From: Tom Lane Date: Sat, 12 Mar 2011 21:30:36 +0000 (-0500) Subject: Make all comparisons done for/with statistics use the default collation. X-Git-Tag: REL9_1_ALPHA5~89 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=696d1f7f064402840a60b7177a838d1452ad13e6;p=postgresql Make all comparisons done for/with statistics use the default collation. While this will give wrong answers when estimating selectivity for a comparison operator that's using a non-default collation, the estimation error probably won't be large; and anyway the former approach created estimation errors of its own by trying to use a histogram that might have been computed with some other collation. So we'll adopt this simplified approach for now and perhaps improve it sometime in the future. This patch incorporates changes from Andres Freund to make sure that selfuncs.c passes a valid collation OID to any datatype-specific function it calls, in case that function wants collation information. Said OID will now always be DEFAULT_COLLATION_OID, but at least we won't get errors. --- diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index bafdc80d58..a9acc7c303 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -24,6 +24,7 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/pg_collation.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_namespace.h" #include "commands/dbcommands.h" @@ -862,13 +863,11 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr) { stats->attrtypid = exprType(index_expr); stats->attrtypmod = exprTypmod(index_expr); - stats->attrcollation = exprCollation(index_expr); } else { stats->attrtypid = attr->atttypid; stats->attrtypmod = attr->atttypmod; - stats->attrcollation = attr->attcollation; } typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid)); @@ -1931,7 +1930,8 @@ compute_minimal_stats(VacAttrStatsP stats, track_cnt = 0; fmgr_info(mystats->eqfunc, &f_cmpeq); - fmgr_info_collation(stats->attrcollation, &f_cmpeq); + /* We always use the default collation for statistics */ + fmgr_info_collation(DEFAULT_COLLATION_OID, &f_cmpeq); for (i = 0; i < samplerows; i++) { @@ -2253,7 +2253,8 @@ compute_scalar_stats(VacAttrStatsP stats, SelectSortFunction(mystats->ltopr, false, &cmpFn, &cmpFlags); fmgr_info(cmpFn, &f_cmpfn); - fmgr_info_collation(stats->attrcollation, &f_cmpfn); + /* We always use the default collation for statistics */ + fmgr_info_collation(DEFAULT_COLLATION_OID, &f_cmpfn); /* Initial scan to find sortable values */ for (i = 0; i < samplerows; i++) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index c292e24ac3..756874b817 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2056,7 +2056,6 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey) mergejoinscansel(root, (Node *) rinfo->clause, pathkey->pk_opfamily, - pathkey->pk_collation, pathkey->pk_strategy, pathkey->pk_nulls_first, &leftstartsel, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index f10110b1b7..5cad1b88ad 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -145,7 +145,7 @@ static double eqjoinsel_inner(Oid operator, static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, - Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid, + Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); static double convert_numeric_to_scalar(Datum value, Oid typid); static void convert_string_to_scalar(char *value, @@ -164,10 +164,10 @@ static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi); static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi); -static char *convert_string_datum(Datum value, Oid typid, Oid collid); +static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, Oid collation, Datum *min, Datum *max); + Oid sortop, Datum *min, Datum *max); static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, @@ -285,6 +285,7 @@ var_eq_const(VariableStatData *vardata, Oid operator, FmgrInfo eqproc; fmgr_info(get_opcode(operator), &eqproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc); for (i = 0; i < nvalues; i++) { @@ -514,7 +515,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); fmgr_info(get_opcode(operator), &opproc); - fmgr_info_collation(vardata->attcollation, &opproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc); /* * If we have most-common-values info, add up the fractions of the MCV @@ -839,7 +840,7 @@ ineq_histogram_selectivity(PlannerInfo *root, */ if (convert_to_scalar(constval, consttype, &val, values[i - 1], values[i], - vardata->vartype, vardata->attcollation, + vardata->vartype, &low, &high)) { if (high <= low) @@ -1700,6 +1701,7 @@ scalararraysel(PlannerInfo *root, if (!oprsel) return (Selectivity) 0.5; fmgr_info(oprsel, &oprselproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &oprselproc); /* deconstruct the expression */ Assert(list_length(clause->args) == 2); @@ -2116,6 +2118,7 @@ eqjoinsel_inner(Oid operator, nmatches; fmgr_info(get_opcode(operator), &eqproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc); hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); @@ -2338,6 +2341,7 @@ eqjoinsel_semi(Oid operator, nmatches; fmgr_info(get_opcode(operator), &eqproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc); hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); @@ -2588,7 +2592,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS) */ void mergejoinscansel(PlannerInfo *root, Node *clause, - Oid opfamily, Oid collation, int strategy, bool nulls_first, + Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend) { @@ -2757,20 +2761,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause, /* Try to get ranges of both inputs */ if (!isgt) { - if (!get_variable_range(root, &leftvar, lstatop, collation, + if (!get_variable_range(root, &leftvar, lstatop, &leftmin, &leftmax)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, collation, + if (!get_variable_range(root, &rightvar, rstatop, &rightmin, &rightmax)) goto fail; /* no range available from stats */ } else { /* need to swap the max and min */ - if (!get_variable_range(root, &leftvar, lstatop, collation, + if (!get_variable_range(root, &leftvar, lstatop, &leftmax, &leftmin)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, collation, + if (!get_variable_range(root, &rightvar, rstatop, &rightmax, &rightmin)) goto fail; /* no range available from stats */ } @@ -3371,7 +3375,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) */ static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, - Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid, + Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound) { /* @@ -3424,9 +3428,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, case TEXTOID: case NAMEOID: { - char *valstr = convert_string_datum(value, valuetypid, boundscollid); - char *lostr = convert_string_datum(lobound, boundstypid, boundscollid); - char *histr = convert_string_datum(hibound, boundstypid, boundscollid); + char *valstr = convert_string_datum(value, valuetypid); + char *lostr = convert_string_datum(lobound, boundstypid); + char *histr = convert_string_datum(hibound, boundstypid); convert_string_to_scalar(valstr, scaledvalue, lostr, scaledlobound, @@ -3670,7 +3674,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi) * before continuing, so as to generate correct locale-specific results. */ static char * -convert_string_datum(Datum value, Oid typid, Oid collid) +convert_string_datum(Datum value, Oid typid) { char *val; @@ -3703,7 +3707,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid) return NULL; } - if (!lc_collate_is_c(collid)) + if (!lc_collate_is_c(DEFAULT_COLLATION_OID)) { char *xfrmstr; size_t xfrmlen; @@ -4102,7 +4106,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->rel = find_base_rel(root, var->varno); vardata->atttype = var->vartype; vardata->atttypmod = var->vartypmod; - vardata->attcollation = var->varcollid; vardata->isunique = has_unique_index(vardata->rel, var->varattno); rte = root->simple_rte_array[var->varno]; @@ -4188,7 +4191,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, vardata->var = node; vardata->atttype = exprType(node); vardata->atttypmod = exprTypmod(node); - vardata->attcollation = exprCollation(node); if (onerel) { @@ -4397,7 +4399,7 @@ get_variable_numdistinct(VariableStatData *vardata) * be "<" not ">", as only the former is likely to be found in pg_statistic. */ static bool -get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation, +get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max) { Datum tmin = 0; @@ -4482,7 +4484,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid FmgrInfo opproc; fmgr_info(get_opcode(sortop), &opproc); - fmgr_info_collation(collation, &opproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc); for (i = 0; i < nvalues; i++) { @@ -5109,6 +5111,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, if (cmpopr == InvalidOid) elog(ERROR, "no >= operator for opfamily %u", opfamily); fmgr_info(get_opcode(cmpopr), &opproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc); prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true, prefixcon->constvalue, @@ -5130,6 +5133,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, if (cmpopr == InvalidOid) elog(ERROR, "no < operator for opfamily %u", opfamily); fmgr_info(get_opcode(cmpopr), &opproc); + fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc); greaterstrcon = make_greater_string(prefixcon, &opproc); if (greaterstrcon) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 3ad7b4bd05..cc1441372d 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -50,6 +50,10 @@ * the information to be stored in a pg_statistic row for the column. Be * careful to allocate any pointed-to data in anl_context, which will NOT * be CurrentMemoryContext when compute_stats is called. + * + * Note: for the moment, all comparisons done for statistical purposes + * should use the database's default collation (DEFAULT_COLLATION_OID). + * This might change in some future release. *---------- */ typedef struct VacAttrStats *VacAttrStatsP; @@ -66,13 +70,12 @@ typedef struct VacAttrStats * Note: do not assume that the data being analyzed has the same datatype * shown in attr, ie do not trust attr->atttypid, attlen, etc. This is * because some index opclasses store a different type than the underlying - * column/expression. Instead use attrtypid, attrtypmod, attrcollation, and attrtype for + * column/expression. Instead use attrtypid, attrtypmod, and attrtype for * information about the datatype being fed to the typanalyze function. */ Form_pg_attribute attr; /* copy of pg_attribute row for column */ Oid attrtypid; /* type of data being analyzed */ int32 attrtypmod; /* typmod of data being analyzed */ - Oid attrcollation; /* collation of the data being analyzed */ Form_pg_type attrtype; /* copy of pg_type row for attrtypid */ MemoryContext anl_context; /* where to save long-lived data */ diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index baf6d8caf8..e9913aa049 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -74,7 +74,6 @@ typedef struct VariableStatData Oid vartype; /* exposed type of expression */ Oid atttype; /* type to pass to get_attstatsslot */ int32 atttypmod; /* typmod to pass to get_attstatsslot */ - Oid attcollation; /* collation of the variable */ bool isunique; /* true if matched to a unique index */ } VariableStatData; @@ -179,7 +178,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo); extern void mergejoinscansel(PlannerInfo *root, Node *clause, - Oid opfamily, Oid collation, int strategy, bool nulls_first, + Oid opfamily, int strategy, bool nulls_first, Selectivity *leftstart, Selectivity *leftend, Selectivity *rightstart, Selectivity *rightend);