]> granicus.if.org Git - postgresql/commitdiff
Make all comparisons done for/with statistics use the default collation.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 12 Mar 2011 21:30:36 +0000 (16:30 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 12 Mar 2011 21:30:36 +0000 (16:30 -0500)
While this will give wrong answers when estimating selectivity for a
comparison operator that's using a non-default collation, the estimation
error probably won't be large; and anyway the former approach created
estimation errors of its own by trying to use a histogram that might have
been computed with some other collation.  So we'll adopt this simplified
approach for now and perhaps improve it sometime in the future.

This patch incorporates changes from Andres Freund to make sure that
selfuncs.c passes a valid collation OID to any datatype-specific function
it calls, in case that function wants collation information.  Said OID will
now always be DEFAULT_COLLATION_OID, but at least we won't get errors.

src/backend/commands/analyze.c
src/backend/optimizer/path/costsize.c
src/backend/utils/adt/selfuncs.c
src/include/commands/vacuum.h
src/include/utils/selfuncs.h

index bafdc80d5847e43842b51591ad4f21bce1bbff74..a9acc7c30367c96f74bceabf1c953455c20ae4c6 100644 (file)
@@ -24,6 +24,7 @@
 #include "catalog/index.h"
 #include "catalog/indexing.h"
 #include "catalog/namespace.h"
+#include "catalog/pg_collation.h"
 #include "catalog/pg_inherits_fn.h"
 #include "catalog/pg_namespace.h"
 #include "commands/dbcommands.h"
@@ -862,13 +863,11 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
        {
                stats->attrtypid = exprType(index_expr);
                stats->attrtypmod = exprTypmod(index_expr);
-               stats->attrcollation = exprCollation(index_expr);
        }
        else
        {
                stats->attrtypid = attr->atttypid;
                stats->attrtypmod = attr->atttypmod;
-               stats->attrcollation = attr->attcollation;
        }
 
        typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid));
@@ -1931,7 +1930,8 @@ compute_minimal_stats(VacAttrStatsP stats,
        track_cnt = 0;
 
        fmgr_info(mystats->eqfunc, &f_cmpeq);
-       fmgr_info_collation(stats->attrcollation, &f_cmpeq);
+       /* We always use the default collation for statistics */
+       fmgr_info_collation(DEFAULT_COLLATION_OID, &f_cmpeq);
 
        for (i = 0; i < samplerows; i++)
        {
@@ -2253,7 +2253,8 @@ compute_scalar_stats(VacAttrStatsP stats,
 
        SelectSortFunction(mystats->ltopr, false, &cmpFn, &cmpFlags);
        fmgr_info(cmpFn, &f_cmpfn);
-       fmgr_info_collation(stats->attrcollation, &f_cmpfn);
+       /* We always use the default collation for statistics */
+       fmgr_info_collation(DEFAULT_COLLATION_OID, &f_cmpfn);
 
        /* Initial scan to find sortable values */
        for (i = 0; i < samplerows; i++)
index c292e24ac303ffce75cd2d0c4516be9360da4fa5..756874b817c3698cc37fde4947ea62c10ffb3b36 100644 (file)
@@ -2056,7 +2056,6 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
        mergejoinscansel(root,
                                         (Node *) rinfo->clause,
                                         pathkey->pk_opfamily,
-                                        pathkey->pk_collation,
                                         pathkey->pk_strategy,
                                         pathkey->pk_nulls_first,
                                         &leftstartsel,
index f10110b1b7ea592a357ce9bbdbe16644c8406321..5cad1b88ad5f588d07106174faebb5f0e0923c98 100644 (file)
@@ -145,7 +145,7 @@ static double eqjoinsel_inner(Oid operator,
 static double eqjoinsel_semi(Oid operator,
                           VariableStatData *vardata1, VariableStatData *vardata2);
 static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
-                                 Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid,
+                                 Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound);
 static double convert_numeric_to_scalar(Datum value, Oid typid);
 static void convert_string_to_scalar(char *value,
@@ -164,10 +164,10 @@ static double convert_one_string_to_scalar(char *value,
                                                         int rangelo, int rangehi);
 static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
                                                        int rangelo, int rangehi);
-static char *convert_string_datum(Datum value, Oid typid, Oid collid);
+static char *convert_string_datum(Datum value, Oid typid);
 static double convert_timevalue_to_scalar(Datum value, Oid typid);
 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
-                                  Oid sortop, Oid collation, Datum *min, Datum *max);
+                                  Oid sortop, Datum *min, Datum *max);
 static bool get_actual_variable_range(PlannerInfo *root,
                                                  VariableStatData *vardata,
                                                  Oid sortop,
@@ -285,6 +285,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                        FmgrInfo        eqproc;
 
                        fmgr_info(get_opcode(operator), &eqproc);
+                       fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc);
 
                        for (i = 0; i < nvalues; i++)
                        {
@@ -514,7 +515,7 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt,
        stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
 
        fmgr_info(get_opcode(operator), &opproc);
-       fmgr_info_collation(vardata->attcollation, &opproc);
+       fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
 
        /*
         * If we have most-common-values info, add up the fractions of the MCV
@@ -839,7 +840,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                                 */
                                if (convert_to_scalar(constval, consttype, &val,
                                                                          values[i - 1], values[i],
-                                                                         vardata->vartype, vardata->attcollation,
+                                                                         vardata->vartype,
                                                                          &low, &high))
                                {
                                        if (high <= low)
@@ -1700,6 +1701,7 @@ scalararraysel(PlannerInfo *root,
        if (!oprsel)
                return (Selectivity) 0.5;
        fmgr_info(oprsel, &oprselproc);
+       fmgr_info_collation(DEFAULT_COLLATION_OID, &oprselproc);
 
        /* deconstruct the expression */
        Assert(list_length(clause->args) == 2);
@@ -2116,6 +2118,7 @@ eqjoinsel_inner(Oid operator,
                                        nmatches;
 
                fmgr_info(get_opcode(operator), &eqproc);
+               fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc);
                hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
                hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
 
@@ -2338,6 +2341,7 @@ eqjoinsel_semi(Oid operator,
                                        nmatches;
 
                fmgr_info(get_opcode(operator), &eqproc);
+               fmgr_info_collation(DEFAULT_COLLATION_OID, &eqproc);
                hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool));
                hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
 
@@ -2588,7 +2592,7 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
  */
 void
 mergejoinscansel(PlannerInfo *root, Node *clause,
-                                Oid opfamily, Oid collation, int strategy, bool nulls_first,
+                                Oid opfamily, int strategy, bool nulls_first,
                                 Selectivity *leftstart, Selectivity *leftend,
                                 Selectivity *rightstart, Selectivity *rightend)
 {
@@ -2757,20 +2761,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
        /* Try to get ranges of both inputs */
        if (!isgt)
        {
-               if (!get_variable_range(root, &leftvar, lstatop, collation,
+               if (!get_variable_range(root, &leftvar, lstatop,
                                                                &leftmin, &leftmax))
                        goto fail;                      /* no range available from stats */
-               if (!get_variable_range(root, &rightvar, rstatop, collation,
+               if (!get_variable_range(root, &rightvar, rstatop,
                                                                &rightmin, &rightmax))
                        goto fail;                      /* no range available from stats */
        }
        else
        {
                /* need to swap the max and min */
-               if (!get_variable_range(root, &leftvar, lstatop, collation,
+               if (!get_variable_range(root, &leftvar, lstatop,
                                                                &leftmax, &leftmin))
                        goto fail;                      /* no range available from stats */
-               if (!get_variable_range(root, &rightvar, rstatop, collation,
+               if (!get_variable_range(root, &rightvar, rstatop,
                                                                &rightmax, &rightmin))
                        goto fail;                      /* no range available from stats */
        }
@@ -3371,7 +3375,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
  */
 static bool
 convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
-                                 Datum lobound, Datum hibound, Oid boundstypid, Oid boundscollid,
+                                 Datum lobound, Datum hibound, Oid boundstypid,
                                  double *scaledlobound, double *scaledhibound)
 {
        /*
@@ -3424,9 +3428,9 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                case TEXTOID:
                case NAMEOID:
                        {
-                               char       *valstr = convert_string_datum(value, valuetypid, boundscollid);
-                               char       *lostr = convert_string_datum(lobound, boundstypid, boundscollid);
-                               char       *histr = convert_string_datum(hibound, boundstypid, boundscollid);
+                               char       *valstr = convert_string_datum(value, valuetypid);
+                               char       *lostr = convert_string_datum(lobound, boundstypid);
+                               char       *histr = convert_string_datum(hibound, boundstypid);
 
                                convert_string_to_scalar(valstr, scaledvalue,
                                                                                 lostr, scaledlobound,
@@ -3670,7 +3674,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
  * before continuing, so as to generate correct locale-specific results.
  */
 static char *
-convert_string_datum(Datum value, Oid typid, Oid collid)
+convert_string_datum(Datum value, Oid typid)
 {
        char       *val;
 
@@ -3703,7 +3707,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid)
                        return NULL;
        }
 
-       if (!lc_collate_is_c(collid))
+       if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
        {
                char       *xfrmstr;
                size_t          xfrmlen;
@@ -4102,7 +4106,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
                vardata->rel = find_base_rel(root, var->varno);
                vardata->atttype = var->vartype;
                vardata->atttypmod = var->vartypmod;
-               vardata->attcollation = var->varcollid;
                vardata->isunique = has_unique_index(vardata->rel, var->varattno);
 
                rte = root->simple_rte_array[var->varno];
@@ -4188,7 +4191,6 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
        vardata->var = node;
        vardata->atttype = exprType(node);
        vardata->atttypmod = exprTypmod(node);
-       vardata->attcollation = exprCollation(node);
 
        if (onerel)
        {
@@ -4397,7 +4399,7 @@ get_variable_numdistinct(VariableStatData *vardata)
  * be "<" not ">", as only the former is likely to be found in pg_statistic.
  */
 static bool
-get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid collation,
+get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
                                   Datum *min, Datum *max)
 {
        Datum           tmin = 0;
@@ -4482,7 +4484,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Oid
                FmgrInfo        opproc;
 
                fmgr_info(get_opcode(sortop), &opproc);
-               fmgr_info_collation(collation, &opproc);
+               fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
 
                for (i = 0; i < nvalues; i++)
                {
@@ -5109,6 +5111,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
        if (cmpopr == InvalidOid)
                elog(ERROR, "no >= operator for opfamily %u", opfamily);
        fmgr_info(get_opcode(cmpopr), &opproc);
+       fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
 
        prefixsel = ineq_histogram_selectivity(root, vardata, &opproc, true,
                                                                                   prefixcon->constvalue,
@@ -5130,6 +5133,7 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
        if (cmpopr == InvalidOid)
                elog(ERROR, "no < operator for opfamily %u", opfamily);
        fmgr_info(get_opcode(cmpopr), &opproc);
+       fmgr_info_collation(DEFAULT_COLLATION_OID, &opproc);
 
        greaterstrcon = make_greater_string(prefixcon, &opproc);
        if (greaterstrcon)
index 3ad7b4bd05495c465925ae8cd5067efa37c57eb9..cc1441372dc6b04619b4f634b3c543c83d7bf162 100644 (file)
  * the information to be stored in a pg_statistic row for the column.  Be
  * careful to allocate any pointed-to data in anl_context, which will NOT
  * be CurrentMemoryContext when compute_stats is called.
+ *
+ * Note: for the moment, all comparisons done for statistical purposes
+ * should use the database's default collation (DEFAULT_COLLATION_OID).
+ * This might change in some future release.
  *----------
  */
 typedef struct VacAttrStats *VacAttrStatsP;
@@ -66,13 +70,12 @@ typedef struct VacAttrStats
         * Note: do not assume that the data being analyzed has the same datatype
         * shown in attr, ie do not trust attr->atttypid, attlen, etc.  This is
         * because some index opclasses store a different type than the underlying
-        * column/expression.  Instead use attrtypid, attrtypmod, attrcollation, and attrtype for
+        * column/expression.  Instead use attrtypid, attrtypmod, and attrtype for
         * information about the datatype being fed to the typanalyze function.
         */
        Form_pg_attribute attr;         /* copy of pg_attribute row for column */
        Oid                     attrtypid;              /* type of data being analyzed */
        int32           attrtypmod;             /* typmod of data being analyzed */
-       Oid                     attrcollation;  /* collation of the data being analyzed */
        Form_pg_type attrtype;          /* copy of pg_type row for attrtypid */
        MemoryContext anl_context;      /* where to save long-lived data */
 
index baf6d8caf86fdf118889428e140aa0b87d81e15d..e9913aa049f28da92efe296e9925e21cf7719f21 100644 (file)
@@ -74,7 +74,6 @@ typedef struct VariableStatData
        Oid                     vartype;                /* exposed type of expression */
        Oid                     atttype;                /* type to pass to get_attstatsslot */
        int32           atttypmod;              /* typmod to pass to get_attstatsslot */
-       Oid                     attcollation;   /* collation of the variable */
        bool            isunique;               /* true if matched to a unique index */
 } VariableStatData;
 
@@ -179,7 +178,7 @@ extern Selectivity rowcomparesel(PlannerInfo *root,
                          int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
 
 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
-                                Oid opfamily, Oid collation, int strategy, bool nulls_first,
+                                Oid opfamily, int strategy, bool nulls_first,
                                 Selectivity *leftstart, Selectivity *leftend,
                                 Selectivity *rightstart, Selectivity *rightend);