</entry>
</row>
+ <row>
+ <entry><structfield>stacoll<replaceable>N</replaceable></structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry><literal><link linkend="catalog-pg-collation"><structname>pg_collation</structname></link>.oid</literal></entry>
+ <entry>
+ The collation used to derive the statistics stored in the
+ <replaceable>N</replaceable>th <quote>slot</quote>. For example, a
+ histogram slot for a collatable column would show the collation that
+ defines the sort order of the data. Zero for noncollatable data.
+ </entry>
+ </row>
+
<row>
<entry><structfield>stanumbers<replaceable>N</replaceable></structfield></entry>
<entry><type>float4[]</type></entry>
{
stats->attrtypid = exprType(index_expr);
stats->attrtypmod = exprTypmod(index_expr);
+
+ /*
+ * If a collation has been specified for the index column, use that in
+ * preference to anything else; but if not, fall back to whatever we
+ * can get from the expression.
+ */
+ if (OidIsValid(onerel->rd_indcollation[attnum - 1]))
+ stats->attrcollid = onerel->rd_indcollation[attnum - 1];
+ else
+ stats->attrcollid = exprCollation(index_expr);
}
else
{
stats->attrtypid = attr->atttypid;
stats->attrtypmod = attr->atttypmod;
+ stats->attrcollid = attr->attcollation;
}
typtuple = SearchSysCacheCopy1(TYPEOID,
{
values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
}
+ i = Anum_pg_statistic_stacoll1 - 1;
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ values[i++] = ObjectIdGetDatum(stats->stacoll[k]); /* stacollN */
+ }
i = Anum_pg_statistic_stanumbers1 - 1;
for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
{
firstcount1 = track_cnt;
for (j = 0; j < track_cnt; j++)
{
- /* We always use the default collation for statistics */
if (DatumGetBool(FunctionCall2Coll(&f_cmpeq,
- DEFAULT_COLLATION_OID,
+ stats->attrcollid,
value, track[j].value)))
{
match = true;
stats->stakind[0] = STATISTIC_KIND_MCV;
stats->staop[0] = mystats->eqopr;
+ stats->stacoll[0] = stats->attrcollid;
stats->stanumbers[0] = mcv_freqs;
stats->numnumbers[0] = num_mcv;
stats->stavalues[0] = mcv_values;
memset(&ssup, 0, sizeof(ssup));
ssup.ssup_cxt = CurrentMemoryContext;
- /* We always use the default collation for statistics */
- ssup.ssup_collation = DEFAULT_COLLATION_OID;
+ ssup.ssup_collation = stats->attrcollid;
ssup.ssup_nulls_first = false;
/*
stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
stats->staop[slot_idx] = mystats->eqopr;
+ stats->stacoll[slot_idx] = stats->attrcollid;
stats->stanumbers[slot_idx] = mcv_freqs;
stats->numnumbers[slot_idx] = num_mcv;
stats->stavalues[slot_idx] = mcv_values;
stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
stats->staop[slot_idx] = mystats->ltopr;
+ stats->stacoll[slot_idx] = stats->attrcollid;
stats->stavalues[slot_idx] = hist_values;
stats->numvalues[slot_idx] = num_hist;
stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
stats->staop[slot_idx] = mystats->ltopr;
+ stats->stacoll[slot_idx] = stats->attrcollid;
stats->stanumbers[slot_idx] = corrs;
stats->numnumbers[slot_idx] = 1;
slot_idx++;
* (b) split the data into groups by first (k-1) columns
*
* (c) for each group count different values in the last column
+ *
+ * We use the column data types' default sort operators and collations;
+ * perhaps at some point it'd be worth using column-specific collations?
*/
/* prepare the sort function for the first dimension, and SortItem array */
colstat->attrtypid);
/* prepare the sort function for this dimension */
- multi_sort_add_dimension(mss, i, type->lt_opr);
+ multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
/* accumulate all the data for both columns into an array and sort it */
for (j = 0; j < numrows; j++)
}
/*
- * Prepare sort support info using the given sort operator
+ * Prepare sort support info using the given sort operator and collation
* at the position 'sortdim'
*/
void
-multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
+multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
+ Oid oper, Oid collation)
{
SortSupport ssup = &mss->ssup[sortdim];
ssup->ssup_cxt = CurrentMemoryContext;
- ssup->ssup_collation = DEFAULT_COLLATION_OID;
+ ssup->ssup_collation = collation;
ssup->ssup_nulls_first = false;
- ssup->ssup_cxt = CurrentMemoryContext;
PrepareSortSupportFromOrderingOp(oper, ssup);
}
/*
* For each dimension, set up sort-support and fill in the values from the
* sample data.
+ *
+ * We use the column data types' default sort operators and collations;
+ * perhaps at some point it'd be worth using column-specific collations?
*/
for (i = 0; i < k; i++)
{
colstat->attrtypid);
/* prepare the sort function for this dimension */
- multi_sort_add_dimension(mss, i, type->lt_opr);
+ multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
/* accumulate all the data for this dimension into the arrays */
for (j = 0; j < numrows; j++)
#include "postgres.h"
#include "access/hash.h"
+#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "commands/vacuum.h"
#include "tsearch/ts_type.h"
stats->stakind[0] = STATISTIC_KIND_MCELEM;
stats->staop[0] = TextEqualOperator;
+ stats->stacoll[0] = DEFAULT_COLLATION_OID;
stats->stanumbers[0] = mcelem_freqs;
/* See above comment about two extra frequency fields */
stats->numnumbers[0] = num_mcelem + 2;
Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers,
float4 *hist, int nhist,
- Oid operator, FmgrInfo *cmpfunc);
+ Oid operator);
static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers,
Datum *array_data, int nitems,
- Oid operator, FmgrInfo *cmpfunc);
+ Oid operator, TypeCacheEntry *typentry);
static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers,
Datum *array_data, int nitems,
float4 *hist, int nhist,
- Oid operator, FmgrInfo *cmpfunc);
+ Oid operator, TypeCacheEntry *typentry);
static float *calc_hist(const float4 *hist, int nhist, int n);
static float *calc_distr(const float *p, int n, int m, float rest);
static int floor_log2(uint32 n);
static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value,
- int *index, FmgrInfo *cmpfunc);
+ int *index, TypeCacheEntry *typentry);
static int element_compare(const void *key1, const void *key2, void *arg);
static int float_compare_desc(const void *key1, const void *key2);
sslot.nnumbers,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
- cmpfunc);
+ typentry);
else
selec = mcelem_array_contained_selec(sslot.values,
sslot.nvalues,
hslot.numbers,
hslot.nnumbers,
OID_ARRAY_CONTAINED_OP,
- cmpfunc);
+ typentry);
free_attstatsslot(&hslot);
free_attstatsslot(&sslot);
NULL, 0,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
- cmpfunc);
+ typentry);
else
selec = mcelem_array_contained_selec(NULL, 0,
NULL, 0,
&constval, 1,
NULL, 0,
OID_ARRAY_CONTAINED_OP,
- cmpfunc);
+ typentry);
}
/*
NULL, 0,
&constval, 1,
OID_ARRAY_CONTAINS_OP,
- cmpfunc);
+ typentry);
else
selec = mcelem_array_contained_selec(NULL, 0,
NULL, 0,
&constval, 1,
NULL, 0,
OID_ARRAY_CONTAINED_OP,
- cmpfunc);
+ typentry);
/* we assume no nulls here, so no stanullfrac correction */
}
sslot.values, sslot.nvalues,
sslot.numbers, sslot.nnumbers,
hslot.numbers, hslot.nnumbers,
- operator, cmpfunc);
+ operator);
free_attstatsslot(&hslot);
free_attstatsslot(&sslot);
/* No most-common-elements info, so do without */
selec = mcelem_array_selec(array, typentry,
NULL, 0, NULL, 0, NULL, 0,
- operator, cmpfunc);
+ operator);
}
/*
/* No stats at all, so do without */
selec = mcelem_array_selec(array, typentry,
NULL, 0, NULL, 0, NULL, 0,
- operator, cmpfunc);
+ operator);
/* we assume no nulls here, so no stanullfrac correction */
}
Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers,
float4 *hist, int nhist,
- Oid operator, FmgrInfo *cmpfunc)
+ Oid operator)
{
Selectivity selec;
int num_elems;
/* Sort extracted elements using their default comparison function. */
qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
- element_compare, cmpfunc);
+ element_compare, typentry);
/* Separate cases according to operator */
if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
numbers, nnumbers,
elem_values, nonnull_nitems,
- operator, cmpfunc);
+ operator, typentry);
else if (operator == OID_ARRAY_CONTAINED_OP)
selec = mcelem_array_contained_selec(mcelem, nmcelem,
numbers, nnumbers,
elem_values, nonnull_nitems,
hist, nhist,
- operator, cmpfunc);
+ operator, typentry);
else
{
elog(ERROR, "arraycontsel called for unrecognized operator %u",
mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers,
Datum *array_data, int nitems,
- Oid operator, FmgrInfo *cmpfunc)
+ Oid operator, TypeCacheEntry *typentry)
{
Selectivity selec,
elem_selec;
/* Ignore any duplicates in the array data. */
if (i > 0 &&
- element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0)
+ element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
continue;
/* Find the smallest MCELEM >= this array item. */
if (use_bsearch)
{
match = find_next_mcelem(mcelem, nmcelem, array_data[i],
- &mcelem_index, cmpfunc);
+ &mcelem_index, typentry);
}
else
{
{
int cmp = element_compare(&mcelem[mcelem_index],
&array_data[i],
- cmpfunc);
+ typentry);
if (cmp < 0)
mcelem_index++;
float4 *numbers, int nnumbers,
Datum *array_data, int nitems,
float4 *hist, int nhist,
- Oid operator, FmgrInfo *cmpfunc)
+ Oid operator, TypeCacheEntry *typentry)
{
int mcelem_index,
i,
/* Ignore any duplicates in the array data. */
if (i > 0 &&
- element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0)
+ element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
continue;
/*
{
int cmp = element_compare(&mcelem[mcelem_index],
&array_data[i],
- cmpfunc);
+ typentry);
if (cmp < 0)
{
*/
static bool
find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index,
- FmgrInfo *cmpfunc)
+ TypeCacheEntry *typentry)
{
int l = *index,
r = nmcelem - 1,
while (l <= r)
{
i = (l + r) / 2;
- res = element_compare(&mcelem[i], &value, cmpfunc);
+ res = element_compare(&mcelem[i], &value, typentry);
if (res == 0)
{
*index = i;
/*
* Comparison function for elements.
*
- * We use the element type's default btree opclass, and the default collation
+ * We use the element type's default btree opclass, and its default collation
* if the type is collation-sensitive.
*
* XXX consider using SortSupport infrastructure
{
Datum d1 = *((const Datum *) key1);
Datum d2 = *((const Datum *) key2);
- FmgrInfo *cmpfunc = (FmgrInfo *) arg;
+ TypeCacheEntry *typentry = (TypeCacheEntry *) arg;
+ FmgrInfo *cmpfunc = &typentry->cmp_proc_finfo;
Datum c;
- c = FunctionCall2Coll(cmpfunc, DEFAULT_COLLATION_OID, d1, d2);
+ c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2);
return DatumGetInt32(c);
}
#include "postgres.h"
#include "access/tuptoaster.h"
-#include "catalog/pg_collation.h"
#include "commands/vacuum.h"
#include "utils/array.h"
#include "utils/builtins.h"
/* Information about array element type */
Oid type_id; /* element type's OID */
Oid eq_opr; /* default equality operator's OID */
+ Oid coll_id; /* collation to use */
bool typbyval; /* physical properties of element type */
int16 typlen;
char typalign;
extra_data = (ArrayAnalyzeExtraData *) palloc(sizeof(ArrayAnalyzeExtraData));
extra_data->type_id = typentry->type_id;
extra_data->eq_opr = typentry->eq_opr;
+ extra_data->coll_id = stats->attrcollid; /* collation we should use */
extra_data->typbyval = typentry->typbyval;
extra_data->typlen = typentry->typlen;
extra_data->typalign = typentry->typalign;
stats->stakind[slot_idx] = STATISTIC_KIND_MCELEM;
stats->staop[slot_idx] = extra_data->eq_opr;
+ stats->stacoll[slot_idx] = extra_data->coll_id;
stats->stanumbers[slot_idx] = mcelem_freqs;
/* See above comment about extra stanumber entries */
stats->numnumbers[slot_idx] = num_mcelem + 3;
stats->stakind[slot_idx] = STATISTIC_KIND_DECHIST;
stats->staop[slot_idx] = extra_data->eq_opr;
+ stats->stacoll[slot_idx] = extra_data->coll_id;
stats->stanumbers[slot_idx] = hist;
stats->numnumbers[slot_idx] = num_hist + 1;
slot_idx++;
/*
* Hash function for elements.
*
- * We use the element type's default hash opclass, and the default collation
+ * We use the element type's default hash opclass, and the column collation
* if the type is collation-sensitive.
*/
static uint32
Datum d = *((const Datum *) key);
Datum h;
- h = FunctionCall1Coll(array_extra_data->hash, DEFAULT_COLLATION_OID, d);
+ h = FunctionCall1Coll(array_extra_data->hash,
+ array_extra_data->coll_id,
+ d);
return DatumGetUInt32(h);
}
/*
* Comparison function for elements.
*
- * We use the element type's default btree opclass, and the default collation
+ * We use the element type's default btree opclass, and the column collation
* if the type is collation-sensitive.
*
* XXX consider using SortSupport infrastructure
Datum d2 = *((const Datum *) key2);
Datum c;
- c = FunctionCall2Coll(array_extra_data->cmp, DEFAULT_COLLATION_OID, d1, d2);
+ c = FunctionCall2Coll(array_extra_data->cmp,
+ array_extra_data->coll_id,
+ d1, d2);
return DatumGetInt32(c);
}
num_hist = 0;
}
stats->staop[slot_idx] = Float8LessOperator;
+ stats->stacoll[slot_idx] = InvalidOid;
stats->stavalues[slot_idx] = length_hist_values;
stats->numvalues[slot_idx] = num_hist;
stats->statypid[slot_idx] = FLOAT8OID;
* For both oprrest and oprjoin functions, the operator's input collation OID
* (if any) is passed using the standard fmgr mechanism, so that the estimator
* function can fetch it with PG_GET_COLLATION(). Note, however, that all
- * statistics in pg_statistic are currently built using the database's default
+ * statistics in pg_statistic are currently built using the relevant column's
* collation. Thus, in most cases where we are looking at statistics, we
- * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
+ * should ignore the operator collation and use the stats entry's collation.
* We expect that the error induced by doing this is usually not large enough
- * to justify complicating matters.
+ * to justify complicating matters. In any case, doing otherwise would yield
+ * entirely garbage results for ordered stats data such as histograms.
*----------
*/
RelOptInfo *inner_rel);
static bool estimate_multivariate_ndistinct(PlannerInfo *root,
RelOptInfo *rel, List **varinfos, double *ndistinct);
-static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
+static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
+ double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
int rangelo, int rangehi);
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
int rangelo, int rangehi);
-static char *convert_string_datum(Datum value, Oid typid, bool *failure);
+static char *convert_string_datum(Datum value, Oid typid, Oid collid,
+ bool *failure);
static double convert_timevalue_to_scalar(Datum value, Oid typid,
bool *failure);
static void examine_simple_variable(PlannerInfo *root, Var *var,
/* be careful to apply operator right way 'round */
if (varonleft)
match = DatumGetBool(FunctionCall2Coll(&eqproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
sslot.values[i],
constval));
else
match = DatumGetBool(FunctionCall2Coll(&eqproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
constval,
sslot.values[i]));
if (match)
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
constval,
sslot.values[i])))
mcv_selec += sslot.numbers[i];
{
if (varonleft ?
DatumGetBool(FunctionCall2Coll(opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
sslot.values[i],
constval)) :
DatumGetBool(FunctionCall2Coll(opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
constval,
sslot.values[i])))
nmatch++;
&sslot.values[probe]);
ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
sslot.values[probe],
constval));
if (isgt)
* values to a uniform comparison scale, and do a linear
* interpolation within this bin.
*/
- if (convert_to_scalar(constval, consttype, &val,
+ if (convert_to_scalar(constval, consttype, sslot.stacoll,
+ &val,
sslot.values[i - 1], sslot.values[i],
vardata->vartype,
&low, &high))
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
- DEFAULT_COLLATION_OID,
+ sslot1->stacoll,
sslot1->values[i],
sslot2->values[j])))
{
if (hasmatch2[j])
continue;
if (DatumGetBool(FunctionCall2Coll(&eqproc,
- DEFAULT_COLLATION_OID,
+ sslot1->stacoll,
sslot1->values[i],
sslot2->values[j])))
{
* converted to measurements expressed in seconds.
*/
static bool
-convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
+convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound)
{
case NAMEOID:
{
char *valstr = convert_string_datum(value, valuetypid,
- &failure);
+ collid, &failure);
char *lostr = convert_string_datum(lobound, boundstypid,
- &failure);
+ collid, &failure);
char *histr = convert_string_datum(hibound, boundstypid,
- &failure);
+ collid, &failure);
/*
* Bail out if any of the values is not of string type. We
* before continuing, so as to generate correct locale-specific results.
*/
static char *
-convert_string_datum(Datum value, Oid typid, bool *failure)
+convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
{
char *val;
return NULL;
}
- if (!lc_collate_is_c(DEFAULT_COLLATION_OID))
+ if (!lc_collate_is_c(collid))
{
char *xfrmstr;
size_t xfrmlen;
continue;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
sslot.values[i], tmin)))
{
tmin = sslot.values[i];
tmin_is_mcv = true;
}
if (DatumGetBool(FunctionCall2Coll(&opproc,
- DEFAULT_COLLATION_OID,
+ sslot.stacoll,
tmax, sslot.values[i])))
{
tmax = sslot.values[i];
Selectivity prefixsel;
Oid cmpopr;
FmgrInfo opproc;
+ AttStatsSlot sslot;
Const *greaterstrcon;
Selectivity eq_sel;
/*-------
* If we can create a string larger than the prefix, say
- * "x < greaterstr".
+ * "x < greaterstr". We try to generate the string referencing the
+ * collation of the var's statistics, but if that's not available,
+ * use DEFAULT_COLLATION_OID.
*-------
*/
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
+ /* sslot.stacoll is set up */ ;
+ else
+ sslot.stacoll = DEFAULT_COLLATION_OID;
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
BTLessStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
- greaterstrcon = make_greater_string(prefixcon, &opproc,
- DEFAULT_COLLATION_OID);
+ greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
if (greaterstrcon)
{
Selectivity topsel;
*
* If a matching slot is found, true is returned, and *sslot is filled thus:
* staop: receives the actual STAOP value.
+ * stacoll: receives the actual STACOLL value.
* valuetype: receives actual datatype of the elements of stavalues.
* values: receives pointer to an array of the slot's stavalues.
* nvalues: receives number of stavalues.
*
* If no matching slot is found, false is returned, and *sslot is zeroed.
*
+ * Note that the current API doesn't allow for searching for a slot with
+ * a particular collation. If we ever actually support recording more than
+ * one collation, we'll have to extend the API, but for now simple is good.
+ *
* The data referred to by the fields of sslot is locally palloc'd and
* is independent of the original pg_statistic tuple. When the caller
* is done with it, call free_attstatsslot to release the palloc'd data.
return false; /* not there */
sslot->staop = (&stats->staop1)[i];
+ sslot->stacoll = (&stats->stacoll1)[i];
+
+ /*
+ * XXX Hopefully-temporary hack: if stacoll isn't set, inject the default
+ * collation. This won't matter for non-collation-aware datatypes. For
+ * those that are, this covers cases where stacoll has not been set. In
+ * the short term we need this because some code paths involving type NAME
+ * do not pass any collation to prefix_selectivity and related functions.
+ * Even when that's been fixed, it's likely that some add-on typanalyze
+ * functions won't get the word right away about filling stacoll during
+ * ANALYZE, so we'll probably need this for awhile.
+ */
+ if (sslot->stacoll == InvalidOid)
+ sslot->stacoll = DEFAULT_COLLATION_OID;
if (flags & ATTSTATSSLOT_VALUES)
{
typentry->typtype = typtup->typtype;
typentry->typrelid = typtup->typrelid;
typentry->typelem = typtup->typelem;
+ typentry->typcollation = typtup->typcollation;
/* If it's a domain, immediately thread it into the domain cache list */
if (typentry->typtype == TYPTYPE_DOMAIN)
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201812091
+#define CATALOG_VERSION_NO 201812141
#endif
* statistical data can be placed. Each slot includes:
* kind integer code identifying kind of data (see below)
* op OID of associated operator, if needed
+ * coll OID of relevant collation, or 0 if none
* numbers float4 array (for statistical values)
* values anyarray (for representations of data values)
- * The ID and operator fields are never NULL; they are zeroes in an
- * unused slot. The numbers and values fields are NULL in an unused
- * slot, and might also be NULL in a used slot if the slot kind has
- * no need for one or the other.
+ * The ID, operator, and collation fields are never NULL; they are zeroes
+ * in an unused slot. The numbers and values fields are NULL in an
+ * unused slot, and might also be NULL in a used slot if the slot kind
+ * has no need for one or the other.
* ----------------
*/
Oid staop4;
Oid staop5;
+ Oid stacoll1;
+ Oid stacoll2;
+ Oid stacoll3;
+ Oid stacoll4;
+ Oid stacoll5;
+
#ifdef CATALOG_VARLEN /* variable-length fields start here */
float4 stanumbers1[1];
float4 stanumbers2[1];
/*
* In a "most common values" slot, staop is the OID of the "=" operator
- * used to decide whether values are the same or not. stavalues contains
+ * used to decide whether values are the same or not, and stacoll is the
+ * collation used (same as column's collation). stavalues contains
* the K most common non-null values appearing in the column, and stanumbers
* contains their frequencies (fractions of total row count). The values
* shall be ordered in decreasing frequency. Note that since the arrays are
/*
* A "histogram" slot describes the distribution of scalar data. staop is
- * the OID of the "<" operator that describes the sort ordering. (In theory,
- * more than one histogram could appear, if a datatype has more than one
- * useful sort operator.) stavalues contains M (>=2) non-null values that
+ * the OID of the "<" operator that describes the sort ordering, and stacoll
+ * is the relevant collation. (In theory more than one histogram could appear,
+ * if a datatype has more than one useful sort operator or we care about more
+ * than one collation. Currently the collation will always be that of the
+ * underlying column.) stavalues contains M (>=2) non-null values that
* divide the non-null column data values into M-1 bins of approximately equal
* population. The first stavalues item is the MIN and the last is the MAX.
* stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV
/*
* A "correlation" slot describes the correlation between the physical order
* of table tuples and the ordering of data values of this column, as seen
- * by the "<" operator identified by staop. (As with the histogram, more
- * than one entry could theoretically appear.) stavalues is not used and
- * should be NULL. stanumbers contains a single entry, the correlation
- * coefficient between the sequence of data values and the sequence of
- * their actual tuple positions. The coefficient ranges from +1 to -1.
+ * by the "<" operator identified by staop with the collation identified by
+ * stacoll. (As with the histogram, more than one entry could theoretically
+ * appear.) stavalues is not used and should be NULL. stanumbers contains
+ * a single entry, the correlation coefficient between the sequence of data
+ * values and the sequence of their actual tuple positions. The coefficient
+ * ranges from +1 to -1.
*/
#define STATISTIC_KIND_CORRELATION 3
* except that it stores the most common non-null *elements* of the column
* values. This is useful when the column datatype is an array or some other
* type with identifiable elements (for instance, tsvector). staop contains
- * the equality operator appropriate to the element type. stavalues contains
+ * the equality operator appropriate to the element type, and stacoll
+ * contains the collation to use with it. stavalues contains
* the most common element values, and stanumbers their frequencies. Unlike
* MCV slots, frequencies are measured as the fraction of non-null rows the
* element value appears in, not the frequency of all rows. Also unlike
* A "distinct elements count histogram" slot describes the distribution of
* the number of distinct element values present in each row of an array-type
* column. Only non-null rows are considered, and only non-null elements.
- * staop contains the equality operator appropriate to the element type.
+ * staop contains the equality operator appropriate to the element type,
+ * and stacoll contains the collation to use with it.
* stavalues is not used and should be NULL. The last member of stanumbers is
* the average count of distinct element values over all non-null rows. The
* preceding M (>=2) members form a histogram that divides the population of
* careful to allocate any pointed-to data in anl_context, which will NOT
* be CurrentMemoryContext when compute_stats is called.
*
- * Note: for the moment, all comparisons done for statistical purposes
- * should use the database's default collation (DEFAULT_COLLATION_OID).
- * This might change in some future release.
+ * Note: all comparisons done for statistical purposes should use the
+ * underlying column's collation (attcollation), except in situations
+ * where a noncollatable container type contains a collatable type;
+ * in that case use the type's default collation. Be sure to record
+ * the appropriate collation in stacoll.
*----------
*/
typedef struct VacAttrStats *VacAttrStatsP;
* because some index opclasses store a different type than the underlying
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
* information about the datatype being fed to the typanalyze function.
+ * Likewise, use attrcollid not attr->attcollation.
*/
Form_pg_attribute attr; /* copy of pg_attribute row for column */
Oid attrtypid; /* type of data being analyzed */
int32 attrtypmod; /* typmod of data being analyzed */
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
+ Oid attrcollid; /* collation of data being analyzed */
MemoryContext anl_context; /* where to save long-lived data */
/*
float4 stadistinct; /* # distinct values */
int16 stakind[STATISTIC_NUM_SLOTS];
Oid staop[STATISTIC_NUM_SLOTS];
+ Oid stacoll[STATISTIC_NUM_SLOTS];
int numnumbers[STATISTIC_NUM_SLOTS];
float4 *stanumbers[STATISTIC_NUM_SLOTS];
int numvalues[STATISTIC_NUM_SLOTS];
extern MultiSortSupport multi_sort_init(int ndims);
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
- Oid oper);
+ Oid oper, Oid collation);
extern int multi_sort_compare(const void *a, const void *b, void *arg);
extern int multi_sort_compare_dim(int dim, const SortItem *a,
const SortItem *b, MultiSortSupport mss);
{
/* Always filled: */
Oid staop; /* Actual staop for the found slot */
+ Oid stacoll; /* Actual collation for the found slot */
/* Filled if ATTSTATSSLOT_VALUES is specified: */
Oid valuetype; /* Actual datatype of the values */
Datum *values; /* slot's "values" array, or NULL if none */
char typtype;
Oid typrelid;
Oid typelem;
+ Oid typcollation;
/*
* Information obtained from opfamily entries