]> granicus.if.org Git - postgresql/commitdiff
Use column collation for extended statistics
authorTomas Vondra <tomas.vondra@postgresql.org>
Thu, 18 Jul 2019 10:28:16 +0000 (12:28 +0200)
committerTomas Vondra <tomas.vondra@postgresql.org>
Sat, 20 Jul 2019 14:37:37 +0000 (16:37 +0200)
The current extended statistics code was a bit confused which collation
to use.  When building the statistics, the collations defined as default
for the data types were used (since commit 5e0928005).  The MCV code was
however using the column collations for MCV serialization, and then
DEFAULT_COLLATION_OID when computing estimates. So overall the code was
using all three possible options, inconsistently.

This uses the column colation everywhere - this makes it consistent with
what 5e0928005 did for regular stats.  We however do not track the
collations in a catalog, because we can derive them from column-level
information.  This may need to change in the future, e.g. after allowing
statistics on expressions.

Reviewed-by: Tom Lane
Discussion: https://postgr.es/m/8736jdhbhc.fsf%40ansel.ydns.eu
Backpatch-to: 12
src/backend/commands/statscmds.c
src/backend/statistics/dependencies.c
src/backend/statistics/mcv.c
src/backend/statistics/mvdistinct.c

index cf406f6f96b8c29595dd598ce1f3b1dbbb5c962b..34d11c2a9802675ae8df6bfabe0a51e80e813f38 100644 (file)
@@ -485,6 +485,10 @@ RemoveStatisticsById(Oid statsOid)
  *
  * For MCV lists that's not the case, as those statistics store the datums
  * internally. In this case we simply reset the statistics value to NULL.
+ *
+ * Note that "type change" includes collation change, which means we can rely
+ * on the MCV list being consistent with the collation info in pg_attribute
+ * during estimation.
  */
 void
 UpdateStatisticsForTypeChange(Oid statsOid, Oid relationOid, int attnum,
index 66c38ce2bc9900f4f321f836106b1e7c2d89a1d5..585cad2ad946b4e7c6f09f8f4b71420f12015cc3 100644 (file)
@@ -273,7 +273,7 @@ dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
                                 colstat->attrtypid);
 
                /* prepare the sort function for this dimension */
-               multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
+               multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
        }
 
        /*
index 2b685ec67a2c844f099b904b55a4812769aac8c3..cec06f8c444eebf1ec6a75ad06644522d3a539d6 100644 (file)
@@ -366,7 +366,7 @@ build_mss(VacAttrStats **stats, int numattrs)
                        elog(ERROR, "cache lookup failed for ordering operator for type %u",
                                 colstat->attrtypid);
 
-               multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
+               multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
        }
 
        return mss;
@@ -686,7 +686,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
 
                /* sort and deduplicate the data */
                ssup[dim].ssup_cxt = CurrentMemoryContext;
-               ssup[dim].ssup_collation = DEFAULT_COLLATION_OID;
+               ssup[dim].ssup_collation = stats[dim]->attrcollid;
                ssup[dim].ssup_nulls_first = false;
 
                PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
@@ -1630,15 +1630,22 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                                         * First check whether the constant is below the lower
                                         * boundary (in that case we can skip the bucket, because
                                         * there's no overlap).
+                                        *
+                                        * We don't store collations used to build the statistics,
+                                        * but we can use the collation for the attribute itself,
+                                        * as stored in varcollid. We do reset the statistics after
+                                        * a type change (including collation change), so this is
+                                        * OK. We may need to relax this after allowing extended
+                                        * statistics on expressions.
                                         */
                                        if (varonleft)
                                                match = DatumGetBool(FunctionCall2Coll(&opproc,
-                                                                                                                          DEFAULT_COLLATION_OID,
+                                                                                                                          var->varcollid,
                                                                                                                           item->values[idx],
                                                                                                                           cst->constvalue));
                                        else
                                                match = DatumGetBool(FunctionCall2Coll(&opproc,
-                                                                                                                          DEFAULT_COLLATION_OID,
+                                                                                                                          var->varcollid,
                                                                                                                           cst->constvalue,
                                                                                                                           item->values[idx]));
 
index 536605b83df70d76f4393a291248a30d1b4f3992..f3383c05d912b870bc3915129d11e90cea6f5b2b 100644 (file)
@@ -477,7 +477,7 @@ ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
                                 colstat->attrtypid);
 
                /* prepare the sort function for this dimension */
-               multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation);
+               multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
 
                /* accumulate all the data for this dimension into the arrays */
                for (j = 0; j < numrows; j++)