]> granicus.if.org Git - postgresql/commitdiff
Fix planner to pass correct collation to operator selectivity estimators.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 9 Jul 2012 03:51:08 +0000 (23:51 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 9 Jul 2012 03:51:08 +0000 (23:51 -0400)
We can do this without creating an API break for estimation functions
by passing the collation using the existing fmgr functionality for
passing an input collation as a hidden parameter.

The need for this was foreseen at the outset, but we didn't get around to
making it happen in 9.1 because of the decision to sort all pg_statistic
histograms according to the database's default collation.  That meant that
selectivity estimators generally need to use the default collation too,
even if they're estimating for an operator that will do something
different.  The reason it's suddenly become more interesting is that
regexp interpretation also uses a collation (for its LC_TYPE not LC_COLLATE
property), and we no longer want to use the wrong collation when examining
regexps during planning.  It's not that the selectivity estimate is likely
to change much from this; rather that we are thinking of caching compiled
regexps during planner estimation, and we won't get the intended benefit
if we cache them with a different collation than the executor will use.

Back-patch to 9.1, both because the regexp change is likely to get
back-patched and because we might as well get this right in all
collation-supporting branches, in case any third-party code wants to
rely on getting the collation.  The patch turns out to be minuscule
now that I've done it ...

src/backend/optimizer/path/clausesel.c
src/backend/optimizer/util/plancat.c
src/backend/utils/adt/selfuncs.c
src/include/optimizer/plancat.h

index bb148d7b0717262705b8725fc03738a08e9b2e60..6b8d92773f7b740a886017fefb4a90067268139c 100644 (file)
@@ -578,6 +578,7 @@ clause_selectivity(PlannerInfo *root,
                                                                                 list_make2(var,
                                                                                                        makeBoolConst(true,
                                                                                                                                  false)),
+                                                                                InvalidOid,
                                                                                 varRelid);
                }
        }
@@ -649,13 +650,15 @@ clause_selectivity(PlannerInfo *root,
        }
        else if (is_opclause(clause) || IsA(clause, DistinctExpr))
        {
-               Oid                     opno = ((OpExpr *) clause)->opno;
+               OpExpr     *opclause = (OpExpr *) clause;
+               Oid                     opno = opclause->opno;
 
                if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
                {
                        /* Estimate selectivity for a join clause. */
                        s1 = join_selectivity(root, opno,
-                                                                 ((OpExpr *) clause)->args,
+                                                                 opclause->args,
+                                                                 opclause->inputcollid,
                                                                  jointype,
                                                                  sjinfo);
                }
@@ -663,7 +666,8 @@ clause_selectivity(PlannerInfo *root,
                {
                        /* Estimate selectivity for a restriction clause. */
                        s1 = restriction_selectivity(root, opno,
-                                                                                ((OpExpr *) clause)->args,
+                                                                                opclause->args,
+                                                                                opclause->inputcollid,
                                                                                 varRelid);
                }
 
index 38b81a05ff712d527370af834fbbc8298d980f9e..1818a2a8718cf6fa3e32ad9397e436bcef788cbc 100644 (file)
@@ -1010,6 +1010,7 @@ Selectivity
 restriction_selectivity(PlannerInfo *root,
                                                Oid operatorid,
                                                List *args,
+                                               Oid inputcollid,
                                                int varRelid)
 {
        RegProcedure oprrest = get_oprrest(operatorid);
@@ -1022,11 +1023,12 @@ restriction_selectivity(PlannerInfo *root,
        if (!oprrest)
                return (Selectivity) 0.5;
 
-       result = DatumGetFloat8(OidFunctionCall4(oprrest,
-                                                                                        PointerGetDatum(root),
-                                                                                        ObjectIdGetDatum(operatorid),
-                                                                                        PointerGetDatum(args),
-                                                                                        Int32GetDatum(varRelid)));
+       result = DatumGetFloat8(OidFunctionCall4Coll(oprrest,
+                                                                                                inputcollid,
+                                                                                                PointerGetDatum(root),
+                                                                                                ObjectIdGetDatum(operatorid),
+                                                                                                PointerGetDatum(args),
+                                                                                                Int32GetDatum(varRelid)));
 
        if (result < 0.0 || result > 1.0)
                elog(ERROR, "invalid restriction selectivity: %f", result);
@@ -1045,6 +1047,7 @@ Selectivity
 join_selectivity(PlannerInfo *root,
                                 Oid operatorid,
                                 List *args,
+                                Oid inputcollid,
                                 JoinType jointype,
                                 SpecialJoinInfo *sjinfo)
 {
@@ -1058,12 +1061,13 @@ join_selectivity(PlannerInfo *root,
        if (!oprjoin)
                return (Selectivity) 0.5;
 
-       result = DatumGetFloat8(OidFunctionCall5(oprjoin,
-                                                                                        PointerGetDatum(root),
-                                                                                        ObjectIdGetDatum(operatorid),
-                                                                                        PointerGetDatum(args),
-                                                                                        Int16GetDatum(jointype),
-                                                                                        PointerGetDatum(sjinfo)));
+       result = DatumGetFloat8(OidFunctionCall5Coll(oprjoin,
+                                                                                                inputcollid,
+                                                                                                PointerGetDatum(root),
+                                                                                                ObjectIdGetDatum(operatorid),
+                                                                                                PointerGetDatum(args),
+                                                                                                Int16GetDatum(jointype),
+                                                                                                PointerGetDatum(sjinfo)));
 
        if (result < 0.0 || result > 1.0)
                elog(ERROR, "invalid join selectivity: %f", result);
index 95e46276f0a8911758f4ec02b993193bf55eee15..dc38034104eb88560f083dbd2adc1db66656c356 100644 (file)
  * joins, however, the selectivity is defined as the fraction of the left-hand
  * side relation's rows that are expected to have a match (ie, at least one
  * row with a TRUE result) in the right-hand side.
+ *
+ * For both oprrest and oprjoin functions, the operator's input collation OID
+ * (if any) is passed using the standard fmgr mechanism, so that the estimator
+ * function can fetch it with PG_GET_COLLATION().  Note, however, that all
+ * statistics in pg_statistic are currently built using the database's default
+ * collation.  Thus, in most cases where we are looking at statistics, we
+ * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
+ * We expect that the error induced by doing this is usually not large enough
+ * to justify complicating matters.
  *----------
  */
 
@@ -1097,6 +1106,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        Oid                     operator = PG_GETARG_OID(1);
        List       *args = (List *) PG_GETARG_POINTER(2);
        int                     varRelid = PG_GETARG_INT32(3);
+       Oid                     collation = PG_GET_COLLATION();
        VariableStatData vardata;
        Node       *other;
        bool            varonleft;
@@ -1197,12 +1207,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
        }
 
        /*
-        * Divide pattern into fixed prefix and remainder.      XXX we have to assume
-        * default collation here, because we don't have access to the actual
-        * input collation for the operator.  FIXME ...
+        * Divide pattern into fixed prefix and remainder.  Unlike many of the
+        * other functions in this file, we use the pattern operator's actual
+        * collation for this step.  This is not because we expect the collation
+        * to make a big difference in the selectivity estimate (it seldom would),
+        * but because we want to be sure we cache compiled regexps under the
+        * right cache key, so that they can be re-used at runtime.
         */
        patt = (Const *) other;
-       pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
+       pstatus = pattern_fixed_prefix(patt, ptype, collation,
                                                                   &prefix, &rest);
 
        /*
@@ -1847,18 +1860,20 @@ scalararraysel(PlannerInfo *root,
                                                                                elem_nulls[i],
                                                                                elmbyval));
                        if (is_join_clause)
-                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
-                                                                                                 PointerGetDatum(root),
-                                                                                                 ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int16GetDatum(jointype),
-                                                                                                 PointerGetDatum(sjinfo)));
+                               s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
+                                                                                                         ObjectIdGetDatum(operator),
+                                                                                                         PointerGetDatum(args),
+                                                                                                         Int16GetDatum(jointype),
+                                                                                                         PointerGetDatum(sjinfo)));
                        else
-                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                                 PointerGetDatum(root),
-                                                                                                 ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int32GetDatum(varRelid)));
+                               s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
+                                                                                                         ObjectIdGetDatum(operator),
+                                                                                                         PointerGetDatum(args),
+                                                                                                         Int32GetDatum(varRelid)));
 
                        if (useOr)
                        {
@@ -1912,18 +1927,20 @@ scalararraysel(PlannerInfo *root,
                         */
                        args = list_make2(leftop, elem);
                        if (is_join_clause)
-                               s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
-                                                                                                 PointerGetDatum(root),
-                                                                                                 ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int16GetDatum(jointype),
-                                                                                                 PointerGetDatum(sjinfo)));
+                               s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
+                                                                                                         ObjectIdGetDatum(operator),
+                                                                                                         PointerGetDatum(args),
+                                                                                                         Int16GetDatum(jointype),
+                                                                                                         PointerGetDatum(sjinfo)));
                        else
-                               s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                                 PointerGetDatum(root),
-                                                                                                 ObjectIdGetDatum(operator),
-                                                                                                 PointerGetDatum(args),
-                                                                                                 Int32GetDatum(varRelid)));
+                               s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+                                                                                                         clause->inputcollid,
+                                                                                                         PointerGetDatum(root),
+                                                                                                         ObjectIdGetDatum(operator),
+                                                                                                         PointerGetDatum(args),
+                                                                                                         Int32GetDatum(varRelid)));
 
                        if (useOr)
                        {
@@ -1962,18 +1979,20 @@ scalararraysel(PlannerInfo *root,
                dummyexpr->collation = clause->inputcollid;
                args = list_make2(leftop, dummyexpr);
                if (is_join_clause)
-                       s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         Int16GetDatum(jointype),
-                                                                                         PointerGetDatum(sjinfo)));
+                       s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+                                                                                                 clause->inputcollid,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int16GetDatum(jointype),
+                                                                                                 PointerGetDatum(sjinfo)));
                else
-                       s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
-                                                                                         PointerGetDatum(root),
-                                                                                         ObjectIdGetDatum(operator),
-                                                                                         PointerGetDatum(args),
-                                                                                         Int32GetDatum(varRelid)));
+                       s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+                                                                                                 clause->inputcollid,
+                                                                                                 PointerGetDatum(root),
+                                                                                                 ObjectIdGetDatum(operator),
+                                                                                                 PointerGetDatum(args),
+                                                                                                 Int32GetDatum(varRelid)));
                s1 = useOr ? 0.0 : 1.0;
 
                /*
@@ -2046,6 +2065,7 @@ rowcomparesel(PlannerInfo *root,
 {
        Selectivity s1;
        Oid                     opno = linitial_oid(clause->opnos);
+       Oid                     inputcollid = linitial_oid(clause->inputcollids);
        List       *opargs;
        bool            is_join_clause;
 
@@ -2086,6 +2106,7 @@ rowcomparesel(PlannerInfo *root,
                /* Estimate selectivity for a join clause. */
                s1 = join_selectivity(root, opno,
                                                          opargs,
+                                                         inputcollid,
                                                          jointype,
                                                          sjinfo);
        }
@@ -2094,6 +2115,7 @@ rowcomparesel(PlannerInfo *root,
                /* Estimate selectivity for a restriction clause. */
                s1 = restriction_selectivity(root, opno,
                                                                         opargs,
+                                                                        inputcollid,
                                                                         varRelid);
        }
 
index cf4a3f2b9935dbe706d06e3a9632bf0ba524b1da..e0d04dbecd7a58d1f79af62b5156ea2f4eef1cbc 100644 (file)
@@ -43,11 +43,13 @@ extern bool has_unique_index(RelOptInfo *rel, AttrNumber attno);
 extern Selectivity restriction_selectivity(PlannerInfo *root,
                                                Oid operatorid,
                                                List *args,
+                                               Oid inputcollid,
                                                int varRelid);
 
 extern Selectivity join_selectivity(PlannerInfo *root,
                                 Oid operatorid,
                                 List *args,
+                                Oid inputcollid,
                                 JoinType jointype,
                                 SpecialJoinInfo *sjinfo);