]> granicus.if.org Git - postgresql/commitdiff
Fix range-query estimation to not double-exclude NULLs, per gripe from
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 19 Oct 2002 02:56:16 +0000 (02:56 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 19 Oct 2002 02:56:16 +0000 (02:56 +0000)
Ray Ontko 28-June-02.  Also, fix prefix_selectivity for NAME lefthand
variables (it was bogusly assuming binary compatibility), and adjust
make_greater_string() to not call pg_mbcliplen() with invalid multibyte
data (this last per bug report that I can't find at the moment, but it
was in July '02).

src/backend/optimizer/path/clausesel.c
src/backend/utils/adt/selfuncs.c
src/include/utils/selfuncs.h

index 25350c480ccfb223463ba44ba5e4c7d1e9c41e4c..8a1ab28ac1e9737ec084ba24ded99b1fec5e4a3f 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.51 2002/06/20 20:29:29 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.52 2002/10/19 02:56:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -102,7 +102,9 @@ restrictlist_selectivity(Query *root,
  * see that hisel is the fraction of the range below the high bound, while
  * losel is the fraction above the low bound; so hisel can be interpreted
  * directly as a 0..1 value but we need to convert losel to 1-losel before
- * interpreting it as a value. Then the available range is 1-losel to hisel.)
+ * interpreting it as a value. Then the available range is 1-losel to hisel.
+ * However, this calculation double-excludes nulls, so really we need
+ * hisel + losel + null_frac - 1.)
  * If the calculation yields zero or negative, however, we chicken out and
  * use a default estimate; that probably means that one or both
  * selectivities is a default estimate rather than an actual range value.
@@ -199,6 +201,9 @@ clauselist_selectivity(Query *root,
                        /* Successfully matched a pair of range clauses */
                        Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
 
+                       /* Adjust for double-exclusion of NULLs */
+                       s2 += nulltestsel(root, IS_NULL, rqlist->var, varRelid);
+
                        /*
                         * A zero or slightly negative s2 should be converted into a
                         * small positive value; we probably are dealing with a very
@@ -503,12 +508,18 @@ clause_selectivity(Query *root,
        else if (IsA(clause, NullTest))
        {
                /* Use node specific selectivity calculation function */
-               s1 = nulltestsel(root, (NullTest *) clause, varRelid);
+               s1 = nulltestsel(root,
+                                                ((NullTest *) clause)->nulltesttype,
+                                                ((NullTest *) clause)->arg,
+                                                varRelid);
        }
        else if (IsA(clause, BooleanTest))
        {
                /* Use node specific selectivity calculation function */
-               s1 = booltestsel(root, (BooleanTest *) clause, varRelid);
+               s1 = booltestsel(root,
+                                                ((BooleanTest *) clause)->booltesttype,
+                                                ((BooleanTest *) clause)->arg,
+                                                varRelid);
        }
        else if (IsA(clause, RelabelType))
        {
index 693925b7ab5a2e4fa1934317633f8692208a9d37..211ccbf87bd397fed882d572b91bf216db151135 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.118 2002/09/20 03:55:40 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.119 2002/10/19 02:56:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1007,10 +1007,9 @@ icnlikesel(PG_FUNCTION_ARGS)
  *             booltestsel             - Selectivity of BooleanTest Node.
  */
 Selectivity
-booltestsel(Query *root, BooleanTest *clause, int varRelid)
+booltestsel(Query *root, BoolTestType booltesttype, Node *arg, int varRelid)
 {
        Var                *var;
-       Node       *arg;
        Oid                     relid;
        HeapTuple       statsTuple;
        Datum      *values;
@@ -1019,10 +1018,6 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
        int                     nnumbers;
        double          selec;
 
-       Assert(clause && IsA(clause, BooleanTest));
-
-       arg = (Node *) clause->arg;
-
        /*
         * Ignore any binary-compatible relabeling (probably unnecessary, but
         * can't hurt)
@@ -1040,7 +1035,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                 * the possibility of a NULL value when using clause_selectivity,
                 * and just assume the value is either TRUE or FALSE.
                 */
-               switch (clause->booltesttype)
+               switch (booltesttype)
                {
                        case IS_UNKNOWN:
                                selec = DEFAULT_UNK_SEL;
@@ -1058,7 +1053,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                                break;
                        default:
                                elog(ERROR, "booltestsel: unexpected booltesttype %d",
-                                        (int) clause->booltesttype);
+                                        (int) booltesttype);
                                selec = 0.0;    /* Keep compiler quiet */
                                break;
                }
@@ -1107,7 +1102,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                         */
                        freq_false = 1.0 - freq_true - freq_null;
 
-                       switch (clause->booltesttype)
+                       switch (booltesttype)
                        {
                                case IS_UNKNOWN:
                                        /* select only NULL values */
@@ -1135,7 +1130,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                                        break;
                                default:
                                        elog(ERROR, "booltestsel: unexpected booltesttype %d",
-                                                (int) clause->booltesttype);
+                                                (int) booltesttype);
                                        selec = 0.0;    /* Keep compiler quiet */
                                        break;
                        }
@@ -1151,7 +1146,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                         * Otherwise adjust for null fraction and assume an even split
                         * for boolean tests.
                         */
-                       switch (clause->booltesttype)
+                       switch (booltesttype)
                        {
                                case IS_UNKNOWN:
 
@@ -1176,7 +1171,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                                        break;
                                default:
                                        elog(ERROR, "booltestsel: unexpected booltesttype %d",
-                                                (int) clause->booltesttype);
+                                                (int) booltesttype);
                                        selec = 0.0;    /* Keep compiler quiet */
                                        break;
                        }
@@ -1190,7 +1185,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                 * No VACUUM ANALYZE stats available, so use a default value.
                 * (Note: not much point in recursing to clause_selectivity here.)
                 */
-               switch (clause->booltesttype)
+               switch (booltesttype)
                {
                        case IS_UNKNOWN:
                                selec = DEFAULT_UNK_SEL;
@@ -1206,7 +1201,7 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
                                break;
                        default:
                                elog(ERROR, "booltestsel: unexpected booltesttype %d",
-                                        (int) clause->booltesttype);
+                                        (int) booltesttype);
                                selec = 0.0;    /* Keep compiler quiet */
                                break;
                }
@@ -1222,19 +1217,16 @@ booltestsel(Query *root, BooleanTest *clause, int varRelid)
  *             nulltestsel             - Selectivity of NullTest Node.
  */
 Selectivity
-nulltestsel(Query *root, NullTest *clause, int varRelid)
+nulltestsel(Query *root, NullTestType nulltesttype, Node *arg, int varRelid)
 {
        Var                *var;
-       Node       *arg;
        Oid                     relid;
        HeapTuple       statsTuple;
        double          selec;
        double          defselec;
        double          freq_null;
 
-       Assert(clause && IsA(clause, NullTest));
-
-       switch (clause->nulltesttype)
+       switch (nulltesttype)
        {
                case IS_NULL:
                        defselec = DEFAULT_UNK_SEL;
@@ -1244,25 +1236,22 @@ nulltestsel(Query *root, NullTest *clause, int varRelid)
                        break;
                default:
                        elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
-                                (int) clause->nulltesttype);
+                                (int) nulltesttype);
                        return (Selectivity) 0;         /* keep compiler quiet */
        }
 
-       arg = (Node *) clause->arg;
-
        /*
         * Ignore any binary-compatible relabeling
         */
        if (IsA(arg, RelabelType))
                arg = ((RelabelType *) arg)->arg;
 
-       if (IsA(arg, Var) &&(varRelid == 0 || varRelid == ((Var *) arg)->varno))
+       if (IsA(arg, Var) &&
+               (varRelid == 0 || varRelid == ((Var *) arg)->varno))
                var = (Var *) arg;
        else
        {
-               /*
-                * punt if non-Var argument
-                */
+               /* punt if non-Var argument */
                return (Selectivity) defselec;
        }
 
@@ -1282,7 +1271,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid)
                stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
                freq_null = stats->stanullfrac;
 
-               switch (clause->nulltesttype)
+               switch (nulltesttype)
                {
                        case IS_NULL:
 
@@ -1301,7 +1290,7 @@ nulltestsel(Query *root, NullTest *clause, int varRelid)
                                break;
                        default:
                                elog(ERROR, "nulltestsel: unexpected nulltesttype %d",
-                                        (int) clause->nulltesttype);
+                                        (int) nulltesttype);
                                return (Selectivity) 0; /* keep compiler quiet */
                }
 
@@ -2978,6 +2967,10 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon)
        else
                prefix = DatumGetCString(DirectFunctionCall1(byteaout, prefixcon->constvalue));
 
+       /* If var is type NAME, must adjust type of comparison constant */
+       if (var->vartype == NAMEOID)
+               prefixcon = string_to_const(prefix, NAMEOID);
+
        cmpargs = makeList2(var, prefixcon);
        /* Assume scalargtsel is appropriate for all supported types */
        prefixsel = DatumGetFloat8(DirectFunctionCall4(scalargtsel,
@@ -3014,6 +3007,9 @@ prefix_selectivity(Query *root, Var *var, Const *prefixcon)
                 */
                prefixsel = topsel + prefixsel - 1.0;
 
+               /* Adjust for double-exclusion of NULLs */
+               prefixsel += nulltestsel(root, IS_NULL, (Node *) var, var->varno);
+
                /*
                 * A zero or slightly negative prefixsel should be converted into
                 * a small positive value; we probably are dealing with a very
@@ -3351,6 +3347,7 @@ make_greater_string(const Const *str_const)
        while (len > 0)
        {
                unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
+               unsigned char savelastchar = *lastchar;
 
                /*
                 * Try to generate a larger string by incrementing the last byte.
@@ -3369,6 +3366,9 @@ make_greater_string(const Const *str_const)
                        }
                }
 
+               /* restore last byte so we don't confuse pg_mbcliplen */
+               *lastchar = savelastchar;
+
                /*
                 * Truncate off the last character, which might be more than 1
                 * byte, depending on the character encoding.
index 4ed6c1755c74c3f63ac12cab7b7f394de3d6deac..8e73e61ffdc8b998030e439f98f0ce7161fe5c95 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: selfuncs.h,v 1.8 2002/09/04 20:31:46 momjian Exp $
+ * $Id: selfuncs.h,v 1.9 2002/10/19 02:56:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -66,8 +66,10 @@ extern Datum icregexnejoinsel(PG_FUNCTION_ARGS);
 extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
 extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
 
-extern Selectivity booltestsel(Query *root, BooleanTest *clause, int varRelid);
-extern Selectivity nulltestsel(Query *root, NullTest *clause, int varRelid);
+extern Selectivity booltestsel(Query *root, BoolTestType booltesttype,
+                                                          Node *arg, int varRelid);
+extern Selectivity nulltestsel(Query *root, NullTestType nulltesttype,
+                                                          Node *arg, int varRelid);
 
 extern void mergejoinscansel(Query *root, Node *clause,
                                 Selectivity *leftscan,