]> granicus.if.org Git - postgresql/commitdiff
First step in fixing selectivity-estimation code. eqsel and
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 1 Aug 1999 04:54:25 +0000 (04:54 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 1 Aug 1999 04:54:25 +0000 (04:54 +0000)
neqsel now behave as per my suggestions in pghackers a few days ago.
selectivity for < > <= >= should work OK for integral types as well, but
still need work for nonintegral types.  Since these routines have never
actually executed before :-(, this may result in some significant changes
in the optimizer's choices of execution plans.  Let me know if you see
any serious misbehavior.
CAUTION: THESE CHANGES REQUIRE INITDB.  pg_statistic table has changed.

src/backend/commands/vacuum.c
src/backend/utils/adt/selfuncs.c
src/include/catalog/pg_statistic.h
src/include/commands/vacuum.h
src/include/utils/builtins.h

index 204bd8e2693351457505478f4d7e43b4f2f0b839..12d2bc9bf99b856d2308f842562dcb61bb07adfc 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.115 1999/07/19 07:07:20 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.116 1999/08/01 04:54:24 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -78,7 +78,7 @@ static void vc_vacpage(Page page, VPageDescr vpd);
 static void vc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples);
 static void vc_scanoneind(Relation indrel, int num_tuples);
 static void vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple);
-static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len);
+static void vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
 static void vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
 static void vc_delhilowstats(Oid relid, int attcnt, int *attnums);
 static VPageDescr vc_tidreapped(ItemPointer itemptr, VPageList vpl);
@@ -473,9 +473,13 @@ vc_vacone(Oid relid, bool analyze, List *va_cols)
                        {
                                pgopform = (Form_pg_operator) GETSTRUCT(func_operator);
                                fmgr_info(pgopform->oprcode, &(stats->f_cmplt));
+                               stats->op_cmplt = oprid(func_operator);
                        }
                        else
+                       {
                                stats->f_cmplt.fn_addr = NULL;
+                               stats->op_cmplt = InvalidOid;
+                       }
 
                        func_operator = oper(">", stats->attr->atttypid, stats->attr->atttypid, true);
                        if (func_operator != NULL)
@@ -2200,8 +2204,8 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
                        {
                                swapDatum(stats->guess1, stats->guess2);
                                swapInt(stats->guess1_len, stats->guess2_len);
-                               stats->guess1_cnt = stats->guess2_hits;
                                swapLong(stats->guess1_hits, stats->guess2_hits);
+                               stats->guess1_cnt = stats->guess1_hits;
                        }
                        if (stats->guess1_cnt > stats->best_cnt)
                        {
@@ -2227,7 +2231,7 @@ vc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple)
  *
  */
 static void
-vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len)
+vc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
 {
        if (attr->attbyval && attr->attlen != -1)
                *bucket = value;
@@ -2340,13 +2344,14 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
                                        selratio = 0;
                                else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
                                {
+                                       /* exact result when there are just 1 or 2 values... */
                                        double          min_cnt_d = stats->min_cnt,
                                                                max_cnt_d = stats->max_cnt,
                                                                null_cnt_d = stats->null_cnt,
-                                                               nonnullcnt_d = stats->nonnull_cnt;              /* prevent overflow */
+                                                               nonnull_cnt_d = stats->nonnull_cnt;             /* prevent overflow */
 
                                        selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) /
-                                               (nonnullcnt_d + null_cnt_d) / (nonnullcnt_d + null_cnt_d);
+                                               (nonnull_cnt_d + null_cnt_d) / (nonnull_cnt_d + null_cnt_d);
                                }
                                else
                                {
@@ -2359,7 +2364,9 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
                                         */
                                        selratio = (most * most + 0.20 * most * (total - most)) / total / total;
                                }
-                               if (selratio > 1.0)
+                               if (selratio < 0.0)
+                                       selratio = 0.0;
+                               else if (selratio > 1.0)
                                        selratio = 1.0;
                                attp->attdisbursion = selratio;
 
@@ -2375,13 +2382,22 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
                                 * doing system relations, especially pg_statistic is a
                                 * problem
                                 */
-                               if (VacAttrStatsLtGtValid(stats) && stats->initialized  /* &&
-                                                                                                                                                * !IsSystemRelationName(
-                                                                                                                                                *
-                                        pgcform->relname.data) */ )
+                               if (VacAttrStatsLtGtValid(stats) && stats->initialized
+                                       /* && !IsSystemRelationName(pgcform->relname.data)
+                                        */ )
                                {
+                                       float32data nullratio;
+                                       float32data bestratio;
                                        FmgrInfo        out_function;
                                        char       *out_string;
+                                       double          best_cnt_d = stats->best_cnt,
+                                                               null_cnt_d = stats->null_cnt,
+                                                               nonnull_cnt_d = stats->nonnull_cnt;             /* prevent overflow */
+
+                                       nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
+                                       bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
+
+                                       fmgr_info(stats->outfunc, &out_function);
 
                                        for (i = 0; i < Natts_pg_statistic; ++i)
                                                nulls[i] = ' ';
@@ -2391,26 +2407,34 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
                                         * ----------------
                                         */
                                        i = 0;
-                                       values[i++] = (Datum) relid;            /* 1 */
-                                       values[i++] = (Datum) attp->attnum; /* 2 */
-                                       values[i++] = (Datum) InvalidOid;       /* 3 */
-                                       fmgr_info(stats->outfunc, &out_function);
-                                       out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid);
-                                       values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
+                                       values[i++] = (Datum) relid;            /* starelid */
+                                       values[i++] = (Datum) attp->attnum; /* staattnum */
+                                       values[i++] = (Datum) stats->op_cmplt;  /* staop */
+                                       /* hack: this code knows float4 is pass-by-ref */
+                                       values[i++] = PointerGetDatum(&nullratio);      /* stanullfrac */
+                                       values[i++] = PointerGetDatum(&bestratio);      /* stacommonfrac */
+                                       out_string = (*fmgr_faddr(&out_function)) (stats->best, stats->attr->atttypid, stats->attr->atttypmod);
+                                       values[i++] = PointerGetDatum(textin(out_string)); /* stacommonval */
                                        pfree(out_string);
-                                       out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid);
-                                       values[i++] = (Datum) fmgr(F_TEXTIN, out_string);
+                                       out_string = (*fmgr_faddr(&out_function)) (stats->min, stats->attr->atttypid, stats->attr->atttypmod);
+                                       values[i++] = PointerGetDatum(textin(out_string)); /* staloval */
+                                       pfree(out_string);
+                                       out_string = (char *) (*fmgr_faddr(&out_function)) (stats->max, stats->attr->atttypid, stats->attr->atttypmod);
+                                       values[i++] = PointerGetDatum(textin(out_string)); /* stahival */
                                        pfree(out_string);
 
                                        stup = heap_formtuple(sd->rd_att, values, nulls);
 
                                        /* ----------------
-                                        *      insert the tuple in the relation and get the tuple's oid.
+                                        *      insert the tuple in the relation.
                                         * ----------------
                                         */
                                        heap_insert(sd, stup);
-                                       pfree(DatumGetPointer(values[3]));
-                                       pfree(DatumGetPointer(values[4]));
+
+                                       /* release allocated space */
+                                       pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval-1]));
+                                       pfree(DatumGetPointer(values[Anum_pg_statistic_staloval-1]));
+                                       pfree(DatumGetPointer(values[Anum_pg_statistic_stahival-1]));
                                        pfree(stup);
                                }
                        }
index db78c48525649537e49dea428040a276a08c8182..0b6afc814b6ed849811f48cf10309e6dedd6ecd6 100644 (file)
@@ -6,13 +6,11 @@
  *       These routines are registered in the operator catalog in the
  *       "oprrest" and "oprjoin" attributes.
  *
- *       XXX check all the functions--I suspect them to be 1-based.
- *
  * Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.35 1999/07/17 20:17:59 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.36 1999/08/01 04:54:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "access/heapam.h"
 #include "catalog/catname.h"
+#include "catalog/pg_operator.h"
 #include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "parser/parse_oper.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"
 /* N is not a valid var/constant or relation id */
 #define NONVALUE(N)            ((N) == -1)
 
-/*
- * generalize the test for functional index selectivity request
- */
-#define FunctionalSelectivity(nIndKeys,attNum) (attNum==InvalidAttrNumber)
+/* are we looking at a functional index selectivity request? */
+#define FunctionalSelectivity(nIndKeys,attNum) ((attNum)==InvalidAttrNumber)
 
-static float32data getattdisbursion(Oid relid, AttrNumber attnum);
-static void gethilokey(Oid relid, AttrNumber attnum, Oid opid,
-                  char **high, char **low);
+/* default selectivity estimate for inequalities such as "A < b" */
+#define DEFAULT_INEQ_SEL  (1.0 / 3.0)
+
+static void getattproperties(Oid relid, AttrNumber attnum,
+                                                        Oid *typid,
+                                                        int *typlen,
+                                                        bool *typbyval,
+                                                        int32 *typmod);
+static bool getattstatistics(Oid relid, AttrNumber attnum,
+                                                        Oid typid, int32 typmod,
+                                                        double *nullfrac,
+                                                        double *commonfrac,
+                                                        Datum *commonval,
+                                                        Datum *loval,
+                                                        Datum *hival);
+static double getattdisbursion(Oid relid, AttrNumber attnum);
 
 
 /*
- *             eqsel                   - Selectivity of "=" for any data type.
+ *             eqsel                   - Selectivity of "=" for any data types.
  */
 float64
 eqsel(Oid opid,
          Oid relid,
          AttrNumber attno,
-         char *value,
+         Datum value,
          int32 flag)
 {
        float64         result;
@@ -55,18 +67,124 @@ eqsel(Oid opid,
        if (NONVALUE(attno) || NONVALUE(relid))
                *result = 0.1;
        else
-               *result = (float64data) getattdisbursion(relid, (int) attno);
+       {
+               Oid                     typid;
+               int                     typlen;
+               bool            typbyval;
+               int32           typmod;
+               double          nullfrac;
+               double          commonfrac;
+               Datum           commonval;
+               double          selec;
+
+               /* get info about the attribute */
+               getattproperties(relid, attno,
+                                                &typid, &typlen, &typbyval, &typmod);
+
+               if (getattstatistics(relid, attno, typid, typmod,
+                                                        &nullfrac, &commonfrac, &commonval,
+                                                        NULL, NULL))
+               {
+                       if (flag & SEL_CONSTANT)
+                       {
+                               /* Is the constant the same as the most common value? */
+                               HeapTuple       oprtuple;
+                               Oid                     ltype,
+                                                       rtype;
+                               Operator        func_operator;
+                               bool            mostcommon = false;
+
+                               /* get left and right datatypes of the operator */
+                               oprtuple = get_operator_tuple(opid);
+                               if (! HeapTupleIsValid(oprtuple))
+                                       elog(ERROR, "eqsel: no tuple for operator %u", opid);
+                               ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
+                               rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
+
+                               /* and find appropriate equality operator (no, it ain't
+                                * necessarily opid itself...)
+                                */
+                               func_operator = oper("=", ltype, rtype, true);
+
+                               if (func_operator != NULL)
+                               {
+                                       RegProcedure eqproc = ((Form_pg_operator) GETSTRUCT(func_operator))->oprcode;
+                                       if (flag & SEL_RIGHT) /* given value on the right? */
+                                               mostcommon = (bool)
+                                                       DatumGetUInt8(fmgr(eqproc, commonval, value));
+                                       else
+                                               mostcommon = (bool)
+                                                       DatumGetUInt8(fmgr(eqproc, value, commonval));
+                               }
+
+                               if (mostcommon)
+                               {
+                                       /* Search is for the most common value.  We know the
+                                        * selectivity exactly (or as exactly as VACUUM could
+                                        * calculate it, anyway).
+                                        */
+                                       selec = commonfrac;
+                               }
+                               else
+                               {
+                                       /* Comparison is against a constant that is neither the
+                                        * most common value nor null.  Its selectivity cannot
+                                        * be more than this:
+                                        */
+                                       selec = 1.0 - commonfrac - nullfrac;
+                                       if (selec > commonfrac)
+                                               selec = commonfrac;
+                                       /* and in fact it's probably less, so apply a fudge
+                                        * factor.
+                                        */
+                                       selec *= 0.5;
+                               }
+                       }
+                       else
+                       {
+                               /* Search is for a value that we do not know a priori,
+                                * but we will assume it is not NULL.  Selectivity
+                                * cannot be more than this:
+                                */
+                               selec = 1.0 - nullfrac;
+                               if (selec > commonfrac)
+                                       selec = commonfrac;
+                               /* and in fact it's probably less, so apply a fudge
+                                * factor.
+                                */
+                               selec *= 0.5;
+                       }
+
+                       /* result should be in range, but make sure... */
+                       if (selec < 0.0)
+                               selec = 0.0;
+                       else if (selec > 1.0)
+                               selec = 1.0;
+
+                       if (! typbyval)
+                               pfree(DatumGetPointer(commonval));
+               }
+               else
+               {
+                       /* No VACUUM ANALYZE stats available, so make a guess using
+                        * the disbursion stat (if we have that, which is unlikely...)
+                        */
+                       selec = getattdisbursion(relid, attno);
+               }
+
+               *result = (float64data) selec;
+       }
        return result;
 }
 
 /*
- *             neqsel                  - Selectivity of "!=" for any data type.
+ *             neqsel                  - Selectivity of "!=" for any data types.
  */
 float64
 neqsel(Oid opid,
           Oid relid,
           AttrNumber attno,
-          char *value,
+          Datum value,
           int32 flag)
 {
        float64         result;
@@ -77,96 +195,164 @@ neqsel(Oid opid,
 }
 
 /*
- *             intltsel                - Selectivity of "<" for integers.
+ *             intltsel                - Selectivity of "<" (also "<=") for integers.
  *                                               Should work for both longs and shorts.
  */
 float64
 intltsel(Oid opid,
                 Oid relid,
                 AttrNumber attno,
-                int32 value,
+                Datum value,
                 int32 flag)
 {
        float64         result;
-       char       *highchar,
-                          *lowchar;
-       long            val,
-                               high,
-                               low,
-                               top,
-                               bottom;
 
        result = (float64) palloc(sizeof(float64data));
-       if (NONVALUE(attno) || NONVALUE(relid))
-               *result = 1.0 / 3;
+       if (! (flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
+               *result = DEFAULT_INEQ_SEL;
        else
        {
-               /* XXX                  val = atol(value); */
-               val = value;
-               gethilokey(relid, (int) attno, opid, &highchar, &lowchar);
-               if (*highchar == 'n' || *lowchar == 'n')
+               HeapTuple       oprtuple;
+               Oid                     ltype,
+                                       rtype;
+               Oid                     typid;
+               int                     typlen;
+               bool            typbyval;
+               int32           typmod;
+               Datum           hival,
+                                       loval;
+               long            val,
+                                       high,
+                                       low,
+                                       numerator,
+                                       denominator;
+
+               /* get left and right datatypes of the operator */
+               oprtuple = get_operator_tuple(opid);
+               if (! HeapTupleIsValid(oprtuple))
+                       elog(ERROR, "intltsel: no tuple for operator %u", opid);
+               ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
+               rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
+
+               /*
+                * TEMPORARY HACK: this code is currently getting called for
+                * a bunch of non-integral types.  Give a default estimate if
+                * either side is not pass-by-val.  Need better solution.
+                */
+               if (! get_typbyval(ltype) || ! get_typbyval(rtype))
                {
-                       *result = 1.0 / 3.0;
+                       *result = DEFAULT_INEQ_SEL;
                        return result;
                }
-               high = atol(highchar);
-               low = atol(lowchar);
-               if ((flag & SEL_RIGHT && val < low) ||
-                       (!(flag & SEL_RIGHT) && val > high))
+
+               /* Deduce type of the constant, and convert to uniform "long" format.
+                * Note that constant might well be a different type than attribute.
+                * XXX this ought to use a type-specific "convert to double" op.
+                */
+               typid = (flag & SEL_RIGHT) ? rtype : ltype;
+               switch (get_typlen(typid))
                {
-                       float32data nvals;
+                       case 1:
+                               val = (long) DatumGetUInt8(value);
+                               break;
+                       case 2:
+                               val = (long) DatumGetInt16(value);
+                               break;
+                       case 4:
+                               val = (long) DatumGetInt32(value);
+                               break;
+                       default:
+                               elog(ERROR, "intltsel: unsupported type %u", typid);
+                               *result = DEFAULT_INEQ_SEL;
+                               return result;
+               }
 
-                       nvals = getattdisbursion(relid, (int) attno);
-                       if (nvals == 0)
-                               *result = 1.0 / 3.0;
-                       else
-                       {
-                               *result = 3.0 * (float64data) nvals;
-                               if (*result > 1.0)
-                                       *result = 1;
-                       }
+               /* Now get info about the attribute */
+               getattproperties(relid, attno,
+                                                &typid, &typlen, &typbyval, &typmod);
+
+               if (! getattstatistics(relid, attno, typid, typmod,
+                                                          NULL, NULL, NULL,
+                                                          &loval, &hival))
+               {
+                       *result = DEFAULT_INEQ_SEL;
+                       return result;
+               }
+               /*
+                * Convert loval/hival to common "long int" representation.
+                */
+               switch (typlen)
+               {
+                       case 1:
+                               low = (long) DatumGetUInt8(loval);
+                               high = (long) DatumGetUInt8(hival);
+                               break;
+                       case 2:
+                               low = (long) DatumGetInt16(loval);
+                               high = (long) DatumGetInt16(hival);
+                               break;
+                       case 4:
+                               low = (long) DatumGetInt32(loval);
+                               high = (long) DatumGetInt32(hival);
+                               break;
+                       default:
+                               elog(ERROR, "intltsel: unsupported type %u", typid);
+                               *result = DEFAULT_INEQ_SEL;
+                               return result;
+               }
+               if (val < low || val > high)
+               {
+                       /* If given value is outside the statistical range,
+                        * assume we have out-of-date stats and return a default guess.
+                        * We could return a small or large value if we trusted the stats
+                        * more.   XXX change this eventually.
+                        */
+                       *result = DEFAULT_INEQ_SEL;
                }
                else
                {
-                       bottom = high - low;
-                       if (bottom == 0)
-                               ++bottom;
+                       denominator = high - low;
+                       if (denominator <= 0)
+                               denominator = 1;
                        if (flag & SEL_RIGHT)
-                               top = val - low;
+                               numerator = val - low;
                        else
-                               top = high - val;
-                       if (top > bottom)
+                               numerator = high - val;
+                       if (numerator <= 0)     /* never return a zero estimate! */
+                               numerator = 1;
+                       if (numerator >= denominator)
                                *result = 1.0;
                        else
-                       {
-                               if (top == 0)
-                                       ++top;
-                               *result = ((1.0 * top) / bottom);
-                       }
+                               *result = (double) numerator / (double) denominator;
+               }
+               if (! typbyval)
+               {
+                       pfree(DatumGetPointer(hival));
+                       pfree(DatumGetPointer(loval));
                }
        }
        return result;
 }
 
 /*
- *             intgtsel                - Selectivity of ">" for integers.
+ *             intgtsel                - Selectivity of ">" (also ">=") for integers.
  *                                               Should work for both longs and shorts.
  */
 float64
 intgtsel(Oid opid,
                 Oid relid,
                 AttrNumber attno,
-                int32 value,
+                Datum value,
                 int32 flag)
 {
        float64         result;
-       int                     notflag;
 
-       if (flag & 0)
-               notflag = flag & ~SEL_RIGHT;
-       else
-               notflag = flag | SEL_RIGHT;
-       result = intltsel(opid, relid, attno, value, (int32) notflag);
+       /* Compute selectivity of "<", then invert --- but only if we
+        * were able to produce a non-default estimate.
+        */
+       result = intltsel(opid, relid, attno, value, flag);
+       if (*result != DEFAULT_INEQ_SEL)
+               *result = 1.0 - *result;
        return result;
 }
 
@@ -181,7 +367,7 @@ eqjoinsel(Oid opid,
                  AttrNumber attno2)
 {
        float64         result;
-       float32data num1,
+       float64data num1,
                                num2,
                                max;
 
@@ -191,13 +377,13 @@ eqjoinsel(Oid opid,
                *result = 0.1;
        else
        {
-               num1 = getattdisbursion(relid1, (int) attno1);
-               num2 = getattdisbursion(relid2, (int) attno2);
+               num1 = getattdisbursion(relid1, attno1);
+               num2 = getattdisbursion(relid2, attno2);
                max = (num1 > num2) ? num1 : num2;
-               if (max == 0)
+               if (max <= 0)
                        *result = 1.0;
                else
-                       *result = (float64data) max;
+                       *result = max;
        }
        return result;
 }
@@ -220,7 +406,7 @@ neqjoinsel(Oid opid,
 }
 
 /*
- *             intltjoinsel    - Join selectivity of "<"
+ *             intltjoinsel    - Join selectivity of "<" and "<="
  */
 float64
 intltjoinsel(Oid opid,
@@ -232,12 +418,12 @@ intltjoinsel(Oid opid,
        float64         result;
 
        result = (float64) palloc(sizeof(float64data));
-       *result = 1.0 / 3.0;
+       *result = DEFAULT_INEQ_SEL;
        return result;
 }
 
 /*
- *             intgtjoinsel    - Join selectivity of ">"
+ *             intgtjoinsel    - Join selectivity of ">" and ">="
  */
 float64
 intgtjoinsel(Oid opid,
@@ -249,129 +435,230 @@ intgtjoinsel(Oid opid,
        float64         result;
 
        result = (float64) palloc(sizeof(float64data));
-       *result = 1.0 / 3.0;
+       *result = DEFAULT_INEQ_SEL;
        return result;
 }
 
 /*
- *             getattdisbursion                - Retrieves the number of values within an attribute.
- *
- *             Note:
- *                             getattdisbursion and gethilokey both currently use keyed
- *                             relation scans and amgetattr.  Alternatively,
- *                             the relation scan could be non-keyed and the tuple
- *                             returned could be cast (struct X *) tuple + tuple->t_hoff.
- *                             The first method is good for testing the implementation,
- *                             but the second may ultimately be faster?!?      In any case,
- *                             using the cast instead of amgetattr would be
- *                             more efficient.  However, the cast will not work
- *                             for gethilokey which accesses stahikey in struct statistic.
+ * getattproperties
+ *       Retrieve pg_attribute properties for an attribute,
+ *       including type OID, type len, type byval flag, typmod.
  */
-static float32data
-getattdisbursion(Oid relid, AttrNumber attnum)
+static void
+getattproperties(Oid relid, AttrNumber attnum,
+                                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
 {
        HeapTuple       atp;
-       float32data nvals;
-       int32           ntuples;
+       Form_pg_attribute att_tup;
 
        atp = SearchSysCacheTuple(ATTNUM,
                                                          ObjectIdGetDatum(relid),
                                                          Int16GetDatum(attnum),
                                                          0, 0);
-       if (!HeapTupleIsValid(atp))
-       {
-               elog(ERROR, "getattdisbursion: no attribute tuple %u %d",
-                        relid, attnum);
-               return 0;
-       }
-       nvals = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
-       if (nvals > 0)
-               return nvals;
-
-       atp = SearchSysCacheTuple(RELOID,
-                                                         ObjectIdGetDatum(relid),
-                                                         0, 0, 0);
-
-       /*
-        * XXX -- use number of tuples as number of distinctive values just
-        * for now, in case number of distinctive values is not cached
-        */
-       if (!HeapTupleIsValid(atp))
-       {
-               elog(ERROR, "getattdisbursion: no relation tuple %u", relid);
-               return 0;
-       }
-       ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-       /* Look above how nvals is used.        - vadim 04/09/97 */
-       if (ntuples > 0)
-               nvals = 1.0 / ntuples;
-
-       return nvals;
+       if (! HeapTupleIsValid(atp))
+               elog(ERROR, "getattproperties: no attribute tuple %u %d",
+                        relid, (int) attnum);
+       att_tup = (Form_pg_attribute) GETSTRUCT(atp);
+
+       *typid = att_tup->atttypid;
+       *typlen = att_tup->attlen;
+       *typbyval = att_tup->attbyval;
+       *typmod = att_tup->atttypmod;
 }
 
 /*
- *             gethilokey              - Returns a pointer to strings containing
- *                                               the high and low keys within an attribute.
+ * getattstatistics
+ *       Retrieve the pg_statistic data for an attribute.
+ *       Returns 'false' if no stats are available.
+ *
+ * Inputs:
+ * 'relid' and 'attnum' are the relation and attribute number.
+ * 'typid' and 'typmod' are the type and typmod of the column,
+ * which the caller must already have looked up.
  *
- *             Currently returns "0", and "0" in high and low if the statistic
- *             catalog does not contain the proper tuple.      Eventually, the
- *             statistic demon should have the tuple maintained, and it should
- *             elog() if the tuple is missing.
+ * Outputs:
+ * The available stats are nullfrac, commonfrac, commonval, loval, hival.
+ * The caller need not retrieve all five --- pass NULL pointers for the
+ * unwanted values.
  *
- *             XXX Question: is this worth sticking in the catalog caches,
- *                     or will this get invalidated too often?
+ * commonval, loval, hival are returned as Datums holding the internal
+ * representation of the values.  (Note that these should be pfree'd
+ * after use if the data type is not by-value.)
+ *
+ * XXX currently, this does a linear search of pg_statistic because there
+ * is no index nor syscache for pg_statistic.  FIX THIS!
  */
-static void
-gethilokey(Oid relid,
-                  AttrNumber attnum,
-                  Oid opid,
-                  char **high,
-                  char **low)
+static bool
+getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod,
+                                double *nullfrac,
+                                double *commonfrac,
+                                Datum *commonval,
+                                Datum *loval,
+                                Datum *hival)
 {
        Relation        rel;
        HeapScanDesc scan;
-       static ScanKeyData key[3] = {
+       static ScanKeyData key[2] = {
                {0, Anum_pg_statistic_starelid, F_OIDEQ, {0, 0, F_OIDEQ}},
-               {0, Anum_pg_statistic_staattnum, F_INT2EQ, {0, 0, F_INT2EQ}},
-               {0, Anum_pg_statistic_staop, F_OIDEQ, {0, 0, F_OIDEQ}}
+               {0, Anum_pg_statistic_staattnum, F_INT2EQ, {0, 0, F_INT2EQ}}
        };
        bool            isnull;
        HeapTuple       tuple;
+       HeapTuple       typeTuple;
+       FmgrInfo        inputproc;
 
        rel = heap_openr(StatisticRelationName);
 
        key[0].sk_argument = ObjectIdGetDatum(relid);
        key[1].sk_argument = Int16GetDatum((int16) attnum);
-       key[2].sk_argument = ObjectIdGetDatum(opid);
-       scan = heap_beginscan(rel, 0, SnapshotNow, 3, key);
+
+       scan = heap_beginscan(rel, 0, SnapshotNow, 2, key);
        tuple = heap_getnext(scan, 0);
        if (!HeapTupleIsValid(tuple))
        {
-               *high = "n";
-               *low = "n";
+               /* no such stats entry */
+               heap_endscan(scan);
+               heap_close(rel);
+               return false;
+       }
 
-               /*
-                * XXX                  elog(ERROR, "gethilokey: statistic tuple not
-                * found");
-                */
-               return;
+       /* We assume that there will only be one entry in pg_statistic
+        * for the given rel/att.  Someday, VACUUM might store more than one...
+        */
+       if (nullfrac)
+               *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
+       if (commonfrac)
+               *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
+
+       /* Get the type input proc for the column datatype */
+       typeTuple = SearchSysCacheTuple(TYPOID,
+                                                                       ObjectIdGetDatum(typid),
+                                                                       0, 0, 0);
+       if (! HeapTupleIsValid(typeTuple))
+               elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
+                        typid);
+       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+
+       /* Values are variable-length fields, so cannot access as struct fields.
+        * Must do it the hard way with heap_getattr.
+        */
+       if (commonval)
+       {
+               text *val = (text *) heap_getattr(tuple,
+                                                                                 Anum_pg_statistic_stacommonval,
+                                                                                 RelationGetDescr(rel),
+                                                                                 &isnull);
+               if (isnull)
+               {
+                       elog(DEBUG, "getattstatistics: stacommonval is null");
+                       *commonval = PointerGetDatum(NULL);
+               }
+               else
+               {
+                       char *strval = textout(val);
+                       *commonval = (Datum)
+                               (*fmgr_faddr(&inputproc)) (strval, typid, typmod);
+                       pfree(strval);
+               }
        }
-       *high = textout((struct varlena *)
-                                       heap_getattr(tuple,
-                                                                Anum_pg_statistic_stahikey,
-                                                                RelationGetDescr(rel),
-                                                                &isnull));
-       if (isnull)
-               elog(DEBUG, "gethilokey: high key is null");
-       *low = textout((struct varlena *)
-                                  heap_getattr(tuple,
-                                                               Anum_pg_statistic_stalokey,
-                                                               RelationGetDescr(rel),
-                                                               &isnull));
-       if (isnull)
-               elog(DEBUG, "gethilokey: low key is null");
+
+       if (loval)
+       {
+               text *val = (text *) heap_getattr(tuple,
+                                                                                 Anum_pg_statistic_staloval,
+                                                                                 RelationGetDescr(rel),
+                                                                                 &isnull);
+               if (isnull)
+               {
+                       elog(DEBUG, "getattstatistics: staloval is null");
+                       *loval = PointerGetDatum(NULL);
+               }
+               else
+               {
+                       char *strval = textout(val);
+                       *loval = (Datum)
+                               (*fmgr_faddr(&inputproc)) (strval, typid, typmod);
+                       pfree(strval);
+               }
+       }
+
+       if (hival)
+       {
+               text *val = (text *) heap_getattr(tuple,
+                                                                                 Anum_pg_statistic_stahival,
+                                                                                 RelationGetDescr(rel),
+                                                                                 &isnull);
+               if (isnull)
+               {
+                       elog(DEBUG, "getattstatistics: stahival is null");
+                       *hival = PointerGetDatum(NULL);
+               }
+               else
+               {
+                       char *strval = textout(val);
+                       *hival = (Datum)
+                               (*fmgr_faddr(&inputproc)) (strval, typid, typmod);
+                       pfree(strval);
+               }
+       }
+
        heap_endscan(scan);
        heap_close(rel);
+       return true;
+}
+
+/*
+ * getattdisbursion
+ *       Retrieve the disbursion statistic for an attribute,
+ *       or produce an estimate if no info is available.
+ */
+static double
+getattdisbursion(Oid relid, AttrNumber attnum)
+{
+       HeapTuple       atp;
+       double          disbursion;
+       int32           ntuples;
+
+       atp = SearchSysCacheTuple(ATTNUM,
+                                                         ObjectIdGetDatum(relid),
+                                                         Int16GetDatum(attnum),
+                                                         0, 0);
+       if (!HeapTupleIsValid(atp))
+       {
+               /* this should not happen */
+               elog(ERROR, "getattdisbursion: no attribute tuple %u %d",
+                        relid, attnum);
+               return 0.1;
+       }
+
+       disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
+       if (disbursion > 0.0)
+               return disbursion;
+
+       /* VACUUM ANALYZE has not stored a disbursion statistic for us.
+        * Produce an estimate = 1/numtuples.  This may produce
+        * unreasonably small estimates for large tables, so limit
+        * the estimate to no less than 0.01.
+        */
+       atp = SearchSysCacheTuple(RELOID,
+                                                         ObjectIdGetDatum(relid),
+                                                         0, 0, 0);
+       if (!HeapTupleIsValid(atp))
+       {
+               /* this should not happen */
+               elog(ERROR, "getattdisbursion: no relation tuple %u", relid);
+               return 0.1;
+       }
+
+       ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
+
+       if (ntuples > 0)
+               disbursion = 1.0 / (double) ntuples;
+
+       if (disbursion < 0.01)
+               disbursion = 0.01;
+
+       return disbursion;
 }
 
 float64
index 19b87b68b1b09c37d5fa8fe876308bdb239fd3ae..1c719443282f433c295918d23d9c91695e745f2b 100644 (file)
@@ -7,7 +7,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_statistic.h,v 1.6 1999/02/13 23:21:15 momjian Exp $
+ * $Id: pg_statistic.h,v 1.7 1999/08/01 04:54:21 tgl Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
  */
 CATALOG(pg_statistic)
 {
-       Oid                     starelid;
-       int2            staattnum;
-       Oid                     staop;
-       text            stalokey;               /* VARIABLE LENGTH FIELD */
-       text            stahikey;               /* VARIABLE LENGTH FIELD */
+       /* These fields form the unique key for the entry: */
+       Oid                     starelid;               /* relation containing attribute */
+       int2            staattnum;              /* attribute (column) stats are for */
+       Oid                     staop;                  /* '<' comparison op used for lo/hi vals */
+       /* Note: the current VACUUM code will never produce more than one entry
+        * per column, but in theory there could be multiple entries if a datatype
+        * has more than one useful ordering operator.  Also, the current code
+        * will not write an entry unless it found at least one non-NULL value
+        * in the column; so the remaining fields will never be NULL.
+        */
+
+       /* These fields contain the stats about the column indicated by the key */
+       float4          stanullfrac;    /* the fraction of the entries that are NULL */
+       float4          stacommonfrac;  /* the fraction that are the most common val */
+
+       /* THE REST OF THESE ARE VARIABLE LENGTH FIELDS.
+        * They cannot be accessed as C struct entries; you have to use the
+        * full field access machinery (heap_getattr) for them.
+        *
+        * All three of these are text representations of data values of the
+        * column's data type.  To re-create the actual Datum, do
+        * datatypein(textout(givenvalue)).
+        */
+       text            stacommonval;   /* most common non-null value in column */
+       text            staloval;               /* smallest non-null value in column */
+       text            stahival;               /* largest non-null value in column */
 } FormData_pg_statistic;
 
 /* ----------------
@@ -50,11 +71,14 @@ typedef FormData_pg_statistic *Form_pg_statistic;
  *             compiler constants for pg_statistic
  * ----------------
  */
-#define Natts_pg_statistic                             5
+#define Natts_pg_statistic                             8
 #define Anum_pg_statistic_starelid             1
 #define Anum_pg_statistic_staattnum            2
 #define Anum_pg_statistic_staop                        3
-#define Anum_pg_statistic_stalokey             4
-#define Anum_pg_statistic_stahikey             5
+#define Anum_pg_statistic_stanullfrac  4
+#define Anum_pg_statistic_stacommonfrac        5
+#define Anum_pg_statistic_stacommonval 6
+#define Anum_pg_statistic_staloval             7
+#define Anum_pg_statistic_stahival             8
 
 #endif  /* PG_STATISTIC_H */
index 59a72bbb15b949228ed7b059964f61f419a44027..59a7fe4a5063e4e82e7a0ee8c854b4f4238b5cc1 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: vacuum.h,v 1.22 1999/07/15 15:21:03 momjian Exp $
+ * $Id: vacuum.h,v 1.23 1999/08/01 04:54:25 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -67,22 +67,23 @@ typedef struct
                                guess2,
                                max,
                                min;
-       int16           best_len,
+       int                     best_len,
                                guess1_len,
                                guess2_len,
                                max_len,
                                min_len;
-       int32           best_cnt,
+       long            best_cnt,
                                guess1_cnt,
                                guess1_hits,
                                guess2_hits,
                                null_cnt,
-                               nonnull_cnt;
-       int32           max_cnt,
+                               nonnull_cnt,
+                               max_cnt,
                                min_cnt;
        FmgrInfo        f_cmpeq,
                                f_cmplt,
                                f_cmpgt;
+       Oid                     op_cmplt;
        regproc         outfunc;
        bool            initialized;
 } VacAttrStats;
index e6a0b4157d5ea5ce013ed614513925f0975bbd8b..dfe1897cbe22e7d364972c0c4cbcafafd86ec260 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.84 1999/07/16 17:07:39 momjian Exp $
+ * $Id: builtins.h,v 1.85 1999/08/01 04:54:20 tgl Exp $
  *
  * NOTES
  *       This should normally only be included by fmgr.h.
@@ -372,10 +372,10 @@ extern Oid        regproctooid(RegProcedure rp);
 #define RegprocToOid(rp) regproctooid(rp)
 
 /* selfuncs.c */
-extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag);
-extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, char *value, int32 flag);
-extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag);
-extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, int32 value, int32 flag);
+extern float64 eqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+extern float64 neqsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+extern float64 intltsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
+extern float64 intgtsel(Oid opid, Oid relid, AttrNumber attno, Datum value, int32 flag);
 extern float64 eqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
 extern float64 neqjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);
 extern float64 intltjoinsel(Oid opid, Oid relid1, AttrNumber attno1, Oid relid2, AttrNumber attno2);