]> granicus.if.org Git - postgresql/commitdiff
Store -1 in attdisbursion to signal 'no duplicates in column'.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 9 Aug 1999 03:16:47 +0000 (03:16 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 9 Aug 1999 03:16:47 +0000 (03:16 +0000)
Centralize att_disbursion readout logic.

src/backend/commands/vacuum.c
src/backend/optimizer/path/joinpath.c
src/backend/utils/adt/selfuncs.c

index 0418a8d3a3891e85c2bc03cc8d74c9ceba1aad1d..b480b44f34700f56f4c4aa63779b11962bbb92b1 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.117 1999/08/08 17:13:10 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.118 1999/08/09 03:16:47 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2346,36 +2346,46 @@ vc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *
                                }
                                else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
                                {
-                                       /* looks like we have a unique-key attribute */
-                                       double          total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
-
-                                       selratio = 1.0 / total;
-                               }
-                               else if (VacAttrStatsLtGtValid(stats) && stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
-                               {
-                                       /* exact result when there are just 1 or 2 values... */
-                                       double          min_cnt_d = stats->min_cnt,
-                                                               max_cnt_d = stats->max_cnt,
-                                                               null_cnt_d = stats->null_cnt;
-                                       double          total = ((double) stats->nonnull_cnt) + null_cnt_d;
-
-                                       selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
+                                       /* looks like we have a unique-key attribute ---
+                                        * flag this with special -1.0 flag value.
+                                        *
+                                        * The correct disbursion is 1.0/numberOfRows, but since
+                                        * the relation row count can get updated without
+                                        * recomputing disbursion, we want to store a "symbolic"
+                                        * value and figure 1.0/numberOfRows on the fly.
+                                        */
+                                       selratio = -1;
                                }
                                else
                                {
-                                       double          most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                                       double          total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+                                       if (VacAttrStatsLtGtValid(stats) &&
+                                               stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+                                       {
+                                               /* exact result when there are just 1 or 2 values... */
+                                               double          min_cnt_d = stats->min_cnt,
+                                                                       max_cnt_d = stats->max_cnt,
+                                                                       null_cnt_d = stats->null_cnt;
+                                               double          total = ((double) stats->nonnull_cnt) + null_cnt_d;
 
-                                       /*
-                                        * we assume count of other values are 20% of best
-                                        * count in table
-                                        */
-                                       selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
+                                               selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
+                                       }
+                                       else
+                                       {
+                                               double          most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
+                                               double          total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+
+                                               /*
+                                                * we assume count of other values are 20% of best
+                                                * count in table
+                                                */
+                                               selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
+                                       }
+                                       /* Make sure calculated values are in-range */
+                                       if (selratio < 0.0)
+                                               selratio = 0.0;
+                                       else if (selratio > 1.0)
+                                               selratio = 1.0;
                                }
-                               if (selratio < 0.0)
-                                       selratio = 0.0;
-                               else if (selratio > 1.0)
-                                       selratio = 1.0;
                                attp->attdisbursion = selratio;
 
                                /*
index 57688deeb8506adab8824b2be05b72c2b568cad1..4a7018aa64ac1ba1cbbdc1946ac4b53552c37eb3 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.43 1999/08/06 04:00:15 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.44 1999/08/09 03:16:43 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -23,7 +23,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "parser/parsetree.h"
-#include "utils/syscache.h"
+#include "utils/lsyscache.h"
 
 static Path *best_innerjoin(List *join_paths, List *outer_relid);
 static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
@@ -586,7 +586,6 @@ hash_inner_and_outer(Query *root,
 
 /*
  * Estimate disbursion of the specified Var
- *       Generate some kind of estimate, no matter what...
  *
  * We use a default of 0.1 if we can't figure out anything better.
  * This will typically discourage use of a hash rather strongly,
@@ -598,24 +597,11 @@ static Cost
 estimate_disbursion(Query *root, Var *var)
 {
        Oid                     relid;
-       HeapTuple       atp;
-       double          disbursion;
 
        if (! IsA(var, Var))
                return 0.1;
 
        relid = getrelid(var->varno, root->rtable);
 
-       atp = SearchSysCacheTuple(ATTNUM,
-                                                         ObjectIdGetDatum(relid),
-                                                         Int16GetDatum(var->varattno),
-                                                         0, 0);
-       if (! HeapTupleIsValid(atp))
-               return 0.1;
-
-       disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
-       if (disbursion > 0.0)
-               return disbursion;
-
-       return 0.1;
+       return (Cost) get_attdisbursion(relid, var->varattno, 0.1);
 }
index a0e0c7ad7fc345b5490b7d10ec1cb143eedf00a9..298c7aeed1601bdc85fabe1ea69564b2dc569328 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.37 1999/08/02 02:05:41 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.38 1999/08/09 03:16:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -52,7 +52,6 @@ static bool getattstatistics(Oid relid, AttrNumber attnum,
                                                         Datum *commonval,
                                                         Datum *loval,
                                                         Datum *hival);
-static double getattdisbursion(Oid relid, AttrNumber attnum);
 
 
 /*
@@ -172,7 +171,7 @@ eqsel(Oid opid,
                        /* No VACUUM ANALYZE stats available, so make a guess using
                         * the disbursion stat (if we have that, which is unlikely...)
                         */
-                       selec = getattdisbursion(relid, attno);
+                       selec = get_attdisbursion(relid, attno, 0.01);
                }
 
                *result = (float64data) selec;
@@ -374,8 +373,8 @@ eqjoinsel(Oid opid,
                *result = 0.1;
        else
        {
-               num1 = getattdisbursion(relid1, attno1);
-               num2 = getattdisbursion(relid2, attno2);
+               num1 = get_attdisbursion(relid1, attno1, 0.01);
+               num2 = get_attdisbursion(relid2, attno2, 0.01);
                max = (num1 > num2) ? num1 : num2;
                if (max <= 0)
                        *result = 1.0;
@@ -675,60 +674,6 @@ getattstatistics(Oid relid, AttrNumber attnum, Oid typid, int32 typmod,
        return true;
 }
 
-/*
- * getattdisbursion
- *       Retrieve the disbursion statistic for an attribute,
- *       or produce an estimate if no info is available.
- */
-static double
-getattdisbursion(Oid relid, AttrNumber attnum)
-{
-       HeapTuple       atp;
-       double          disbursion;
-       int32           ntuples;
-
-       atp = SearchSysCacheTuple(ATTNUM,
-                                                         ObjectIdGetDatum(relid),
-                                                         Int16GetDatum(attnum),
-                                                         0, 0);
-       if (!HeapTupleIsValid(atp))
-       {
-               /* this should not happen */
-               elog(ERROR, "getattdisbursion: no attribute tuple %u %d",
-                        relid, attnum);
-               return 0.1;
-       }
-
-       disbursion = ((Form_pg_attribute) GETSTRUCT(atp))->attdisbursion;
-       if (disbursion > 0.0)
-               return disbursion;
-
-       /* VACUUM ANALYZE has not stored a disbursion statistic for us.
-        * Produce an estimate = 1/numtuples.  This may produce
-        * unreasonably small estimates for large tables, so limit
-        * the estimate to no less than 0.01.
-        */
-       atp = SearchSysCacheTuple(RELOID,
-                                                         ObjectIdGetDatum(relid),
-                                                         0, 0, 0);
-       if (!HeapTupleIsValid(atp))
-       {
-               /* this should not happen */
-               elog(ERROR, "getattdisbursion: no relation tuple %u", relid);
-               return 0.1;
-       }
-
-       ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-       if (ntuples > 0)
-               disbursion = 1.0 / (double) ntuples;
-
-       if (disbursion < 0.01)
-               disbursion = 0.01;
-
-       return disbursion;
-}
-
 float64
 btreesel(Oid operatorObjectId,
                 Oid indrelid,