Make eqsel produce better results for boolean columns,

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 16 Feb 2000 00:59:27 +0000 (00:59 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 16 Feb 2000 00:59:27 +0000 (00:59 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 16 Feb 2000 00:59:27 +0000 (00:59 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 16 Feb 2000 00:59:27 +0000 (00:59 +0000)
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 30106744ded9f3e091e5dbbb5260eb3b62f69d3a..6b1f5cde26f1ec44f238985e6825a1d0e4ec2476 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.55 2000/02/15 20:49:21 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.56 2000/02/16 00:59:27 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -144,10 +144,13 @@ eqsel(Oid opid,
                                         selec = 1.0 - commonfrac - nullfrac;
                                         if (selec > commonfrac)
                                                 selec = commonfrac;
-                                       /* and in fact it's probably less, so apply a fudge
-                                        * factor.
+                                       /* and in fact it's probably less, so we should apply
+                                        * a fudge factor.  The only case where we don't is
+                                        * for a boolean column, where indeed we have estimated
+                                        * the less-common value's frequency exactly!
                                          */
-                                       selec *= 0.5;
+                                       if (typid != BOOLOID)
+                                               selec *= 0.5;
                                 }
                         }
                         else
@@ -310,20 +313,20 @@ scalarltsel(Oid opid,
                         /* If we trusted the stats fully, we could return a small or
                          * large selec depending on which side of the single data point
                          * the constant is on.  But it seems better to assume that the
-                        * stats are out of date and return a default...
+                        * stats are wrong and return a default...
                          */
                         *result = DEFAULT_INEQ_SEL;
-       }
-               else if (val <= low || val >= high)
+               }
+               else if (val < low || val > high)
                 {
                         /* If given value is outside the statistical range, return a
                          * small or large value; but not 0.0/1.0 since there is a chance
                          * the stats are out of date.
                          */
                         if (flag & SEL_RIGHT)
-                               *result = (val <= low) ? 0.01 : 0.99;
+                               *result = (val < low) ? 0.001 : 0.999;
                         else
-                               *result = (val <= low) ? 0.99 : 0.01;
+                               *result = (val < low) ? 0.999 : 0.001;
                 }
                 else
                 {
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 16 Feb 2000 00:59:27 +0000 (00:59 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 16 Feb 2000 00:59:27 +0000 (00:59 +0000)