]> granicus.if.org Git - postgresql/commitdiff
Be more wary of missing statistics in eqjoinsel_semi().
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 12 Apr 2011 05:59:51 +0000 (01:59 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 12 Apr 2011 05:59:51 +0000 (01:59 -0400)
In particular, if we don't have real ndistinct estimates for both sides,
fall back to assuming that half of the left-hand rows have join partners.
This is what was done in 8.2 and 8.3 (cf nulltestsel() in those versions).
It's pretty stupid but it won't lead us to think that an antijoin produces
no rows out, as seen in recent example from Uwe Schroeder.

src/backend/utils/adt/selfuncs.c

index 38a8a89ad3c4d2b0e790d70cfe7076a1e31110a3..3333b7a8086caa75b24e1258ace5fdb419c37de9 100644 (file)
@@ -2261,7 +2261,9 @@ eqjoinsel_semi(Oid operator,
                bool       *hasmatch1;
                bool       *hasmatch2;
                double          nullfrac1 = stats1->stanullfrac;
-               double          matchfreq1;
+               double          matchfreq1,
+                                       uncertainfrac,
+                                       uncertain;
                int                     i,
                                        nmatches;
 
@@ -2314,18 +2316,26 @@ eqjoinsel_semi(Oid operator,
                 * the uncertain rows that a fraction nd2/nd1 have join partners. We
                 * can discount the known-matched MCVs from the distinct-values counts
                 * before doing the division.
+                *
+                * Crude as the above is, it's completely useless if we don't have
+                * reliable ndistinct values for both sides.  Hence, if either nd1
+                * or nd2 is default, punt and assume half of the uncertain rows
+                * have join partners.
                 */
-               nd1 -= nmatches;
-               nd2 -= nmatches;
-               if (nd1 <= nd2 || nd2 <= 0)
-                       selec = Max(matchfreq1, 1.0 - nullfrac1);
-               else
+               if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
                {
-                       double          uncertain = 1.0 - matchfreq1 - nullfrac1;
-
-                       CLAMP_PROBABILITY(uncertain);
-                       selec = matchfreq1 + (nd2 / nd1) * uncertain;
+                       nd1 -= nmatches;
+                       nd2 -= nmatches;
+                       if (nd1 <= nd2 || nd2 <= 0)
+                               uncertainfrac = 1.0;
+                       else
+                               uncertainfrac = nd2 / nd1;
                }
+               else
+                       uncertainfrac = 0.5;
+               uncertain = 1.0 - matchfreq1 - nullfrac1;
+               CLAMP_PROBABILITY(uncertain);
+               selec = matchfreq1 + uncertainfrac * uncertain;
        }
        else
        {
@@ -2335,15 +2345,20 @@ eqjoinsel_semi(Oid operator,
                 */
                double          nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
 
-               if (vardata1->rel)
-                       nd1 = Min(nd1, vardata1->rel->rows);
-               if (vardata2->rel)
-                       nd2 = Min(nd2, vardata2->rel->rows);
+               if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
+               {
+                       if (vardata1->rel)
+                               nd1 = Min(nd1, vardata1->rel->rows);
+                       if (vardata2->rel)
+                               nd2 = Min(nd2, vardata2->rel->rows);
 
-               if (nd1 <= nd2 || nd2 <= 0)
-                       selec = 1.0 - nullfrac1;
+                       if (nd1 <= nd2 || nd2 <= 0)
+                               selec = 1.0 - nullfrac1;
+                       else
+                               selec = (nd2 / nd1) * (1.0 - nullfrac1);
+               }
                else
-                       selec = (nd2 / nd1) * (1.0 - nullfrac1);
+                       selec = 0.5 * (1.0 - nullfrac1);
        }
 
        if (have_mcvs1)