]> granicus.if.org Git - postgresql/commitdiff
Fix FK-based join selectivity estimation for semi/antijoins.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 17 Dec 2016 20:28:54 +0000 (15:28 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 17 Dec 2016 20:28:54 +0000 (15:28 -0500)
This case wasn't thought through sufficiently in commit 100340e2d.
It's true that the FK proves that every outer row has a match in the
inner table, but we forgot that some of the inner rows might be filtered
away by WHERE conditions located within the semijoin's RHS.

If the RHS is just one table, we can reasonably take the semijoin
selectivity as equal to the fraction of the referenced table's rows
that are expected to survive its restriction clauses.

If the RHS is a join, it's not clear how much of the referenced table
might get through the join, so fall back to the same rule we were
already using for other outer-join cases: use the minimum of the
regular per-clause selectivity estimates.  This gives the same result
as if we hadn't considered the FK at all when there's a single FK
column, but it should still help for multi-column FKs, which is the
case that 100340e2d is really meant to help with.

Back-patch to 9.6 where the previous commit came in.

Discussion: https://postgr.es/m/16149.1481835103@sss.pgh.pa.us

src/backend/optimizer/path/costsize.c

index e42895dc31c6ba8e5eb621a72d61e14d5506d56f..415edadd3e079e6e2bf32d4cd5e5241d0ae51a7f 100644 (file)
@@ -4085,6 +4085,7 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
        {
                ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
                bool            ref_is_outer;
+               bool            use_smallest_selectivity = false;
                List       *removedlist;
                ListCell   *cell;
                ListCell   *prev;
@@ -4205,9 +4206,9 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
                 * be double-counting the null fraction, and (2) it's not very clear
                 * how to combine null fractions for multiple referencing columns.
                 *
-                * In the first branch of the logic below, null derating is done
-                * implicitly by relying on clause_selectivity(); in the other two
-                * paths, we do nothing for now about correcting for nulls.
+                * In the use_smallest_selectivity code below, null derating is done
+                * implicitly by relying on clause_selectivity(); in the other cases,
+                * we do nothing for now about correcting for nulls.
                 *
                 * XXX another point here is that if either side of an FK constraint
                 * is an inheritance parent, we estimate as though the constraint
@@ -4230,28 +4231,41 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
                         * the smallest per-column selectivity, instead.  (This should
                         * correspond to the FK column with the most nulls.)
                         */
-                       Selectivity thisfksel = 1.0;
-
-                       foreach(cell, removedlist)
-                       {
-                               RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
-                               Selectivity csel;
-
-                               csel = clause_selectivity(root, (Node *) rinfo,
-                                                                                 0, jointype, sjinfo);
-                               thisfksel = Min(thisfksel, csel);
-                       }
-                       fkselec *= thisfksel;
+                       use_smallest_selectivity = true;
                }
                else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
                {
                        /*
                         * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the
-                        * fraction of LHS rows that have matches.  If the referenced
-                        * table is on the inner side, that means the selectivity is 1.0
-                        * (modulo nulls, which we're ignoring for now).  We already
-                        * covered the other case, so no work here.
+                        * fraction of LHS rows that have matches.  The referenced table
+                        * is on the inner side (we already handled the other case above),
+                        * so the FK implies that every LHS row has a match *in the
+                        * referenced table*.  But any restriction or join clauses below
+                        * here will reduce the number of matches.
                         */
+                       if (bms_membership(inner_relids) == BMS_SINGLETON)
+                       {
+                               /*
+                                * When the inner side of the semi/anti join is just the
+                                * referenced table, we may take the FK selectivity as equal
+                                * to the selectivity of the table's restriction clauses.
+                                */
+                               RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
+                               double          ref_tuples = Max(ref_rel->tuples, 1.0);
+
+                               fkselec *= ref_rel->rows / ref_tuples;
+                       }
+                       else
+                       {
+                               /*
+                                * When the inner side of the semi/anti join is itself a join,
+                                * it's hard to guess what fraction of the referenced table
+                                * will get through the join.  But we still don't want to
+                                * multiply per-column estimates together.  Take the smallest
+                                * per-column selectivity, instead.
+                                */
+                               use_smallest_selectivity = true;
+                       }
                }
                else
                {
@@ -4265,6 +4279,26 @@ get_foreign_key_join_selectivity(PlannerInfo *root,
 
                        fkselec *= 1.0 / ref_tuples;
                }
+
+               /*
+                * Common code for cases where we should use the smallest selectivity
+                * that would be computed for any one of the FK's clauses.
+                */
+               if (use_smallest_selectivity)
+               {
+                       Selectivity thisfksel = 1.0;
+
+                       foreach(cell, removedlist)
+                       {
+                               RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+                               Selectivity csel;
+
+                               csel = clause_selectivity(root, (Node *) rinfo,
+                                                                                 0, jointype, sjinfo);
+                               thisfksel = Min(thisfksel, csel);
+                       }
+                       fkselec *= thisfksel;
+               }
        }
 
        *restrictlist = worklist;