]> granicus.if.org Git - postgresql/commitdiff
Use Foreign Key relationships to infer multi-column join selectivity
authorSimon Riggs <simon@2ndQuadrant.com>
Fri, 8 Apr 2016 01:51:09 +0000 (02:51 +0100)
committerSimon Riggs <simon@2ndQuadrant.com>
Fri, 8 Apr 2016 01:51:09 +0000 (02:51 +0100)
In cases where joins use multiple columns we currently assess each join
separately causing gross mis-estimates for join cardinality.

This patch adds use of FK information for the first time into the
planner. When FKs are present and we have multi-column join information,
plan estimates will be drastically improved. Cases with multiple FKs
are handled, though partial matches are ignored currently.

Net effect is substantial performance improvements for joins in many
common cases. Additional planning time is isolated to cases that are
currently performing poorly, measured at 0.08 - 0.15 ms.

Please watch for planner performance regressions; circumstances seem
unlikely but the law of unintended consequences may apply somewhen.
Additional complex tests welcome to prove this before release.

Tests can be performed using SET enable_fkey_estimates = on | off
using scripts provided during Hackers discussions, message id:
552335D9.3090707@2ndquadrant.com

Authors: Tomas Vondra and David Rowley
Reviewed and tested by Simon Riggs, adding comments only

src/backend/optimizer/path/costsize.c
src/backend/utils/misc/guc.c
src/include/optimizer/cost.h
src/include/optimizer/paths.h
src/test/regress/expected/rangefuncs.out

index 70a4c276e4735c19b3068f44104429c689a9f7d6..8e4caeba0a9b6107d687cd06b9909043309fa6a9 100644 (file)
@@ -126,6 +126,7 @@ bool                enable_nestloop = true;
 bool           enable_material = true;
 bool           enable_mergejoin = true;
 bool           enable_hashjoin = true;
+bool           enable_fkey_estimates = true;
 
 typedef struct
 {
@@ -3887,6 +3888,361 @@ get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
        return nrows;
 }
 
+/*
+ * quals_match_foreign_key
+ *             Determines if the foreign key is matched by joinquals.
+ *
+ * Checks that there are conditions on all columns of the foreign key, matching
+ * the operator used by the foreign key etc. If such complete match is found,
+ * the function returns bitmap identifying the matching quals (0-based).
+ *
+ * Otherwise (no match at all or incomplete match), NULL is returned.
+ *
+ * XXX It seems possible in the future to do something useful when a
+ * partial match occurs between join and FK, but that is less common
+ * and that part isn't worked out yet.
+ */
+static Bitmapset *
+quals_match_foreign_key(PlannerInfo *root, ForeignKeyOptInfo *fkinfo,
+                                               RelOptInfo *fkrel, RelOptInfo *foreignrel,
+                                               List *joinquals)
+{
+       int i;
+       int nkeys = fkinfo->nkeys;
+       Bitmapset *qualmatches = NULL;
+       Bitmapset *fkmatches = NULL;
+
+       /*
+        * Loop over each column of the foreign key and build a bitmapset
+        * of each joinqual which matches. Note that we don't stop when we find
+        * the first match, as the expression could be duplicated in the
+        * joinquals, and we want to generate a bitmapset which has bits set for
+        * every matching join qual.
+        */
+       for (i = 0; i < nkeys; i++)
+       {
+               ListCell *lc;
+               int quallstidx = -1;
+
+               foreach(lc, joinquals)
+               {
+                       RestrictInfo   *rinfo;
+                       OpExpr             *clause;
+                       Var                        *leftvar;
+                       Var                        *rightvar;
+
+                       quallstidx++;
+
+                       /*
+                        * Technically we don't need to, but here we skip this qual if
+                        * we've matched it to part of the foreign key already. This
+                        * should prove to be a useful optimization when the quals appear
+                        * in the same order as the foreign key's keys. We need only bother
+                        * doing this when the foreign key is made up of more than 1 set
+                        * of columns, and we're not testing the first column.
+                        */
+                       if (i > 0 && bms_is_member(quallstidx, qualmatches))
+                               continue;
+
+                       /*
+                        * Here since 'usefulquals' only contains bitmap indexes for quals
+                        * of type "var op var" we can safely skip checking this.
+                        */
+                       rinfo = (RestrictInfo *) lfirst(lc);
+                       clause = (OpExpr *) rinfo->clause;
+
+                       /*
+                        * If the operator does not match then there's little point in
+                        * checking the operands.
+                        */
+                       if (clause->opno != fkinfo->conpfeqop[i])
+                               continue;
+
+                       leftvar = (Var *) get_leftop((Expr *) clause);
+                       rightvar = (Var *) get_rightop((Expr *) clause);
+
+                       /* Foreign keys only support Vars, so ignore anything more complex */
+                       if (!IsA(leftvar, Var) || !IsA(rightvar, Var))
+                               continue;
+
+                       /*
+                        * For RestrictInfos built from an eclass we must consider each
+                        * member of the eclass as rinfo's operands may not belong to the
+                        * foreign key. For efficient tracking of which Vars we've found,
+                        * since we're only tracking 2 Vars, we use a bitmask. We can
+                        * safely finish searching when both of the least significant bits
+                        * are set.
+                        */
+                       if (rinfo->parent_ec)
+                       {
+                               EquivalenceClass   *ec = rinfo->parent_ec;
+                               ListCell                   *lc2;
+                               int                                     foundvarmask = 0;
+
+                               foreach(lc2, ec->ec_members)
+                               {
+                                       EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+                                       Var *var = (Var *) em->em_expr;
+
+                                       if (!IsA(var, Var))
+                                               continue;
+
+                                       if (foreignrel->relid == var->varno &&
+                                               fkinfo->confkeys[i] == var->varattno)
+                                               foundvarmask |= 1;
+
+                                       else if (fkrel->relid == var->varno &&
+                                               fkinfo->conkeys[i] == var->varattno)
+                                               foundvarmask |= 2;
+
+                                       /*
+                                        * Check if we've found both matches. If found we add
+                                        * this qual to the matched list and mark this key as
+                                        * matched too.
+                                        */
+                                       if (foundvarmask == 3)
+                                       {
+                                               qualmatches = bms_add_member(qualmatches, quallstidx);
+                                               fkmatches = bms_add_member(fkmatches, i);
+                                               break;
+                                       }
+                               }
+                       }
+                       else
+                       {
+                               /*
+                                * In this non eclass RestrictInfo case we'll check if the left
+                                * and right Vars match to this part of the foreign key.
+                                * Remember that this could be written with the Vars in either
+                                * order, so we test both permutations of the expression.
+                                */
+                               if ((foreignrel->relid == leftvar->varno) &&
+                                       (fkrel->relid == rightvar->varno) &&
+                                       (fkinfo->confkeys[i] == leftvar->varattno) &&
+                                       (fkinfo->conkeys[i] == rightvar->varattno))
+                               {
+                                       qualmatches = bms_add_member(qualmatches, quallstidx);
+                                       fkmatches = bms_add_member(fkmatches, i);
+                               }
+                               else if ((foreignrel->relid == rightvar->varno) &&
+                                                (fkrel->relid == leftvar->varno) &&
+                                                (fkinfo->confkeys[i] == rightvar->varattno) &&
+                                                (fkinfo->conkeys[i] == leftvar->varattno))
+                               {
+                                       qualmatches = bms_add_member(qualmatches, quallstidx);
+                                       fkmatches = bms_add_member(fkmatches, i);
+                               }
+                       }
+               }
+       }
+
+       /* can't find more matches than columns in the foreign key */
+       Assert(bms_num_members(fkmatches) <= nkeys);
+
+       /* Only return the matches if the foreign key is matched fully. */
+       if (bms_num_members(fkmatches) == nkeys)
+       {
+               bms_free(fkmatches);
+               return qualmatches;
+       }
+
+       bms_free(fkmatches);
+       bms_free(qualmatches);
+
+       return NULL;
+}
+
+/*
+ * find_best_foreign_key_quals
+ *             Finds the foreign key best matching the joinquals.
+ *
+ * Analyzes joinquals to determine if any quals match foreign keys defined the
+ * two relations (fkrel referencing foreignrel). When multiple foreign keys
+ * match, we choose the one with the most keys as the best one because of the
+ * way estimation occurs in clauselist_join_selectivity().  We could choose
+ * the FK matching the most quals, however we assume the quals may be duplicated.
+ *
+ * We also track which joinquals match the current foreign key, so that we can
+ * easily skip then when computing the selectivity.
+ *
+ * When no matching foreign key is found we return 0, otherwise we return the
+ * number of keys in the foreign key.
+ *
+ * Foreign keys matched only partially are currently ignored.
+ */
+static int
+find_best_foreign_key_quals(PlannerInfo *root, RelOptInfo *fkrel,
+                                                       RelOptInfo *foreignrel, List *joinquals,
+                                                       Bitmapset **joinqualsbitmap)
+{
+       Bitmapset          *qualbestmatch;
+       ListCell           *lc;
+       int                             bestmatchnkeys;
+
+       /*
+        * fast path out when there's no foreign keys on fkrel, or when use of
+        * foreign keys for estimation is disabled by GUC
+        */
+       if ((fkrel->fkeylist == NIL) || (!enable_fkey_estimates))
+       {
+               *joinqualsbitmap = NULL;
+               return 0;
+       }
+
+       qualbestmatch = NULL;
+       bestmatchnkeys = 0;
+
+       /* now check the matches for each foreign key defined on the fkrel */
+       foreach(lc, fkrel->fkeylist)
+       {
+               ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
+               Bitmapset *qualsmatched;
+
+               /*
+                * We make no attempt in checking that this foreign key actually
+                * references 'foreignrel', the reasoning here is that we may be able
+                * to match the foreign key to an eclass member Var of a RestrictInfo
+                * that's in qualslist, this Var may belong to some other relation.
+                *
+                * XXX Is this assumption safe in all cases? Maybe not, but does
+                * it lead to a worse estimate than the previous approach? Doubt it.
+                */
+               qualsmatched = quals_match_foreign_key(root, fkinfo, fkrel, foreignrel,
+                                                                                          joinquals);
+
+               /* Did we get a match? And is that match better than a previous one? */
+               if (qualsmatched != NULL && fkinfo->nkeys > bestmatchnkeys)
+               {
+                       /* save the new best match */
+                       bms_free(qualbestmatch);
+                       qualbestmatch = qualsmatched;
+                       bestmatchnkeys = fkinfo->nkeys;
+               }
+       }
+
+       *joinqualsbitmap = qualbestmatch;
+       return bestmatchnkeys;
+}
+
+/*
+ * clauselist_join_selectivity
+ *             Estimate selectivity of join clauses either by using foreign key info
+ *             or by using the regular clauselist_selectivity().
+ *
+ * Since selectivity estimates for each joinqual are multiplied together, this
+ * can cause significant underestimates on the number of join tuples in cases
+ * where there's more than 1 clause in the join condition. To help ease the
+ * pain here we make use of foreign keys, and we assume that 1 row will match
+ * when *all* of the foreign key columns are present in the join condition, any
+ * additional clauses are estimated using clauselist_selectivity().
+ *
+ * Note this ignores whether the FK is invalid or currently deferred; we don't
+ * rely on this assumption for correctness of the query, so it is a reasonable
+ * and safe assumption for planning purposes.
+ */
+static Selectivity
+clauselist_join_selectivity(PlannerInfo *root, List *joinquals,
+                                                       JoinType jointype, SpecialJoinInfo *sjinfo)
+{
+       int                             outerid;
+       int                             innerid;
+       Selectivity             sel = 1.0;
+       Bitmapset          *foundfkquals = NULL;
+
+       innerid = -1;
+       while ((innerid = bms_next_member(sjinfo->min_righthand, innerid)) >= 0)
+       {
+               RelOptInfo *innerrel = find_base_rel(root, innerid);
+
+               outerid = -1;
+               while ((outerid = bms_next_member(sjinfo->min_lefthand, outerid)) >= 0)
+               {
+                       RelOptInfo         *outerrel = find_base_rel(root, outerid);
+                       Bitmapset          *outer2inner;
+                       Bitmapset          *inner2outer;
+                       int                             innermatches;
+                       int                             outermatches;
+
+                       /*
+                        * check which quals are matched by a foreign key referencing the
+                        * innerrel.
+                        */
+                       outermatches = find_best_foreign_key_quals(root, outerrel,
+                                                                                       innerrel, joinquals, &outer2inner);
+
+                       /* do the same, but with relations swapped */
+                       innermatches = find_best_foreign_key_quals(root, innerrel,
+                                                                                       outerrel, joinquals, &inner2outer);
+
+                       /*
+                        * did we find any matches at all? If so we need to see which one is
+                        * the best/longest match
+                        */
+                       if (outermatches != 0 || innermatches != 0)
+                       {
+                               double  referenced_tuples;
+                               bool overlap;
+
+                               /* either could be zero, but not both. */
+                               if (outermatches < innermatches)
+                               {
+                                       overlap = bms_overlap(foundfkquals, inner2outer);
+
+                                       foundfkquals = bms_add_members(foundfkquals, inner2outer);
+                                       referenced_tuples = Max(outerrel->tuples, 1.0);
+                               }
+                               else
+                               {
+                                       overlap = bms_overlap(foundfkquals, outer2inner);
+
+                                       foundfkquals = bms_add_members(foundfkquals, outer2inner);
+                                       referenced_tuples = Max(innerrel->tuples, 1.0);
+                               }
+
+                               /*
+                                * XXX should we ignore these overlapping matches?
+                                * Or perhaps take the Max() or Min()?
+                                */
+                               if (overlap)
+                               {
+                                       if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+                                               sel = Min(sel,Min(1.0 / (outerrel->tuples / Max(innerrel->tuples, 1.0)), 1.0));
+                                       else
+                                               sel = Min(sel, 1.0 / referenced_tuples);
+                               }
+                               else
+                               {
+                                       if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+                                               sel *= Min(1.0 / (outerrel->tuples / Max(innerrel->tuples, 1.0)), 1.0);
+                                       else
+                                               sel *= 1.0 / referenced_tuples;
+                               }
+                       }
+               }
+       }
+
+       /*
+        * If any non matched quals exist then we build a list of the non-matches
+        * and use clauselist_selectivity() to estimate the selectivity of these.
+        */
+       if (bms_num_members(foundfkquals) < list_length(joinquals))
+       {
+               ListCell *lc;
+               int lstidx = 0;
+               List *nonfkeyclauses = NIL;
+
+               foreach (lc, joinquals)
+               {
+                       if (!bms_is_member(lstidx, foundfkquals))
+                               nonfkeyclauses = lappend(nonfkeyclauses, lfirst(lc));
+                       lstidx++;
+               }
+               sel *= clauselist_selectivity(root, nonfkeyclauses, 0, jointype, sjinfo);
+       }
+
+       return sel;
+}
+
 /*
  * calc_joinrel_size_estimate
  *             Workhorse for set_joinrel_size_estimates and
@@ -3933,11 +4289,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
                }
 
                /* Get the separate selectivities */
-               jselec = clauselist_selectivity(root,
-                                                                               joinquals,
-                                                                               0,
-                                                                               jointype,
-                                                                               sjinfo);
+               jselec = clauselist_join_selectivity(root,
+                                                                                        joinquals,
+                                                                                        jointype,
+                                                                                        sjinfo);
+
                pselec = clauselist_selectivity(root,
                                                                                pushedquals,
                                                                                0,
@@ -3950,11 +4306,10 @@ calc_joinrel_size_estimate(PlannerInfo *root,
        }
        else
        {
-               jselec = clauselist_selectivity(root,
-                                                                               restrictlist,
-                                                                               0,
-                                                                               jointype,
-                                                                               sjinfo);
+               jselec = clauselist_join_selectivity(root,
+                                                                                        restrictlist,
+                                                                                        jointype,
+                                                                                        sjinfo);
                pselec = 0.0;                   /* not used, keep compiler quiet */
        }
 
index b7afe6b39c4a91f4b49163a47ec1d60d15a8ebe6..a07050017ee0a73e1a6dfddaef56811a61ebf5ee 100644 (file)
@@ -877,6 +877,15 @@ static struct config_bool ConfigureNamesBool[] =
                true,
                NULL, NULL, NULL
        },
+       {
+               {"enable_fkey_estimates", PGC_USERSET, QUERY_TUNING_METHOD,
+                       gettext_noop("Enables use of foreign keys for estimating joins."),
+                       NULL
+               },
+               &enable_fkey_estimates,
+               true,
+               NULL, NULL, NULL
+       },
 
        {
                {"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
index d4adca6836a30ead68c5299bd77da66d3ba34129..58ac1638ec1a27b9161a2effb370ef9b4480c502 100644 (file)
@@ -66,6 +66,7 @@ extern bool enable_nestloop;
 extern bool enable_material;
 extern bool enable_mergejoin;
 extern bool enable_hashjoin;
+extern bool enable_fkey_estimates;
 extern int     constraint_exclusion;
 
 extern double clamp_row_est(double nrows);
index 7ad4f026a36eae30150c9d76547673cfaab688af..d9a3c1f2326082df98b4dcb3622c1713f7b78916 100644 (file)
@@ -76,6 +76,8 @@ extern Expr *adjust_rowcompare_for_index(RowCompareExpr *clause,
                                                        int indexcol,
                                                        List **indexcolnos,
                                                        bool *var_on_left_p);
+extern bool has_matching_fkey(RelOptInfo *rel, RelOptInfo *frel, List *clauses,
+                                                         bool reverse);
 
 /*
  * tidpath.h
index 00ef4210549879668afa69344e8b51bccbfb3afb..abe0dc983ee3fac4262c7a8a56762db6a26e02bb 100644 (file)
@@ -1,18 +1,19 @@
 SELECT name, setting FROM pg_settings WHERE name LIKE 'enable%';
-         name         | setting 
-----------------------+---------
- enable_bitmapscan    | on
- enable_hashagg       | on
- enable_hashjoin      | on
- enable_indexonlyscan | on
- enable_indexscan     | on
- enable_material      | on
- enable_mergejoin     | on
- enable_nestloop      | on
- enable_seqscan       | on
- enable_sort          | on
- enable_tidscan       | on
-(11 rows)
+         name          | setting 
+-----------------------+---------
+ enable_bitmapscan     | on
+ enable_fkey_estimates | on
+ enable_hashagg        | on
+ enable_hashjoin       | on
+ enable_indexonlyscan  | on
+ enable_indexscan      | on
+ enable_material       | on
+ enable_mergejoin      | on
+ enable_nestloop       | on
+ enable_seqscan        | on
+ enable_sort           | on
+ enable_tidscan        | on
+(12 rows)
 
 CREATE TABLE foo2(fooid int, f2 int);
 INSERT INTO foo2 VALUES(1, 11);