]> granicus.if.org Git - postgresql/commitdiff
Weaken the planner's tests for relevant joinclauses.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 13 Apr 2012 19:32:34 +0000 (15:32 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 13 Apr 2012 20:07:17 +0000 (16:07 -0400)
We should be willing to cross-join two small relations if that allows us
to use an inner indexscan on a large relation (that is, the potential
indexqual for the large table requires both smaller relations).  This
worked in simple cases but fell apart as soon as there was a join clause
to a fourth relation, because the existence of any two-relation join clause
caused the planner to not consider clauseless joins between other base
relations.  The added regression test shows an example case adapted from
a recent complaint from Benoit Delbosc.

Adjust have_relevant_joinclause, have_relevant_eclass_joinclause, and
has_relevant_eclass_joinclause to consider that a join clause mentioning
three or more relations is sufficient grounds for joining any subset of
those relations, even if we have to do so via a cartesian join.  Since such
clauses are relatively uncommon, this shouldn't affect planning speed on
typical queries; in fact it should help a bit, because the latter two
functions in particular get significantly simpler.

Although this is arguably a bug fix, I'm not going to risk back-patching
it, since it might have currently-unforeseen consequences.

src/backend/optimizer/path/equivclass.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/util/joininfo.c
src/test/regress/expected/join.out
src/test/regress/sql/join.sql

index c115c2148db109476dd607f99a178e25a5f76a24..5cdfec542cde91ded3271a62b9da53b63cabc20c 100644 (file)
@@ -2035,7 +2035,7 @@ get_parent_relid(PlannerInfo *root, RelOptInfo *rel)
 /*
  * have_relevant_eclass_joinclause
  *             Detect whether there is an EquivalenceClass that could produce
- *             a joinclause between the two given relations.
+ *             a joinclause involving the two given relations.
  *
  * This is essentially a very cut-down version of
  * generate_join_implied_equalities(). Note it's OK to occasionally say "yes"
@@ -2051,9 +2051,6 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
        foreach(lc1, root->eq_classes)
        {
                EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
-               bool            has_rel1;
-               bool            has_rel2;
-               ListCell   *lc2;
 
                /*
                 * Won't generate joinclauses if single-member (this test covers the
@@ -2063,9 +2060,18 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
                        continue;
 
                /*
+                * We do not need to examine the individual members of the EC, because
+                * all that we care about is whether each rel overlaps the relids of
+                * at least one member, and a test on ec_relids is sufficient to prove
+                * that.  (As with have_relevant_joinclause(), it is not necessary
+                * that the EC be able to form a joinclause relating exactly the two
+                * given rels, only that it be able to form a joinclause mentioning
+                * both, and this will surely be true if both of them overlap
+                * ec_relids.)
+                *
                 * Note we don't test ec_broken; if we did, we'd need a separate code
-                * path to look through ec_sources.  Checking the members anyway is OK
-                * as a possibly-overoptimistic heuristic.
+                * path to look through ec_sources.  Checking the membership anyway is
+                * OK as a possibly-overoptimistic heuristic.
                 *
                 * We don't test ec_has_const either, even though a const eclass won't
                 * generate real join clauses.  This is because if we had "WHERE a.x =
@@ -2073,35 +2079,8 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
                 * since the join result is likely to be small even though it'll end
                 * up being an unqualified nestloop.
                 */
-
-               /* Needn't scan if it couldn't contain members from each rel */
-               if (!bms_overlap(rel1->relids, ec->ec_relids) ||
-                       !bms_overlap(rel2->relids, ec->ec_relids))
-                       continue;
-
-               /* Scan the EC to see if it has member(s) in each rel */
-               has_rel1 = has_rel2 = false;
-               foreach(lc2, ec->ec_members)
-               {
-                       EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-
-                       if (cur_em->em_is_const || cur_em->em_is_child)
-                               continue;               /* ignore consts and children here */
-                       if (bms_is_subset(cur_em->em_relids, rel1->relids))
-                       {
-                               has_rel1 = true;
-                               if (has_rel2)
-                                       break;
-                       }
-                       if (bms_is_subset(cur_em->em_relids, rel2->relids))
-                       {
-                               has_rel2 = true;
-                               if (has_rel1)
-                                       break;
-                       }
-               }
-
-               if (has_rel1 && has_rel2)
+               if (bms_overlap(rel1->relids, ec->ec_relids) &&
+                       bms_overlap(rel2->relids, ec->ec_relids))
                        return true;
        }
 
@@ -2112,7 +2091,7 @@ have_relevant_eclass_joinclause(PlannerInfo *root,
 /*
  * has_relevant_eclass_joinclause
  *             Detect whether there is an EquivalenceClass that could produce
- *             a joinclause between the given relation and anything else.
+ *             a joinclause involving the given relation and anything else.
  *
  * This is the same as have_relevant_eclass_joinclause with the other rel
  * implicitly defined as "everything else in the query".
@@ -2125,9 +2104,6 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
        foreach(lc1, root->eq_classes)
        {
                EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
-               bool            has_rel1;
-               bool            has_rel2;
-               ListCell   *lc2;
 
                /*
                 * Won't generate joinclauses if single-member (this test covers the
@@ -2137,45 +2113,11 @@ has_relevant_eclass_joinclause(PlannerInfo *root, RelOptInfo *rel1)
                        continue;
 
                /*
-                * Note we don't test ec_broken; if we did, we'd need a separate code
-                * path to look through ec_sources.  Checking the members anyway is OK
-                * as a possibly-overoptimistic heuristic.
-                *
-                * We don't test ec_has_const either, even though a const eclass won't
-                * generate real join clauses.  This is because if we had "WHERE a.x =
-                * b.y and a.x = 42", it is worth considering a join between a and b,
-                * since the join result is likely to be small even though it'll end
-                * up being an unqualified nestloop.
+                * Per the comment in have_relevant_eclass_joinclause, it's sufficient
+                * to find an EC that mentions both this rel and some other rel.
                 */
-
-               /* Needn't scan if it couldn't contain members from each rel */
-               if (!bms_overlap(rel1->relids, ec->ec_relids) ||
-                       bms_is_subset(ec->ec_relids, rel1->relids))
-                       continue;
-
-               /* Scan the EC to see if it has member(s) in each rel */
-               has_rel1 = has_rel2 = false;
-               foreach(lc2, ec->ec_members)
-               {
-                       EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-
-                       if (cur_em->em_is_const || cur_em->em_is_child)
-                               continue;               /* ignore consts and children here */
-                       if (bms_is_subset(cur_em->em_relids, rel1->relids))
-                       {
-                               has_rel1 = true;
-                               if (has_rel2)
-                                       break;
-                       }
-                       if (!bms_overlap(cur_em->em_relids, rel1->relids))
-                       {
-                               has_rel2 = true;
-                               if (has_rel1)
-                                       break;
-                       }
-               }
-
-               if (has_rel1 && has_rel2)
+               if (bms_overlap(rel1->relids, ec->ec_relids) &&
+                       !bms_is_subset(ec->ec_relids, rel1->relids))
                        return true;
        }
 
index 4a35d8d3a48d07dec2085e10783b690eb917886f..2ad0b969d2510d59a698699ad3e161f6309fec2e 100644 (file)
@@ -88,13 +88,9 @@ join_search_one_level(PlannerInfo *root, int level)
                        has_join_restriction(root, old_rel))
                {
                        /*
-                        * Note that if all available join clauses for this rel require
-                        * more than one other rel, we will fail to make any joins against
-                        * it here.  In most cases that's OK; it'll be considered by
-                        * "bushy plan" join code in a higher-level pass where we have
-                        * those other rels collected into a join rel.
-                        *
-                        * See also the last-ditch case below.
+                        * There are relevant join clauses or join order restrictions,
+                        * so consider joins between this rel and (only) those rels it is
+                        * linked to by a clause or restriction.
                         */
                        make_rels_by_clause_joins(root,
                                                                          old_rel,
@@ -160,8 +156,8 @@ join_search_one_level(PlannerInfo *root, int level)
                                {
                                        /*
                                         * OK, we can build a rel of the right level from this
-                                        * pair of rels.  Do so if there is at least one usable
-                                        * join clause or a relevant join restriction.
+                                        * pair of rels.  Do so if there is at least one relevant
+                                        * join clause or join order restriction.
                                         */
                                        if (have_relevant_joinclause(root, old_rel, new_rel) ||
                                                have_join_order_restriction(root, old_rel, new_rel))
index b582ccdc0042a7acf1ddcec2b32347747bc5c64f..20d57c514039592be75bf851da5558a2c7b488cd 100644 (file)
 
 /*
  * have_relevant_joinclause
- *             Detect whether there is a joinclause that can be used to join
+ *             Detect whether there is a joinclause that involves
  *             the two given relations.
+ *
+ * Note: the joinclause does not have to be evaluatable with only these two
+ * relations.  This is intentional.  For example consider
+ *             SELECT * FROM a, b, c WHERE a.x = (b.y + c.z)
+ * If a is much larger than the other tables, it may be worthwhile to
+ * cross-join b and c and then use an inner indexscan on a.x.  Therefore
+ * we should consider this joinclause as reason to join b to c, even though
+ * it can't be applied at that join step.
  */
 bool
 have_relevant_joinclause(PlannerInfo *root,
                                                 RelOptInfo *rel1, RelOptInfo *rel2)
 {
        bool            result = false;
-       Relids          join_relids;
        List       *joininfo;
+       Relids          other_relids;
        ListCell   *l;
 
-       join_relids = bms_union(rel1->relids, rel2->relids);
-
        /*
         * We could scan either relation's joininfo list; may as well use the
         * shorter one.
         */
        if (list_length(rel1->joininfo) <= list_length(rel2->joininfo))
+       {
                joininfo = rel1->joininfo;
+               other_relids = rel2->relids;
+       }
        else
+       {
                joininfo = rel2->joininfo;
+               other_relids = rel1->relids;
+       }
 
        foreach(l, joininfo)
        {
                RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
 
-               if (bms_is_subset(rinfo->required_relids, join_relids))
+               if (bms_overlap(other_relids, rinfo->required_relids))
                {
                        result = true;
                        break;
@@ -62,8 +74,6 @@ have_relevant_joinclause(PlannerInfo *root,
        if (!result && rel1->has_eclass_joins && rel2->has_eclass_joins)
                result = have_relevant_eclass_joinclause(root, rel1, rel2);
 
-       bms_free(join_relids);
-
        return result;
 }
 
index c0c72833339c65af560a55781758b4f5ac0f2079..e5dceff76586503b818e89bfa0b20466e68f5ae4 100644 (file)
@@ -2666,6 +2666,57 @@ select * from int4_tbl a full join int4_tbl b on false;
  -2147483647 |            
 (10 rows)
 
+--
+-- test for ability to use a cartesian join when necessary
+--
+explain (costs off)
+select * from
+  tenk1 join int4_tbl on f1 = twothousand,
+  int4(sin(1)) q1,
+  int4(sin(0)) q2
+where q1 = thousand or q2 = thousand;
+                                 QUERY PLAN                                  
+-----------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (tenk1.twothousand = int4_tbl.f1)
+   ->  Nested Loop
+         Join Filter: ((q1.q1 = tenk1.thousand) OR (q2.q2 = tenk1.thousand))
+         ->  Nested Loop
+               ->  Function Scan on q1
+               ->  Function Scan on q2
+         ->  Bitmap Heap Scan on tenk1
+               Recheck Cond: ((q1.q1 = thousand) OR (q2.q2 = thousand))
+               ->  BitmapOr
+                     ->  Bitmap Index Scan on tenk1_thous_tenthous
+                           Index Cond: (q1.q1 = thousand)
+                     ->  Bitmap Index Scan on tenk1_thous_tenthous
+                           Index Cond: (q2.q2 = thousand)
+   ->  Hash
+         ->  Seq Scan on int4_tbl
+(16 rows)
+
+explain (costs off)
+select * from
+  tenk1 join int4_tbl on f1 = twothousand,
+  int4(sin(1)) q1,
+  int4(sin(0)) q2
+where thousand = (q1 + q2);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Hash Join
+   Hash Cond: (tenk1.twothousand = int4_tbl.f1)
+   ->  Nested Loop
+         ->  Nested Loop
+               ->  Function Scan on q1
+               ->  Function Scan on q2
+         ->  Bitmap Heap Scan on tenk1
+               Recheck Cond: (thousand = (q1.q1 + q2.q2))
+               ->  Bitmap Index Scan on tenk1_thous_tenthous
+                     Index Cond: (thousand = (q1.q1 + q2.q2))
+   ->  Hash
+         ->  Seq Scan on int4_tbl
+(12 rows)
+
 --
 -- test join removal
 --
index 2d53cf1725bd02c131ff3eb45f4b5eba00f64a35..bc9b7812cf845d844e61c77a82ec981214df6cd0 100644 (file)
@@ -689,6 +689,24 @@ order by 1,2;
 select * from int4_tbl a full join int4_tbl b on true;
 select * from int4_tbl a full join int4_tbl b on false;
 
+--
+-- test for ability to use a cartesian join when necessary
+--
+
+explain (costs off)
+select * from
+  tenk1 join int4_tbl on f1 = twothousand,
+  int4(sin(1)) q1,
+  int4(sin(0)) q2
+where q1 = thousand or q2 = thousand;
+
+explain (costs off)
+select * from
+  tenk1 join int4_tbl on f1 = twothousand,
+  int4(sin(1)) q1,
+  int4(sin(0)) q2
+where thousand = (q1 + q2);
+
 --
 -- test join removal
 --