Reduce semijoins with unique inner relations to plain inner joins.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 39e2ddda906f5f7fc3e4dad17ca89013f24ff934..c130d2f17f2ece03afeb8bb9e54fefc4f36d9ac5 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -126,13 +126,15 @@ add_paths_to_joinrel(PlannerInfo *root,
          *
          * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
          * matter since the executor can make the equivalent optimization anyway;
-        * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, if
-        * the LHS covers all of the associated semijoin's min_lefthand, then it's
-        * appropriate to set inner_unique because the path produced by
-        * create_unique_path will be unique relative to the LHS.  (If we have an
-        * LHS that's only part of the min_lefthand, that is *not* true.)  For
-        * JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape
-        * this module.
+        * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
+        * must be considering a semijoin whose inner side is not provably unique
+        * (else reduce_unique_semijoins would've simplified it), so there's no
+        * point in calling innerrel_is_unique.  However, if the LHS covers all of
+        * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+        * because the path produced by create_unique_path will be unique relative
+        * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
+        * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
+        * letting that value escape this module.
          */
         switch (jointype)
         {
@@ -145,12 +147,20 @@ add_paths_to_joinrel(PlannerInfo *root,
                                                                                            outerrel->relids);
                         break;
                 case JOIN_UNIQUE_OUTER:
-                       extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
-                                                                                                       JOIN_INNER, restrictlist);
+                       extra.inner_unique = innerrel_is_unique(root,
+                                                                                                       outerrel->relids,
+                                                                                                       innerrel,
+                                                                                                       JOIN_INNER,
+                                                                                                       restrictlist,
+                                                                                                       false);
                         break;
                 default:
-                       extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
-                                                                                                       jointype, restrictlist);
+                       extra.inner_unique = innerrel_is_unique(root,
+                                                                                                       outerrel->relids,
+                                                                                                       innerrel,
+                                                                                                       jointype,
+                                                                                                       restrictlist,
+                                                                                                       false);
                         break;
         }
  
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c

index 69b9be4d76b34b04e03dc752c6ad0934526d64e0..34317fe7782b58f04d4030494069a836dcd5f2d4 100644 (file)
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
                                         List *clause_list);
  static Oid     distinct_col_search(int colno, List *colnos, List *opids);
  static bool is_innerrel_unique_for(PlannerInfo *root,
-                                          RelOptInfo *outerrel,
+                                          Relids outerrelids,
                                            RelOptInfo *innerrel,
                                            JoinType jointype,
                                            List *restrictlist);
@@ -495,6 +495,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
  }
  
  
+/*
+ * reduce_unique_semijoins
+ *             Check for semijoins that can be simplified to plain inner joins
+ *             because the inner relation is provably unique for the join clauses.
+ *
+ * Ideally this would happen during reduce_outer_joins, but we don't have
+ * enough information at that point.
+ *
+ * To perform the strength reduction when applicable, we need only delete
+ * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
+ * bother fixing the join type attributed to it in the query jointree,
+ * since that won't be consulted again.)
+ */
+void
+reduce_unique_semijoins(PlannerInfo *root)
+{
+       ListCell   *lc;
+       ListCell   *next;
+
+       /*
+        * Scan the join_info_list to find semijoins.  We can't use foreach
+        * because we may delete the current cell.
+        */
+       for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
+       {
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+               int                     innerrelid;
+               RelOptInfo *innerrel;
+               Relids          joinrelids;
+               List       *restrictlist;
+
+               next = lnext(lc);
+
+               /*
+                * Must be a non-delaying semijoin to a single baserel, else we aren't
+                * going to be able to do anything with it.  (It's probably not
+                * possible for delay_upper_joins to be set on a semijoin, but we
+                * might as well check.)
+                */
+               if (sjinfo->jointype != JOIN_SEMI ||
+                       sjinfo->delay_upper_joins)
+                       continue;
+
+               if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+                       continue;
+
+               innerrel = find_base_rel(root, innerrelid);
+
+               /*
+                * Before we trouble to run generate_join_implied_equalities, make a
+                * quick check to eliminate cases in which we will surely be unable to
+                * prove uniqueness of the innerrel.
+                */
+               if (!rel_supports_distinctness(root, innerrel))
+                       continue;
+
+               /* Compute the relid set for the join we are considering */
+               joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+               /*
+                * Since we're only considering a single-rel RHS, any join clauses it
+                * has must be clauses linking it to the semijoin's min_lefthand.  We
+                * can also consider EC-derived join clauses.
+                */
+               restrictlist =
+                       list_concat(generate_join_implied_equalities(root,
+                                                                                                                joinrelids,
+                                                                                                                sjinfo->min_lefthand,
+                                                                                                                innerrel),
+                                               innerrel->joininfo);
+
+               /* Test whether the innerrel is unique for those clauses. */
+               if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
+                                                               JOIN_SEMI, restrictlist, true))
+                       continue;
+
+               /* OK, remove the SpecialJoinInfo from the list. */
+               root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
+       }
+}
+
+
  /*
   * rel_supports_distinctness
   *             Could the relation possibly be proven distinct on some set of columns?
@@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
   *       Check if the innerrel provably contains at most one tuple matching any
   *       tuple from the outerrel, based on join clauses in the 'restrictlist'.
   *
+ * We need an actual RelOptInfo for the innerrel, but it's sufficient to
+ * identify the outerrel by its Relids.  This asymmetry supports use of this
+ * function before joinrels have been built.
+ *
   * The proof must be made based only on clauses that will be "joinquals"
   * rather than "otherquals" at execution.  For an inner join there's no
   * difference; but if the join is outer, we must ignore pushed-down quals,
@@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
   *
   * The actual proof is undertaken by is_innerrel_unique_for(); this function
   * is a frontend that is mainly concerned with caching the answers.
+ * In particular, the force_cache argument allows overriding the internal
+ * heuristic about whether to cache negative answers; it should be "true"
+ * if making an inquiry that is not part of the normal bottom-up join search
+ * sequence.
   */
  bool
  innerrel_is_unique(PlannerInfo *root,
-                                  RelOptInfo *outerrel,
+                                  Relids outerrelids,
                                    RelOptInfo *innerrel,
                                    JoinType jointype,
-                                  List *restrictlist)
+                                  List *restrictlist,
+                                  bool force_cache)
  {
         MemoryContext old_context;
         ListCell   *lc;
@@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
         {
                 Relids          unique_for_rels = (Relids) lfirst(lc);
  
-               if (bms_is_subset(unique_for_rels, outerrel->relids))
+               if (bms_is_subset(unique_for_rels, outerrelids))
                         return true;            /* Success! */
         }
  
@@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
         {
                 Relids          unique_for_rels = (Relids) lfirst(lc);
  
-               if (bms_is_subset(outerrel->relids, unique_for_rels))
+               if (bms_is_subset(outerrelids, unique_for_rels))
                         return false;
         }
  
         /* No cached information, so try to make the proof. */
-       if (is_innerrel_unique_for(root, outerrel, innerrel,
+       if (is_innerrel_unique_for(root, outerrelids, innerrel,
                                                            jointype, restrictlist))
         {
                 /*
@@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
                  */
                 old_context = MemoryContextSwitchTo(root->planner_cxt);
                 innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
-                                                                                       bms_copy(outerrel->relids));
+                                                                                       bms_copy(outerrelids));
                 MemoryContextSwitchTo(old_context);
  
                 return true;                    /* Success! */
@@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
                  * from smaller to larger.  It is useful in GEQO mode, where the
                  * knowledge can be carried across successive planning attempts; and
                  * it's likely to be useful when using join-search plugins, too. Hence
-                * cache only when join_search_private is non-NULL.  (Yeah, that's a
-                * hack, but it seems reasonable.)
+                * cache when join_search_private is non-NULL.  (Yeah, that's a hack,
+                * but it seems reasonable.)
+                *
+                * Also, allow callers to override that heuristic and force caching;
+                * that's useful for reduce_unique_semijoins, which calls here before
+                * the normal join search starts.
                  */
-               if (root->join_search_private)
+               if (force_cache || root->join_search_private)
                 {
                         old_context = MemoryContextSwitchTo(root->planner_cxt);
                         innerrel->non_unique_for_rels =
                                 lappend(innerrel->non_unique_for_rels,
-                                               bms_copy(outerrel->relids));
+                                               bms_copy(outerrelids));
                         MemoryContextSwitchTo(old_context);
                 }
  
@@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
   */
  static bool
  is_innerrel_unique_for(PlannerInfo *root,
-                                          RelOptInfo *outerrel,
+                                          Relids outerrelids,
                                            RelOptInfo *innerrel,
                                            JoinType jointype,
                                            List *restrictlist)
@@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
                  * Check if clause has the form "outer op inner" or "inner op outer",
                  * and if so mark which side is inner.
                  */
-               if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+               if (!clause_sides_match_join(restrictinfo, outerrelids,
                                                                          innerrel->relids))
                         continue;                       /* no good for these input relations */
  
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index ef0de3fb1a99917f02e51a5568b7904e0f8464a8..74de3b818f7ec0bbd28c3da2198f51f7aaec80fb 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -192,6 +192,12 @@ query_planner(PlannerInfo *root, List *tlist,
          */
         joinlist = remove_useless_joins(root, joinlist);
  
+       /*
+        * Also, reduce any semijoins with unique inner rels to plain inner joins.
+        * Likewise, this can't be done until now for lack of needed info.
+        */
+       reduce_unique_semijoins(root);
+
         /*
          * Now distribute "placeholders" to base rels as needed.  This has to be
          * done after join removal because removal could change whether a
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index 5df68a22a6014ffe1af6fef730a4b9124573b8c5..e773c0f7edacd267d7d348012381f418064d5a7a 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -103,11 +103,12 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
   * prototypes for plan/analyzejoins.c
   */
  extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern void reduce_unique_semijoins(PlannerInfo *root);
  extern bool query_supports_distinctness(Query *query);
  extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
  extern bool innerrel_is_unique(PlannerInfo *root,
-                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
-                                  JoinType jointype, List *restrictlist);
+                                  Relids outerrelids, RelOptInfo *innerrel,
+                                  JoinType jointype, List *restrictlist, bool force_cache);
  
  /*
   * prototypes for plan/setrefs.c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index 87ff3657a342c8061f8ad8c2656744f3d5b3b130..d08b1e1ae5377471f2ea53c673169d8e02f6fe19 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -5663,3 +5663,31 @@ where exists (select 1 from tenk1 t3
           Index Cond: (t2.hundred = t3.tenthous)
  (18 rows)
  
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+              where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+      and t1.unique1 < 1;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   Output: t1.unique1, t2.hundred
+   ->  Nested Loop
+         Output: t1.unique1, j3.tenthous
+         ->  Index Only Scan using onek_unique1 on public.onek t1
+               Output: t1.unique1
+               Index Cond: (t1.unique1 < 1)
+         ->  Index Only Scan using j3_unique1_tenthous_idx on public.j3
+               Output: j3.unique1, j3.tenthous
+               Index Cond: (j3.unique1 = t1.unique1)
+   ->  Index Only Scan using tenk1_hundred on public.tenk1 t2
+         Output: t2.hundred
+         Index Cond: (t2.hundred = j3.tenthous)
+(13 rows)
+
+drop table j3;
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out

index aa06d1d454ed264290e402e55fb385ef0de33ab0..f6b51a54c31fd32ea9cbe1f4ab553f4113c810a9 100644 (file)
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -1673,7 +1673,7 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
                             QUERY PLAN                            
  -----------------------------------------------------------------
   Update on base_tbl b
-   ->  Hash Semi Join
+   ->  Hash Join
           Hash Cond: (b.a = r.a)
           ->  Seq Scan on base_tbl b
           ->  Hash
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index a36e29f462e16d854418eef9aff8b2d7ec26484c..c3994ea531ce9a5492709362fd342b9565cf9cb1 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1864,3 +1864,17 @@ from onek t1, tenk1 t2
  where exists (select 1 from tenk1 t3
                where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
        and t1.unique1 < 1;
+
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+              where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+      and t1.unique1 < 1;
+
+drop table j3;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/plan/analyzejoins.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/include/optimizer/planmain.h		patch \| blob \| history
src/test/regress/expected/join.out		patch \| blob \| history
src/test/regress/expected/updatable_views.out		patch \| blob \| history
src/test/regress/sql/join.sql		patch \| blob \| history