]> granicus.if.org Git - postgresql/commitdiff
Reduce semijoins with unique inner relations to plain inner joins.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 1 May 2017 18:53:42 +0000 (14:53 -0400)
If the inner relation can be proven unique, that is it can have no more
than one matching row for any row of the outer query, then we might as
well implement the semijoin as a plain inner join, allowing substantially
more freedom to the planner.  This is a form of outer join strength
reduction, but it can't be implemented in reduce_outer_joins() because
we don't have enough info about the individual relations at that stage.
Instead do it much like remove_useless_joins(): once we've built base
relations, we can make another pass over the SpecialJoinInfo list and
get rid of any entries representing reducible semijoins.

This is essentially a followon to the inner-unique patch (commit 9c7f5229a)
and makes use of the proof machinery that that patch created.  We need only
minor refactoring of innerrel_is_unique's API to support this usage.

Per performance complaint from Teodor Sigaev.

Discussion: https://postgr.es/m/f994fc98-389f-4a46-d1bc-c42e05cb43ed@sigaev.ru

src/backend/optimizer/path/joinpath.c
src/backend/optimizer/plan/analyzejoins.c
src/backend/optimizer/plan/planmain.c
src/include/optimizer/planmain.h
src/test/regress/expected/join.out
src/test/regress/expected/updatable_views.out
src/test/regress/sql/join.sql

index 39e2ddda906f5f7fc3e4dad17ca89013f24ff934..c130d2f17f2ece03afeb8bb9e54fefc4f36d9ac5 100644 (file)
@@ -126,13 +126,15 @@ add_paths_to_joinrel(PlannerInfo *root,
         *
         * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
         * matter since the executor can make the equivalent optimization anyway;
-        * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, if
-        * the LHS covers all of the associated semijoin's min_lefthand, then it's
-        * appropriate to set inner_unique because the path produced by
-        * create_unique_path will be unique relative to the LHS.  (If we have an
-        * LHS that's only part of the min_lefthand, that is *not* true.)  For
-        * JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid letting that value escape
-        * this module.
+        * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
+        * must be considering a semijoin whose inner side is not provably unique
+        * (else reduce_unique_semijoins would've simplified it), so there's no
+        * point in calling innerrel_is_unique.  However, if the LHS covers all of
+        * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+        * because the path produced by create_unique_path will be unique relative
+        * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
+        * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
+        * letting that value escape this module.
         */
        switch (jointype)
        {
@@ -145,12 +147,20 @@ add_paths_to_joinrel(PlannerInfo *root,
                                                                                           outerrel->relids);
                        break;
                case JOIN_UNIQUE_OUTER:
-                       extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
-                                                                                                       JOIN_INNER, restrictlist);
+                       extra.inner_unique = innerrel_is_unique(root,
+                                                                                                       outerrel->relids,
+                                                                                                       innerrel,
+                                                                                                       JOIN_INNER,
+                                                                                                       restrictlist,
+                                                                                                       false);
                        break;
                default:
-                       extra.inner_unique = innerrel_is_unique(root, outerrel, innerrel,
-                                                                                                       jointype, restrictlist);
+                       extra.inner_unique = innerrel_is_unique(root,
+                                                                                                       outerrel->relids,
+                                                                                                       innerrel,
+                                                                                                       jointype,
+                                                                                                       restrictlist,
+                                                                                                       false);
                        break;
        }
 
index 69b9be4d76b34b04e03dc752c6ad0934526d64e0..34317fe7782b58f04d4030494069a836dcd5f2d4 100644 (file)
@@ -42,7 +42,7 @@ static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
                                        List *clause_list);
 static Oid     distinct_col_search(int colno, List *colnos, List *opids);
 static bool is_innerrel_unique_for(PlannerInfo *root,
-                                          RelOptInfo *outerrel,
+                                          Relids outerrelids,
                                           RelOptInfo *innerrel,
                                           JoinType jointype,
                                           List *restrictlist);
@@ -495,6 +495,88 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
 }
 
 
+/*
+ * reduce_unique_semijoins
+ *             Check for semijoins that can be simplified to plain inner joins
+ *             because the inner relation is provably unique for the join clauses.
+ *
+ * Ideally this would happen during reduce_outer_joins, but we don't have
+ * enough information at that point.
+ *
+ * To perform the strength reduction when applicable, we need only delete
+ * the semijoin's SpecialJoinInfo from root->join_info_list.  (We don't
+ * bother fixing the join type attributed to it in the query jointree,
+ * since that won't be consulted again.)
+ */
+void
+reduce_unique_semijoins(PlannerInfo *root)
+{
+       ListCell   *lc;
+       ListCell   *next;
+
+       /*
+        * Scan the join_info_list to find semijoins.  We can't use foreach
+        * because we may delete the current cell.
+        */
+       for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
+       {
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+               int                     innerrelid;
+               RelOptInfo *innerrel;
+               Relids          joinrelids;
+               List       *restrictlist;
+
+               next = lnext(lc);
+
+               /*
+                * Must be a non-delaying semijoin to a single baserel, else we aren't
+                * going to be able to do anything with it.  (It's probably not
+                * possible for delay_upper_joins to be set on a semijoin, but we
+                * might as well check.)
+                */
+               if (sjinfo->jointype != JOIN_SEMI ||
+                       sjinfo->delay_upper_joins)
+                       continue;
+
+               if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
+                       continue;
+
+               innerrel = find_base_rel(root, innerrelid);
+
+               /*
+                * Before we trouble to run generate_join_implied_equalities, make a
+                * quick check to eliminate cases in which we will surely be unable to
+                * prove uniqueness of the innerrel.
+                */
+               if (!rel_supports_distinctness(root, innerrel))
+                       continue;
+
+               /* Compute the relid set for the join we are considering */
+               joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+               /*
+                * Since we're only considering a single-rel RHS, any join clauses it
+                * has must be clauses linking it to the semijoin's min_lefthand.  We
+                * can also consider EC-derived join clauses.
+                */
+               restrictlist =
+                       list_concat(generate_join_implied_equalities(root,
+                                                                                                                joinrelids,
+                                                                                                                sjinfo->min_lefthand,
+                                                                                                                innerrel),
+                                               innerrel->joininfo);
+
+               /* Test whether the innerrel is unique for those clauses. */
+               if (!innerrel_is_unique(root, sjinfo->min_lefthand, innerrel,
+                                                               JOIN_SEMI, restrictlist, true))
+                       continue;
+
+               /* OK, remove the SpecialJoinInfo from the list. */
+               root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
+       }
+}
+
+
 /*
  * rel_supports_distinctness
  *             Could the relation possibly be proven distinct on some set of columns?
@@ -857,6 +939,10 @@ distinct_col_search(int colno, List *colnos, List *opids)
  *       Check if the innerrel provably contains at most one tuple matching any
  *       tuple from the outerrel, based on join clauses in the 'restrictlist'.
  *
+ * We need an actual RelOptInfo for the innerrel, but it's sufficient to
+ * identify the outerrel by its Relids.  This asymmetry supports use of this
+ * function before joinrels have been built.
+ *
  * The proof must be made based only on clauses that will be "joinquals"
  * rather than "otherquals" at execution.  For an inner join there's no
  * difference; but if the join is outer, we must ignore pushed-down quals,
@@ -867,13 +953,18 @@ distinct_col_search(int colno, List *colnos, List *opids)
  *
  * The actual proof is undertaken by is_innerrel_unique_for(); this function
  * is a frontend that is mainly concerned with caching the answers.
+ * In particular, the force_cache argument allows overriding the internal
+ * heuristic about whether to cache negative answers; it should be "true"
+ * if making an inquiry that is not part of the normal bottom-up join search
+ * sequence.
  */
 bool
 innerrel_is_unique(PlannerInfo *root,
-                                  RelOptInfo *outerrel,
+                                  Relids outerrelids,
                                   RelOptInfo *innerrel,
                                   JoinType jointype,
-                                  List *restrictlist)
+                                  List *restrictlist,
+                                  bool force_cache)
 {
        MemoryContext old_context;
        ListCell   *lc;
@@ -900,7 +991,7 @@ innerrel_is_unique(PlannerInfo *root,
        {
                Relids          unique_for_rels = (Relids) lfirst(lc);
 
-               if (bms_is_subset(unique_for_rels, outerrel->relids))
+               if (bms_is_subset(unique_for_rels, outerrelids))
                        return true;            /* Success! */
        }
 
@@ -912,12 +1003,12 @@ innerrel_is_unique(PlannerInfo *root,
        {
                Relids          unique_for_rels = (Relids) lfirst(lc);
 
-               if (bms_is_subset(outerrel->relids, unique_for_rels))
+               if (bms_is_subset(outerrelids, unique_for_rels))
                        return false;
        }
 
        /* No cached information, so try to make the proof. */
-       if (is_innerrel_unique_for(root, outerrel, innerrel,
+       if (is_innerrel_unique_for(root, outerrelids, innerrel,
                                                           jointype, restrictlist))
        {
                /*
@@ -932,7 +1023,7 @@ innerrel_is_unique(PlannerInfo *root,
                 */
                old_context = MemoryContextSwitchTo(root->planner_cxt);
                innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
-                                                                                       bms_copy(outerrel->relids));
+                                                                                       bms_copy(outerrelids));
                MemoryContextSwitchTo(old_context);
 
                return true;                    /* Success! */
@@ -949,15 +1040,19 @@ innerrel_is_unique(PlannerInfo *root,
                 * from smaller to larger.  It is useful in GEQO mode, where the
                 * knowledge can be carried across successive planning attempts; and
                 * it's likely to be useful when using join-search plugins, too. Hence
-                * cache only when join_search_private is non-NULL.  (Yeah, that's a
-                * hack, but it seems reasonable.)
+                * cache when join_search_private is non-NULL.  (Yeah, that's a hack,
+                * but it seems reasonable.)
+                *
+                * Also, allow callers to override that heuristic and force caching;
+                * that's useful for reduce_unique_semijoins, which calls here before
+                * the normal join search starts.
                 */
-               if (root->join_search_private)
+               if (force_cache || root->join_search_private)
                {
                        old_context = MemoryContextSwitchTo(root->planner_cxt);
                        innerrel->non_unique_for_rels =
                                lappend(innerrel->non_unique_for_rels,
-                                               bms_copy(outerrel->relids));
+                                               bms_copy(outerrelids));
                        MemoryContextSwitchTo(old_context);
                }
 
@@ -972,7 +1067,7 @@ innerrel_is_unique(PlannerInfo *root,
  */
 static bool
 is_innerrel_unique_for(PlannerInfo *root,
-                                          RelOptInfo *outerrel,
+                                          Relids outerrelids,
                                           RelOptInfo *innerrel,
                                           JoinType jointype,
                                           List *restrictlist)
@@ -1007,7 +1102,7 @@ is_innerrel_unique_for(PlannerInfo *root,
                 * Check if clause has the form "outer op inner" or "inner op outer",
                 * and if so mark which side is inner.
                 */
-               if (!clause_sides_match_join(restrictinfo, outerrel->relids,
+               if (!clause_sides_match_join(restrictinfo, outerrelids,
                                                                         innerrel->relids))
                        continue;                       /* no good for these input relations */
 
index ef0de3fb1a99917f02e51a5568b7904e0f8464a8..74de3b818f7ec0bbd28c3da2198f51f7aaec80fb 100644 (file)
@@ -192,6 +192,12 @@ query_planner(PlannerInfo *root, List *tlist,
         */
        joinlist = remove_useless_joins(root, joinlist);
 
+       /*
+        * Also, reduce any semijoins with unique inner rels to plain inner joins.
+        * Likewise, this can't be done until now for lack of needed info.
+        */
+       reduce_unique_semijoins(root);
+
        /*
         * Now distribute "placeholders" to base rels as needed.  This has to be
         * done after join removal because removal could change whether a
index 5df68a22a6014ffe1af6fef730a4b9124573b8c5..e773c0f7edacd267d7d348012381f418064d5a7a 100644 (file)
@@ -103,11 +103,12 @@ extern void match_foreign_keys_to_quals(PlannerInfo *root);
  * prototypes for plan/analyzejoins.c
  */
 extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern void reduce_unique_semijoins(PlannerInfo *root);
 extern bool query_supports_distinctness(Query *query);
 extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 extern bool innerrel_is_unique(PlannerInfo *root,
-                                  RelOptInfo *outerrel, RelOptInfo *innerrel,
-                                  JoinType jointype, List *restrictlist);
+                                  Relids outerrelids, RelOptInfo *innerrel,
+                                  JoinType jointype, List *restrictlist, bool force_cache);
 
 /*
  * prototypes for plan/setrefs.c
index 87ff3657a342c8061f8ad8c2656744f3d5b3b130..d08b1e1ae5377471f2ea53c673169d8e02f6fe19 100644 (file)
@@ -5663,3 +5663,31 @@ where exists (select 1 from tenk1 t3
          Index Cond: (t2.hundred = t3.tenthous)
 (18 rows)
 
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+              where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+      and t1.unique1 < 1;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Nested Loop
+   Output: t1.unique1, t2.hundred
+   ->  Nested Loop
+         Output: t1.unique1, j3.tenthous
+         ->  Index Only Scan using onek_unique1 on public.onek t1
+               Output: t1.unique1
+               Index Cond: (t1.unique1 < 1)
+         ->  Index Only Scan using j3_unique1_tenthous_idx on public.j3
+               Output: j3.unique1, j3.tenthous
+               Index Cond: (j3.unique1 = t1.unique1)
+   ->  Index Only Scan using tenk1_hundred on public.tenk1 t2
+         Output: t2.hundred
+         Index Cond: (t2.hundred = j3.tenthous)
+(13 rows)
+
+drop table j3;
index aa06d1d454ed264290e402e55fb385ef0de33ab0..f6b51a54c31fd32ea9cbe1f4ab553f4113c810a9 100644 (file)
@@ -1673,7 +1673,7 @@ EXPLAIN (costs off) UPDATE rw_view1 SET a = a + 5;
                            QUERY PLAN                            
 -----------------------------------------------------------------
  Update on base_tbl b
-   ->  Hash Semi Join
+   ->  Hash Join
          Hash Cond: (b.a = r.a)
          ->  Seq Scan on base_tbl b
          ->  Hash
index a36e29f462e16d854418eef9aff8b2d7ec26484c..c3994ea531ce9a5492709362fd342b9565cf9cb1 100644 (file)
@@ -1864,3 +1864,17 @@ from onek t1, tenk1 t2
 where exists (select 1 from tenk1 t3
               where t3.thousand = t1.unique1 and t3.tenthous = t2.hundred)
       and t1.unique1 < 1;
+
+-- ... unless it actually is unique
+create table j3 as select unique1, tenthous from onek;
+vacuum analyze j3;
+create unique index on j3(unique1, tenthous);
+
+explain (verbose, costs off)
+select t1.unique1, t2.hundred
+from onek t1, tenk1 t2
+where exists (select 1 from j3
+              where j3.unique1 = t1.unique1 and j3.tenthous = t2.hundred)
+      and t1.unique1 < 1;
+
+drop table j3;