Allow join removal in some cases involving a left join to a subquery.

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 16 Jul 2014 01:12:43 +0000 (21:12 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 16 Jul 2014 01:12:43 +0000 (21:12 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 16 Jul 2014 01:12:43 +0000 (21:12 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 16 Jul 2014 01:12:43 +0000 (21:12 -0400)
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c

index 129fc3dfae6cd2937a8e0a21003f314d8d034853..773f8a458e22319656e6c9a54b88a0e954708cc9 100644 (file)
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,17 +22,21 @@
   */
  #include "postgres.h"
  
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
  #include "optimizer/joininfo.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
  #include "optimizer/planmain.h"
-#include "optimizer/var.h"
+#include "optimizer/tlist.h"
+#include "utils/lsyscache.h"
  
  /* local functions */
  static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
  static void remove_rel_from_query(PlannerInfo *root, int relid,
                                           Relids joinrelids);
  static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
+static Oid     distinct_col_search(int colno, List *colnos, List *opids);
  
  
  /*
@@ -147,18 +151,15 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
  {
         int                     innerrelid;
         RelOptInfo *innerrel;
+       Query      *subquery = NULL;
         Relids          joinrelids;
         List       *clause_list = NIL;
         ListCell   *l;
         int                     attroff;
  
         /*
-        * Currently, we only know how to remove left joins to a baserel with
-        * unique indexes.  We can check most of these criteria pretty trivially
-        * to avoid doing useless extra work.  But checking whether any of the
-        * indexes are unique would require iterating over the indexlist, so for
-        * now we just make sure there are indexes of some sort or other.  If none
-        * of them are unique, join removal will still fail, just slightly later.
+        * Must be a non-delaying left join to a single baserel, else we aren't
+        * going to be able to do anything with it.
          */
         if (sjinfo->jointype != JOIN_LEFT ||
                 sjinfo->delay_upper_joins ||
@@ -168,11 +169,39 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
         innerrelid = bms_singleton_member(sjinfo->min_righthand);
         innerrel = find_base_rel(root, innerrelid);
  
-       if (innerrel->reloptkind != RELOPT_BASEREL ||
-               innerrel->rtekind != RTE_RELATION ||
-               innerrel->indexlist == NIL)
+       if (innerrel->reloptkind != RELOPT_BASEREL)
                 return false;
  
+       /*
+        * Before we go to the effort of checking whether any innerrel variables
+        * are needed above the join, make a quick check to eliminate cases in
+        * which we will surely be unable to prove uniqueness of the innerrel.
+        */
+       if (innerrel->rtekind == RTE_RELATION)
+       {
+               /*
+                * For a plain-relation innerrel, we only know how to prove uniqueness
+                * by reference to unique indexes.  If there are no indexes then
+                * there's certainly no unique indexes so there's no point in going
+                * further.
+                */
+               if (innerrel->indexlist == NIL)
+                       return false;
+       }
+       else if (innerrel->rtekind == RTE_SUBQUERY)
+       {
+               subquery = root->simple_rte_array[innerrelid]->subquery;
+
+               /*
+                * If the subquery has no qualities that support distinctness proofs
+                * then there's no point in going further.
+                */
+               if (!query_supports_distinctness(subquery))
+                       return false;
+       }
+       else
+               return false;                   /* unsupported rtekind */
+
         /* Compute the relid set for the join we are considering */
         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
  
@@ -272,12 +301,64 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
  
         /*
          * relation_has_unique_index_for automatically adds any usable restriction
-        * clauses for the innerrel, so we needn't do that here.
+        * clauses for the innerrel, so we needn't do that here.  (XXX we are not
+        * considering restriction clauses for subqueries; is that worth doing?)
          */
  
-       /* Now examine the indexes to see if we have a matching unique index */
-       if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
-               return true;
+       if (innerrel->rtekind == RTE_RELATION)
+       {
+               /* Now examine the indexes to see if we have a matching unique index */
+               if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
+                       return true;
+       }
+       else    /* innerrel->rtekind == RTE_SUBQUERY */
+       {
+               List       *colnos = NIL;
+               List       *opids = NIL;
+
+               /*
+                * Build the argument lists for query_is_distinct_for: a list of
+                * output column numbers that the query needs to be distinct over, and
+                * a list of equality operators that the output columns need to be
+                * distinct according to.
+                */
+               foreach(l, clause_list)
+               {
+                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+                       Oid                     op;
+                       Var                *var;
+
+                       /*
+                        * Get the equality operator we need uniqueness according to.
+                        * (This might be a cross-type operator and thus not exactly the
+                        * same operator the subquery would consider; that's all right
+                        * since query_is_distinct_for can resolve such cases.)  The
+                        * mergejoinability test above should have selected only OpExprs.
+                        */
+                       Assert(IsA(rinfo->clause, OpExpr));
+                       op = ((OpExpr *) rinfo->clause)->opno;
+
+                       /* clause_sides_match_join identified the inner side for us */
+                       if (rinfo->outer_is_left)
+                               var = (Var *) get_rightop(rinfo->clause);
+                       else
+                               var = (Var *) get_leftop(rinfo->clause);
+
+                       /*
+                        * If inner side isn't a Var referencing a subquery output column,
+                        * this clause doesn't help us.
+                        */
+                       if (!var || !IsA(var, Var) ||
+                               var->varno != innerrelid || var->varlevelsup != 0)
+                               continue;
+
+                       colnos = lappend_int(colnos, var->varattno);
+                       opids = lappend_oid(opids, op);
+               }
+
+               if (query_is_distinct_for(subquery, colnos, opids))
+                       return true;
+       }
  
         /*
          * Some day it would be nice to check for other methods of establishing
@@ -481,3 +562,189 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
  
         return result;
  }
+
+
+/*
+ * query_supports_distinctness - could the query possibly be proven distinct
+ *             on some set of output columns?
+ *
+ * This is effectively a pre-checking function for query_is_distinct_for().
+ * It must return TRUE if query_is_distinct_for() could possibly return TRUE
+ * with this query, but it should not expend a lot of cycles.  The idea is
+ * that callers can avoid doing possibly-expensive processing to compute
+ * query_is_distinct_for()'s argument lists if the call could not possibly
+ * succeed.
+ */
+bool
+query_supports_distinctness(Query *query)
+{
+       if (query->distinctClause != NIL ||
+               query->groupClause != NIL ||
+               query->hasAggs ||
+               query->havingQual ||
+               query->setOperations)
+               return true;
+
+       return false;
+}
+
+/*
+ * query_is_distinct_for - does query never return duplicates of the
+ *             specified columns?
+ *
+ * query is a not-yet-planned subquery (in current usage, it's always from
+ * a subquery RTE, which the planner avoids scribbling on).
+ *
+ * colnos is an integer list of output column numbers (resno's).  We are
+ * interested in whether rows consisting of just these columns are certain
+ * to be distinct.  "Distinctness" is defined according to whether the
+ * corresponding upper-level equality operators listed in opids would think
+ * the values are distinct.  (Note: the opids entries could be cross-type
+ * operators, and thus not exactly the equality operators that the subquery
+ * would use itself.  We use equality_ops_are_compatible() to check
+ * compatibility.  That looks at btree or hash opfamily membership, and so
+ * should give trustworthy answers for all operators that we might need
+ * to deal with here.)
+ */
+bool
+query_is_distinct_for(Query *query, List *colnos, List *opids)
+{
+       ListCell   *l;
+       Oid                     opid;
+
+       Assert(list_length(colnos) == list_length(opids));
+
+       /*
+        * A set-returning function in the query's targetlist can result in
+        * returning duplicate rows, if the SRF is evaluated after the
+        * de-duplication step; so we play it safe and say "no" if there are any
+        * SRFs.  (We could be certain that it's okay if SRFs appear only in the
+        * specified columns, since those must be evaluated before de-duplication;
+        * but it doesn't presently seem worth the complication to check that.)
+        */
+       if (expression_returns_set((Node *) query->targetList))
+               return false;
+
+       /*
+        * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
+        * columns in the DISTINCT clause appear in colnos and operator semantics
+        * match.
+        */
+       if (query->distinctClause)
+       {
+               foreach(l, query->distinctClause)
+               {
+                       SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
+                       TargetEntry *tle = get_sortgroupclause_tle(sgc,
+                                                                                                          query->targetList);
+
+                       opid = distinct_col_search(tle->resno, colnos, opids);
+                       if (!OidIsValid(opid) ||
+                               !equality_ops_are_compatible(opid, sgc->eqop))
+                               break;                  /* exit early if no match */
+               }
+               if (l == NULL)                  /* had matches for all? */
+                       return true;
+       }
+
+       /*
+        * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
+        * appear in colnos and operator semantics match.
+        */
+       if (query->groupClause)
+       {
+               foreach(l, query->groupClause)
+               {
+                       SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
+                       TargetEntry *tle = get_sortgroupclause_tle(sgc,
+                                                                                                          query->targetList);
+
+                       opid = distinct_col_search(tle->resno, colnos, opids);
+                       if (!OidIsValid(opid) ||
+                               !equality_ops_are_compatible(opid, sgc->eqop))
+                               break;                  /* exit early if no match */
+               }
+               if (l == NULL)                  /* had matches for all? */
+                       return true;
+       }
+       else
+       {
+               /*
+                * If we have no GROUP BY, but do have aggregates or HAVING, then the
+                * result is at most one row so it's surely unique, for any operators.
+                */
+               if (query->hasAggs || query->havingQual)
+                       return true;
+       }
+
+       /*
+        * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
+        * except with ALL.
+        */
+       if (query->setOperations)
+       {
+               SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
+
+               Assert(IsA(topop, SetOperationStmt));
+               Assert(topop->op != SETOP_NONE);
+
+               if (!topop->all)
+               {
+                       ListCell   *lg;
+
+                       /* We're good if all the nonjunk output columns are in colnos */
+                       lg = list_head(topop->groupClauses);
+                       foreach(l, query->targetList)
+                       {
+                               TargetEntry *tle = (TargetEntry *) lfirst(l);
+                               SortGroupClause *sgc;
+
+                               if (tle->resjunk)
+                                       continue;       /* ignore resjunk columns */
+
+                               /* non-resjunk columns should have grouping clauses */
+                               Assert(lg != NULL);
+                               sgc = (SortGroupClause *) lfirst(lg);
+                               lg = lnext(lg);
+
+                               opid = distinct_col_search(tle->resno, colnos, opids);
+                               if (!OidIsValid(opid) ||
+                                       !equality_ops_are_compatible(opid, sgc->eqop))
+                                       break;          /* exit early if no match */
+                       }
+                       if (l == NULL)          /* had matches for all? */
+                               return true;
+               }
+       }
+
+       /*
+        * XXX Are there any other cases in which we can easily see the result
+        * must be distinct?
+        *
+        * If you do add more smarts to this function, be sure to update
+        * query_supports_distinctness() to match.
+        */
+
+       return false;
+}
+
+/*
+ * distinct_col_search - subroutine for query_is_distinct_for
+ *
+ * If colno is in colnos, return the corresponding element of opids,
+ * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
+ * but if it does, we arbitrarily select the first match.)
+ */
+static Oid
+distinct_col_search(int colno, List *colnos, List *opids)
+{
+       ListCell   *lc1,
+                          *lc2;
+
+       forboth(lc1, colnos, lc2, opids)
+       {
+               if (colno == lfirst_int(lc1))
+                       return lfirst_oid(lc2);
+       }
+       return InvalidOid;
+}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index d129f8d65ea7f556ff14853d3d6d85a1805886a9..319e8b2c37968882cdc70d465e17f9042a572d40 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -22,8 +22,9 @@
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
+#include "optimizer/planmain.h"
  #include "optimizer/restrictinfo.h"
-#include "optimizer/tlist.h"
+#include "optimizer/var.h"
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
@@ -38,8 +39,6 @@ typedef enum
  } PathCostComparison;
  
  static List *translate_sub_tlist(List *tlist, int relid);
-static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
-static Oid     distinct_col_search(int colno, List *colnos, List *opids);
  
  
  /*****************************************************************************
@@ -1312,25 +1311,29 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
         if (rel->rtekind == RTE_SUBQUERY)
         {
                 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
-               List       *sub_tlist_colnos;
  
-               sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
-
-               if (sub_tlist_colnos &&
-                       query_is_distinct_for(rte->subquery,
-                                                                 sub_tlist_colnos, in_operators))
+               if (query_supports_distinctness(rte->subquery))
                 {
-                       pathnode->umethod = UNIQUE_PATH_NOOP;
-                       pathnode->path.rows = rel->rows;
-                       pathnode->path.startup_cost = subpath->startup_cost;
-                       pathnode->path.total_cost = subpath->total_cost;
-                       pathnode->path.pathkeys = subpath->pathkeys;
+                       List       *sub_tlist_colnos;
+
+                       sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
+
+                       if (sub_tlist_colnos &&
+                               query_is_distinct_for(rte->subquery,
+                                                                         sub_tlist_colnos, in_operators))
+                       {
+                               pathnode->umethod = UNIQUE_PATH_NOOP;
+                               pathnode->path.rows = rel->rows;
+                               pathnode->path.startup_cost = subpath->startup_cost;
+                               pathnode->path.total_cost = subpath->total_cost;
+                               pathnode->path.pathkeys = subpath->pathkeys;
  
-                       rel->cheapest_unique_path = (Path *) pathnode;
+                               rel->cheapest_unique_path = (Path *) pathnode;
  
-                       MemoryContextSwitchTo(oldcontext);
+                               MemoryContextSwitchTo(oldcontext);
  
-                       return pathnode;
+                               return pathnode;
+                       }
                 }
         }
  
@@ -1450,161 +1453,6 @@ translate_sub_tlist(List *tlist, int relid)
         return result;
  }
  
-/*
- * query_is_distinct_for - does query never return duplicates of the
- *             specified columns?
- *
- * colnos is an integer list of output column numbers (resno's).  We are
- * interested in whether rows consisting of just these columns are certain
- * to be distinct.  "Distinctness" is defined according to whether the
- * corresponding upper-level equality operators listed in opids would think
- * the values are distinct.  (Note: the opids entries could be cross-type
- * operators, and thus not exactly the equality operators that the subquery
- * would use itself.  We use equality_ops_are_compatible() to check
- * compatibility.  That looks at btree or hash opfamily membership, and so
- * should give trustworthy answers for all operators that we might need
- * to deal with here.)
- */
-static bool
-query_is_distinct_for(Query *query, List *colnos, List *opids)
-{
-       ListCell   *l;
-       Oid                     opid;
-
-       Assert(list_length(colnos) == list_length(opids));
-
-       /*
-        * A set-returning function in the query's targetlist can result in
-        * returning duplicate rows, if the SRF is evaluated after the
-        * de-duplication step; so we play it safe and say "no" if there are any
-        * SRFs.  (We could be certain that it's okay if SRFs appear only in the
-        * specified columns, since those must be evaluated before de-duplication;
-        * but it doesn't presently seem worth the complication to check that.)
-        */
-       if (expression_returns_set((Node *) query->targetList))
-               return false;
-
-       /*
-        * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
-        * columns in the DISTINCT clause appear in colnos and operator semantics
-        * match.
-        */
-       if (query->distinctClause)
-       {
-               foreach(l, query->distinctClause)
-               {
-                       SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
-                       TargetEntry *tle = get_sortgroupclause_tle(sgc,
-                                                                                                          query->targetList);
-
-                       opid = distinct_col_search(tle->resno, colnos, opids);
-                       if (!OidIsValid(opid) ||
-                               !equality_ops_are_compatible(opid, sgc->eqop))
-                               break;                  /* exit early if no match */
-               }
-               if (l == NULL)                  /* had matches for all? */
-                       return true;
-       }
-
-       /*
-        * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
-        * appear in colnos and operator semantics match.
-        */
-       if (query->groupClause)
-       {
-               foreach(l, query->groupClause)
-               {
-                       SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
-                       TargetEntry *tle = get_sortgroupclause_tle(sgc,
-                                                                                                          query->targetList);
-
-                       opid = distinct_col_search(tle->resno, colnos, opids);
-                       if (!OidIsValid(opid) ||
-                               !equality_ops_are_compatible(opid, sgc->eqop))
-                               break;                  /* exit early if no match */
-               }
-               if (l == NULL)                  /* had matches for all? */
-                       return true;
-       }
-       else
-       {
-               /*
-                * If we have no GROUP BY, but do have aggregates or HAVING, then the
-                * result is at most one row so it's surely unique, for any operators.
-                */
-               if (query->hasAggs || query->havingQual)
-                       return true;
-       }
-
-       /*
-        * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
-        * except with ALL.
-        */
-       if (query->setOperations)
-       {
-               SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
-
-               Assert(IsA(topop, SetOperationStmt));
-               Assert(topop->op != SETOP_NONE);
-
-               if (!topop->all)
-               {
-                       ListCell   *lg;
-
-                       /* We're good if all the nonjunk output columns are in colnos */
-                       lg = list_head(topop->groupClauses);
-                       foreach(l, query->targetList)
-                       {
-                               TargetEntry *tle = (TargetEntry *) lfirst(l);
-                               SortGroupClause *sgc;
-
-                               if (tle->resjunk)
-                                       continue;       /* ignore resjunk columns */
-
-                               /* non-resjunk columns should have grouping clauses */
-                               Assert(lg != NULL);
-                               sgc = (SortGroupClause *) lfirst(lg);
-                               lg = lnext(lg);
-
-                               opid = distinct_col_search(tle->resno, colnos, opids);
-                               if (!OidIsValid(opid) ||
-                                       !equality_ops_are_compatible(opid, sgc->eqop))
-                                       break;          /* exit early if no match */
-                       }
-                       if (l == NULL)          /* had matches for all? */
-                               return true;
-               }
-       }
-
-       /*
-        * XXX Are there any other cases in which we can easily see the result
-        * must be distinct?
-        */
-
-       return false;
-}
-
-/*
- * distinct_col_search - subroutine for query_is_distinct_for
- *
- * If colno is in colnos, return the corresponding element of opids,
- * else return InvalidOid.  (We expect colnos does not contain duplicates,
- * so the result is well-defined.)
- */
-static Oid
-distinct_col_search(int colno, List *colnos, List *opids)
-{
-       ListCell   *lc1,
-                          *lc2;
-
-       forboth(lc1, colnos, lc2, opids)
-       {
-               if (colno == lfirst_int(lc1))
-                       return lfirst_oid(lc2);
-       }
-       return InvalidOid;
-}
-
  /*
   * create_subqueryscan_path
   *       Creates a path corresponding to a sequential scan of a subquery,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index 8bdb7dbeb08177fb02e0a8e041be1905befed615..450425046a50b38993f57cddf73a185ed848c069 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -122,6 +122,8 @@ extern RestrictInfo *build_implied_join_equality(Oid opno,
   * prototypes for plan/analyzejoins.c
   */
  extern List *remove_useless_joins(PlannerInfo *root, List *joinlist);
+extern bool query_supports_distinctness(Query *query);
+extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
  
  /*
   * prototypes for plan/setrefs.c
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index c62a63f6740ebbf9dd4194121dbf7bec8f899f4e..1cb1c51a6ee6d09b3b18614dced6224be7a33e61 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3131,9 +3131,11 @@ begin;
  CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
  CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
  CREATE TEMP TABLE c (id int PRIMARY KEY);
+CREATE TEMP TABLE d (a int, b int);
  INSERT INTO a VALUES (0, 0), (1, NULL);
  INSERT INTO b VALUES (0, 0), (1, NULL);
  INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3), (2,2), (3,1);
  -- all three cases should be optimizable into a simple seqscan
  explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
    QUERY PLAN   
@@ -3169,6 +3171,83 @@ select id from a where id in (
           ->  Seq Scan on b
  (5 rows)
  
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by its GROUP BY clause
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id and d.b = s.c_id;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id and d.b = s.c_id;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+-- join removal is not possible when the GROUP BY contains a column that is
+-- not in the join condition
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id;
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
+   ->  Sort
+         Sort Key: d.a
+         ->  Seq Scan on d
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id;
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
+   ->  Sort
+         Sort Key: d.a
+         ->  Seq Scan on d
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b
+(11 rows)
+
+-- check join removal works when uniqueness of the join condition is enforced
+-- by a UNION
+explain (costs off)
+select d.* from d left join (select id from a union select id from b) s
+  on d.a = s.id;
+  QUERY PLAN   
+---------------
+ Seq Scan on d
+(1 row)
+
+-- check join removal with a cross-type comparison operator
+explain (costs off)
+select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4
+  on i8.q1 = i4.f1;
+       QUERY PLAN        
+-------------------------
+ Seq Scan on int8_tbl i8
+(1 row)
+
  rollback;
  create temp table parent (k int primary key, pd int);
  create temp table child (k int unique, cd int);
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 1031f26b31430c8ec1f0ad3024cd406830e8e95f..fa3e0686261a080c92cda09f2e7f43dc32a577ae 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -919,9 +919,11 @@ begin;
  CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
  CREATE TEMP TABLE b (id int PRIMARY KEY, c_id int);
  CREATE TEMP TABLE c (id int PRIMARY KEY);
+CREATE TEMP TABLE d (a int, b int);
  INSERT INTO a VALUES (0, 0), (1, NULL);
  INSERT INTO b VALUES (0, 0), (1, NULL);
  INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3), (2,2), (3,1);
  
  -- all three cases should be optimizable into a simple seqscan
  explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
@@ -936,6 +938,39 @@ select id from a where id in (
         select b.id from b left join c on b.id = c.id
  );
  
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by its GROUP BY clause
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id and d.b = s.c_id;
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id and d.b = s.c_id;
+
+-- join removal is not possible when the GROUP BY contains a column that is
+-- not in the join condition
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id;
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id;
+
+-- check join removal works when uniqueness of the join condition is enforced
+-- by a UNION
+explain (costs off)
+select d.* from d left join (select id from a union select id from b) s
+  on d.a = s.id;
+
+-- check join removal with a cross-type comparison operator
+explain (costs off)
+select i8.* from int8_tbl i8 left join (select f1 from int4_tbl group by f1) i4
+  on i8.q1 = i4.f1;
+
  rollback;
  
  create temp table parent (k int primary key, pd int);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 16 Jul 2014 01:12:43 +0000 (21:12 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 16 Jul 2014 01:12:43 +0000 (21:12 -0400)
src/backend/optimizer/plan/analyzejoins.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/include/optimizer/planmain.h		patch \| blob \| history
src/test/regress/expected/join.out		patch \| blob \| history
src/test/regress/sql/join.sql		patch \| blob \| history