]> granicus.if.org Git - postgresql/commitdiff
Switch the planner over to treating qualifications of a JOIN_SEMI join as
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 22 Nov 2008 22:47:06 +0000 (22:47 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 22 Nov 2008 22:47:06 +0000 (22:47 +0000)
though it is an inner rather than outer join type.  This essentially means
that we don't bother to separate "pushed down" qual conditions from actual
join quals at a semijoin plan node; which is okay because the restrictions of
SQL syntax make it impossible to have a pushed-down qual that references the
inner side of a semijoin.  This allows noticeably better optimization of
IN/EXISTS cases than we had before, since the equivalence-class machinery can
now use those quals.  Also fix a couple of other mistakes that had essentially
disabled the ability to unique-ify the inner relation and then join it to just
a subset of the left-hand relations.  An example case using the regression
database is

select * from tenk1 a, tenk1 b
where (a.unique1,b.unique2) in (select unique1,unique2 from tenk1 c);

which is planned reasonably well by 8.3 and earlier but had been forcing a
cartesian join of a/b in CVS HEAD.

src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/plan/initsplan.c
src/include/nodes/nodes.h

index ba33da829a957553da31d1fc3a6e41f79766dfc9..0b9c5819820274de0fc52a30487de106c7f6c05f 100644 (file)
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.200 2008/10/21 20:42:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.201 2008/11/22 22:47:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2481,7 +2481,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                        break;
                case JOIN_SEMI:
                        nrows = outer_rel->rows * jselec;
-                       nrows *= pselec;
+                       /* pselec not used */
                        break;
                case JOIN_ANTI:
                        nrows = outer_rel->rows * (1.0 - jselec);
index 297c50389fb4bc5dcfebbcb582d849bfc7b32655..5d1e31549238af8eacc89ba2fad315b43e99ea8a 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.233 2008/09/12 14:56:13 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.234 2008/11/22 22:47:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1647,10 +1647,10 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
        switch (jointype)
        {
                case JOIN_INNER:
+               case JOIN_SEMI:
                        isouterjoin = false;
                        break;
                case JOIN_LEFT:
-               case JOIN_SEMI:
                case JOIN_ANTI:
                        isouterjoin = true;
                        break;
index eaf0ffa4276e87a1a28d3eb126597b56ff14cf31..fe70df49ad4d02eb75f49970ddf88a514f4d9dfe 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.118 2008/10/04 21:56:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.119 2008/11/22 22:47:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -744,12 +744,12 @@ hash_inner_and_outer(PlannerInfo *root,
        switch (jointype)
        {
                case JOIN_INNER:
+               case JOIN_SEMI:
                case JOIN_UNIQUE_OUTER:
                case JOIN_UNIQUE_INNER:
                        isouterjoin = false;
                        break;
                case JOIN_LEFT:
-               case JOIN_SEMI:
                case JOIN_ANTI:
                        isouterjoin = true;
                        break;
index 9ac0cc9743195f8813f0492f353c7a66ac599cb2..5f49ee7e290136b631face5fd5ed16bd538b827b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.94 2008/08/17 19:40:11 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.95 2008/11/22 22:47:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -419,6 +419,27 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                        match_sjinfo = sjinfo;
                        reversed = true;
                }
+               else if (sjinfo->jointype == JOIN_SEMI &&
+                                bms_equal(sjinfo->syn_righthand, rel2->relids))
+               {
+                       /*
+                        * For a semijoin, we can join the RHS to anything else by
+                        * unique-ifying the RHS.
+                        */
+                       if (match_sjinfo)
+                               return false;   /* invalid join path */
+                       match_sjinfo = sjinfo;
+                       reversed = false;
+               }
+               else if (sjinfo->jointype == JOIN_SEMI &&
+                                bms_equal(sjinfo->syn_righthand, rel1->relids))
+               {
+                       /* Reversed semijoin case */
+                       if (match_sjinfo)
+                               return false;   /* invalid join path */
+                       match_sjinfo = sjinfo;
+                       reversed = true;
+               }
                else
                {
                        /*----------
@@ -444,14 +465,24 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                         * We assume that make_outerjoininfo() set things up correctly
                         * so that we'll only match to some SJ if the join is valid.
                         * Set flag here to check at bottom of loop.
+                        *
+                        * For a semijoin, assume it's okay if either side fully contains
+                        * the RHS (per the unique-ification case above).
                         *----------
                         */
-                       if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+                       if (sjinfo->jointype != JOIN_SEMI &&
+                               bms_overlap(rel1->relids, sjinfo->min_righthand) &&
                                bms_overlap(rel2->relids, sjinfo->min_righthand))
                        {
                                /* seems OK */
                                Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
                        }
+                       else if (sjinfo->jointype == JOIN_SEMI &&
+                                        (bms_is_subset(sjinfo->syn_righthand, rel1->relids) ||
+                                         bms_is_subset(sjinfo->syn_righthand, rel2->relids)))
+                       {
+                               /* seems OK */
+                       }
                        else
                                is_valid_inner = false;
                }
@@ -612,15 +643,23 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                                                 restrictlist);
                        break;
                case JOIN_SEMI:
-                       if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
-                               restriction_is_constant_false(restrictlist))
+                       /*
+                        * Do these steps only if we actually have a regular semijoin,
+                        * as opposed to a case where we should unique-ify the RHS.
+                        */
+                       if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+                               bms_is_subset(sjinfo->min_righthand, rel2->relids))
                        {
-                               mark_dummy_rel(joinrel);
-                               break;
+                               if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
+                                       restriction_is_constant_false(restrictlist))
+                               {
+                                       mark_dummy_rel(joinrel);
+                                       break;
+                               }
+                               add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                        JOIN_SEMI, sjinfo,
+                                                                        restrictlist);
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
-                                                                JOIN_SEMI, sjinfo,
-                                                                restrictlist);
 
                        /*
                         * If we know how to unique-ify the RHS and one input rel is
index d1b590d063120ed6c4645e5afc448ed3cac083c8..1764656da3806a84b5c33d92e9dab20688fd2a27 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.144 2008/10/25 19:51:32 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.145 2008/11/22 22:47:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -851,16 +851,11 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
                maybe_equivalence = false;
                maybe_outer_join = false;
        }
-       else if (bms_overlap(relids, outerjoin_nonnullable) &&
-                        (jointype != JOIN_SEMI ||
-                         bms_nonempty_difference(relids, outerjoin_nonnullable)))
+       else if (bms_overlap(relids, outerjoin_nonnullable))
        {
                /*
                 * The qual is attached to an outer join and mentions (some of the)
-                * rels on the nonnullable side, so it's not degenerate.  (For a
-                * JOIN_SEMI qual, we consider it non-degenerate only if it mentions
-                * both sides of the join --- if it mentions only one side, it can
-                * be pushed down.)
+                * rels on the nonnullable side, so it's not degenerate.
                 *
                 * We can't use such a clause to deduce equivalence (the left and
                 * right sides might be unequal above the join because one of them has
@@ -1062,6 +1057,7 @@ distribute_sublink_quals_to_rels(PlannerInfo *root,
        SpecialJoinInfo *sjinfo;
        Relids          qualscope;
        Relids          ojscope;
+       Relids          outerjoin_nonnullable;
        ListCell   *l;
 
        /*
@@ -1076,17 +1072,27 @@ distribute_sublink_quals_to_rels(PlannerInfo *root,
                                                                fslink->jointype,
                                                                quals);
 
+       /* Treat as inner join if SEMI, outer join if ANTI */
        qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand);
-       ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+       if (fslink->jointype == JOIN_SEMI)
+       {
+               ojscope = outerjoin_nonnullable = NULL;
+       }
+       else
+       {
+               Assert(fslink->jointype == JOIN_ANTI);
+               ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+               outerjoin_nonnullable = fslink->lefthand;
+       }
 
-       /* Distribute the join quals much as for a regular LEFT JOIN */
+       /* Distribute the join quals much as for a regular JOIN node */
        foreach(l, quals)
        {
                Node   *qual = (Node *) lfirst(l);
 
                distribute_qual_to_rels(root, qual,
                                                                false, below_outer_join, fslink->jointype,
-                                                               qualscope, ojscope, fslink->lefthand);
+                                                               qualscope, ojscope, outerjoin_nonnullable);
        }
 
        /* Now we can add the SpecialJoinInfo to join_info_list */
index ddd9934cf1ed70b20bb9879ceb264cf03675f202..4161c08ec54da18cb2a680669a06d867d737aa31 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.214 2008/10/21 20:42:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.215 2008/11/22 22:47:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -542,15 +542,23 @@ typedef enum JoinType
 
 /*
  * OUTER joins are those for which pushed-down quals must behave differently
- * from the join's own quals.  This is in fact everything except INNER joins.
- * However, this macro must also exclude the JOIN_UNIQUE symbols since those
- * are temporary proxies for what will eventually be an INNER join.
+ * from the join's own quals.  This is in fact everything except INNER and
+ * SEMI joins.  However, this macro must also exclude the JOIN_UNIQUE symbols
+ * since those are temporary proxies for what will eventually be an INNER
+ * join.
  *
- * Note: in some places it is preferable to treat JOIN_SEMI as not being
- * an outer join, since it doesn't produce null-extended rows.  Be aware
- * of that distinction when deciding whether to use this macro.
+ * Note: semijoins are a hybrid case, but we choose to treat them as not
+ * being outer joins.  This is okay principally because the SQL syntax makes
+ * it impossible to have a pushed-down qual that refers to the inner relation
+ * of a semijoin; so there is no strong need to distinguish join quals from
+ * pushed-down quals.  This is convenient because for almost all purposes,
+ * quals attached to a semijoin can be treated the same as innerjoin quals.
  */
 #define IS_OUTER_JOIN(jointype) \
-       ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER)
+       (((1 << (jointype)) & \
+         ((1 << JOIN_LEFT) | \
+          (1 << JOIN_FULL) | \
+          (1 << JOIN_RIGHT) | \
+          (1 << JOIN_ANTI))) != 0)
 
 #endif   /* NODES_H */