]> granicus.if.org Git - postgresql/commitdiff
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 14 Aug 2008 18:48:00 +0000 (18:48 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 14 Aug 2008 18:48:00 +0000 (18:48 +0000)
the old JOIN_IN code, but antijoins are new functionality.)  Teach the planner
to convert appropriate EXISTS and NOT EXISTS subqueries into semi and anti
joins respectively.  Also, LEFT JOINs with suitable upper-level IS NULL
filters are recognized as being anti joins.  Unify the InClauseInfo and
OuterJoinInfo infrastructure into "SpecialJoinInfo".  With that change,
it becomes possible to associate a SpecialJoinInfo with every join attempt,
which permits some cleanup of join selectivity estimation.  That needs to be
taken much further than this patch does, but the next step is to change the
API for oprjoin selectivity functions, which seems like material for a
separate patch.  So for the moment the output size estimates for semi and
especially anti joins are quite bogus.

40 files changed:
doc/src/sgml/indexam.sgml
src/backend/commands/explain.c
src/backend/executor/nodeHashjoin.c
src/backend/executor/nodeMergejoin.c
src/backend/executor/nodeNestloop.c
src/backend/nodes/copyfuncs.c
src/backend/nodes/equalfuncs.c
src/backend/nodes/list.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/README
src/backend/optimizer/path/clausesel.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/joinpath.c
src/backend/optimizer/path/joinrels.c
src/backend/optimizer/path/orindxpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/initsplan.c
src/backend/optimizer/plan/planmain.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/plan/subselect.c
src/backend/optimizer/prep/prepjointree.c
src/backend/optimizer/prep/prepunion.c
src/backend/optimizer/util/clauses.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/relnode.c
src/backend/optimizer/util/var.c
src/backend/rewrite/rewriteManip.c
src/backend/utils/adt/selfuncs.c
src/include/nodes/nodes.h
src/include/nodes/pg_list.h
src/include/nodes/relation.h
src/include/optimizer/clauses.h
src/include/optimizer/cost.h
src/include/optimizer/pathnode.h
src/include/optimizer/paths.h
src/include/optimizer/planmain.h
src/include/optimizer/prep.h
src/include/optimizer/subselect.h
src/include/utils/selfuncs.h

index 760d8d1a206da059f88b0beb84e36b962f212485..393ccfa2dd15d3e6be9f466f4bdb0264bf486935 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.26 2008/04/14 17:05:32 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.27 2008/08/14 18:47:58 tgl Exp $ -->
 
 <chapter id="indexam">
  <title>Index Access Method Interface Definition</title>
@@ -879,7 +879,8 @@ amcostestimate (PlannerInfo *root,
 
 <programlisting>
 *indexSelectivity = clauselist_selectivity(root, indexQuals,
-                                           index-&gt;rel-&gt;relid, JOIN_INNER);
+                                           index-&gt;rel-&gt;relid,
+                                           JOIN_INNER, NULL);
 </programlisting>
     </para>
    </step>
index 1fd6f0cb331ed7a427f878c6f20f715f662a75dc..d0033f436620e6b86f35c6d29938b33d88cecb21 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.176 2008/08/07 03:04:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.177 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -450,8 +450,11 @@ explain_outNode(StringInfo str,
                                case JOIN_RIGHT:
                                        pname = "Nested Loop Right Join";
                                        break;
-                               case JOIN_IN:
-                                       pname = "Nested Loop IN Join";
+                               case JOIN_SEMI:
+                                       pname = "Nested Loop Semi Join";
+                                       break;
+                               case JOIN_ANTI:
+                                       pname = "Nested Loop Anti Join";
                                        break;
                                default:
                                        pname = "Nested Loop ??? Join";
@@ -473,8 +476,11 @@ explain_outNode(StringInfo str,
                                case JOIN_RIGHT:
                                        pname = "Merge Right Join";
                                        break;
-                               case JOIN_IN:
-                                       pname = "Merge IN Join";
+                               case JOIN_SEMI:
+                                       pname = "Merge Semi Join";
+                                       break;
+                               case JOIN_ANTI:
+                                       pname = "Merge Anti Join";
                                        break;
                                default:
                                        pname = "Merge ??? Join";
@@ -496,8 +502,11 @@ explain_outNode(StringInfo str,
                                case JOIN_RIGHT:
                                        pname = "Hash Right Join";
                                        break;
-                               case JOIN_IN:
-                                       pname = "Hash IN Join";
+                               case JOIN_SEMI:
+                                       pname = "Hash Semi Join";
+                                       break;
+                               case JOIN_ANTI:
+                                       pname = "Hash Anti Join";
                                        break;
                                default:
                                        pname = "Hash ??? Join";
index bfe5be7c33c83ec185faac703070c4fec9f95280..837837bece0c69c53576b014048d79aa2a784266 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.93 2008/01/01 19:45:49 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.94 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,9 @@
 #include "utils/memutils.h"
 
 
+/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
+#define HASHJOIN_IS_OUTER(hjstate)  ((hjstate)->hj_NullInnerTupleSlot != NULL)
+
 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
                                                  HashJoinState *hjstate,
                                                  uint32 *hashvalue);
@@ -89,14 +92,6 @@ ExecHashJoin(HashJoinState *node)
                node->js.ps.ps_TupFromTlist = false;
        }
 
-       /*
-        * If we're doing an IN join, we want to return at most one row per outer
-        * tuple; so we can stop scanning the inner scan if we matched on the
-        * previous try.
-        */
-       if (node->js.jointype == JOIN_IN && node->hj_MatchedOuter)
-               node->hj_NeedNewOuter = true;
-
        /*
         * Reset per-tuple memory context to free any expression evaluation
         * storage allocated in the previous tuple cycle.  Note this can't happen
@@ -129,7 +124,7 @@ ExecHashJoin(HashJoinState *node)
                 * outer plan node.  If we succeed, we have to stash it away for later
                 * consumption by ExecHashJoinOuterGetTuple.
                 */
-               if (node->js.jointype == JOIN_LEFT ||
+               if (HASHJOIN_IS_OUTER(node) ||
                        (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
                         !node->hj_OuterNotEmpty))
                {
@@ -162,7 +157,7 @@ ExecHashJoin(HashJoinState *node)
                 * If the inner relation is completely empty, and we're not doing an
                 * outer join, we can quit without scanning the outer relation.
                 */
-               if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
+               if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
                        return NULL;
 
                /*
@@ -263,27 +258,41 @@ ExecHashJoin(HashJoinState *node)
                        {
                                node->hj_MatchedOuter = true;
 
-                               if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+                               /* In an antijoin, we never return a matched tuple */
+                               if (node->js.jointype == JOIN_ANTI)
+                               {
+                                       node->hj_NeedNewOuter = true;
+                                       break;          /* out of loop over hash bucket */
+                               }
+                               else
                                {
-                                       TupleTableSlot *result;
+                                       /*
+                                        * In a semijoin, we'll consider returning the first match,
+                                        * but after that we're done with this outer tuple.
+                                        */
+                                       if (node->js.jointype == JOIN_SEMI)
+                                               node->hj_NeedNewOuter = true;
+
+                                       if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+                                       {
+                                               TupleTableSlot *result;
 
-                                       result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+                                               result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
 
-                                       if (isDone != ExprEndResult)
-                                       {
-                                               node->js.ps.ps_TupFromTlist =
-                                                       (isDone == ExprMultipleResult);
-                                               return result;
+                                               if (isDone != ExprEndResult)
+                                               {
+                                                       node->js.ps.ps_TupFromTlist =
+                                                               (isDone == ExprMultipleResult);
+                                                       return result;
+                                               }
                                        }
-                               }
 
-                               /*
-                                * If we didn't return a tuple, may need to set NeedNewOuter
-                                */
-                               if (node->js.jointype == JOIN_IN)
-                               {
-                                       node->hj_NeedNewOuter = true;
-                                       break;          /* out of loop over hash bucket */
+                                       /*
+                                        * If semijoin and we didn't return the tuple, we're still
+                                        * done with this outer tuple.
+                                        */
+                                       if (node->js.jointype == JOIN_SEMI)
+                                               break;          /* out of loop over hash bucket */
                                }
                        }
                }
@@ -296,7 +305,7 @@ ExecHashJoin(HashJoinState *node)
                node->hj_NeedNewOuter = true;
 
                if (!node->hj_MatchedOuter &&
-                       node->js.jointype == JOIN_LEFT)
+                       HASHJOIN_IS_OUTER(node))
                {
                        /*
                         * We are doing an outer join and there were no join matches for
@@ -305,7 +314,7 @@ ExecHashJoin(HashJoinState *node)
                         */
                        econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
 
-                       if (ExecQual(otherqual, econtext, false))
+                       if (otherqual == NIL || ExecQual(otherqual, econtext, false))
                        {
                                /*
                                 * qualification was satisfied so we project and return the
@@ -398,12 +407,14 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
        ExecInitResultTupleSlot(estate, &hjstate->js.ps);
        hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
 
+       /* note: HASHJOIN_IS_OUTER macro depends on this initialization */
        switch (node->join.jointype)
        {
                case JOIN_INNER:
-               case JOIN_IN:
+               case JOIN_SEMI:
                        break;
                case JOIN_LEFT:
+               case JOIN_ANTI:
                        hjstate->hj_NullInnerTupleSlot =
                                ExecInitNullTupleSlot(estate,
                                                                 ExecGetResultType(innerPlanState(hjstate)));
@@ -570,7 +581,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
                        if (ExecHashGetHashValue(hashtable, econtext,
                                                                         hjstate->hj_OuterHashKeys,
                                                                         true,          /* outer tuple */
-                                                                        (hjstate->js.jointype == JOIN_LEFT),
+                                                                        HASHJOIN_IS_OUTER(hjstate),
                                                                         hashvalue))
                        {
                                /* remember outer relation is not empty for possible rescan */
@@ -650,7 +661,7 @@ start_over:
         * sides.  We can sometimes skip over batches that are empty on only one
         * side, but there are exceptions:
         *
-        * 1. In a LEFT JOIN, we have to process outer batches even if the inner
+        * 1. In an outer join, we have to process outer batches even if the inner
         * batch is empty.
         *
         * 2. If we have increased nbatch since the initial estimate, we have to
@@ -667,7 +678,7 @@ start_over:
                        hashtable->innerBatchFile[curbatch] == NULL))
        {
                if (hashtable->outerBatchFile[curbatch] &&
-                       hjstate->js.jointype == JOIN_LEFT)
+                       HASHJOIN_IS_OUTER(hjstate))
                        break;                          /* must process due to rule 1 */
                if (hashtable->innerBatchFile[curbatch] &&
                        nbatch != hashtable->nbatch_original)
index cf1bb7d0a69471d6a503ac523741a3009e6fae47..e9deb5c8da7ca01e0ff8457ea2d7252545144797 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.91 2008/04/13 20:51:20 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.92 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -757,7 +757,7 @@ ExecMergeJoin(MergeJoinState *node)
                                innerTupleSlot = node->mj_InnerTupleSlot;
                                econtext->ecxt_innertuple = innerTupleSlot;
 
-                               if (node->js.jointype == JOIN_IN &&
+                               if (node->js.jointype == JOIN_SEMI &&
                                        node->mj_MatchedOuter)
                                        qualResult = false;
                                else
@@ -772,6 +772,10 @@ ExecMergeJoin(MergeJoinState *node)
                                        node->mj_MatchedOuter = true;
                                        node->mj_MatchedInner = true;
 
+                                       /* In an antijoin, we never return a matched tuple */
+                                       if (node->js.jointype == JOIN_ANTI)
+                                               break;
+
                                        qualResult = (otherqual == NIL ||
                                                                  ExecQual(otherqual, econtext, false));
                                        MJ_DEBUG_QUAL(otherqual, qualResult);
@@ -1472,11 +1476,12 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
        switch (node->join.jointype)
        {
                case JOIN_INNER:
-               case JOIN_IN:
+               case JOIN_SEMI:
                        mergestate->mj_FillOuter = false;
                        mergestate->mj_FillInner = false;
                        break;
                case JOIN_LEFT:
+               case JOIN_ANTI:
                        mergestate->mj_FillOuter = true;
                        mergestate->mj_FillInner = false;
                        mergestate->mj_NullInnerTupleSlot =
index 78216212ff58437fa421ffa9b870f12537843d86..c6a33228582c24e138a7b2e534b3fa853fa29524 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeNestloop.c,v 1.46 2008/01/01 19:45:49 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeNestloop.c,v 1.47 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -101,15 +101,6 @@ ExecNestLoop(NestLoopState *node)
                node->js.ps.ps_TupFromTlist = false;
        }
 
-       /*
-        * If we're doing an IN join, we want to return at most one row per outer
-        * tuple; so we can stop scanning the inner scan if we matched on the
-        * previous try.
-        */
-       if (node->js.jointype == JOIN_IN &&
-               node->nl_MatchedOuter)
-               node->nl_NeedNewOuter = true;
-
        /*
         * Reset per-tuple memory context to free any expression evaluation
         * storage allocated in the previous tuple cycle.  Note this can't happen
@@ -177,7 +168,8 @@ ExecNestLoop(NestLoopState *node)
                        node->nl_NeedNewOuter = true;
 
                        if (!node->nl_MatchedOuter &&
-                               node->js.jointype == JOIN_LEFT)
+                               (node->js.jointype == JOIN_LEFT ||
+                                node->js.jointype == JOIN_ANTI))
                        {
                                /*
                                 * We are doing an outer join and there were no join matches
@@ -189,7 +181,7 @@ ExecNestLoop(NestLoopState *node)
 
                                ENL1_printf("testing qualification for outer-join tuple");
 
-                               if (ExecQual(otherqual, econtext, false))
+                               if (otherqual == NIL || ExecQual(otherqual, econtext, false))
                                {
                                        /*
                                         * qualification was satisfied so we project and return
@@ -232,30 +224,39 @@ ExecNestLoop(NestLoopState *node)
                {
                        node->nl_MatchedOuter = true;
 
-                       if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+                       /* In an antijoin, we never return a matched tuple */
+                       if (node->js.jointype == JOIN_ANTI)
+                               node->nl_NeedNewOuter = true;
+                       else
                        {
                                /*
-                                * qualification was satisfied so we project and return the
-                                * slot containing the result tuple using ExecProject().
+                                * In a semijoin, we'll consider returning the first match,
+                                * but after that we're done with this outer tuple.
                                 */
-                               TupleTableSlot *result;
-                               ExprDoneCond isDone;
+                               if (node->js.jointype == JOIN_SEMI)
+                                       node->nl_NeedNewOuter = true;
+                               if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+                               {
+                                       /*
+                                        * qualification was satisfied so we project and return
+                                        * the slot containing the result tuple using
+                                        * ExecProject().
+                                        */
+                                       TupleTableSlot *result;
+                                       ExprDoneCond isDone;
 
-                               ENL1_printf("qualification succeeded, projecting tuple");
+                                       ENL1_printf("qualification succeeded, projecting tuple");
 
-                               result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+                                       result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
 
-                               if (isDone != ExprEndResult)
-                               {
-                                       node->js.ps.ps_TupFromTlist =
-                                               (isDone == ExprMultipleResult);
-                                       return result;
+                                       if (isDone != ExprEndResult)
+                                       {
+                                               node->js.ps.ps_TupFromTlist =
+                                                       (isDone == ExprMultipleResult);
+                                               return result;
+                                       }
                                }
                        }
-
-                       /* If we didn't return a tuple, may need to set NeedNewOuter */
-                       if (node->js.jointype == JOIN_IN)
-                               node->nl_NeedNewOuter = true;
                }
 
                /*
@@ -333,9 +334,10 @@ ExecInitNestLoop(NestLoop *node, EState *estate, int eflags)
        switch (node->join.jointype)
        {
                case JOIN_INNER:
-               case JOIN_IN:
+               case JOIN_SEMI:
                        break;
                case JOIN_LEFT:
+               case JOIN_ANTI:
                        nlstate->nl_NullInnerTupleSlot =
                                ExecInitNullTupleSlot(estate,
                                                                 ExecGetResultType(innerPlanState(nlstate)));
index 90ebd7819a7d1c8c5b3c6f1591aa57451d8460c6..83322320b091511d64c8ab5838a84ea45e38ec41 100644 (file)
@@ -15,7 +15,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.399 2008/08/07 19:35:02 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.400 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1444,36 +1444,37 @@ _copyRestrictInfo(RestrictInfo *from)
 }
 
 /*
- * _copyOuterJoinInfo
+ * _copyFlattenedSubLink
  */
-static OuterJoinInfo *
-_copyOuterJoinInfo(OuterJoinInfo *from)
+static FlattenedSubLink *
+_copyFlattenedSubLink(FlattenedSubLink *from)
 {
-       OuterJoinInfo *newnode = makeNode(OuterJoinInfo);
+       FlattenedSubLink *newnode = makeNode(FlattenedSubLink);
 
-       COPY_BITMAPSET_FIELD(min_lefthand);
-       COPY_BITMAPSET_FIELD(min_righthand);
-       COPY_BITMAPSET_FIELD(syn_lefthand);
-       COPY_BITMAPSET_FIELD(syn_righthand);
-       COPY_SCALAR_FIELD(is_full_join);
-       COPY_SCALAR_FIELD(lhs_strict);
-       COPY_SCALAR_FIELD(delay_upper_joins);
+       COPY_SCALAR_FIELD(jointype);
+       COPY_BITMAPSET_FIELD(lefthand);
+       COPY_BITMAPSET_FIELD(righthand);
+       COPY_NODE_FIELD(quals);
 
        return newnode;
 }
 
 /*
- * _copyInClauseInfo
+ * _copySpecialJoinInfo
  */
-static InClauseInfo *
-_copyInClauseInfo(InClauseInfo *from)
+static SpecialJoinInfo *
+_copySpecialJoinInfo(SpecialJoinInfo *from)
 {
-       InClauseInfo *newnode = makeNode(InClauseInfo);
+       SpecialJoinInfo *newnode = makeNode(SpecialJoinInfo);
 
-       COPY_BITMAPSET_FIELD(lefthand);
-       COPY_BITMAPSET_FIELD(righthand);
-       COPY_NODE_FIELD(sub_targetlist);
-       COPY_NODE_FIELD(in_operators);
+       COPY_BITMAPSET_FIELD(min_lefthand);
+       COPY_BITMAPSET_FIELD(min_righthand);
+       COPY_BITMAPSET_FIELD(syn_lefthand);
+       COPY_BITMAPSET_FIELD(syn_righthand);
+       COPY_SCALAR_FIELD(jointype);
+       COPY_SCALAR_FIELD(lhs_strict);
+       COPY_SCALAR_FIELD(delay_upper_joins);
+       COPY_NODE_FIELD(join_quals);
 
        return newnode;
 }
@@ -3233,11 +3234,11 @@ copyObject(void *from)
                case T_RestrictInfo:
                        retval = _copyRestrictInfo(from);
                        break;
-               case T_OuterJoinInfo:
-                       retval = _copyOuterJoinInfo(from);
+               case T_FlattenedSubLink:
+                       retval = _copyFlattenedSubLink(from);
                        break;
-               case T_InClauseInfo:
-                       retval = _copyInClauseInfo(from);
+               case T_SpecialJoinInfo:
+                       retval = _copySpecialJoinInfo(from);
                        break;
                case T_AppendRelInfo:
                        retval = _copyAppendRelInfo(from);
index dc0b2cca3734e7a4eb0ba4ab2592a1f5dc109af4..831a5fe19bec1fd33af859d99832a9e2d005d215 100644 (file)
@@ -18,7 +18,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.326 2008/08/07 01:11:47 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.327 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -702,26 +702,27 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
 }
 
 static bool
-_equalOuterJoinInfo(OuterJoinInfo *a, OuterJoinInfo *b)
+_equalFlattenedSubLink(FlattenedSubLink *a, FlattenedSubLink *b)
 {
-       COMPARE_BITMAPSET_FIELD(min_lefthand);
-       COMPARE_BITMAPSET_FIELD(min_righthand);
-       COMPARE_BITMAPSET_FIELD(syn_lefthand);
-       COMPARE_BITMAPSET_FIELD(syn_righthand);
-       COMPARE_SCALAR_FIELD(is_full_join);
-       COMPARE_SCALAR_FIELD(lhs_strict);
-       COMPARE_SCALAR_FIELD(delay_upper_joins);
+       COMPARE_SCALAR_FIELD(jointype);
+       COMPARE_BITMAPSET_FIELD(lefthand);
+       COMPARE_BITMAPSET_FIELD(righthand);
+       COMPARE_NODE_FIELD(quals);
 
        return true;
 }
 
 static bool
-_equalInClauseInfo(InClauseInfo *a, InClauseInfo *b)
+_equalSpecialJoinInfo(SpecialJoinInfo *a, SpecialJoinInfo *b)
 {
-       COMPARE_BITMAPSET_FIELD(lefthand);
-       COMPARE_BITMAPSET_FIELD(righthand);
-       COMPARE_NODE_FIELD(sub_targetlist);
-       COMPARE_NODE_FIELD(in_operators);
+       COMPARE_BITMAPSET_FIELD(min_lefthand);
+       COMPARE_BITMAPSET_FIELD(min_righthand);
+       COMPARE_BITMAPSET_FIELD(syn_lefthand);
+       COMPARE_BITMAPSET_FIELD(syn_righthand);
+       COMPARE_SCALAR_FIELD(jointype);
+       COMPARE_SCALAR_FIELD(lhs_strict);
+       COMPARE_SCALAR_FIELD(delay_upper_joins);
+       COMPARE_NODE_FIELD(join_quals);
 
        return true;
 }
@@ -2185,11 +2186,11 @@ equal(void *a, void *b)
                case T_RestrictInfo:
                        retval = _equalRestrictInfo(a, b);
                        break;
-               case T_OuterJoinInfo:
-                       retval = _equalOuterJoinInfo(a, b);
+               case T_FlattenedSubLink:
+                       retval = _equalFlattenedSubLink(a, b);
                        break;
-               case T_InClauseInfo:
-                       retval = _equalInClauseInfo(a, b);
+               case T_SpecialJoinInfo:
+                       retval = _equalSpecialJoinInfo(a, b);
                        break;
                case T_AppendRelInfo:
                        retval = _equalAppendRelInfo(a, b);
index ab54d9aa5954133e67ed0a9013f991faa8ac7e38..4f1819b987459daa01736e4b3457bea54afb84d9 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/list.c,v 1.69 2008/01/01 19:45:50 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/list.c,v 1.70 2008/08/14 18:47:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -783,6 +783,42 @@ list_union_oid(List *list1, List *list2)
        return result;
 }
 
+/*
+ * Return a list that contains all the cells that are in both list1 and
+ * list2.  The returned list is freshly allocated via palloc(), but the
+ * cells themselves point to the same objects as the cells of the
+ * input lists.
+ *
+ * Duplicate entries in list1 will not be suppressed, so it's only a true
+ * "intersection" if list1 is known unique beforehand.
+ *
+ * This variant works on lists of pointers, and determines list
+ * membership via equal().  Note that the list1 member will be pointed
+ * to in the result.
+ */
+List *
+list_intersection(List *list1, List *list2)
+{
+       List       *result;
+       ListCell   *cell;
+
+       if (list1 == NIL || list2 == NIL)
+               return NIL;
+
+       Assert(IsPointerList(list1));
+       Assert(IsPointerList(list2));
+
+       result = NIL;
+       foreach(cell, list1)
+       {
+               if (list_member(list2, lfirst(cell)))
+                       result = lappend(result, lfirst(cell));
+       }
+
+       check_list_invariants(result);
+       return result;
+}
+
 /*
  * Return a list that contains all the cells in list1 that are not in
  * list2. The returned list is freshly allocated via palloc(), but the
index 4728117be24982677aeec51af1bf5946dc6c2486..13c824389ef581c42bdab7c52d4b4488d5a5e033 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.333 2008/08/07 19:35:02 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.334 2008/08/14 18:47:58 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1267,6 +1267,8 @@ _outUniquePath(StringInfo str, UniquePath *node)
 
        WRITE_NODE_FIELD(subpath);
        WRITE_ENUM_FIELD(umethod, UniquePathMethod);
+       WRITE_NODE_FIELD(in_operators);
+       WRITE_NODE_FIELD(uniq_exprs);
        WRITE_FLOAT_FIELD(rows, "%.0f");
 }
 
@@ -1332,8 +1334,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
        WRITE_NODE_FIELD(left_join_clauses);
        WRITE_NODE_FIELD(right_join_clauses);
        WRITE_NODE_FIELD(full_join_clauses);
-       WRITE_NODE_FIELD(oj_info_list);
-       WRITE_NODE_FIELD(in_info_list);
+       WRITE_NODE_FIELD(join_info_list);
        WRITE_NODE_FIELD(append_rel_list);
        WRITE_NODE_FIELD(query_pathkeys);
        WRITE_NODE_FIELD(group_pathkeys);
@@ -1342,7 +1343,6 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
        WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
        WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
        WRITE_BOOL_FIELD(hasJoinRTEs);
-       WRITE_BOOL_FIELD(hasOuterJoins);
        WRITE_BOOL_FIELD(hasHavingQual);
        WRITE_BOOL_FIELD(hasPseudoConstantQuals);
 }
@@ -1479,28 +1479,29 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
 }
 
 static void
-_outOuterJoinInfo(StringInfo str, OuterJoinInfo *node)
+_outFlattenedSubLink(StringInfo str, FlattenedSubLink *node)
+{
+       WRITE_NODE_TYPE("FLATTENEDSUBLINK");
+
+       WRITE_ENUM_FIELD(jointype, JoinType);
+       WRITE_BITMAPSET_FIELD(lefthand);
+       WRITE_BITMAPSET_FIELD(righthand);
+       WRITE_NODE_FIELD(quals);
+}
+
+static void
+_outSpecialJoinInfo(StringInfo str, SpecialJoinInfo *node)
 {
-       WRITE_NODE_TYPE("OUTERJOININFO");
+       WRITE_NODE_TYPE("SPECIALJOININFO");
 
        WRITE_BITMAPSET_FIELD(min_lefthand);
        WRITE_BITMAPSET_FIELD(min_righthand);
        WRITE_BITMAPSET_FIELD(syn_lefthand);
        WRITE_BITMAPSET_FIELD(syn_righthand);
-       WRITE_BOOL_FIELD(is_full_join);
+       WRITE_ENUM_FIELD(jointype, JoinType);
        WRITE_BOOL_FIELD(lhs_strict);
        WRITE_BOOL_FIELD(delay_upper_joins);
-}
-
-static void
-_outInClauseInfo(StringInfo str, InClauseInfo *node)
-{
-       WRITE_NODE_TYPE("INCLAUSEINFO");
-
-       WRITE_BITMAPSET_FIELD(lefthand);
-       WRITE_BITMAPSET_FIELD(righthand);
-       WRITE_NODE_FIELD(sub_targetlist);
-       WRITE_NODE_FIELD(in_operators);
+       WRITE_NODE_FIELD(join_quals);
 }
 
 static void
@@ -2352,11 +2353,11 @@ _outNode(StringInfo str, void *obj)
                        case T_InnerIndexscanInfo:
                                _outInnerIndexscanInfo(str, obj);
                                break;
-                       case T_OuterJoinInfo:
-                               _outOuterJoinInfo(str, obj);
+                       case T_FlattenedSubLink:
+                               _outFlattenedSubLink(str, obj);
                                break;
-                       case T_InClauseInfo:
-                               _outInClauseInfo(str, obj);
+                       case T_SpecialJoinInfo:
+                               _outSpecialJoinInfo(str, obj);
                                break;
                        case T_AppendRelInfo:
                                _outAppendRelInfo(str, obj);
index ba18db0dd97db2494367f901b3bc2ea07954f5eb..18763841ae07e999be5d08a12ab2d76b05d7ef00 100644 (file)
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.47 2008/08/02 21:31:59 tgl Exp $
+$PostgreSQL: pgsql/src/backend/optimizer/README,v 1.48 2008/08/14 18:47:59 tgl Exp $
 
 Optimizer
 =========
@@ -114,9 +114,8 @@ no choice but to generate a clauseless Cartesian-product join; so we
 consider joining that rel to each other available rel.  But in the presence
 of join clauses we will only consider joins that use available join
 clauses.  Note that join-order restrictions induced by outer joins and
-IN clauses are treated as if they were real join clauses, to ensure that
-we find a workable join order in cases where those restrictions force a
-clauseless join to be done.)
+IN/EXISTS clauses are also checked, to ensure that we find a workable join
+order in cases where those restrictions force a clauseless join to be done.)
 
 If we only had two relations in the list, we are done: we just pick
 the cheapest path for the join RelOptInfo.  If we had more than two, we now
@@ -174,9 +173,9 @@ for it or the cheapest path with the desired ordering (if that's cheaper
 than applying a sort to the cheapest other path).
 
 If the query contains one-sided outer joins (LEFT or RIGHT joins), or
-"IN (sub-select)" WHERE clauses that were converted to joins, then some of
+IN or EXISTS WHERE clauses that were converted to joins, then some of
 the possible join orders may be illegal.  These are excluded by having
-join_is_legal consult side lists of outer joins and IN joins to see
+join_is_legal consult a side list of such "special" joins to see
 whether a proposed join is illegal.  (The same consultation allows it
 to see which join style should be applied for a valid join, ie,
 JOIN_INNER, JOIN_LEFT, etc.)
@@ -219,10 +218,10 @@ FULL JOIN ordering is enforced by not collapsing FULL JOIN nodes when
 translating the jointree to "joinlist" representation.  LEFT and RIGHT
 JOIN nodes are normally collapsed so that they participate fully in the
 join order search.  To avoid generating illegal join orders, the planner
-creates an OuterJoinInfo node for each outer join, and join_is_legal
+creates a SpecialJoinInfo node for each outer join, and join_is_legal
 checks this list to decide if a proposed join is legal.
 
-What we store in OuterJoinInfo nodes are the minimum sets of Relids
+What we store in SpecialJoinInfo nodes are the minimum sets of Relids
 required on each side of the join to form the outer join.  Note that
 these are minimums; there's no explicit maximum, since joining other
 rels to the OJ's syntactic rels may be legal.  Per identities 1 and 2,
@@ -273,7 +272,7 @@ planner()
  set up for recursive handling of subqueries
  do final cleanup after planning
 -subquery_planner()
- pull up subqueries from rangetable, if possible
+ pull up sublinks and subqueries from rangetable, if possible
  canonicalize qual
      Attempt to simplify WHERE clause to the most useful form; this includes
      flattening nested AND/ORs and detecting clauses that are duplicated in
index e358c6990e248e0c3d895c57baf40679b0c60e07..e788f2787310cd3d8f119738f5a503c149e6329e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.90 2008/01/11 17:00:45 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.91 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -94,7 +94,8 @@ Selectivity
 clauselist_selectivity(PlannerInfo *root,
                                           List *clauses,
                                           int varRelid,
-                                          JoinType jointype)
+                                          JoinType jointype,
+                                          SpecialJoinInfo *sjinfo)
 {
        Selectivity s1 = 1.0;
        RangeQueryClause *rqlist = NULL;
@@ -106,7 +107,7 @@ clauselist_selectivity(PlannerInfo *root,
         */
        if (list_length(clauses) == 1)
                return clause_selectivity(root, (Node *) linitial(clauses),
-                                                                 varRelid, jointype);
+                                                                 varRelid, jointype, sjinfo);
 
        /*
         * Initial scan over clauses.  Anything that doesn't look like a potential
@@ -120,7 +121,7 @@ clauselist_selectivity(PlannerInfo *root,
                Selectivity s2;
 
                /* Always compute the selectivity using clause_selectivity */
-               s2 = clause_selectivity(root, clause, varRelid, jointype);
+               s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
 
                /*
                 * Check for being passed a RestrictInfo.
@@ -227,9 +228,8 @@ clauselist_selectivity(PlannerInfo *root,
                                s2 = rqlist->hibound + rqlist->lobound - 1.0;
 
                                /* Adjust for double-exclusion of NULLs */
-                               /* HACK: disable nulltestsel's special outer-join logic */
                                s2 += nulltestsel(root, IS_NULL, rqlist->var,
-                                                                 varRelid, JOIN_INNER);
+                                                                 varRelid, jointype, sjinfo);
 
                                /*
                                 * A zero or slightly negative s2 should be converted into a
@@ -420,13 +420,32 @@ bms_is_subset_singleton(const Bitmapset *s, int x)
  * is appropriate for ordinary join clauses and restriction clauses.
  *
  * jointype is the join type, if the clause is a join clause.  Pass JOIN_INNER
- * if the clause isn't a join clause or the context is uncertain.
+ * if the clause isn't a join clause.
+ *
+ * sjinfo is NULL for a non-join clause, otherwise it provides additional
+ * context information about the join being performed.  There are some
+ * special cases:
+ *     1. For a special (not INNER) join, sjinfo is always a member of
+ *        root->join_info_list.
+ *     2. For an INNER join, sjinfo is just a transient struct, and only the
+ *        relids and jointype fields in it can be trusted.
+ *     3. XXX sjinfo might be NULL even though it really is a join.  This case
+ *        will go away soon, but fixing it requires API changes for oprjoin and
+ *        amcostestimate functions.
+ * It is possible for jointype to be different from sjinfo->jointype.
+ * This indicates we are considering a variant join: either with
+ * the LHS and RHS switched, or with one input unique-ified.
+ *
+ * Note: when passing nonzero varRelid, it's normally appropriate to set
+ * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
+ * join clause; because we aren't treating it as a join clause.
  */
 Selectivity
 clause_selectivity(PlannerInfo *root,
                                   Node *clause,
                                   int varRelid,
-                                  JoinType jointype)
+                                  JoinType jointype,
+                                  SpecialJoinInfo *sjinfo)
 {
        Selectivity s1 = 0.5;           /* default for any unhandled clause type */
        RestrictInfo *rinfo = NULL;
@@ -457,36 +476,15 @@ clause_selectivity(PlannerInfo *root,
                 * If possible, cache the result of the selectivity calculation for
                 * the clause.  We can cache if varRelid is zero or the clause
                 * contains only vars of that relid --- otherwise varRelid will affect
-                * the result, so mustn't cache.  We also have to be careful about the
-                * jointype.  It's OK to cache when jointype is JOIN_INNER or one of
-                * the outer join types (any given outer-join clause should always be
-                * examined with the same jointype, so result won't change). It's not
-                * OK to cache when jointype is one of the special types associated
-                * with IN processing, because the same clause may be examined with
-                * different jointypes and the result should vary.
+                * the result, so mustn't cache.
                 */
                if (varRelid == 0 ||
                        bms_is_subset_singleton(rinfo->clause_relids, varRelid))
                {
-                       switch (jointype)
-                       {
-                               case JOIN_INNER:
-                               case JOIN_LEFT:
-                               case JOIN_FULL:
-                               case JOIN_RIGHT:
-                                       /* Cacheable --- do we already have the result? */
-                                       if (rinfo->this_selec >= 0)
-                                               return rinfo->this_selec;
-                                       cacheable = true;
-                                       break;
-
-                               case JOIN_IN:
-                               case JOIN_REVERSE_IN:
-                               case JOIN_UNIQUE_OUTER:
-                               case JOIN_UNIQUE_INNER:
-                                       /* unsafe to cache */
-                                       break;
-                       }
+                       /* Cacheable --- do we already have the result? */
+                       if (rinfo->this_selec >= 0)
+                               return rinfo->this_selec;
+                       cacheable = true;
                }
 
                /*
@@ -568,7 +566,8 @@ clause_selectivity(PlannerInfo *root,
                s1 = 1.0 - clause_selectivity(root,
                                                                  (Node *) get_notclausearg((Expr *) clause),
                                                                          varRelid,
-                                                                         jointype);
+                                                                         jointype,
+                                                                         sjinfo);
        }
        else if (and_clause(clause))
        {
@@ -576,7 +575,8 @@ clause_selectivity(PlannerInfo *root,
                s1 = clauselist_selectivity(root,
                                                                        ((BoolExpr *) clause)->args,
                                                                        varRelid,
-                                                                       jointype);
+                                                                       jointype,
+                                                                       sjinfo);
        }
        else if (or_clause(clause))
        {
@@ -594,7 +594,8 @@ clause_selectivity(PlannerInfo *root,
                        Selectivity s2 = clause_selectivity(root,
                                                                                                (Node *) lfirst(arg),
                                                                                                varRelid,
-                                                                                               jointype);
+                                                                                               jointype,
+                                                                                               sjinfo);
 
                        s1 = s1 + s2 - s1 * s2;
                }
@@ -700,7 +701,8 @@ clause_selectivity(PlannerInfo *root,
                                                        (ScalarArrayOpExpr *) clause,
                                                        is_join_clause,
                                                        varRelid,
-                                                       jointype);
+                                                       jointype,
+                                                       sjinfo);
        }
        else if (IsA(clause, RowCompareExpr))
        {
@@ -708,7 +710,8 @@ clause_selectivity(PlannerInfo *root,
                s1 = rowcomparesel(root,
                                                   (RowCompareExpr *) clause,
                                                   varRelid,
-                                                  jointype);
+                                                  jointype,
+                                                  sjinfo);
        }
        else if (IsA(clause, NullTest))
        {
@@ -717,7 +720,8 @@ clause_selectivity(PlannerInfo *root,
                                                 ((NullTest *) clause)->nulltesttype,
                                                 (Node *) ((NullTest *) clause)->arg,
                                                 varRelid,
-                                                jointype);
+                                                jointype,
+                                                sjinfo);
        }
        else if (IsA(clause, BooleanTest))
        {
@@ -726,7 +730,8 @@ clause_selectivity(PlannerInfo *root,
                                                 ((BooleanTest *) clause)->booltesttype,
                                                 (Node *) ((BooleanTest *) clause)->arg,
                                                 varRelid,
-                                                jointype);
+                                                jointype,
+                                                sjinfo);
        }
        else if (IsA(clause, CurrentOfExpr))
        {
@@ -743,7 +748,8 @@ clause_selectivity(PlannerInfo *root,
                s1 = clause_selectivity(root,
                                                                (Node *) ((RelabelType *) clause)->arg,
                                                                varRelid,
-                                                               jointype);
+                                                               jointype,
+                                                               sjinfo);
        }
        else if (IsA(clause, CoerceToDomain))
        {
@@ -751,7 +757,8 @@ clause_selectivity(PlannerInfo *root,
                s1 = clause_selectivity(root,
                                                                (Node *) ((CoerceToDomain *) clause)->arg,
                                                                varRelid,
-                                                               jointype);
+                                                               jointype,
+                                                               sjinfo);
        }
 
        /* Cache the result if possible */
index bbb1d3a1adfbd2a1e15082d35e8475ce00546523..d9d8c89864064adbdc43ebdf9690f96c47253cfc 100644 (file)
@@ -54,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.192 2008/03/24 21:53:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.193 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -119,8 +119,9 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
                           PathKey *pathkey);
 static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
 static Selectivity approx_selectivity(PlannerInfo *root, List *quals,
-                                  JoinType jointype);
-static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root);
+                                  SpecialJoinInfo *sjinfo);
+static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root,
+                                                                          SpecialJoinInfo *sjinfo);
 static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
@@ -1273,9 +1274,10 @@ nestloop_inner_path_rows(Path *path)
  *       nested loop algorithm.
  *
  * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
  */
 void
-cost_nestloop(NestPath *path, PlannerInfo *root)
+cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
        Path       *outer_path = path->outerjoinpath;
        Path       *inner_path = path->innerjoinpath;
@@ -1298,7 +1300,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
         * selectivity.  (This assumes that all the quals attached to the join are
         * IN quals, which should be true.)
         */
-       joininfactor = join_in_selectivity(path, root);
+       joininfactor = join_in_selectivity(path, root, sjinfo);
 
        /* cost of source data */
 
@@ -1349,6 +1351,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
  *       merge join algorithm.
  *
  * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
  *
  * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
  * outersortkeys and innersortkeys are lists of the keys to be used
@@ -1356,7 +1359,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
  * sort is needed because the source path is already ordered.
  */
 void
-cost_mergejoin(MergePath *path, PlannerInfo *root)
+cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
        Path       *outer_path = path->jpath.outerjoinpath;
        Path       *inner_path = path->jpath.innerjoinpath;
@@ -1402,8 +1405,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
         * Note: it's probably bogus to use the normal selectivity calculation
         * here when either the outer or inner path is a UniquePath.
         */
-       merge_selec = approx_selectivity(root, mergeclauses,
-                                                                        path->jpath.jointype);
+       merge_selec = approx_selectivity(root, mergeclauses, sjinfo);
        cost_qual_eval(&merge_qual_cost, mergeclauses, root);
        cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
        qp_qual_cost.startup -= merge_qual_cost.startup;
@@ -1605,7 +1607,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
         * output size.  (This assumes that all the quals attached to the join are
         * IN quals, which should be true.)
         */
-       joininfactor = join_in_selectivity(&path->jpath, root);
+       joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
 
        /*
         * The number of tuple comparisons needed is approximately number of outer
@@ -1696,11 +1698,12 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
  *       hash join algorithm.
  *
  * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
  *
  * Note: path's hashclauses should be a subset of the joinrestrictinfo list
  */
 void
-cost_hashjoin(HashPath *path, PlannerInfo *root)
+cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
        Path       *outer_path = path->jpath.outerjoinpath;
        Path       *inner_path = path->jpath.innerjoinpath;
@@ -1733,8 +1736,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
         * Note: it's probably bogus to use the normal selectivity calculation
         * here when either the outer or inner path is a UniquePath.
         */
-       hash_selec = approx_selectivity(root, hashclauses,
-                                                                       path->jpath.jointype);
+       hash_selec = approx_selectivity(root, hashclauses, sjinfo);
        cost_qual_eval(&hash_qual_cost, hashclauses, root);
        cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
        qp_qual_cost.startup -= hash_qual_cost.startup;
@@ -1863,7 +1865,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
         * output size.  (This assumes that all the quals attached to the join are
         * IN quals, which should be true.)
         */
-       joininfactor = join_in_selectivity(&path->jpath, root);
+       joininfactor = join_in_selectivity(&path->jpath, root, sjinfo);
 
        /*
         * The number of tuple comparisons needed is the number of outer tuples
@@ -2216,6 +2218,9 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
  *             The input can be either an implicitly-ANDed list of boolean
  *             expressions, or a list of RestrictInfo nodes (typically the latter).
  *
+ * Currently this is only used in join estimation, so sjinfo should never
+ * be NULL.
+ *
  * This is quick-and-dirty because we bypass clauselist_selectivity, and
  * simply multiply the independent clause selectivities together.  Now
  * clauselist_selectivity often can't do any better than that anyhow, but
@@ -2228,7 +2233,7 @@ get_initplan_cost(PlannerInfo *root, SubPlan *subplan)
  * seems OK to live with the approximation.
  */
 static Selectivity
-approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype)
+approx_selectivity(PlannerInfo *root, List *quals, SpecialJoinInfo *sjinfo)
 {
        Selectivity total = 1.0;
        ListCell   *l;
@@ -2238,7 +2243,7 @@ approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype)
                Node       *qual = (Node *) lfirst(l);
 
                /* Note that clause_selectivity will be able to cache its result */
-               total *= clause_selectivity(root, qual, 0, jointype);
+               total *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
        }
        return total;
 }
@@ -2269,7 +2274,8 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
                clauselist_selectivity(root,
                                                           rel->baserestrictinfo,
                                                           0,
-                                                          JOIN_INNER);
+                                                          JOIN_INNER,
+                                                          NULL);
 
        rel->rows = clamp_row_est(nrows);
 
@@ -2295,11 +2301,6 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
  * calculations for each pair of input rels that's encountered, and somehow
  * average the results?  Probably way more trouble than it's worth.)
  *
- * It's important that the results for symmetric JoinTypes be symmetric,
- * eg, (rel1, rel2, JOIN_LEFT) should produce the same result as (rel2,
- * rel1, JOIN_RIGHT).  Also, JOIN_IN should produce the same result as
- * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER.
- *
  * We set only the rows field here.  The width field was already set by
  * build_joinrel_tlist, and baserestrictcost is not used for join rels.
  */
@@ -2307,9 +2308,10 @@ void
 set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                                   RelOptInfo *outer_rel,
                                                   RelOptInfo *inner_rel,
-                                                  JoinType jointype,
+                                                  SpecialJoinInfo *sjinfo,
                                                   List *restrictlist)
 {
+       JoinType        jointype = sjinfo->jointype;
        Selectivity jselec;
        Selectivity pselec;
        double          nrows;
@@ -2347,11 +2349,13 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                jselec = clauselist_selectivity(root,
                                                                                joinquals,
                                                                                0,
-                                                                               jointype);
+                                                                               jointype,
+                                                                               sjinfo);
                pselec = clauselist_selectivity(root,
                                                                                pushedquals,
                                                                                0,
-                                                                               jointype);
+                                                                               jointype,
+                                                                               sjinfo);
 
                /* Avoid leaking a lot of ListCells */
                list_free(joinquals);
@@ -2362,7 +2366,8 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                jselec = clauselist_selectivity(root,
                                                                                restrictlist,
                                                                                0,
-                                                                               jointype);
+                                                                               jointype,
+                                                                               sjinfo);
                pselec = 0.0;                   /* not used, keep compiler quiet */
        }
 
@@ -2390,12 +2395,6 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                nrows = outer_rel->rows;
                        nrows *= pselec;
                        break;
-               case JOIN_RIGHT:
-                       nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows < inner_rel->rows)
-                               nrows = inner_rel->rows;
-                       nrows *= pselec;
-                       break;
                case JOIN_FULL:
                        nrows = outer_rel->rows * inner_rel->rows * jselec;
                        if (nrows < outer_rel->rows)
@@ -2404,23 +2403,27 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                nrows = inner_rel->rows;
                        nrows *= pselec;
                        break;
-               case JOIN_IN:
-               case JOIN_UNIQUE_INNER:
+               case JOIN_SEMI:
+                       /* XXX this is unsafe, could Assert? */
                        upath = create_unique_path(root, inner_rel,
-                                                                          inner_rel->cheapest_total_path);
-                       nrows = outer_rel->rows * upath->rows * jselec;
+                                                                          inner_rel->cheapest_total_path,
+                                                                          sjinfo);
+                       if (upath)
+                               nrows = outer_rel->rows * upath->rows * jselec;
+                       else
+                               nrows = outer_rel->rows * inner_rel->rows * jselec;
                        if (nrows > outer_rel->rows)
                                nrows = outer_rel->rows;
                        break;
-               case JOIN_REVERSE_IN:
-               case JOIN_UNIQUE_OUTER:
-                       upath = create_unique_path(root, outer_rel,
-                                                                          outer_rel->cheapest_total_path);
-                       nrows = upath->rows * inner_rel->rows * jselec;
-                       if (nrows > inner_rel->rows)
-                               nrows = inner_rel->rows;
+               case JOIN_ANTI:
+                       /* XXX this is utterly wrong */
+                       nrows = outer_rel->rows * inner_rel->rows * jselec;
+                       if (nrows < outer_rel->rows)
+                               nrows = outer_rel->rows;
+                       nrows *= pselec;
                        break;
                default:
+                       /* other values not expected here */
                        elog(ERROR, "unrecognized join type: %d", (int) jointype);
                        nrows = 0;                      /* keep compiler quiet */
                        break;
@@ -2435,9 +2438,10 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
  *       to be smaller than an ordinary inner join.
  *
  * 'path' is already filled in except for the cost fields
+ * 'sjinfo' must be the JOIN_SEMI SpecialJoinInfo for the join
  */
 static Selectivity
-join_in_selectivity(JoinPath *path, PlannerInfo *root)
+join_in_selectivity(JoinPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
        RelOptInfo *innerrel;
        UniquePath *innerunique;
@@ -2445,8 +2449,9 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
        double          nrows;
 
        /* Return 1.0 whenever it's not JOIN_IN */
-       if (path->jointype != JOIN_IN)
+       if (path->jointype != JOIN_SEMI)
                return 1.0;
+       Assert(sjinfo && sjinfo->jointype == JOIN_SEMI);
 
        /*
         * Return 1.0 if the inner side is already known unique.  The case where
@@ -2458,10 +2463,12 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
        if (IsA(path->innerjoinpath, UniquePath))
                return 1.0;
        innerrel = path->innerjoinpath->parent;
+       /* XXX might assert if sjinfo doesn't exactly match innerrel? */
        innerunique = create_unique_path(root,
                                                                         innerrel,
-                                                                        innerrel->cheapest_total_path);
-       if (innerunique->rows >= innerrel->rows)
+                                                                        innerrel->cheapest_total_path,
+                                                                        sjinfo);
+       if (innerunique && innerunique->rows >= innerrel->rows)
                return 1.0;
 
        /*
@@ -2473,7 +2480,8 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
        selec = clauselist_selectivity(root,
                                                                   path->joinrestrictinfo,
                                                                   0,
-                                                                  JOIN_INNER);
+                                                                  JOIN_INNER,
+                                                                  NULL);
        nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec;
 
        nrows = clamp_row_est(nrows);
index e0bd5480083b8d2aa17bfd1598ae4265a3fd229c..92a3fb59d1b33b252186d74ae16f560f2b213d5c 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.231 2008/05/27 00:13:09 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.232 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1631,16 +1631,16 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
        *cheapest_startup = *cheapest_total = NULL;
 
        /*
-        * Nestloop only supports inner, left, and IN joins.
+        * Nestloop only supports inner, left, semi, and anti joins.
         */
        switch (jointype)
        {
                case JOIN_INNER:
-               case JOIN_IN:
-               case JOIN_UNIQUE_OUTER:
                        isouterjoin = false;
                        break;
                case JOIN_LEFT:
+               case JOIN_SEMI:
+               case JOIN_ANTI:
                        isouterjoin = true;
                        break;
                default:
index 3e646d103251dbbb86ec8c12cbf874e628b0ad40..845f429a4b4865fc55a7f7e24c0b03c10c9dd4b3 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.116 2008/03/24 21:53:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.117 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                         RelOptInfo *outerrel, RelOptInfo *innerrel,
                                         List *restrictlist, List *mergeclause_list,
-                                        JoinType jointype);
+                                        JoinType jointype, SpecialJoinInfo *sjinfo);
 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                         RelOptInfo *outerrel, RelOptInfo *innerrel,
                                         List *restrictlist, List *mergeclause_list,
-                                        JoinType jointype);
+                                        JoinType jointype, SpecialJoinInfo *sjinfo);
 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                         RelOptInfo *outerrel, RelOptInfo *innerrel,
-                                        List *restrictlist, JoinType jointype);
+                                        List *restrictlist,
+                                        JoinType jointype, SpecialJoinInfo *sjinfo);
 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
                                                 RelOptInfo *outer_rel, JoinType jointype);
 static List *select_mergejoin_clauses(PlannerInfo *root,
@@ -52,6 +53,18 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
  *
  * Modifies the pathlist field of the joinrel node to contain the best
  * paths found so far.
+ *
+ * jointype is not necessarily the same as sjinfo->jointype; it might be
+ * "flipped around" if we are considering joining the rels in the opposite
+ * direction from what's indicated in sjinfo.
+ *
+ * Also, this routine and others in this module accept the special JoinTypes
+ * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
+ * unique-ify the outer or inner relation and then apply a regular inner
+ * join.  These values are not allowed to propagate outside this module,
+ * however.  Path cost estimation code may need to recognize that it's
+ * dealing with such a case --- the combination of nominal jointype INNER
+ * with sjinfo->jointype == JOIN_SEMI indicates that.
  */
 void
 add_paths_to_joinrel(PlannerInfo *root,
@@ -59,6 +72,7 @@ add_paths_to_joinrel(PlannerInfo *root,
                                         RelOptInfo *outerrel,
                                         RelOptInfo *innerrel,
                                         JoinType jointype,
+                                        SpecialJoinInfo *sjinfo,
                                         List *restrictlist)
 {
        List       *mergeclause_list = NIL;
@@ -82,7 +96,7 @@ add_paths_to_joinrel(PlannerInfo *root,
         * sorted.
         */
        sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-                                                restrictlist, mergeclause_list, jointype);
+                                                restrictlist, mergeclause_list, jointype, sjinfo);
 
        /*
         * 2. Consider paths where the outer relation need not be explicitly
@@ -90,7 +104,7 @@ add_paths_to_joinrel(PlannerInfo *root,
         * path is already ordered.
         */
        match_unsorted_outer(root, joinrel, outerrel, innerrel,
-                                                restrictlist, mergeclause_list, jointype);
+                                                restrictlist, mergeclause_list, jointype, sjinfo);
 
 #ifdef NOT_USED
 
@@ -106,7 +120,7 @@ add_paths_to_joinrel(PlannerInfo *root,
         * invoked with the two rels given in the other order.
         */
        match_unsorted_inner(root, joinrel, outerrel, innerrel,
-                                                restrictlist, mergeclause_list, jointype);
+                                                restrictlist, mergeclause_list, jointype, sjinfo);
 #endif
 
        /*
@@ -115,7 +129,7 @@ add_paths_to_joinrel(PlannerInfo *root,
         */
        if (enable_hashjoin)
                hash_inner_and_outer(root, joinrel, outerrel, innerrel,
-                                                        restrictlist, jointype);
+                                                        restrictlist, jointype, sjinfo);
 }
 
 /*
@@ -131,6 +145,7 @@ add_paths_to_joinrel(PlannerInfo *root,
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *             mergejoin clauses in this join
  * 'jointype' is the type of join to do
+ * 'sjinfo' is extra info about the join for selectivity estimation
  */
 static void
 sort_inner_and_outer(PlannerInfo *root,
@@ -139,7 +154,8 @@ sort_inner_and_outer(PlannerInfo *root,
                                         RelOptInfo *innerrel,
                                         List *restrictlist,
                                         List *mergeclause_list,
-                                        JoinType jointype)
+                                        JoinType jointype,
+                                        SpecialJoinInfo *sjinfo)
 {
        bool            useallclauses;
        Path       *outer_path;
@@ -155,7 +171,8 @@ sort_inner_and_outer(PlannerInfo *root,
        {
                case JOIN_INNER:
                case JOIN_LEFT:
-               case JOIN_IN:
+               case JOIN_SEMI:
+               case JOIN_ANTI:
                case JOIN_UNIQUE_OUTER:
                case JOIN_UNIQUE_INNER:
                        useallclauses = false;
@@ -184,12 +201,16 @@ sort_inner_and_outer(PlannerInfo *root,
        inner_path = innerrel->cheapest_total_path;
        if (jointype == JOIN_UNIQUE_OUTER)
        {
-               outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
+               outer_path = (Path *) create_unique_path(root, outerrel,
+                                                                                                outer_path, sjinfo);
+               Assert(outer_path);
                jointype = JOIN_INNER;
        }
        else if (jointype == JOIN_UNIQUE_INNER)
        {
-               inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
+               inner_path = (Path *) create_unique_path(root, innerrel,
+                                                                                                inner_path, sjinfo);
+               Assert(inner_path);
                jointype = JOIN_INNER;
        }
 
@@ -270,6 +291,7 @@ sort_inner_and_outer(PlannerInfo *root,
                                 create_mergejoin_path(root,
                                                                           joinrel,
                                                                           jointype,
+                                                                          sjinfo,
                                                                           outer_path,
                                                                           inner_path,
                                                                           restrictlist,
@@ -312,6 +334,7 @@ sort_inner_and_outer(PlannerInfo *root,
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *             mergejoin clauses in this join
  * 'jointype' is the type of join to do
+ * 'sjinfo' is extra info about the join for selectivity estimation
  */
 static void
 match_unsorted_outer(PlannerInfo *root,
@@ -320,7 +343,8 @@ match_unsorted_outer(PlannerInfo *root,
                                         RelOptInfo *innerrel,
                                         List *restrictlist,
                                         List *mergeclause_list,
-                                        JoinType jointype)
+                                        JoinType jointype,
+                                        SpecialJoinInfo *sjinfo)
 {
        JoinType        save_jointype = jointype;
        bool            nestjoinOK;
@@ -333,19 +357,18 @@ match_unsorted_outer(PlannerInfo *root,
        ListCell   *l;
 
        /*
-        * Nestloop only supports inner, left, and IN joins.  Also, if we are
-        * doing a right or full join, we must use *all* the mergeclauses as join
-        * clauses, else we will not have a valid plan.  (Although these two flags
-        * are currently inverses, keep them separate for clarity and possible
-        * future changes.)
+        * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
+        * are doing a right or full join, we must use *all* the mergeclauses as
+        * join clauses, else we will not have a valid plan.  (Although these two
+        * flags are currently inverses, keep them separate for clarity and
+        * possible future changes.)
         */
        switch (jointype)
        {
                case JOIN_INNER:
                case JOIN_LEFT:
-               case JOIN_IN:
-               case JOIN_UNIQUE_OUTER:
-               case JOIN_UNIQUE_INNER:
+               case JOIN_SEMI:
+               case JOIN_ANTI:
                        nestjoinOK = true;
                        useallclauses = false;
                        break;
@@ -354,6 +377,12 @@ match_unsorted_outer(PlannerInfo *root,
                        nestjoinOK = false;
                        useallclauses = true;
                        break;
+               case JOIN_UNIQUE_OUTER:
+               case JOIN_UNIQUE_INNER:
+                       jointype = JOIN_INNER;
+                       nestjoinOK = true;
+                       useallclauses = false;
+                       break;
                default:
                        elog(ERROR, "unrecognized join type: %d",
                                 (int) jointype);
@@ -366,12 +395,12 @@ match_unsorted_outer(PlannerInfo *root,
         * If we need to unique-ify the inner path, we will consider only the
         * cheapest inner.
         */
-       if (jointype == JOIN_UNIQUE_INNER)
+       if (save_jointype == JOIN_UNIQUE_INNER)
        {
                inner_cheapest_total = (Path *)
-                       create_unique_path(root, innerrel, inner_cheapest_total);
+                       create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
+               Assert(inner_cheapest_total);
                inner_cheapest_startup = inner_cheapest_total;
-               jointype = JOIN_INNER;
        }
        else if (nestjoinOK)
        {
@@ -424,8 +453,9 @@ match_unsorted_outer(PlannerInfo *root,
                {
                        if (outerpath != outerrel->cheapest_total_path)
                                continue;
-                       outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
-                       jointype = JOIN_INNER;
+                       outerpath = (Path *) create_unique_path(root, outerrel,
+                                                                                                       outerpath, sjinfo);
+                       Assert(outerpath);
                }
 
                /*
@@ -449,6 +479,7 @@ match_unsorted_outer(PlannerInfo *root,
                                         create_nestloop_path(root,
                                                                                  joinrel,
                                                                                  jointype,
+                                                                                 sjinfo,
                                                                                  outerpath,
                                                                                  inner_cheapest_total,
                                                                                  restrictlist,
@@ -458,6 +489,7 @@ match_unsorted_outer(PlannerInfo *root,
                                                 create_nestloop_path(root,
                                                                                          joinrel,
                                                                                          jointype,
+                                                                                         sjinfo,
                                                                                          outerpath,
                                                                                          matpath,
                                                                                          restrictlist,
@@ -467,6 +499,7 @@ match_unsorted_outer(PlannerInfo *root,
                                                 create_nestloop_path(root,
                                                                                          joinrel,
                                                                                          jointype,
+                                                                                         sjinfo,
                                                                                          outerpath,
                                                                                          inner_cheapest_startup,
                                                                                          restrictlist,
@@ -476,6 +509,7 @@ match_unsorted_outer(PlannerInfo *root,
                                                 create_nestloop_path(root,
                                                                                          joinrel,
                                                                                          jointype,
+                                                                                         sjinfo,
                                                                                          outerpath,
                                                                                          index_cheapest_total,
                                                                                          restrictlist,
@@ -486,6 +520,7 @@ match_unsorted_outer(PlannerInfo *root,
                                                 create_nestloop_path(root,
                                                                                          joinrel,
                                                                                          jointype,
+                                                                                         sjinfo,
                                                                                          outerpath,
                                                                                          index_cheapest_startup,
                                                                                          restrictlist,
@@ -536,6 +571,7 @@ match_unsorted_outer(PlannerInfo *root,
                                 create_mergejoin_path(root,
                                                                           joinrel,
                                                                           jointype,
+                                                                          sjinfo,
                                                                           outerpath,
                                                                           inner_cheapest_total,
                                                                           restrictlist,
@@ -604,6 +640,7 @@ match_unsorted_outer(PlannerInfo *root,
                                                 create_mergejoin_path(root,
                                                                                           joinrel,
                                                                                           jointype,
+                                                                                          sjinfo,
                                                                                           outerpath,
                                                                                           innerpath,
                                                                                           restrictlist,
@@ -649,6 +686,7 @@ match_unsorted_outer(PlannerInfo *root,
                                                         create_mergejoin_path(root,
                                                                                                   joinrel,
                                                                                                   jointype,
+                                                                                                  sjinfo,
                                                                                                   outerpath,
                                                                                                   innerpath,
                                                                                                   restrictlist,
@@ -680,6 +718,7 @@ match_unsorted_outer(PlannerInfo *root,
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *             clauses that apply to this join
  * 'jointype' is the type of join to do
+ * 'sjinfo' is extra info about the join for selectivity estimation
  */
 static void
 hash_inner_and_outer(PlannerInfo *root,
@@ -687,24 +726,26 @@ hash_inner_and_outer(PlannerInfo *root,
                                         RelOptInfo *outerrel,
                                         RelOptInfo *innerrel,
                                         List *restrictlist,
-                                        JoinType jointype)
+                                        JoinType jointype,
+                                        SpecialJoinInfo *sjinfo)
 {
        bool            isouterjoin;
        List       *hashclauses;
        ListCell   *l;
 
        /*
-        * Hashjoin only supports inner, left, and IN joins.
+        * Hashjoin only supports inner, left, semi, and anti joins.
         */
        switch (jointype)
        {
                case JOIN_INNER:
-               case JOIN_IN:
                case JOIN_UNIQUE_OUTER:
                case JOIN_UNIQUE_INNER:
                        isouterjoin = false;
                        break;
                case JOIN_LEFT:
+               case JOIN_SEMI:
+               case JOIN_ANTI:
                        isouterjoin = true;
                        break;
                default:
@@ -769,14 +810,18 @@ hash_inner_and_outer(PlannerInfo *root,
                if (jointype == JOIN_UNIQUE_OUTER)
                {
                        cheapest_total_outer = (Path *)
-                               create_unique_path(root, outerrel, cheapest_total_outer);
+                               create_unique_path(root, outerrel,
+                                                                  cheapest_total_outer, sjinfo);
+                       Assert(cheapest_total_outer);
                        cheapest_startup_outer = cheapest_total_outer;
                        jointype = JOIN_INNER;
                }
                else if (jointype == JOIN_UNIQUE_INNER)
                {
                        cheapest_total_inner = (Path *)
-                               create_unique_path(root, innerrel, cheapest_total_inner);
+                               create_unique_path(root, innerrel,
+                                                                  cheapest_total_inner, sjinfo);
+                       Assert(cheapest_total_inner);
                        jointype = JOIN_INNER;
                }
 
@@ -784,6 +829,7 @@ hash_inner_and_outer(PlannerInfo *root,
                                 create_hashjoin_path(root,
                                                                          joinrel,
                                                                          jointype,
+                                                                         sjinfo,
                                                                          cheapest_total_outer,
                                                                          cheapest_total_inner,
                                                                          restrictlist,
@@ -793,6 +839,7 @@ hash_inner_and_outer(PlannerInfo *root,
                                         create_hashjoin_path(root,
                                                                                  joinrel,
                                                                                  jointype,
+                                                                                 sjinfo,
                                                                                  cheapest_startup_outer,
                                                                                  cheapest_total_inner,
                                                                                  restrictlist,
index f63c3ab43ffc81b9ae4a197205a608d0485ba488..b517f09e003de8e20e8c66699d1ca28c284ce469 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.92 2008/03/24 21:53:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.93 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -218,7 +218,7 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
                }
 
                /*----------
-                * When OJs or IN clauses are involved, there may be no legal way
+                * When special joins are involved, there may be no legal way
                 * to make an N-way join for some values of N.  For example consider
                 *
                 * SELECT ... FROM t1 WHERE
@@ -230,12 +230,11 @@ join_search_one_level(PlannerInfo *root, int level, List **joinrels)
                 * to accept failure at level 4 and go on to discover a workable
                 * bushy plan at level 5.
                 *
-                * However, if there are no such clauses then join_is_legal() should
+                * However, if there are no special joins then join_is_legal() should
                 * never fail, and so the following sanity check is useful.
                 *----------
                 */
-               if (result_rels == NIL &&
-                       root->oj_info_list == NIL && root->in_info_list == NIL)
+               if (result_rels == NIL && root->join_info_list == NIL)
                        elog(ERROR, "failed to build any %d-way joins", level);
        }
 
@@ -337,89 +336,98 @@ make_rels_by_clauseless_joins(PlannerInfo *root,
  * (We could simplify the API by computing joinrelids locally, but this
  * would be redundant work in the normal path through make_join_rel.)
  *
- * On success, *jointype_p is set to the required join type.
+ * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
+ * else it's set to point to the associated SpecialJoinInfo node.  Also,
+ * *reversed_p is set TRUE if the given relations need to be swapped to
+ * match the SpecialJoinInfo node.
  */
 static bool
 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
-                         Relids joinrelids, JoinType *jointype_p)
+                         Relids joinrelids,
+                         SpecialJoinInfo **sjinfo_p, bool *reversed_p)
 {
-       JoinType        jointype;
+       SpecialJoinInfo *match_sjinfo;
+       bool            reversed;
        bool            is_valid_inner;
        ListCell   *l;
 
        /*
-        * Ensure *jointype_p is set on failure return.  This is just to suppress
-        * uninitialized-variable warnings from overly anal compilers.
+        * Ensure output params are set on failure return.  This is just to
+        * suppress uninitialized-variable warnings from overly anal compilers.
         */
-       *jointype_p = JOIN_INNER;
+       *sjinfo_p = NULL;
+       *reversed_p = false;
 
        /*
-        * If we have any outer joins, the proposed join might be illegal; and in
-        * any case we have to determine its join type.  Scan the OJ list for
-        * conflicts.
+        * If we have any special joins, the proposed join might be illegal; and
+        * in any case we have to determine its join type.  Scan the join info
+        * list for conflicts.
         */
-       jointype = JOIN_INNER;          /* default if no match to an OJ */
+       match_sjinfo = NULL;
+       reversed = false;
        is_valid_inner = true;
 
-       foreach(l, root->oj_info_list)
+       foreach(l, root->join_info_list)
        {
-               OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
 
                /*
-                * This OJ is not relevant unless its RHS overlaps the proposed join.
-                * (Check this first as a fast path for dismissing most irrelevant OJs
-                * quickly.)
+                * This special join is not relevant unless its RHS overlaps the
+                * proposed join.  (Check this first as a fast path for dismissing
+                * most irrelevant SJs quickly.)
                 */
-               if (!bms_overlap(ojinfo->min_righthand, joinrelids))
+               if (!bms_overlap(sjinfo->min_righthand, joinrelids))
                        continue;
 
                /*
                 * Also, not relevant if proposed join is fully contained within RHS
                 * (ie, we're still building up the RHS).
                 */
-               if (bms_is_subset(joinrelids, ojinfo->min_righthand))
+               if (bms_is_subset(joinrelids, sjinfo->min_righthand))
                        continue;
 
                /*
-                * Also, not relevant if OJ is already done within either input.
+                * Also, not relevant if SJ is already done within either input.
                 */
-               if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
-                       bms_is_subset(ojinfo->min_righthand, rel1->relids))
+               if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+                       bms_is_subset(sjinfo->min_righthand, rel1->relids))
                        continue;
-               if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
-                       bms_is_subset(ojinfo->min_righthand, rel2->relids))
+               if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+                       bms_is_subset(sjinfo->min_righthand, rel2->relids))
                        continue;
 
                /*
                 * If one input contains min_lefthand and the other contains
-                * min_righthand, then we can perform the OJ at this join.
+                * min_righthand, then we can perform the SJ at this join.
                 *
-                * Barf if we get matches to more than one OJ (is that possible?)
+                * Barf if we get matches to more than one SJ (is that possible?)
                 */
-               if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
-                       bms_is_subset(ojinfo->min_righthand, rel2->relids))
+               if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+                       bms_is_subset(sjinfo->min_righthand, rel2->relids))
                {
-                       if (jointype != JOIN_INNER)
+                       if (match_sjinfo)
                                return false;   /* invalid join path */
-                       jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_LEFT;
+                       match_sjinfo = sjinfo;
+                       reversed = false;
                }
-               else if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
-                                bms_is_subset(ojinfo->min_righthand, rel1->relids))
+               else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+                                bms_is_subset(sjinfo->min_righthand, rel1->relids))
                {
-                       if (jointype != JOIN_INNER)
+                       if (match_sjinfo)
                                return false;   /* invalid join path */
-                       jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_RIGHT;
+                       match_sjinfo = sjinfo;
+                       reversed = true;
                }
                else
                {
                        /*----------
                         * Otherwise, the proposed join overlaps the RHS but isn't
-                        * a valid implementation of this OJ.  It might still be
+                        * a valid implementation of this SJ.  It might still be
                         * a legal join, however.  If both inputs overlap the RHS,
                         * assume that it's OK.  Since the inputs presumably got past
                         * this function's checks previously, they can't overlap the
                         * LHS and their violations of the RHS boundary must represent
-                        * OJs that have been determined to commute with this one.
+                        * SJs that have been determined to commute with this one.
                         * We have to allow this to work correctly in cases like
                         *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
                         * when the c/d join has been determined to commute with the join
@@ -428,105 +436,33 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                         * as a violation of the upper join's RHS.
                         * Furthermore, if one input overlaps the RHS and the other does
                         * not, we should still allow the join if it is a valid
-                        * implementation of some other OJ.  We have to allow this to
+                        * implementation of some other SJ.  We have to allow this to
                         * support the associative identity
                         *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
-                        * since joining B directly to C violates the lower OJ's RHS.
+                        * since joining B directly to C violates the lower SJ's RHS.
                         * We assume that make_outerjoininfo() set things up correctly
-                        * so that we'll only match to some OJ if the join is valid.
+                        * so that we'll only match to some SJ if the join is valid.
                         * Set flag here to check at bottom of loop.
                         *----------
                         */
-                       if (bms_overlap(rel1->relids, ojinfo->min_righthand) &&
-                               bms_overlap(rel2->relids, ojinfo->min_righthand))
+                       if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+                               bms_overlap(rel2->relids, sjinfo->min_righthand))
                        {
                                /* seems OK */
-                               Assert(!bms_overlap(joinrelids, ojinfo->min_lefthand));
+                               Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
                        }
                        else
                                is_valid_inner = false;
                }
        }
 
-       /* Fail if violated some OJ's RHS and didn't match to another OJ */
-       if (jointype == JOIN_INNER && !is_valid_inner)
+       /* Fail if violated some SJ's RHS and didn't match to another SJ */
+       if (match_sjinfo == NULL && !is_valid_inner)
                return false;                   /* invalid join path */
 
-       /*
-        * Similarly, if we are implementing IN clauses as joins, check for
-        * illegal join path and detect whether we need a non-default join type.
-        */
-       foreach(l, root->in_info_list)
-       {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-               /*
-                * This IN clause is not relevant unless its RHS overlaps the proposed
-                * join.  (Check this first as a fast path for dismissing most
-                * irrelevant INs quickly.)
-                */
-               if (!bms_overlap(ininfo->righthand, joinrelids))
-                       continue;
-
-               /*
-                * If we are still building the IN clause's RHS, then this IN clause
-                * isn't relevant yet.
-                */
-               if (bms_is_subset(joinrelids, ininfo->righthand))
-                       continue;
-
-               /*
-                * Cannot join if proposed join contains rels not in the RHS *and*
-                * contains only part of the RHS.  We must build the complete RHS
-                * (subselect's join) before it can be joined to rels outside the
-                * subselect.
-                */
-               if (!bms_is_subset(ininfo->righthand, joinrelids))
-                       return false;
-
-               /*
-                * At this point we are considering a join of the IN's RHS to some
-                * other rel(s).
-                *
-                * If we already joined IN's RHS to any other rels in either input
-                * path, then this join is not constrained (the necessary work was
-                * done at the lower level where that join occurred).
-                */
-               if (bms_is_subset(ininfo->righthand, rel1->relids) &&
-                       !bms_equal(ininfo->righthand, rel1->relids))
-                       continue;
-               if (bms_is_subset(ininfo->righthand, rel2->relids) &&
-                       !bms_equal(ininfo->righthand, rel2->relids))
-                       continue;
-
-               /*
-                * JOIN_IN technique will work if outerrel includes LHS and innerrel
-                * is exactly RHS; conversely JOIN_REVERSE_IN handles RHS/LHS.
-                *
-                * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; conversely
-                * JOIN_UNIQUE_INNER will work if innerrel is exactly RHS.
-                *
-                * But none of these will work if we already found an OJ or another IN
-                * that needs to trigger here.
-                */
-               if (jointype != JOIN_INNER)
-                       return false;
-               if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
-                       bms_equal(ininfo->righthand, rel2->relids))
-                       jointype = JOIN_IN;
-               else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
-                                bms_equal(ininfo->righthand, rel1->relids))
-                       jointype = JOIN_REVERSE_IN;
-               else if (bms_equal(ininfo->righthand, rel1->relids))
-                       jointype = JOIN_UNIQUE_OUTER;
-               else if (bms_equal(ininfo->righthand, rel2->relids))
-                       jointype = JOIN_UNIQUE_INNER;
-               else
-                       return false;           /* invalid join path */
-       }
-
-       /* Join is valid */
-       *jointype_p = jointype;
+       /* Otherwise, it's a valid join */
+       *sjinfo_p = match_sjinfo;
+       *reversed_p = reversed;
        return true;
 }
 
@@ -540,14 +476,16 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
  *        pairs of rels that add up to the same set of base rels.)
  *
  * NB: will return NULL if attempted join is not valid.  This can happen
- * when working with outer joins, or with IN clauses that have been turned
- * into joins.
+ * when working with outer joins, or with IN or EXISTS clauses that have been
+ * turned into joins.
  */
 RelOptInfo *
 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 {
        Relids          joinrelids;
-       JoinType        jointype;
+       SpecialJoinInfo *sjinfo;
+       bool            reversed;
+       SpecialJoinInfo sjinfo_data;
        RelOptInfo *joinrel;
        List       *restrictlist;
 
@@ -558,18 +496,48 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
        joinrelids = bms_union(rel1->relids, rel2->relids);
 
        /* Check validity and determine join type. */
-       if (!join_is_legal(root, rel1, rel2, joinrelids, &jointype))
+       if (!join_is_legal(root, rel1, rel2, joinrelids,
+                                          &sjinfo, &reversed))
        {
                /* invalid join path */
                bms_free(joinrelids);
                return NULL;
        }
 
+       /* Swap rels if needed to match the join info. */
+       if (reversed)
+       {
+               RelOptInfo *trel = rel1;
+
+               rel1 = rel2;
+               rel2 = trel;
+       }
+
+       /*
+        * If it's a plain inner join, then we won't have found anything in
+        * join_info_list.  Make up a SpecialJoinInfo so that selectivity
+        * estimation functions will know what's being joined.
+        */
+       if (sjinfo == NULL)
+       {
+               sjinfo = &sjinfo_data;
+               sjinfo->type = T_SpecialJoinInfo;
+               sjinfo->min_lefthand = rel1->relids;
+               sjinfo->min_righthand = rel2->relids;
+               sjinfo->syn_lefthand = rel1->relids;
+               sjinfo->syn_righthand = rel2->relids;
+               sjinfo->jointype = JOIN_INNER;
+               /* we don't bother trying to make the remaining fields valid */
+               sjinfo->lhs_strict = false;
+               sjinfo->delay_upper_joins = false;
+               sjinfo->join_quals = NIL;
+       }
+
        /*
         * Find or build the join RelOptInfo, and compute the restrictlist that
         * goes with this particular joining.
         */
-       joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
+       joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
                                                         &restrictlist);
 
        /*
@@ -589,8 +557,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
         * previously computed paths and mark the join as dummy.  (We do it
         * this way since it's conceivable that dummy-ness of a multi-element
         * join might only be noticeable for certain construction paths.)
+        *
+        * We need only consider the jointypes that appear in join_info_list,
+        * plus JOIN_INNER.
         */
-       switch (jointype)
+       switch (sjinfo->jointype)
        {
                case JOIN_INNER:
                        if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
@@ -598,9 +569,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                mark_dummy_join(joinrel);
                                break;
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER,
+                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                JOIN_INNER, sjinfo,
                                                                 restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER,
+                       add_paths_to_joinrel(root, joinrel, rel2, rel1,
+                                                                JOIN_INNER, sjinfo,
                                                                 restrictlist);
                        break;
                case JOIN_LEFT:
@@ -609,9 +582,11 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                mark_dummy_join(joinrel);
                                break;
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT,
+                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                JOIN_LEFT, sjinfo,
                                                                 restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT,
+                       add_paths_to_joinrel(root, joinrel, rel2, rel1,
+                                                                JOIN_RIGHT, sjinfo,
                                                                 restrictlist);
                        break;
                case JOIN_FULL:
@@ -620,75 +595,53 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                                mark_dummy_join(joinrel);
                                break;
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL,
-                                                                restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL,
-                                                                restrictlist);
-                       break;
-               case JOIN_RIGHT:
-                       if (is_dummy_rel(rel2))
-                       {
-                               mark_dummy_join(joinrel);
-                               break;
-                       }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT,
-                                                                restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
-                                                                restrictlist);
-                       break;
-               case JOIN_IN:
-                       if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
-                       {
-                               mark_dummy_join(joinrel);
-                               break;
-                       }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
+                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                JOIN_FULL, sjinfo,
                                                                 restrictlist);
-                       /* REVERSE_IN isn't supported by joinpath.c */
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
-                                                                restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+                       add_paths_to_joinrel(root, joinrel, rel2, rel1,
+                                                                JOIN_FULL, sjinfo,
                                                                 restrictlist);
                        break;
-               case JOIN_REVERSE_IN:
+               case JOIN_SEMI:
                        if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
                        {
                                mark_dummy_join(joinrel);
                                break;
                        }
-                       /* REVERSE_IN isn't supported by joinpath.c */
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
-                                                                restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
+                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                JOIN_SEMI, sjinfo,
                                                                 restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
-                                                                restrictlist);
-                       break;
-               case JOIN_UNIQUE_OUTER:
-                       if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
+
+                       /*
+                        * If we know how to unique-ify the RHS and one input rel is
+                        * exactly the RHS (not a superset) we can consider unique-ifying
+                        * it and then doing a regular join.
+                        */
+                       if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
+                               create_unique_path(root, rel2, rel2->cheapest_total_path,
+                                                                  sjinfo) != NULL)
                        {
-                               mark_dummy_join(joinrel);
-                               break;
+                               add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                        JOIN_UNIQUE_INNER, sjinfo,
+                                                                        restrictlist);
+                               add_paths_to_joinrel(root, joinrel, rel2, rel1,
+                                                                        JOIN_UNIQUE_OUTER, sjinfo,
+                                                                        restrictlist);
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
-                                                                restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
-                                                                restrictlist);
                        break;
-               case JOIN_UNIQUE_INNER:
-                       if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
+               case JOIN_ANTI:
+                       if (is_dummy_rel(rel1))
                        {
                                mark_dummy_join(joinrel);
                                break;
                        }
-                       add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
-                                                                restrictlist);
-                       add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
+                       add_paths_to_joinrel(root, joinrel, rel1, rel2,
+                                                                JOIN_ANTI, sjinfo,
                                                                 restrictlist);
                        break;
                default:
-                       elog(ERROR, "unrecognized join type: %d",
-                                (int) jointype);
+                       /* other values not expected here */
+                       elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
                        break;
        }
 
@@ -701,7 +654,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
 /*
  * have_join_order_restriction
  *             Detect whether the two relations should be joined to satisfy
- *             a join-order restriction arising from outer joins or IN clauses.
+ *             a join-order restriction arising from special joins.
  *
  * In practice this is always used with have_relevant_joinclause(), and so
  * could be merged with that function, but it seems clearer to separate the
@@ -709,8 +662,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
  * a clauseless join must be performed to satisfy join-order restrictions.
  *
  * Note: this is only a problem if one side of a degenerate outer join
- * contains multiple rels, or a clauseless join is required within an IN's
- * RHS; else we will find a join path via the "last ditch" case in
+ * contains multiple rels, or a clauseless join is required within an
+ * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
  * join_search_one_level().  We could dispense with this test if we were
  * willing to try bushy plans in the "last ditch" case, but that seems much
  * less efficient.
@@ -730,23 +683,23 @@ have_join_order_restriction(PlannerInfo *root,
         * Also, the two rels could represent a clauseless join that has to be
         * completed to build up the LHS or RHS of an outer join.
         */
-       foreach(l, root->oj_info_list)
+       foreach(l, root->join_info_list)
        {
-               OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
 
                /* ignore full joins --- other mechanisms handle them */
-               if (ojinfo->is_full_join)
+               if (sjinfo->jointype == JOIN_FULL)
                        continue;
 
-               /* Can we perform the OJ with these rels? */
-               if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
-                       bms_is_subset(ojinfo->min_righthand, rel2->relids))
+               /* Can we perform the SJ with these rels? */
+               if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
+                       bms_is_subset(sjinfo->min_righthand, rel2->relids))
                {
                        result = true;
                        break;
                }
-               if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
-                       bms_is_subset(ojinfo->min_righthand, rel1->relids))
+               if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
+                       bms_is_subset(sjinfo->min_righthand, rel1->relids))
                {
                        result = true;
                        break;
@@ -754,63 +707,19 @@ have_join_order_restriction(PlannerInfo *root,
 
                /*
                 * Might we need to join these rels to complete the RHS?  We have to
-                * use "overlap" tests since either rel might include a lower OJ that
+                * use "overlap" tests since either rel might include a lower SJ that
                 * has been proven to commute with this one.
                 */
-               if (bms_overlap(ojinfo->min_righthand, rel1->relids) &&
-                       bms_overlap(ojinfo->min_righthand, rel2->relids))
+               if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
+                       bms_overlap(sjinfo->min_righthand, rel2->relids))
                {
                        result = true;
                        break;
                }
 
                /* Likewise for the LHS. */
-               if (bms_overlap(ojinfo->min_lefthand, rel1->relids) &&
-                       bms_overlap(ojinfo->min_lefthand, rel2->relids))
-               {
-                       result = true;
-                       break;
-               }
-       }
-
-       /*
-        * Similarly, we need to allow a join that completes a degenerate
-        * IN-clause, or one that builds up its LHS or RHS.
-        */
-       foreach(l, root->in_info_list)
-       {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-               /* Can we perform the IN with these rels? */
-               if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
-                       bms_is_subset(ininfo->righthand, rel2->relids))
-               {
-                       result = true;
-                       break;
-               }
-               if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
-                       bms_is_subset(ininfo->righthand, rel1->relids))
-               {
-                       result = true;
-                       break;
-               }
-
-               /*
-                * Might we need to join these rels to complete the RHS?  It's
-                * probably overkill to test "overlap", since we never join part of an
-                * IN's RHS to anything else, but may as well keep the coding similar
-                * to the OJ case.
-                */
-               if (bms_overlap(ininfo->righthand, rel1->relids) &&
-                       bms_overlap(ininfo->righthand, rel2->relids))
-               {
-                       result = true;
-                       break;
-               }
-
-               /* Likewise for the LHS. */
-               if (bms_overlap(ininfo->lefthand, rel1->relids) &&
-                       bms_overlap(ininfo->lefthand, rel2->relids))
+               if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
+                       bms_overlap(sjinfo->min_lefthand, rel2->relids))
                {
                        result = true;
                        break;
@@ -852,37 +761,22 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 {
        ListCell   *l;
 
-       foreach(l, root->oj_info_list)
+       foreach(l, root->join_info_list)
        {
-               OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
 
                /* ignore full joins --- other mechanisms preserve their ordering */
-               if (ojinfo->is_full_join)
-                       continue;
-
-               /* ignore if OJ is already contained in rel */
-               if (bms_is_subset(ojinfo->min_lefthand, rel->relids) &&
-                       bms_is_subset(ojinfo->min_righthand, rel->relids))
+               if (sjinfo->jointype == JOIN_FULL)
                        continue;
 
-               /* restricted if it overlaps LHS or RHS, but doesn't contain OJ */
-               if (bms_overlap(ojinfo->min_lefthand, rel->relids) ||
-                       bms_overlap(ojinfo->min_righthand, rel->relids))
-                       return true;
-       }
-
-       foreach(l, root->in_info_list)
-       {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-               /* ignore if IN is already contained in rel */
-               if (bms_is_subset(ininfo->lefthand, rel->relids) &&
-                       bms_is_subset(ininfo->righthand, rel->relids))
+               /* ignore if SJ is already contained in rel */
+               if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
+                       bms_is_subset(sjinfo->min_righthand, rel->relids))
                        continue;
 
-               /* restricted if it overlaps LHS or RHS, but doesn't contain IN */
-               if (bms_overlap(ininfo->lefthand, rel->relids) ||
-                       bms_overlap(ininfo->righthand, rel->relids))
+               /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
+               if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
+                       bms_overlap(sjinfo->min_righthand, rel->relids))
                        return true;
        }
 
@@ -922,12 +816,14 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
                if (have_relevant_joinclause(root, rel, rel2))
                {
                        Relids          joinrelids;
-                       JoinType        jointype;
+                       SpecialJoinInfo *sjinfo;
+                       bool            reversed;
 
                        /* join_is_legal needs relids of the union */
                        joinrelids = bms_union(rel->relids, rel2->relids);
 
-                       if (join_is_legal(root, rel, rel2, joinrelids, &jointype))
+                       if (join_is_legal(root, rel, rel2, joinrelids,
+                                                         &sjinfo, &reversed))
                        {
                                /* Yes, this will work */
                                bms_free(joinrelids);
index 1556bf74df659ad560305ab513730302f9dc606d..28d7818aa24bae146d63bd24d5d3dece57d94445 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.84 2008/01/09 20:42:27 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.85 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -169,11 +169,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
         * selectivity will stay cached ...)
         */
        or_selec = clause_selectivity(root, (Node *) or_rinfo,
-                                                                 0, JOIN_INNER);
+                                                                 0, JOIN_INNER, NULL);
        if (or_selec > 0 && or_selec < 1)
        {
                orig_selec = clause_selectivity(root, (Node *) bestrinfo,
-                                                                               0, JOIN_INNER);
+                                                                               0, JOIN_INNER, NULL);
                bestrinfo->this_selec = orig_selec / or_selec;
                /* clamp result to sane range */
                if (bestrinfo->this_selec > 1)
index 0c200e05795f2085384e5d8d9d84c6360255fb7c..ea85fe016e46ba150f09406dffb5f58d9b693ad6 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.244 2008/08/07 19:35:02 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.245 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -595,8 +595,8 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
 {
        Plan       *plan;
        Plan       *subplan;
-       List       *uniq_exprs;
        List       *in_operators;
+       List       *uniq_exprs;
        List       *newtlist;
        int                     nextresno;
        bool            newitems;
@@ -611,7 +611,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
        if (best_path->umethod == UNIQUE_PATH_NOOP)
                return subplan;
 
-       /*----------
+       /*
         * As constructed, the subplan has a "flat" tlist containing just the
         * Vars needed here and at upper levels.  The values we are supposed
         * to unique-ify may be expressions in these variables.  We have to
@@ -626,29 +626,9 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
         * Therefore newtlist starts from build_relation_tlist() not just a
         * copy of the subplan's tlist; and we don't install it into the subplan
         * unless we are sorting or stuff has to be added.
-        *
-        * To find the correct list of values to unique-ify, we look in the
-        * information saved for IN expressions.  If this code is ever used in
-        * other scenarios, some other way of finding what to unique-ify will
-        * be needed.  The IN clause's operators are needed too, since they
-        * determine what the meaning of "unique" is in this context.
-        *----------
         */
-       uniq_exprs = NIL;                       /* just to keep compiler quiet */
-       in_operators = NIL;
-       foreach(l, root->in_info_list)
-       {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-               if (bms_equal(ininfo->righthand, best_path->path.parent->relids))
-               {
-                       uniq_exprs = ininfo->sub_targetlist;
-                       in_operators = ininfo->in_operators;
-                       break;
-               }
-       }
-       if (l == NULL)                          /* fell out of loop? */
-               elog(ERROR, "could not find UniquePath in in_info_list");
+       in_operators = best_path->in_operators;
+       uniq_exprs = best_path->uniq_exprs;
 
        /* initialize modified subplan tlist as just the "required" vars */
        newtlist = build_relation_tlist(best_path->path.parent);
index 6cfd2ab4c3877338cd25241d076454d53d9426c1..3c08d0def91c89888a63b85fdfe60ec3848a2f5b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.140 2008/06/27 20:54:37 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/initsplan.c,v 1.141 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -40,18 +40,22 @@ int                 join_collapse_limit;
 static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
                                        bool below_outer_join,
                                        Relids *qualscope, Relids *inner_join_rels);
-static OuterJoinInfo *make_outerjoininfo(PlannerInfo *root,
+static SpecialJoinInfo *make_outerjoininfo(PlannerInfo *root,
                                   Relids left_rels, Relids right_rels,
                                   Relids inner_join_rels,
-                                  bool is_full_join, Node *clause);
+                                  JoinType jointype, List *clause);
 static void distribute_qual_to_rels(PlannerInfo *root, Node *clause,
                                                bool is_deduced,
                                                bool below_outer_join,
                                                Relids qualscope,
                                                Relids ojscope,
                                                Relids outerjoin_nonnullable);
+static void distribute_sublink_quals_to_rels(PlannerInfo *root,
+                                                                FlattenedSubLink *fslink,
+                                                                bool below_outer_join);
 static bool check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
                                          bool is_pushed_down);
+static bool check_redundant_nullability_qual(PlannerInfo *root, Node *clause);
 static void check_mergejoinable(RestrictInfo *restrictinfo);
 static void check_hashjoinable(RestrictInfo *restrictinfo);
 
@@ -136,40 +140,6 @@ build_base_rel_tlists(PlannerInfo *root, List *final_tlist)
        }
 }
 
-/*
- * add_IN_vars_to_tlists
- *       Add targetlist entries for each var needed in InClauseInfo entries
- *       to the appropriate base relations.
- *
- * Normally this is a waste of time because scanning of the WHERE clause
- * will have added them.  But it is possible that eval_const_expressions()
- * simplified away all references to the vars after the InClauseInfos were
- * made.  We need the IN's righthand-side vars to be available at the join
- * anyway, in case we try to unique-ify the subselect's outputs.  (The only
- * known case that provokes this is "WHERE false AND foo IN (SELECT ...)".
- * We don't try to be very smart about such cases, just correct.)
- */
-void
-add_IN_vars_to_tlists(PlannerInfo *root)
-{
-       ListCell   *l;
-
-       foreach(l, root->in_info_list)
-       {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-               List       *in_vars;
-
-               in_vars = pull_var_clause((Node *) ininfo->sub_targetlist, false);
-               if (in_vars != NIL)
-               {
-                       add_vars_to_targetlist(root, in_vars,
-                                                                  bms_union(ininfo->lefthand,
-                                                                                        ininfo->righthand));
-                       list_free(in_vars);
-               }
-       }
-}
-
 /*
  * add_vars_to_targetlist
  *       For each variable appearing in the list, add it to the owning
@@ -214,15 +184,15 @@ add_vars_to_targetlist(PlannerInfo *root, List *vars, Relids where_needed)
  * deconstruct_jointree
  *       Recursively scan the query's join tree for WHERE and JOIN/ON qual
  *       clauses, and add these to the appropriate restrictinfo and joininfo
- *       lists belonging to base RelOptInfos.  Also, add OuterJoinInfo nodes
- *       to root->oj_info_list for any outer joins appearing in the query tree.
+ *       lists belonging to base RelOptInfos.  Also, add SpecialJoinInfo nodes
+ *       to root->join_info_list for any outer joins appearing in the query tree.
  *       Return a "joinlist" data structure showing the join order decisions
  *       that need to be made by make_one_rel().
  *
  * The "joinlist" result is a list of items that are either RangeTblRef
  * jointree nodes or sub-joinlists.  All the items at the same level of
  * joinlist must be joined in an order to be determined by make_one_rel()
- * (note that legal orders may be constrained by OuterJoinInfo nodes).
+ * (note that legal orders may be constrained by SpecialJoinInfo nodes).
  * A sub-joinlist represents a subproblem to be planned separately. Currently
  * sub-joinlists arise only from FULL OUTER JOIN or when collapsing of
  * subproblems is stopped by join_collapse_limit or from_collapse_limit.
@@ -261,13 +231,13 @@ deconstruct_jointree(PlannerInfo *root)
  * Outputs:
  *     *qualscope gets the set of base Relids syntactically included in this
  *             jointree node (do not modify or free this, as it may also be pointed
- *             to by RestrictInfo and OuterJoinInfo nodes)
+ *             to by RestrictInfo and SpecialJoinInfo nodes)
  *     *inner_join_rels gets the set of base Relids syntactically included in
  *             inner joins appearing at or below this jointree node (do not modify
  *             or free this, either)
  *     Return value is the appropriate joinlist for this jointree node
  *
- * In addition, entries will be added to root->oj_info_list for outer joins.
+ * In addition, entries will be added to root->join_info_list for outer joins.
  */
 static List *
 deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
@@ -341,9 +311,19 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                 * Now process the top-level quals.
                 */
                foreach(l, (List *) f->quals)
-                       distribute_qual_to_rels(root, (Node *) lfirst(l),
-                                                                       false, below_outer_join,
-                                                                       *qualscope, NULL, NULL);
+               {
+                       Node   *qual = (Node *) lfirst(l);
+
+                       /* FlattenedSubLink wrappers need special processing */
+                       if (qual && IsA(qual, FlattenedSubLink))
+                               distribute_sublink_quals_to_rels(root,
+                                                                                                (FlattenedSubLink *) qual,
+                                                                                                below_outer_join);
+                       else
+                               distribute_qual_to_rels(root, qual,
+                                                                               false, below_outer_join,
+                                                                               *qualscope, NULL, NULL);
+               }
        }
        else if (IsA(jtnode, JoinExpr))
        {
@@ -356,8 +336,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                        ojscope;
                List       *leftjoinlist,
                                   *rightjoinlist;
-               OuterJoinInfo *ojinfo;
-               ListCell   *qual;
+               SpecialJoinInfo *sjinfo;
+               ListCell   *l;
 
                /*
                 * Order of operations here is subtle and critical.  First we recurse
@@ -366,7 +346,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                 * Then we place our own join quals, which are restricted by lower
                 * outer joins in any case, and are forced to this level if this is an
                 * outer join and they mention the outer side.  Finally, if this is an
-                * outer join, we create an oj_info_list entry for the join.  This
+                * outer join, we create a join_info_list entry for the join.  This
                 * will prevent quals above us in the join tree that use those rels
                 * from being pushed down below this level.  (It's okay for upper
                 * quals to be pushed down to the outer side, however.)
@@ -386,6 +366,7 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                nonnullable_rels = NULL;
                                break;
                        case JOIN_LEFT:
+                       case JOIN_ANTI:
                                leftjoinlist = deconstruct_recurse(root, j->larg,
                                                                                                   below_outer_join,
                                                                                                   &leftids, &left_inners);
@@ -408,19 +389,8 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                                /* each side is both outer and inner */
                                nonnullable_rels = *qualscope;
                                break;
-                       case JOIN_RIGHT:
-                               /* notice we switch leftids and rightids */
-                               leftjoinlist = deconstruct_recurse(root, j->larg,
-                                                                                                  true,
-                                                                                                  &rightids, &right_inners);
-                               rightjoinlist = deconstruct_recurse(root, j->rarg,
-                                                                                                       below_outer_join,
-                                                                                                       &leftids, &left_inners);
-                               *qualscope = bms_union(leftids, rightids);
-                               *inner_join_rels = bms_union(left_inners, right_inners);
-                               nonnullable_rels = leftids;
-                               break;
                        default:
+                               /* JOIN_RIGHT was eliminated during reduce_outer_joins() */
                                elog(ERROR, "unrecognized join type: %d",
                                         (int) j->jointype);
                                nonnullable_rels = NULL;                /* keep compiler quiet */
@@ -429,35 +399,46 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
                }
 
                /*
-                * For an OJ, form the OuterJoinInfo now, because we need the OJ's
+                * For an OJ, form the SpecialJoinInfo now, because we need the OJ's
                 * semantic scope (ojscope) to pass to distribute_qual_to_rels.  But
-                * we mustn't add it to oj_info_list just yet, because we don't want
+                * we mustn't add it to join_info_list just yet, because we don't want
                 * distribute_qual_to_rels to think it is an outer join below us.
                 */
                if (j->jointype != JOIN_INNER)
                {
-                       ojinfo = make_outerjoininfo(root,
+                       sjinfo = make_outerjoininfo(root,
                                                                                leftids, rightids,
                                                                                *inner_join_rels,
-                                                                               (j->jointype == JOIN_FULL),
-                                                                               j->quals);
-                       ojscope = bms_union(ojinfo->min_lefthand, ojinfo->min_righthand);
+                                                                               j->jointype,
+                                                                               (List *) j->quals);
+                       ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
                }
                else
                {
-                       ojinfo = NULL;
+                       sjinfo = NULL;
                        ojscope = NULL;
                }
 
                /* Process the qual clauses */
-               foreach(qual, (List *) j->quals)
-                       distribute_qual_to_rels(root, (Node *) lfirst(qual),
-                                                                       false, below_outer_join,
-                                                                       *qualscope, ojscope, nonnullable_rels);
+               foreach(l, (List *) j->quals)
+               {
+                       Node   *qual = (Node *) lfirst(l);
+
+                       /* FlattenedSubLink wrappers need special processing */
+                       if (qual && IsA(qual, FlattenedSubLink))
+                               distribute_sublink_quals_to_rels(root,
+                                                                                                (FlattenedSubLink *) qual,
+                                                                                                below_outer_join);
+                       else
+                               distribute_qual_to_rels(root, qual,
+                                                                               false, below_outer_join,
+                                                                               *qualscope,
+                                                                               ojscope, nonnullable_rels);
+               }
 
-               /* Now we can add the OuterJoinInfo to oj_info_list */
-               if (ojinfo)
-                       root->oj_info_list = lappend(root->oj_info_list, ojinfo);
+               /* Now we can add the SpecialJoinInfo to join_info_list */
+               if (sjinfo)
+                       root->join_info_list = lappend(root->join_info_list, sjinfo);
 
                /*
                 * Finally, compute the output joinlist.  We fold subproblems together
@@ -504,39 +485,42 @@ deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
 
 /*
  * make_outerjoininfo
- *       Build an OuterJoinInfo for the current outer join
+ *       Build a SpecialJoinInfo for the current outer join
  *
  * Inputs:
  *     left_rels: the base Relids syntactically on outer side of join
  *     right_rels: the base Relids syntactically on inner side of join
  *     inner_join_rels: base Relids participating in inner joins below this one
- *     is_full_join: what it says
- *     clause: the outer join's join condition
- *
- * If the join is a RIGHT JOIN, left_rels and right_rels are switched by
- * the caller, so that left_rels is always the nonnullable side.  Hence
- * we need only distinguish the LEFT and FULL cases.
+ *     jointype: what it says (must always be LEFT, FULL, SEMI, or ANTI)
+ *     clause: the outer join's join condition (in implicit-AND format)
  *
- * The node should eventually be appended to root->oj_info_list, but we
+ * The node should eventually be appended to root->join_info_list, but we
  * do not do that here.
  *
  * Note: we assume that this function is invoked bottom-up, so that
- * root->oj_info_list already contains entries for all outer joins that are
+ * root->join_info_list already contains entries for all outer joins that are
  * syntactically below this one.
  */
-static OuterJoinInfo *
+static SpecialJoinInfo *
 make_outerjoininfo(PlannerInfo *root,
                                   Relids left_rels, Relids right_rels,
                                   Relids inner_join_rels,
-                                  bool is_full_join, Node *clause)
+                                  JoinType jointype, List *clause)
 {
-       OuterJoinInfo *ojinfo = makeNode(OuterJoinInfo);
+       SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
        Relids          clause_relids;
        Relids          strict_relids;
        Relids          min_lefthand;
        Relids          min_righthand;
        ListCell   *l;
 
+       /*
+        * We should not see RIGHT JOIN here because left/right were switched
+        * earlier
+        */
+       Assert(jointype != JOIN_INNER);
+       Assert(jointype != JOIN_RIGHT);
+
        /*
         * Presently the executor cannot support FOR UPDATE/SHARE marking of rels
         * appearing on the nullable side of an outer join. (It's somewhat unclear
@@ -554,40 +538,41 @@ make_outerjoininfo(PlannerInfo *root,
                RowMarkClause *rc = (RowMarkClause *) lfirst(l);
 
                if (bms_is_member(rc->rti, right_rels) ||
-                       (is_full_join && bms_is_member(rc->rti, left_rels)))
+                       (jointype == JOIN_FULL && bms_is_member(rc->rti, left_rels)))
                        ereport(ERROR,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                         errmsg("SELECT FOR UPDATE/SHARE cannot be applied to the nullable side of an outer join")));
        }
 
+       sjinfo->syn_lefthand = left_rels;
+       sjinfo->syn_righthand = right_rels;
+       sjinfo->jointype = jointype;
        /* this always starts out false */
-       ojinfo->delay_upper_joins = false;
+       sjinfo->delay_upper_joins = false;
+       sjinfo->join_quals = clause;
 
        /* If it's a full join, no need to be very smart */
-       ojinfo->syn_lefthand = left_rels;
-       ojinfo->syn_righthand = right_rels;
-       ojinfo->is_full_join = is_full_join;
-       if (is_full_join)
+       if (jointype == JOIN_FULL)
        {
-               ojinfo->min_lefthand = left_rels;
-               ojinfo->min_righthand = right_rels;
-               ojinfo->lhs_strict = false;             /* don't care about this */
-               return ojinfo;
+               sjinfo->min_lefthand = bms_copy(left_rels);
+               sjinfo->min_righthand = bms_copy(right_rels);
+               sjinfo->lhs_strict = false;             /* don't care about this */
+               return sjinfo;
        }
 
        /*
         * Retrieve all relids mentioned within the join clause.
         */
-       clause_relids = pull_varnos(clause);
+       clause_relids = pull_varnos((Node *) clause);
 
        /*
         * For which relids is the clause strict, ie, it cannot succeed if the
         * rel's columns are all NULL?
         */
-       strict_relids = find_nonnullable_rels(clause);
+       strict_relids = find_nonnullable_rels((Node *) clause);
 
        /* Remember whether the clause is strict for any LHS relations */
-       ojinfo->lhs_strict = bms_overlap(strict_relids, left_rels);
+       sjinfo->lhs_strict = bms_overlap(strict_relids, left_rels);
 
        /*
         * Required LHS always includes the LHS rels mentioned in the clause. We
@@ -602,12 +587,12 @@ make_outerjoininfo(PlannerInfo *root,
        min_righthand = bms_int_members(bms_union(clause_relids, inner_join_rels),
                                                                        right_rels);
 
-       foreach(l, root->oj_info_list)
+       foreach(l, root->join_info_list)
        {
-               OuterJoinInfo *otherinfo = (OuterJoinInfo *) lfirst(l);
+               SpecialJoinInfo *otherinfo = (SpecialJoinInfo *) lfirst(l);
 
                /* ignore full joins --- other mechanisms preserve their ordering */
-               if (otherinfo->is_full_join)
+               if (otherinfo->jointype == JOIN_FULL)
                        continue;
 
                /*
@@ -679,10 +664,10 @@ make_outerjoininfo(PlannerInfo *root,
        /* Shouldn't overlap either */
        Assert(!bms_overlap(min_lefthand, min_righthand));
 
-       ojinfo->min_lefthand = min_lefthand;
-       ojinfo->min_righthand = min_righthand;
+       sjinfo->min_lefthand = min_lefthand;
+       sjinfo->min_righthand = min_righthand;
 
-       return ojinfo;
+       return sjinfo;
 }
 
 
@@ -830,7 +815,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
                /*
                 * If the qual came from implied-equality deduction, it should not be
                 * outerjoin-delayed, else deducer blew it.  But we can't check this
-                * because the ojinfo list may now contain OJs above where the qual
+                * because the join_info_list may now contain OJs above where the qual
                 * belongs.
                 */
                Assert(!ojscope);
@@ -894,6 +879,15 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
                         * we mustn't assume its vars are equal everywhere.
                         */
                        maybe_equivalence = false;
+
+                       /*
+                        * It's possible that this is an IS NULL clause that's redundant
+                        * with a lower antijoin; if so we can just discard it.  We need
+                        * not test in any of the other cases, because this will only
+                        * be possible for pushed-down, delayed clauses.
+                        */
+                       if (check_redundant_nullability_qual(root, clause))
+                               return;
                }
                else
                {
@@ -1021,6 +1015,54 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
        distribute_restrictinfo_to_rels(root, restrictinfo);
 }
 
+/*
+ * distribute_sublink_quals_to_rels
+ *       Pull sublink quals out of a FlattenedSubLink node and distribute
+ *       them appropriately; then add a SpecialJoinInfo node to the query's
+ *       join_info_list.  The FlattenedSubLink node itself is no longer
+ *       needed and does not propagate into further processing.
+ */
+static void
+distribute_sublink_quals_to_rels(PlannerInfo *root,
+                                                                FlattenedSubLink *fslink,
+                                                                bool below_outer_join)
+{
+       List       *quals = make_ands_implicit(fslink->quals);
+       SpecialJoinInfo *sjinfo;
+       Relids          qualscope;
+       Relids          ojscope;
+       ListCell   *l;
+
+       /*
+        * Build a suitable SpecialJoinInfo for the sublink.  Note: using
+        * righthand as inner_join_rels is the conservative worst case;
+        * it might be possible to use a smaller set and thereby allow
+        * the sublink join to commute with others inside its RHS.
+        */
+       sjinfo = make_outerjoininfo(root,
+                                                               fslink->lefthand, fslink->righthand,
+                                                               fslink->righthand,
+                                                               fslink->jointype,
+                                                               quals);
+
+       qualscope = bms_union(sjinfo->syn_lefthand, sjinfo->syn_righthand);
+       ojscope = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+
+       /* Distribute the join quals much as for a regular LEFT JOIN */
+       foreach(l, quals)
+       {
+               Node   *qual = (Node *) lfirst(l);
+
+               distribute_qual_to_rels(root, qual,
+                                                               false, below_outer_join,
+                                                               qualscope, ojscope,
+                                                               fslink->lefthand);
+       }
+
+       /* Now we can add the SpecialJoinInfo to join_info_list */
+       root->join_info_list = lappend(root->join_info_list, sjinfo);
+}
+
 /*
  * check_outerjoin_delay
  *             Detect whether a qual referencing the given relids must be delayed
@@ -1030,7 +1072,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
  * If the qual must be delayed, add relids to *relids_p to reflect the lowest
  * safe level for evaluating the qual, and return TRUE.  Any extra delay for
  * higher-level joins is reflected by setting delay_upper_joins to TRUE in
- * OuterJoinInfo structs.
+ * SpecialJoinInfo structs.
  *
  * For an is_pushed_down qual, we can evaluate the qual as soon as (1) we have
  * all the rels it mentions, and (2) we are at or above any outer joins that
@@ -1042,9 +1084,9 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
  * only nullable rels is strict, we'd have reduced the outer join to an inner
  * join in reduce_outer_joins().)
  *
- * To enforce (2), scan the oj_info_list and merge the required-relid sets of
+ * To enforce (2), scan the join_info_list and merge the required-relid sets of
  * any such OJs into the clause's own reference list.  At the time we are
- * called, the oj_info_list contains only outer joins below this qual. We
+ * called, the join_info_list contains only outer joins below this qual.  We
  * have to repeat the scan until no new relids get added; this ensures that
  * the qual is suitably delayed regardless of the order in which OJs get
  * executed.  As an example, if we have one OJ with LHS=A, RHS=B, and one with
@@ -1057,7 +1099,7 @@ distribute_qual_to_rels(PlannerInfo *root, Node *clause,
  * in reconsider_outer_join_clauses().
  *
  * Lastly, a pushed-down qual that references the nullable side of any current
- * oj_info_list member and has to be evaluated above that OJ (because its
+ * join_info_list member and has to be evaluated above that OJ (because its
  * required relids overlap the LHS too) causes that OJ's delay_upper_joins
  * flag to be set TRUE.  This will prevent any higher-level OJs from
  * being interchanged with that OJ, which would result in not having any
@@ -1083,31 +1125,31 @@ check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
                ListCell   *l;
 
                found_some = false;
-               foreach(l, root->oj_info_list)
+               foreach(l, root->join_info_list)
                {
-                       OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
+                       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
 
                        /* do we reference any nullable rels of this OJ? */
-                       if (bms_overlap(relids, ojinfo->min_righthand) ||
-                               (ojinfo->is_full_join &&
-                                bms_overlap(relids, ojinfo->min_lefthand)))
+                       if (bms_overlap(relids, sjinfo->min_righthand) ||
+                               (sjinfo->jointype == JOIN_FULL &&
+                                bms_overlap(relids, sjinfo->min_lefthand)))
                        {
                                /* yes, so set the result flag */
                                outerjoin_delayed = true;
                                /* have we included all its rels in relids? */
-                               if (!bms_is_subset(ojinfo->min_lefthand, relids) ||
-                                       !bms_is_subset(ojinfo->min_righthand, relids))
+                               if (!bms_is_subset(sjinfo->min_lefthand, relids) ||
+                                       !bms_is_subset(sjinfo->min_righthand, relids))
                                {
                                        /* no, so add them in */
-                                       relids = bms_add_members(relids, ojinfo->min_lefthand);
-                                       relids = bms_add_members(relids, ojinfo->min_righthand);
+                                       relids = bms_add_members(relids, sjinfo->min_lefthand);
+                                       relids = bms_add_members(relids, sjinfo->min_righthand);
                                        /* we'll need another iteration */
                                        found_some = true;
                                }
                                /* set delay_upper_joins if needed */
-                               if (is_pushed_down && !ojinfo->is_full_join &&
-                                       bms_overlap(relids, ojinfo->min_lefthand))
-                                       ojinfo->delay_upper_joins = true;
+                               if (is_pushed_down && sjinfo->jointype != JOIN_FULL &&
+                                       bms_overlap(relids, sjinfo->min_lefthand))
+                                       sjinfo->delay_upper_joins = true;
                        }
                }
        } while (found_some);
@@ -1116,6 +1158,74 @@ check_outerjoin_delay(PlannerInfo *root, Relids *relids_p,
        return outerjoin_delayed;
 }
 
+/*
+ * check_redundant_nullability_qual
+ *       Check to see if the qual is an IS NULL qual that is redundant with
+ *       a lower JOIN_ANTI join.
+ *
+ * We want to suppress redundant IS NULL quals, not so much to save cycles
+ * as to avoid generating bogus selectivity estimates for them.  So if
+ * redundancy is detected here, distribute_qual_to_rels() just throws away
+ * the qual.
+ */
+static bool
+check_redundant_nullability_qual(PlannerInfo *root, Node *clause)
+{
+       Var                *forced_null_var;
+       Index           forced_null_rel;
+       SpecialJoinInfo *match_sjinfo = NULL;
+       ListCell   *lc;
+
+       /* Check for IS NULL, and identify the Var forced to NULL */
+       forced_null_var = find_forced_null_var(clause);
+       if (forced_null_var == NULL)
+               return false;
+       forced_null_rel = forced_null_var->varno;
+
+       /*
+        * Search to see if there's a matching antijoin that is not masked by
+        * a higher outer join.  Because we have to scan the join info bottom-up,
+        * we have to continue looking after finding a match to check for masking
+        * joins.  This logic should agree with reduce_outer_joins's code
+        * to detect antijoins on the basis of IS NULL clauses.  (It's tempting
+        * to consider adding some data structures to avoid redundant work,
+        * but in practice this code shouldn't get executed often enough to
+        * make it worth the trouble.)
+        */
+       foreach(lc, root->join_info_list)
+       {
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+               /* Check for match ... */
+               if (sjinfo->jointype == JOIN_ANTI &&
+                       bms_is_member(forced_null_rel, sjinfo->syn_righthand))
+               {
+                       List   *nonnullable_vars;
+
+                       nonnullable_vars = find_nonnullable_vars((Node *) sjinfo->join_quals);
+                       if (list_member(nonnullable_vars, forced_null_var))
+                       {
+                               match_sjinfo = sjinfo;
+                               continue;
+                       }
+               }
+               /*
+                * Else, if we had a lower match, check to see if the target var is
+                * from the nullable side of this OJ.  If so, this OJ masks the
+                * lower one and we can no longer consider the IS NULL as redundant
+                * with the lower antijoin.
+                */
+               if (!match_sjinfo)
+                       continue;
+               if (bms_is_member(forced_null_rel, sjinfo->syn_righthand) ||
+                       (sjinfo->jointype == JOIN_FULL &&
+                        bms_is_member(forced_null_rel, sjinfo->syn_lefthand)))
+                       match_sjinfo = NULL;
+       }
+
+       return (match_sjinfo != NULL);
+}
+
 /*
  * distribute_restrictinfo_to_rels
  *       Push a completed RestrictInfo into the proper restriction or join
index 081a7c9cebd1f37615cd42e2a49d3d29b706e42e..7dcdaf250f9b2ca16a1db527fb3e5ca13be94ae0 100644 (file)
@@ -14,7 +14,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.109 2008/08/05 02:43:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.110 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -131,8 +131,8 @@ query_planner(PlannerInfo *root, List *tlist,
         * Init planner lists to empty, and set up the array to hold RelOptInfos
         * for "simple" rels.
         *
-        * NOTE: in_info_list and append_rel_list were set up by subquery_planner,
-        * do not touch here; eq_classes may contain data already, too.
+        * NOTE: append_rel_list was set up by subquery_planner, so do not touch
+        * here; eq_classes may contain data already, too.
         */
        root->simple_rel_array_size = list_length(parse->rtable) + 1;
        root->simple_rel_array = (RelOptInfo **)
@@ -143,7 +143,7 @@ query_planner(PlannerInfo *root, List *tlist,
        root->left_join_clauses = NIL;
        root->right_join_clauses = NIL;
        root->full_join_clauses = NIL;
-       root->oj_info_list = NIL;
+       root->join_info_list = NIL;
        root->initial_rels = NIL;
 
        /*
@@ -215,13 +215,6 @@ query_planner(PlannerInfo *root, List *tlist,
 
        joinlist = deconstruct_jointree(root);
 
-       /*
-        * Vars mentioned in InClauseInfo items also have to be added to baserel
-        * targetlists.  Nearly always, they'd have got there from the original
-        * WHERE qual, but in corner cases maybe not.
-        */
-       add_IN_vars_to_tlists(root);
-
        /*
         * Reconsider any postponed outer-join quals now that we have built up
         * equivalence classes.  (This could result in further additions or
index c818f0ddf10e20ae0d260bf2bef5dfeaf4082f3a..8b432ba93fbd7d87dcb6e37fc44fd59c982fca7c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.240 2008/08/07 01:11:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.241 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -55,8 +55,7 @@ planner_hook_type planner_hook = NULL;
 #define EXPRKIND_RTFUNC                2
 #define EXPRKIND_VALUES                3
 #define EXPRKIND_LIMIT         4
-#define EXPRKIND_ININFO                5
-#define EXPRKIND_APPINFO       6
+#define EXPRKIND_APPINFO       5
 
 
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
@@ -255,6 +254,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
        PlannerInfo *root;
        Plan       *plan;
        List       *newHaving;
+       bool            hasOuterJoins;
        ListCell   *l;
 
        /* Create a PlannerInfo data structure for this subquery */
@@ -265,23 +265,22 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
        root->planner_cxt = CurrentMemoryContext;
        root->init_plans = NIL;
        root->eq_classes = NIL;
-       root->in_info_list = NIL;
        root->append_rel_list = NIL;
 
        /*
-        * Look for IN clauses at the top level of WHERE, and transform them into
-        * joins.  Note that this step only handles IN clauses originally at top
-        * level of WHERE; if we pull up any subqueries below, their INs are
-        * processed just before pulling them up.
+        * Look for ANY and EXISTS SubLinks at the top level of WHERE, and try to
+        * transform them into joins.  Note that this step only handles SubLinks
+        * originally at top level of WHERE; if we pull up any subqueries below,
+        * their SubLinks are processed just before pulling them up.
         */
        if (parse->hasSubLinks)
-               parse->jointree->quals = pull_up_IN_clauses(root,
-                                                                                                       parse->jointree->quals);
+               parse->jointree->quals = pull_up_sublinks(root,
+                                                                                                 parse->jointree->quals);
 
        /*
         * Scan the rangetable for set-returning functions, and inline them
         * if possible (producing subqueries that might get pulled up next).
-        * Recursion issues here are handled in the same way as for IN clauses.
+        * Recursion issues here are handled in the same way as for SubLinks.
         */
        inline_set_returning_functions(root);
 
@@ -295,16 +294,11 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
        /*
         * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
         * avoid the expense of doing flatten_join_alias_vars().  Also check for
-        * outer joins --- if none, we can skip reduce_outer_joins() and some
-        * other processing.  This must be done after we have done
-        * pull_up_subqueries, of course.
-        *
-        * Note: if reduce_outer_joins manages to eliminate all outer joins,
-        * root->hasOuterJoins is not reset currently.  This is OK since its
-        * purpose is merely to suppress unnecessary processing in simple cases.
+        * outer joins --- if none, we can skip reduce_outer_joins().
+        * This must be done after we have done pull_up_subqueries, of course.
         */
        root->hasJoinRTEs = false;
-       root->hasOuterJoins = false;
+       hasOuterJoins = false;
        foreach(l, parse->rtable)
        {
                RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
@@ -314,7 +308,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
                        root->hasJoinRTEs = true;
                        if (IS_OUTER_JOIN(rte->jointype))
                        {
-                               root->hasOuterJoins = true;
+                               hasOuterJoins = true;
                                /* Can quit scanning once we find an outer join */
                                break;
                        }
@@ -362,9 +356,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
        parse->limitCount = preprocess_expression(root, parse->limitCount,
                                                                                          EXPRKIND_LIMIT);
 
-       root->in_info_list = (List *)
-               preprocess_expression(root, (Node *) root->in_info_list,
-                                                         EXPRKIND_ININFO);
        root->append_rel_list = (List *)
                preprocess_expression(root, (Node *) root->append_rel_list,
                                                          EXPRKIND_APPINFO);
@@ -442,7 +433,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
         * This step is most easily done after we've done expression
         * preprocessing.
         */
-       if (root->hasOuterJoins)
+       if (hasOuterJoins)
                reduce_outer_joins(root);
 
        /*
@@ -639,20 +630,15 @@ inheritance_planner(PlannerInfo *root)
                        continue;
 
                /*
-                * Generate modified query with this rel as target.  We have to be
-                * prepared to translate varnos in in_info_list as well as in the
-                * Query proper.
+                * Generate modified query with this rel as target.
                 */
                memcpy(&subroot, root, sizeof(PlannerInfo));
                subroot.parse = (Query *)
                        adjust_appendrel_attrs((Node *) parse,
                                                                   appinfo);
-               subroot.in_info_list = (List *)
-                       adjust_appendrel_attrs((Node *) root->in_info_list,
-                                                                  appinfo);
                subroot.init_plans = NIL;
                /* There shouldn't be any OJ info to translate, as yet */
-               Assert(subroot.oj_info_list == NIL);
+               Assert(subroot.join_info_list == NIL);
 
                /* Generate plan */
                subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
index 72e951abd05ead554613c12d72eba6295adf045b..1e4e9fe565b74579759e1e256bd3e2a2b869775d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.132 2008/07/10 02:14:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.133 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,6 +21,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
+#include "optimizer/prep.h"
 #include "optimizer/subselect.h"
 #include "optimizer/var.h"
 #include "parser/parse_expr.h"
@@ -62,6 +63,7 @@ static Node *convert_testexpr_mutator(Node *node,
                                                 convert_testexpr_context *context);
 static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan);
 static bool hash_ok_operator(OpExpr *expr);
+static bool simplify_EXISTS_query(Query *query);
 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
 static Node *process_sublinks_mutator(Node *node,
                                                 process_sublinks_context *context);
@@ -217,11 +219,16 @@ generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
 static Oid
 get_first_col_type(Plan *plan)
 {
-       TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
+       /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
+       if (plan->targetlist)
+       {
+               TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
 
-       Assert(IsA(tent, TargetEntry));
-       Assert(!tent->resjunk);
-       return exprType((Node *) tent->expr);
+               Assert(IsA(tent, TargetEntry));
+               if (!tent->resjunk)
+                       return exprType((Node *) tent->expr);
+       }
+       return VOIDOID;
 }
 
 /*
@@ -258,6 +265,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
         */
        subquery = (Query *) copyObject(subquery);
 
+       /*
+        * If it's an EXISTS subplan, we might be able to simplify it.
+        */
+       if (slink->subLinkType == EXISTS_SUBLINK)
+               (void) simplify_EXISTS_query(subquery);
+
        /*
         * For an EXISTS subplan, tell lower-level planner to expect that only the
         * first tuple will be retrieved.  For ALL and ANY subplans, we will be
@@ -710,80 +723,32 @@ hash_ok_operator(OpExpr *expr)
 }
 
 /*
- * convert_IN_to_join: can we convert an IN SubLink to join style?
+ * convert_ANY_sublink_to_join: can we convert an ANY SubLink to a join?
  *
- * The caller has found a SubLink at the top level of WHERE, but has not
- * checked the properties of the SubLink at all.  Decide whether it is
+ * The caller has found an ANY SubLink at the top level of WHERE, but has not
+ * checked the properties of the SubLink further.  Decide whether it is
  * appropriate to process this SubLink in join style.  If not, return NULL.
  * If so, build the qual clause(s) to replace the SubLink, and return them.
+ * The qual clauses are wrapped in a FlattenedSubLink node to help later
+ * processing place them properly.
  *
  * Side effects of a successful conversion include adding the SubLink's
- * subselect to the query's rangetable and adding an InClauseInfo node to
- * its in_info_list.
+ * subselect to the query's rangetable.
  */
 Node *
-convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
+convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink)
 {
        Query      *parse = root->parse;
        Query      *subselect = (Query *) sublink->subselect;
-       List       *in_operators;
-       List       *left_exprs;
-       List       *right_exprs;
        Relids          left_varnos;
        int                     rtindex;
        RangeTblEntry *rte;
        RangeTblRef *rtr;
        List       *subquery_vars;
-       InClauseInfo *ininfo;
-       Node       *result;
-
-       /*
-        * The sublink type must be "= ANY" --- that is, an IN operator.  We
-        * expect that the test expression will be either a single OpExpr, or an
-        * AND-clause containing OpExprs.  (If it's anything else then the parser
-        * must have determined that the operators have non-equality-like
-        * semantics.  In the OpExpr case we can't be sure what the operator's
-        * semantics are like, and must check for ourselves.)
-        */
-       if (sublink->subLinkType != ANY_SUBLINK)
-               return NULL;
-       if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
-       {
-               OpExpr     *op = (OpExpr *) sublink->testexpr;
-               Oid                     opno = op->opno;
-               List       *opfamilies;
-               List       *opstrats;
-
-               if (list_length(op->args) != 2)
-                       return NULL;                            /* not binary operator? */
-               get_op_btree_interpretation(opno, &opfamilies, &opstrats);
-               if (!list_member_int(opstrats, ROWCOMPARE_EQ))
-                       return NULL;
-               in_operators = list_make1_oid(opno);
-               left_exprs = list_make1(linitial(op->args));
-               right_exprs = list_make1(lsecond(op->args));
-       }
-       else if (and_clause(sublink->testexpr))
-       {
-               ListCell   *lc;
+       Expr       *quals;
+       FlattenedSubLink *fslink;
 
-               /* OK, but we need to extract the per-column info */
-               in_operators = left_exprs = right_exprs = NIL;
-               foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
-               {
-                       OpExpr     *op = (OpExpr *) lfirst(lc);
-
-                       if (!IsA(op, OpExpr))           /* probably shouldn't happen */
-                               return NULL;
-                       if (list_length(op->args) != 2)
-                               return NULL;                    /* not binary operator? */
-                       in_operators = lappend_oid(in_operators, op->opno);
-                       left_exprs = lappend(left_exprs, linitial(op->args));
-                       right_exprs = lappend(right_exprs, lsecond(op->args));
-               }
-       }
-       else
-               return NULL;
+       Assert(sublink->subLinkType == ANY_SUBLINK);
 
        /*
         * The sub-select must not refer to any Vars of the parent query. (Vars of
@@ -793,16 +758,14 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
                return NULL;
 
        /*
-        * The left-hand expressions must contain some Vars of the current query,
-        * else it's not gonna be a join.
+        * The test expression must contain some Vars of the current query,
+        * else it's not gonna be a join.  (Note that it won't have Vars
+        * referring to the subquery, rather Params.)
         */
-       left_varnos = pull_varnos((Node *) left_exprs);
+       left_varnos = pull_varnos(sublink->testexpr);
        if (bms_is_empty(left_varnos))
                return NULL;
 
-       /* ... and the right-hand expressions better not contain Vars at all */
-       Assert(!contain_var_clause((Node *) right_exprs));
-
        /*
         * The combining operators and left-hand expressions mustn't be volatile.
         */
@@ -819,12 +782,19 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
         */
        rte = addRangeTableEntryForSubquery(NULL,
                                                                                subselect,
-                                                                               makeAlias("IN_subquery", NIL),
+                                                                               makeAlias("ANY_subquery", NIL),
                                                                                false);
        parse->rtable = lappend(parse->rtable, rte);
        rtindex = list_length(parse->rtable);
        rtr = makeNode(RangeTblRef);
        rtr->rtindex = rtindex;
+
+       /*
+        * We assume it's okay to add the pulled-up subquery to the topmost FROM
+        * list.  This should be all right for ANY clauses appearing in WHERE
+        * or in upper-level plain JOIN/ON clauses.  ANYs appearing below any
+        * outer joins couldn't be placed there, however.
+        */
        parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
 
        /*
@@ -837,34 +807,232 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
        /*
         * Build the result qual expression, replacing Params with these Vars.
         */
-       result = convert_testexpr(root,
-                                                         sublink->testexpr,
-                                                         subquery_vars);
+       quals = (Expr *) convert_testexpr(root,
+                                                                         sublink->testexpr,
+                                                                         subquery_vars);
+
+       /*
+        * Now build the FlattenedSubLink node.
+        */
+       fslink = makeNode(FlattenedSubLink);
+       fslink->jointype = JOIN_SEMI;
+       fslink->lefthand = left_varnos;
+       fslink->righthand = bms_make_singleton(rtindex);
+       fslink->quals = quals;
+
+       return (Node *) fslink;
+}
+
+/*
+ * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
+ *
+ * The only thing that matters about an EXISTS query is whether it returns
+ * zero or more than zero rows.  Therefore, we can remove certain SQL features
+ * that won't affect that.  The only part that is really likely to matter in
+ * typical usage is simplifying the targetlist: it's a common habit to write
+ * "SELECT * FROM" even though there is no need to evaluate any columns.
+ *
+ * Note: by suppressing the targetlist we could cause an observable behavioral
+ * change, namely that any errors that might occur in evaluating the tlist
+ * won't occur, nor will other side-effects of volatile functions.  This seems
+ * unlikely to bother anyone in practice.
+ *
+ * Returns TRUE if was able to discard the targetlist, else FALSE.
+ */
+static bool
+simplify_EXISTS_query(Query *query)
+{
+       /*
+        * We don't try to simplify at all if the query uses set operations,
+        * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
+        * seem likely in normal usage and their possible effects are complex.
+        */
+       if (query->commandType != CMD_SELECT ||
+               query->intoClause ||
+               query->setOperations ||
+               query->hasAggs ||
+               query->havingQual ||
+               query->limitOffset ||
+               query->limitCount ||
+               query->rowMarks)
+               return false;
 
        /*
-        * Now build the InClauseInfo node.
+        * Mustn't throw away the targetlist if it contains set-returning
+        * functions; those could affect whether zero rows are returned!
         */
-       ininfo = makeNode(InClauseInfo);
-       ininfo->lefthand = left_varnos;
-       ininfo->righthand = bms_make_singleton(rtindex);
-       ininfo->in_operators = in_operators;
+       if (expression_returns_set((Node *) query->targetList))
+               return false;
 
        /*
-        * ininfo->sub_targetlist must be filled with a list of expressions that
-        * would need to be unique-ified if we try to implement the IN using a
-        * regular join to unique-ified subquery output.  This is most easily done
-        * by applying convert_testexpr to just the RHS inputs of the testexpr
-        * operators.  That handles cases like type coercions of the subquery
-        * outputs, clauses dropped due to const-simplification, etc.
+        * Otherwise, we can throw away the targetlist, as well as any GROUP,
+        * DISTINCT, and ORDER BY clauses; none of those clauses will change
+        * a nonzero-rows result to zero rows or vice versa.  (Furthermore,
+        * since our parsetree representation of these clauses depends on the
+        * targetlist, we'd better throw them away if we drop the targetlist.)
         */
-       ininfo->sub_targetlist = (List *) convert_testexpr(root,
-                                                                                                          (Node *) right_exprs,
-                                                                                                          subquery_vars);
+       query->targetList = NIL;
+       query->groupClause = NIL;
+       query->distinctClause = NIL;
+       query->sortClause = NIL;
+       query->hasDistinctOn = false;
 
-       /* Add the completed node to the query's list */
-       root->in_info_list = lappend(root->in_info_list, ininfo);
+       return true;
+}
 
-       return result;
+/*
+ * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join?
+ *
+ * The caller has found an EXISTS SubLink at the top level of WHERE, or just
+ * underneath a NOT, but has not checked the properties of the SubLink
+ * further.  Decide whether it is appropriate to process this SubLink in join
+ * style.  If not, return NULL.  If so, build the qual clause(s) to replace
+ * the SubLink, and return them.  (In the NOT case, the returned clauses are
+ * intended to replace the NOT as well.)  The qual clauses are wrapped in a
+ * FlattenedSubLink node to help later processing place them properly.
+ *
+ * Side effects of a successful conversion include adding the SubLink's
+ * subselect to the query's rangetable.
+ */
+Node *
+convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
+                                                          bool under_not)
+{
+       Query      *parse = root->parse;
+       Query      *subselect = (Query *) sublink->subselect;
+       Node       *whereClause;
+       int                     rtoffset;
+       int                     varno;
+       Relids          clause_varnos;
+       Relids          left_varnos;
+       Relids          right_varnos;
+       Relids          subselect_varnos;
+       FlattenedSubLink *fslink;
+
+       Assert(sublink->subLinkType == EXISTS_SUBLINK);
+
+       /*
+        * Copy the subquery so we can modify it safely (see comments in
+        * make_subplan).
+        */
+       subselect = (Query *) copyObject(subselect);
+
+       /*
+        * See if the subquery can be simplified based on the knowledge that
+        * it's being used in EXISTS().  If we aren't able to get rid of its
+        * targetlist, we have to fail, because the pullup operation leaves
+        * us with noplace to evaluate the targetlist.
+        */
+       if (!simplify_EXISTS_query(subselect))
+               return NULL;
+
+       /*
+        * Separate out the WHERE clause.  (We could theoretically also remove
+        * top-level plain JOIN/ON clauses, but it's probably not worth the
+        * trouble.)
+        */
+       whereClause = subselect->jointree->quals;
+       subselect->jointree->quals = NULL;
+
+       /*
+        * The rest of the sub-select must not refer to any Vars of the parent
+        * query.  (Vars of higher levels should be okay, though.)
+        */
+       if (contain_vars_of_level((Node *) subselect, 1))
+               return NULL;
+
+       /*
+        * On the other hand, the WHERE clause must contain some Vars of the
+        * parent query, else it's not gonna be a join.
+        */
+       if (!contain_vars_of_level(whereClause, 1))
+               return NULL;
+
+       /*
+        * We don't risk optimizing if the WHERE clause is volatile, either.
+        */
+       if (contain_volatile_functions(whereClause))
+               return NULL;
+
+       /*
+        * Also disallow SubLinks within the WHERE clause.  (XXX this could
+        * probably be supported, but it would complicate the transformation
+        * below, and it doesn't seem worth worrying about in a first pass.)
+        */
+       if (contain_subplans(whereClause))
+               return NULL;
+
+       /*
+        * Okay, pull up the sub-select into top range table and jointree.
+        *
+        * We rely here on the assumption that the outer query has no references
+        * to the inner (necessarily true). Therefore this is a lot easier than
+        * what pull_up_subqueries has to go through.
+        *
+        * In fact, it's even easier than what convert_ANY_sublink_to_join has
+        * to do.  The machinations of simplify_EXISTS_query ensured that there
+        * is nothing interesting in the subquery except an rtable and jointree,
+        * and even the jointree FromExpr no longer has quals.  So we can just
+        * append the rtable to our own and append the fromlist to our own.
+        * But first, adjust all level-zero varnos in the subquery to account
+        * for the rtable merger.
+        */
+       rtoffset = list_length(parse->rtable);
+       OffsetVarNodes((Node *) subselect, rtoffset, 0);
+       OffsetVarNodes(whereClause, rtoffset, 0);
+
+       /*
+        * Upper-level vars in subquery will now be one level closer to their
+        * parent than before; in particular, anything that had been level 1
+        * becomes level zero.
+        */
+       IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+       IncrementVarSublevelsUp(whereClause, -1, 1);
+
+       /*
+        * Now that the WHERE clause is adjusted to match the parent query
+        * environment, we can easily identify all the level-zero rels it uses.
+        * The ones <= rtoffset are "left rels" of the join we're forming,
+        * and the ones > rtoffset are "right rels".
+        */
+       clause_varnos = pull_varnos(whereClause);
+       left_varnos = right_varnos = NULL;
+       while ((varno = bms_first_member(clause_varnos)) >= 0)
+       {
+               if (varno <= rtoffset)
+                       left_varnos = bms_add_member(left_varnos, varno);
+               else
+                       right_varnos = bms_add_member(right_varnos, varno);
+       }
+       bms_free(clause_varnos);
+       Assert(!bms_is_empty(left_varnos));
+
+       /* Also identify all the rels syntactically within the subselect */
+       subselect_varnos = get_relids_in_jointree((Node *) subselect->jointree);
+       Assert(bms_is_subset(right_varnos, subselect_varnos));
+
+       /* Now we can attach the modified subquery rtable to the parent */
+       parse->rtable = list_concat(parse->rtable, subselect->rtable);
+
+       /*
+        * We assume it's okay to add the pulled-up subquery to the topmost FROM
+        * list.  This should be all right for EXISTS clauses appearing in WHERE
+        * or in upper-level plain JOIN/ON clauses.  EXISTS appearing below any
+        * outer joins couldn't be placed there, however.
+        */
+       parse->jointree->fromlist = list_concat(parse->jointree->fromlist,
+                                                                                       subselect->jointree->fromlist);
+
+       /*
+        * Now build the FlattenedSubLink node.
+        */
+       fslink = makeNode(FlattenedSubLink);
+       fslink->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+       fslink->lefthand = left_varnos;
+       fslink->righthand = subselect_varnos;
+       fslink->quals = (Expr *) whereClause;
+
+       return (Node *) fslink;
 }
 
 /*
index 755bed363b12af6757910beb1b4dcf8d737bb1a5..b4be3d8d0c1b0b07797602697e5c728c6def7472 100644 (file)
@@ -4,7 +4,7 @@
  *       Planner preprocessing for subqueries and join tree manipulation.
  *
  * NOTE: the intended sequence for invoking these operations is
- *             pull_up_IN_clauses
+ *             pull_up_sublinks
  *             inline_set_returning_functions
  *             pull_up_subqueries
  *             do expression preprocessing (including flattening JOIN alias vars)
@@ -16,7 +16,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.50 2008/03/18 22:04:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepjointree.c,v 1.51 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -63,26 +63,32 @@ static reduce_outer_joins_state *reduce_outer_joins_pass1(Node *jtnode);
 static void reduce_outer_joins_pass2(Node *jtnode,
                                                 reduce_outer_joins_state *state,
                                                 PlannerInfo *root,
-                                                Relids nonnullable_rels);
-static void fix_in_clause_relids(List *in_info_list, int varno,
-                                        Relids subrelids);
+                                                Relids nonnullable_rels,
+                                                List *nonnullable_vars,
+                                                List *forced_null_vars);
+static void fix_flattened_sublink_relids(Node *node,
+                                                                                int varno, Relids subrelids);
 static void fix_append_rel_relids(List *append_rel_list, int varno,
                                          Relids subrelids);
 static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
 
 
 /*
- * pull_up_IN_clauses
- *             Attempt to pull up top-level IN clauses to be treated like joins.
+ * pull_up_sublinks
+ *             Attempt to pull up top-level ANY and EXISTS SubLinks to be treated
+ *             as semijoins or anti-semijoins.
  *
- * A clause "foo IN (sub-SELECT)" appearing at the top level of WHERE can
- * be processed by pulling the sub-SELECT up to become a rangetable entry
- * and handling the implied equality comparisons as join operators (with
- * special join rules).
+ * A clause "foo op ANY (sub-SELECT)" appearing at the top level of WHERE
+ * can be processed by pulling the sub-SELECT up to become a rangetable entry
+ * and handling the implied comparisons as quals of a semijoin.
  * This optimization *only* works at the top level of WHERE, because
- * it cannot distinguish whether the IN ought to return FALSE or NULL in
- * cases involving NULL inputs.  This routine searches for such clauses
- * and does the necessary parsetree transformations if any are found.
+ * it cannot distinguish whether the ANY ought to return FALSE or NULL in
+ * cases involving NULL inputs.  Similarly, EXISTS and NOT EXISTS clauses
+ * can be handled by pulling up the sub-SELECT and creating a semijoin
+ * or anti-semijoin respectively.
+ *
+ * This routine searches for such clauses and does the necessary parsetree
+ * transformations if any are found.
  *
  * This routine has to run before preprocess_expression(), so the WHERE
  * clause is not yet reduced to implicit-AND format.  That means we need
@@ -90,9 +96,10 @@ static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
  * probably only binary ANDs.  We stop as soon as we hit a non-AND item.
  *
  * Returns the possibly-modified version of the given qual-tree node.
+ * There may be side-effects on the query's rtable and jointree, too.
  */
 Node *
-pull_up_IN_clauses(PlannerInfo *root, Node *node)
+pull_up_sublinks(PlannerInfo *root, Node *node)
 {
        if (node == NULL)
                return NULL;
@@ -101,11 +108,39 @@ pull_up_IN_clauses(PlannerInfo *root, Node *node)
                SubLink    *sublink = (SubLink *) node;
                Node       *subst;
 
-               /* Is it a convertible IN clause?  If not, return it as-is */
-               subst = convert_IN_to_join(root, sublink);
-               if (subst == NULL)
-                       return node;
-               return subst;
+               /* Is it a convertible ANY or EXISTS clause? */
+               if (sublink->subLinkType == ANY_SUBLINK)
+               {
+                       subst = convert_ANY_sublink_to_join(root, sublink);
+                       if (subst)
+                               return subst;
+               }
+               else if (sublink->subLinkType == EXISTS_SUBLINK)
+               {
+                       subst = convert_EXISTS_sublink_to_join(root, sublink, false);
+                       if (subst)
+                               return subst;
+               }
+               /* Else return it unmodified */
+               return node;
+       }
+       if (not_clause(node))
+       {
+               /* If the immediate argument of NOT is EXISTS, try to convert */
+               SubLink    *sublink = (SubLink *) get_notclausearg((Expr *) node);
+               Node       *subst;
+
+               if (sublink && IsA(sublink, SubLink))
+               {
+                       if (sublink->subLinkType == EXISTS_SUBLINK)
+                       {
+                               subst = convert_EXISTS_sublink_to_join(root, sublink, true);
+                               if (subst)
+                                       return subst;
+                       }
+               }
+               /* Else return it unmodified */
+               return node;
        }
        if (and_clause(node))
        {
@@ -117,7 +152,7 @@ pull_up_IN_clauses(PlannerInfo *root, Node *node)
                        Node       *oldclause = (Node *) lfirst(l);
 
                        newclauses = lappend(newclauses,
-                                                                pull_up_IN_clauses(root, oldclause));
+                                                                pull_up_sublinks(root, oldclause));
                }
                return (Node *) make_andclause(newclauses);
        }
@@ -137,8 +172,8 @@ pull_up_IN_clauses(PlannerInfo *root, Node *node)
  *
  * This has to be done before we have started to do any optimization of
  * subqueries, else any such steps wouldn't get applied to subqueries
- * obtained via inlining.  However, we do it after pull_up_IN_clauses
- * so that we can inline any functions used in IN subselects.
+ * obtained via inlining.  However, we do it after pull_up_sublinks
+ * so that we can inline any functions used in SubLink subselects.
  *
  * Like most of the planner, this feels free to scribble on its input data
  * structure.
@@ -344,15 +379,14 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
        subroot->planner_cxt = CurrentMemoryContext;
        subroot->init_plans = NIL;
        subroot->eq_classes = NIL;
-       subroot->in_info_list = NIL;
        subroot->append_rel_list = NIL;
 
        /*
-        * Pull up any IN clauses within the subquery's WHERE, so that we don't
-        * leave unoptimized INs behind.
+        * Pull up any SubLinks within the subquery's WHERE, so that we don't
+        * leave unoptimized SubLinks behind.
         */
        if (subquery->hasSubLinks)
-               subquery->jointree->quals = pull_up_IN_clauses(subroot,
+               subquery->jointree->quals = pull_up_sublinks(subroot,
                                                                                                  subquery->jointree->quals);
 
        /*
@@ -402,12 +436,11 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 
        /*
         * Adjust level-0 varnos in subquery so that we can append its rangetable
-        * to upper query's.  We have to fix the subquery's in_info_list and
-        * append_rel_list, as well.
+        * to upper query's.  We have to fix the subquery's append_rel_list
+        * as well.
         */
        rtoffset = list_length(parse->rtable);
        OffsetVarNodes((Node *) subquery, rtoffset, 0);
-       OffsetVarNodes((Node *) subroot->in_info_list, rtoffset, 0);
        OffsetVarNodes((Node *) subroot->append_rel_list, rtoffset, 0);
 
        /*
@@ -415,7 +448,6 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
         * than before.
         */
        IncrementVarSublevelsUp((Node *) subquery, -1, 1);
-       IncrementVarSublevelsUp((Node *) subroot->in_info_list, -1, 1);
        IncrementVarSublevelsUp((Node *) subroot->append_rel_list, -1, 1);
 
        /*
@@ -440,10 +472,6 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
                ResolveNew(parse->havingQual,
                                   varno, 0, rte,
                                   subtlist, CMD_SELECT, 0);
-       root->in_info_list = (List *)
-               ResolveNew((Node *) root->in_info_list,
-                                  varno, 0, rte,
-                                  subtlist, CMD_SELECT, 0);
        root->append_rel_list = (List *)
                ResolveNew((Node *) root->append_rel_list,
                                   varno, 0, rte,
@@ -474,29 +502,27 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
        parse->rowMarks = list_concat(parse->rowMarks, subquery->rowMarks);
 
        /*
-        * We also have to fix the relid sets of any parent InClauseInfo nodes.
-        * (This could perhaps be done by ResolveNew, but it would clutter that
-        * routine's API unreasonably.)
+        * We also have to fix the relid sets of any FlattenedSubLink nodes in
+        * the parent query.  (This could perhaps be done by ResolveNew, but it
+        * would clutter that routine's API unreasonably.)
         *
         * Likewise, relids appearing in AppendRelInfo nodes have to be fixed (but
         * we took care of their translated_vars lists above).  We already checked
         * that this won't require introducing multiple subrelids into the
         * single-slot AppendRelInfo structs.
         */
-       if (root->in_info_list || root->append_rel_list)
+       if (parse->hasSubLinks || root->append_rel_list)
        {
                Relids          subrelids;
 
                subrelids = get_relids_in_jointree((Node *) subquery->jointree);
-               fix_in_clause_relids(root->in_info_list, varno, subrelids);
+               fix_flattened_sublink_relids((Node *) parse, varno, subrelids);
                fix_append_rel_relids(root->append_rel_list, varno, subrelids);
        }
 
        /*
-        * And now add any subquery InClauseInfos and AppendRelInfos to our lists.
+        * And now add subquery's AppendRelInfos to our list.
         */
-       root->in_info_list = list_concat(root->in_info_list,
-                                                                        subroot->in_info_list);
        root->append_rel_list = list_concat(root->append_rel_list,
                                                                                subroot->append_rel_list);
 
@@ -504,8 +530,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
         * We don't have to do the equivalent bookkeeping for outer-join info,
         * because that hasn't been set up yet.
         */
-       Assert(root->oj_info_list == NIL);
-       Assert(subroot->oj_info_list == NIL);
+       Assert(root->join_info_list == NIL);
+       Assert(subroot->join_info_list == NIL);
 
        /*
         * Miscellaneous housekeeping.
@@ -966,6 +992,20 @@ resolvenew_in_jointree(Node *jtnode, int varno,
  * nullable side of the join to be non-null.  (For FULL joins this applies
  * to each side separately.)
  *
+ * Another transformation we apply here is to recognize cases like
+ *             SELECT ... FROM a LEFT JOIN b ON (a.x = b.y) WHERE b.y IS NULL;
+ * If the join clause is strict for b.y, then only null-extended rows could
+ * pass the upper WHERE, and we can conclude that what the query is really
+ * specifying is an anti-semijoin.  We change the join type from JOIN_LEFT
+ * to JOIN_ANTI.  The IS NULL clause then becomes redundant, and must be
+ * removed to prevent bogus selectivity calculations, but we leave it to
+ * distribute_qual_to_rels to get rid of such clauses.
+ *
+ * Also, we get rid of JOIN_RIGHT cases by flipping them around to become
+ * JOIN_LEFT.  This saves some code here and in some later planner routines,
+ * but the main reason to do it is to not need to invent a JOIN_REVERSE_ANTI
+ * join type.
+ *
  * To ease recognition of strict qual clauses, we require this routine to be
  * run after expression preprocessing (i.e., qual canonicalization and JOIN
  * alias-var expansion).
@@ -991,7 +1031,7 @@ reduce_outer_joins(PlannerInfo *root)
                elog(ERROR, "so where are the outer joins?");
 
        reduce_outer_joins_pass2((Node *) root->parse->jointree,
-                                                        state, root, NULL);
+                                                        state, root, NULL, NIL, NIL);
 }
 
 /*
@@ -1068,12 +1108,16 @@ reduce_outer_joins_pass1(Node *jtnode)
  *     state: state data collected by phase 1 for this node
  *     root: toplevel planner state
  *     nonnullable_rels: set of base relids forced non-null by upper quals
+ *     nonnullable_vars: list of Vars forced non-null by upper quals
+ *     forced_null_vars: list of Vars forced null by upper quals
  */
 static void
 reduce_outer_joins_pass2(Node *jtnode,
                                                 reduce_outer_joins_state *state,
                                                 PlannerInfo *root,
-                                                Relids nonnullable_rels)
+                                                Relids nonnullable_rels,
+                                                List *nonnullable_vars,
+                                                List *forced_null_vars)
 {
        /*
         * pass 2 should never descend as far as an empty subnode or base rel,
@@ -1088,12 +1132,21 @@ reduce_outer_joins_pass2(Node *jtnode,
                FromExpr   *f = (FromExpr *) jtnode;
                ListCell   *l;
                ListCell   *s;
-               Relids          pass_nonnullable;
-
-               /* Scan quals to see if we can add any nonnullability constraints */
-               pass_nonnullable = find_nonnullable_rels(f->quals);
-               pass_nonnullable = bms_add_members(pass_nonnullable,
-                                                                                  nonnullable_rels);
+               Relids          pass_nonnullable_rels;
+               List       *pass_nonnullable_vars;
+               List       *pass_forced_null_vars;
+
+               /* Scan quals to see if we can add any constraints */
+               pass_nonnullable_rels = find_nonnullable_rels(f->quals);
+               pass_nonnullable_rels = bms_add_members(pass_nonnullable_rels,
+                                                                                               nonnullable_rels);
+               /* NB: we rely on list_concat to not damage its second argument */
+               pass_nonnullable_vars = find_nonnullable_vars(f->quals);
+               pass_nonnullable_vars = list_concat(pass_nonnullable_vars,
+                                                                                       nonnullable_vars);
+               pass_forced_null_vars = find_forced_null_vars(f->quals);
+               pass_forced_null_vars = list_concat(pass_forced_null_vars,
+                                                                                       forced_null_vars);
                /* And recurse --- but only into interesting subtrees */
                Assert(list_length(f->fromlist) == list_length(state->sub_states));
                forboth(l, f->fromlist, s, state->sub_states)
@@ -1102,9 +1155,12 @@ reduce_outer_joins_pass2(Node *jtnode,
 
                        if (sub_state->contains_outer)
                                reduce_outer_joins_pass2(lfirst(l), sub_state, root,
-                                                                                pass_nonnullable);
+                                                                                pass_nonnullable_rels,
+                                                                                pass_nonnullable_vars,
+                                                                                pass_forced_null_vars);
                }
-               bms_free(pass_nonnullable);
+               bms_free(pass_nonnullable_rels);
+               /* can't so easily clean up var lists, unfortunately */
        }
        else if (IsA(jtnode, JoinExpr))
        {
@@ -1113,10 +1169,14 @@ reduce_outer_joins_pass2(Node *jtnode,
                JoinType        jointype = j->jointype;
                reduce_outer_joins_state *left_state = linitial(state->sub_states);
                reduce_outer_joins_state *right_state = lsecond(state->sub_states);
+               List       *local_nonnullable_vars = NIL;
+               bool            computed_local_nonnullable_vars = false;
 
                /* Can we simplify this join? */
                switch (jointype)
                {
+                       case JOIN_INNER:
+                               break;
                        case JOIN_LEFT:
                                if (bms_overlap(nonnullable_rels, right_state->relids))
                                        jointype = JOIN_INNER;
@@ -1140,11 +1200,63 @@ reduce_outer_joins_pass2(Node *jtnode,
                                }
                                break;
                        default:
+                               elog(ERROR, "unrecognized join type: %d",
+                                        (int) jointype);
                                break;
                }
+
+               /*
+                * Convert JOIN_RIGHT to JOIN_LEFT.  Note that in the case where we
+                * reduced JOIN_FULL to JOIN_RIGHT, this will mean the JoinExpr no
+                * longer matches the internal ordering of any CoalesceExpr's built to
+                * represent merged join variables.  We don't care about that at
+                * present, but be wary of it ...
+                */
+               if (jointype == JOIN_RIGHT)
+               {
+                       Node       *tmparg;
+
+                       tmparg = j->larg;
+                       j->larg = j->rarg;
+                       j->rarg = tmparg;
+                       jointype = JOIN_LEFT;
+                       right_state = linitial(state->sub_states);
+                       left_state = lsecond(state->sub_states);
+               }
+
+               /*
+                * See if we can reduce JOIN_LEFT to JOIN_ANTI.  This is the case
+                * if the join's own quals are strict for any var that was forced
+                * null by higher qual levels.  NOTE: there are other ways that we
+                * could detect an anti-join, in particular if we were to check
+                * whether Vars coming from the RHS must be non-null because of
+                * table constraints.  That seems complicated and expensive though
+                * (in particular, one would have to be wary of lower outer joins).
+                * For the moment this seems sufficient.
+                */
+               if (jointype == JOIN_LEFT)
+               {
+                       List       *overlap;
+
+                       local_nonnullable_vars = find_nonnullable_vars(j->quals);
+                       computed_local_nonnullable_vars = true;
+
+                       /*
+                        * It's not sufficient to check whether local_nonnullable_vars
+                        * and forced_null_vars overlap: we need to know if the overlap
+                        * includes any RHS variables.
+                        */
+                       overlap = list_intersection(local_nonnullable_vars,
+                                                                               forced_null_vars);
+                       if (overlap != NIL &&
+                               bms_overlap(pull_varnos((Node *) overlap),
+                                                       right_state->relids))
+                               jointype = JOIN_ANTI;
+               }
+
+               /* Apply the jointype change, if any, to both jointree node and RTE */
                if (jointype != j->jointype)
                {
-                       /* apply the change to both jointree node and RTE */
                        RangeTblEntry *rte = rt_fetch(rtindex, root->parse->rtable);
 
                        Assert(rte->rtekind == RTE_JOIN);
@@ -1155,45 +1267,103 @@ reduce_outer_joins_pass2(Node *jtnode,
                /* Only recurse if there's more to do below here */
                if (left_state->contains_outer || right_state->contains_outer)
                {
-                       Relids          local_nonnullable;
-                       Relids          pass_nonnullable;
+                       Relids          local_nonnullable_rels;
+                       List       *local_forced_null_vars;
+                       Relids          pass_nonnullable_rels;
+                       List       *pass_nonnullable_vars;
+                       List       *pass_forced_null_vars;
 
                        /*
-                        * If this join is (now) inner, we can add any nonnullability
-                        * constraints its quals provide to those we got from above. But
-                        * if it is outer, we can only pass down the local constraints
-                        * into the nullable side, because an outer join never eliminates
-                        * any rows from its non-nullable side.  If it's a FULL join then
-                        * it doesn't eliminate anything from either side.
+                        * If this join is (now) inner, we can add any constraints its
+                        * quals provide to those we got from above.  But if it is outer,
+                        * we can pass down the local constraints only into the nullable
+                        * side, because an outer join never eliminates any rows from its
+                        * non-nullable side.  Also, there is no point in passing upper
+                        * constraints into the nullable side, since if there were any
+                        * we'd have been able to reduce the join.  (In the case of
+                        * upper forced-null constraints, we *must not* pass them into
+                        * the nullable side --- they either applied here, or not.)
+                        * The upshot is that we pass either the local or the upper
+                        * constraints, never both, to the children of an outer join.
+                        *
+                        * At a FULL join we just punt and pass nothing down --- is it
+                        * possible to be smarter?
                         */
                        if (jointype != JOIN_FULL)
                        {
-                               local_nonnullable = find_nonnullable_rels(j->quals);
-                               local_nonnullable = bms_add_members(local_nonnullable,
-                                                                                                       nonnullable_rels);
+                               local_nonnullable_rels = find_nonnullable_rels(j->quals);
+                               if (!computed_local_nonnullable_vars)
+                                       local_nonnullable_vars = find_nonnullable_vars(j->quals);
+                               local_forced_null_vars = find_forced_null_vars(j->quals);
+                               if (jointype == JOIN_INNER)
+                               {
+                                       /* OK to merge upper and local constraints */
+                                       local_nonnullable_rels = bms_add_members(local_nonnullable_rels,
+                                                                                                                        nonnullable_rels);
+                                       local_nonnullable_vars = list_concat(local_nonnullable_vars,
+                                                                                                                nonnullable_vars);
+                                       local_forced_null_vars = list_concat(local_forced_null_vars,
+                                                                                                                forced_null_vars);
+                               }
                        }
                        else
-                               local_nonnullable = NULL;               /* no use in calculating it */
+                       {
+                               /* no use in calculating these */
+                               local_nonnullable_rels = NULL;
+                               local_forced_null_vars = NIL;
+                       }
 
                        if (left_state->contains_outer)
                        {
-                               if (jointype == JOIN_INNER || jointype == JOIN_RIGHT)
-                                       pass_nonnullable = local_nonnullable;
+                               if (jointype == JOIN_INNER)
+                               {
+                                       /* pass union of local and upper constraints */
+                                       pass_nonnullable_rels = local_nonnullable_rels;
+                                       pass_nonnullable_vars = local_nonnullable_vars;
+                                       pass_forced_null_vars = local_forced_null_vars;
+                               }
+                               else if (jointype != JOIN_FULL)         /* ie, LEFT or ANTI */
+                               {
+                                       /* can't pass local constraints to non-nullable side */
+                                       pass_nonnullable_rels = nonnullable_rels;
+                                       pass_nonnullable_vars = nonnullable_vars;
+                                       pass_forced_null_vars = forced_null_vars;
+                               }
                                else
-                                       pass_nonnullable = nonnullable_rels;
+                               {
+                                       /* no constraints pass through JOIN_FULL */
+                                       pass_nonnullable_rels = NULL;
+                                       pass_nonnullable_vars = NIL;
+                                       pass_forced_null_vars = NIL;
+                               }
                                reduce_outer_joins_pass2(j->larg, left_state, root,
-                                                                                pass_nonnullable);
+                                                                                pass_nonnullable_rels,
+                                                                                pass_nonnullable_vars,
+                                                                                pass_forced_null_vars);
                        }
+
                        if (right_state->contains_outer)
                        {
-                               if (jointype == JOIN_INNER || jointype == JOIN_LEFT)
-                                       pass_nonnullable = local_nonnullable;
+                               if (jointype != JOIN_FULL)              /* ie, INNER, LEFT or ANTI */
+                               {
+                                       /* pass appropriate constraints, per comment above */
+                                       pass_nonnullable_rels = local_nonnullable_rels;
+                                       pass_nonnullable_vars = local_nonnullable_vars;
+                                       pass_forced_null_vars = local_forced_null_vars;
+                               }
                                else
-                                       pass_nonnullable = nonnullable_rels;
+                               {
+                                       /* no constraints pass through JOIN_FULL */
+                                       pass_nonnullable_rels = NULL;
+                                       pass_nonnullable_vars = NIL;
+                                       pass_forced_null_vars = NIL;
+                               }
                                reduce_outer_joins_pass2(j->rarg, right_state, root,
-                                                                                pass_nonnullable);
+                                                                                pass_nonnullable_rels,
+                                                                                pass_nonnullable_vars,
+                                                                                pass_forced_null_vars);
                        }
-                       bms_free(local_nonnullable);
+                       bms_free(local_nonnullable_rels);
                }
        }
        else
@@ -1202,33 +1372,70 @@ reduce_outer_joins_pass2(Node *jtnode,
 }
 
 /*
- * fix_in_clause_relids: update RT-index sets of InClauseInfo nodes
+ * fix_flattened_sublink_relids - adjust FlattenedSubLink nodes after
+ * pulling up a subquery
  *
- * When we pull up a subquery, any InClauseInfo references to the subquery's
- * RT index have to be replaced by the set of substituted relids.
+ * Find any FlattenedSubLink nodes in the given tree that reference the
+ * pulled-up relid, and change them to reference the replacement relid(s).
+ * We do not need to recurse into subqueries, since no subquery of the
+ * current top query could contain such a reference.
  *
- * We assume we may modify the InClauseInfo nodes in-place.
+ * NOTE: although this has the form of a walker, we cheat and modify the
+ * nodes in-place.  This should be OK since the tree was copied by ResolveNew
+ * earlier.
  */
-static void
-fix_in_clause_relids(List *in_info_list, int varno, Relids subrelids)
+
+typedef struct
 {
-       ListCell   *l;
+       int                     varno;
+       Relids          subrelids;
+} fix_flattened_sublink_relids_context;
 
-       foreach(l, in_info_list)
+static bool
+fix_flattened_sublink_relids_walker(Node *node,
+                                                          fix_flattened_sublink_relids_context *context)
+{
+       if (node == NULL)
+               return false;
+       if (IsA(node, FlattenedSubLink))
        {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+               FlattenedSubLink *fslink = (FlattenedSubLink *) node;
 
-               if (bms_is_member(varno, ininfo->lefthand))
+               if (bms_is_member(context->varno, fslink->lefthand))
                {
-                       ininfo->lefthand = bms_del_member(ininfo->lefthand, varno);
-                       ininfo->lefthand = bms_add_members(ininfo->lefthand, subrelids);
+                       fslink->lefthand = bms_del_member(fslink->lefthand,
+                                                                                         context->varno);
+                       fslink->lefthand = bms_add_members(fslink->lefthand,
+                                                                                          context->subrelids);
                }
-               if (bms_is_member(varno, ininfo->righthand))
+               if (bms_is_member(context->varno, fslink->righthand))
                {
-                       ininfo->righthand = bms_del_member(ininfo->righthand, varno);
-                       ininfo->righthand = bms_add_members(ininfo->righthand, subrelids);
+                       fslink->righthand = bms_del_member(fslink->righthand,
+                                                                                          context->varno);
+                       fslink->righthand = bms_add_members(fslink->righthand,
+                                                                                               context->subrelids);
                }
+               /* fall through to examine children */
        }
+       return expression_tree_walker(node, fix_flattened_sublink_relids_walker,
+                                                                 (void *) context);
+}
+
+static void
+fix_flattened_sublink_relids(Node *node, int varno, Relids subrelids)
+{
+       fix_flattened_sublink_relids_context context;
+
+       context.varno = varno;
+       context.subrelids = subrelids;
+
+       /*
+        * Must be prepared to start with a Query or a bare expression tree.
+        */
+       query_or_expression_tree_walker(node,
+                                                                       fix_flattened_sublink_relids_walker,
+                                                                       (void *) &context,
+                                                                       0);
 }
 
 /*
index dda41770c8d9dd198d7cc8fbe5e8a3e7b99a84f3..2fcdf0592fc3a56f7b0062052b3a8a1f3d3ca24e 100644 (file)
@@ -22,7 +22,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.152 2008/08/07 19:35:02 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.153 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1465,25 +1465,25 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
                        j->rtindex = context->child_relid;
                return (Node *) j;
        }
-       if (IsA(node, InClauseInfo))
+       if (IsA(node, FlattenedSubLink))
        {
-               /* Copy the InClauseInfo node with correct mutation of subnodes */
-               InClauseInfo *ininfo;
+               /* Copy the FlattenedSubLink node with correct mutation of subnodes */
+               FlattenedSubLink *fslink;
 
-               ininfo = (InClauseInfo *) expression_tree_mutator(node,
+               fslink = (FlattenedSubLink *) expression_tree_mutator(node,
                                                                                          adjust_appendrel_attrs_mutator,
-                                                                                                                 (void *) context);
-               /* now fix InClauseInfo's relid sets */
-               ininfo->lefthand = adjust_relid_set(ininfo->lefthand,
+                                                                                                                        (void *) context);
+               /* now fix FlattenedSubLink's relid sets */
+               fslink->lefthand = adjust_relid_set(fslink->lefthand,
                                                                                        context->parent_relid,
                                                                                        context->child_relid);
-               ininfo->righthand = adjust_relid_set(ininfo->righthand,
+               fslink->righthand = adjust_relid_set(fslink->righthand,
                                                                                         context->parent_relid,
                                                                                         context->child_relid);
-               return (Node *) ininfo;
+               return (Node *) fslink;
        }
-       /* Shouldn't need to handle OuterJoinInfo or AppendRelInfo here */
-       Assert(!IsA(node, OuterJoinInfo));
+       /* Shouldn't need to handle SpecialJoinInfo or AppendRelInfo here */
+       Assert(!IsA(node, SpecialJoinInfo));
        Assert(!IsA(node, AppendRelInfo));
 
        /*
index 858d4abcbd87c1680e14c6cda67075cb94767c40..ffe31bdff28876f6cbf0a470dc14bfc1f2cd8bbe 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.261 2008/08/07 01:11:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.262 2008/08/14 18:47:59 tgl Exp $
  *
  * HISTORY
  *       AUTHOR                        DATE                    MAJOR EVENT
@@ -32,6 +32,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/planmain.h"
 #include "optimizer/planner.h"
+#include "optimizer/prep.h"
 #include "optimizer/var.h"
 #include "parser/analyze.h"
 #include "parser/parse_clause.h"
@@ -79,6 +80,7 @@ static bool contain_mutable_functions_walker(Node *node, void *context);
 static bool contain_volatile_functions_walker(Node *node, void *context);
 static bool contain_nonstrict_functions_walker(Node *node, void *context);
 static Relids find_nonnullable_rels_walker(Node *node, bool top_level);
+static List *find_nonnullable_vars_walker(Node *node, bool top_level);
 static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK);
 static bool set_coercionform_dontcare_walker(Node *node, void *context);
 static Node *eval_const_expressions_mutator(Node *node,
@@ -1054,6 +1056,13 @@ contain_nonstrict_functions_walker(Node *node, void *context)
  * the expression to have been AND/OR flattened and converted to implicit-AND
  * format.
  *
+ * Note: this function is largely duplicative of find_nonnullable_vars().
+ * The reason not to simplify this function into a thin wrapper around
+ * find_nonnullable_vars() is that the tested conditions really are different:
+ * a clause like "t1.v1 IS NOT NULL OR t1.v2 IS NOT NULL" does not prove
+ * that either v1 or v2 can't be NULL, but it does prove that the t1 row
+ * as a whole can't be all-NULL.
+ *
  * top_level is TRUE while scanning top-level AND/OR structure; here, showing
  * the result is either FALSE or NULL is good enough.  top_level is FALSE when
  * we have descended below a NOT or a strict function: now we must be able to
@@ -1228,9 +1237,330 @@ find_nonnullable_rels_walker(Node *node, bool top_level)
                         expr->booltesttype == IS_NOT_UNKNOWN))
                        result = find_nonnullable_rels_walker((Node *) expr->arg, false);
        }
+       else if (IsA(node, FlattenedSubLink))
+       {
+               /* JOIN_SEMI sublinks preserve strictness, but JOIN_ANTI ones don't */
+               FlattenedSubLink *expr = (FlattenedSubLink *) node;
+
+               if (expr->jointype == JOIN_SEMI)
+                       result = find_nonnullable_rels_walker((Node *) expr->quals,
+                                                                                                 top_level);
+       }
+       return result;
+}
+
+/*
+ * find_nonnullable_vars
+ *             Determine which Vars are forced nonnullable by given clause.
+ *
+ * Returns a list of all level-zero Vars that are referenced in the clause in
+ * such a way that the clause cannot possibly return TRUE if any of these Vars
+ * is NULL.  (It is OK to err on the side of conservatism; hence the analysis
+ * here is simplistic.)
+ *
+ * The semantics here are subtly different from contain_nonstrict_functions:
+ * that function is concerned with NULL results from arbitrary expressions,
+ * but here we assume that the input is a Boolean expression, and wish to
+ * see if NULL inputs will provably cause a FALSE-or-NULL result.  We expect
+ * the expression to have been AND/OR flattened and converted to implicit-AND
+ * format.
+ *
+ * The result is a palloc'd List, but we have not copied the member Var nodes.
+ * Also, we don't bother trying to eliminate duplicate entries.
+ *
+ * top_level is TRUE while scanning top-level AND/OR structure; here, showing
+ * the result is either FALSE or NULL is good enough.  top_level is FALSE when
+ * we have descended below a NOT or a strict function: now we must be able to
+ * prove that the subexpression goes to NULL.
+ *
+ * We don't use expression_tree_walker here because we don't want to descend
+ * through very many kinds of nodes; only the ones we can be sure are strict.
+ */
+List *
+find_nonnullable_vars(Node *clause)
+{
+       return find_nonnullable_vars_walker(clause, true);
+}
+
+static List *
+find_nonnullable_vars_walker(Node *node, bool top_level)
+{
+       List       *result = NIL;
+       ListCell   *l;
+
+       if (node == NULL)
+               return NIL;
+       if (IsA(node, Var))
+       {
+               Var                *var = (Var *) node;
+
+               if (var->varlevelsup == 0)
+                       result = list_make1(var);
+       }
+       else if (IsA(node, List))
+       {
+               /*
+                * At top level, we are examining an implicit-AND list: if any of the
+                * arms produces FALSE-or-NULL then the result is FALSE-or-NULL. If
+                * not at top level, we are examining the arguments of a strict
+                * function: if any of them produce NULL then the result of the
+                * function must be NULL.  So in both cases, the set of nonnullable
+                * vars is the union of those found in the arms, and we pass down the
+                * top_level flag unmodified.
+                */
+               foreach(l, (List *) node)
+               {
+                       result = list_concat(result,
+                                                                find_nonnullable_vars_walker(lfirst(l),
+                                                                                                                         top_level));
+               }
+       }
+       else if (IsA(node, FuncExpr))
+       {
+               FuncExpr   *expr = (FuncExpr *) node;
+
+               if (func_strict(expr->funcid))
+                       result = find_nonnullable_vars_walker((Node *) expr->args, false);
+       }
+       else if (IsA(node, OpExpr))
+       {
+               OpExpr     *expr = (OpExpr *) node;
+
+               set_opfuncid(expr);
+               if (func_strict(expr->opfuncid))
+                       result = find_nonnullable_vars_walker((Node *) expr->args, false);
+       }
+       else if (IsA(node, ScalarArrayOpExpr))
+       {
+               ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
+
+               if (is_strict_saop(expr, true))
+                       result = find_nonnullable_vars_walker((Node *) expr->args, false);
+       }
+       else if (IsA(node, BoolExpr))
+       {
+               BoolExpr   *expr = (BoolExpr *) node;
+
+               switch (expr->boolop)
+               {
+                       case AND_EXPR:
+                               /* At top level we can just recurse (to the List case) */
+                               if (top_level)
+                               {
+                                       result = find_nonnullable_vars_walker((Node *) expr->args,
+                                                                                                                 top_level);
+                                       break;
+                               }
+
+                               /*
+                                * Below top level, even if one arm produces NULL, the result
+                                * could be FALSE (hence not NULL).  However, if *all* the
+                                * arms produce NULL then the result is NULL, so we can take
+                                * the intersection of the sets of nonnullable vars, just as
+                                * for OR.      Fall through to share code.
+                                */
+                               /* FALL THRU */
+                       case OR_EXPR:
+
+                               /*
+                                * OR is strict if all of its arms are, so we can take the
+                                * intersection of the sets of nonnullable vars for each arm.
+                                * This works for both values of top_level.
+                                */
+                               foreach(l, expr->args)
+                               {
+                                       List       *subresult;
+
+                                       subresult = find_nonnullable_vars_walker(lfirst(l),
+                                                                                                                        top_level);
+                                       if (result == NIL)      /* first subresult? */
+                                               result = subresult;
+                                       else
+                                               result = list_intersection(result, subresult);
+
+                                       /*
+                                        * If the intersection is empty, we can stop looking. This
+                                        * also justifies the test for first-subresult above.
+                                        */
+                                       if (result == NIL)
+                                               break;
+                               }
+                               break;
+                       case NOT_EXPR:
+                               /* NOT will return null if its arg is null */
+                               result = find_nonnullable_vars_walker((Node *) expr->args,
+                                                                                                         false);
+                               break;
+                       default:
+                               elog(ERROR, "unrecognized boolop: %d", (int) expr->boolop);
+                               break;
+               }
+       }
+       else if (IsA(node, RelabelType))
+       {
+               RelabelType *expr = (RelabelType *) node;
+
+               result = find_nonnullable_vars_walker((Node *) expr->arg, top_level);
+       }
+       else if (IsA(node, CoerceViaIO))
+       {
+               /* not clear this is useful, but it can't hurt */
+               CoerceViaIO *expr = (CoerceViaIO *) node;
+
+               result = find_nonnullable_vars_walker((Node *) expr->arg, false);
+       }
+       else if (IsA(node, ArrayCoerceExpr))
+       {
+               /* ArrayCoerceExpr is strict at the array level */
+               ArrayCoerceExpr *expr = (ArrayCoerceExpr *) node;
+
+               result = find_nonnullable_vars_walker((Node *) expr->arg, top_level);
+       }
+       else if (IsA(node, ConvertRowtypeExpr))
+       {
+               /* not clear this is useful, but it can't hurt */
+               ConvertRowtypeExpr *expr = (ConvertRowtypeExpr *) node;
+
+               result = find_nonnullable_vars_walker((Node *) expr->arg, top_level);
+       }
+       else if (IsA(node, NullTest))
+       {
+               /* IS NOT NULL can be considered strict, but only at top level */
+               NullTest   *expr = (NullTest *) node;
+
+               if (top_level && expr->nulltesttype == IS_NOT_NULL)
+                       result = find_nonnullable_vars_walker((Node *) expr->arg, false);
+       }
+       else if (IsA(node, BooleanTest))
+       {
+               /* Boolean tests that reject NULL are strict at top level */
+               BooleanTest *expr = (BooleanTest *) node;
+
+               if (top_level &&
+                       (expr->booltesttype == IS_TRUE ||
+                        expr->booltesttype == IS_FALSE ||
+                        expr->booltesttype == IS_NOT_UNKNOWN))
+                       result = find_nonnullable_vars_walker((Node *) expr->arg, false);
+       }
+       else if (IsA(node, FlattenedSubLink))
+       {
+               /* JOIN_SEMI sublinks preserve strictness, but JOIN_ANTI ones don't */
+               FlattenedSubLink *expr = (FlattenedSubLink *) node;
+
+               if (expr->jointype == JOIN_SEMI)
+                       result = find_nonnullable_vars_walker((Node *) expr->quals,
+                                                                                                 top_level);
+       }
        return result;
 }
 
+/*
+ * find_forced_null_vars
+ *             Determine which Vars must be NULL for the given clause to return TRUE.
+ *
+ * This is the complement of find_nonnullable_vars: find the level-zero Vars
+ * that must be NULL for the clause to return TRUE.  (It is OK to err on the
+ * side of conservatism; hence the analysis here is simplistic.  In fact,
+ * we only detect simple "var IS NULL" tests at the top level.)
+ *
+ * The result is a palloc'd List, but we have not copied the member Var nodes.
+ * Also, we don't bother trying to eliminate duplicate entries.
+ */
+List *
+find_forced_null_vars(Node *node)
+{
+       List       *result = NIL;
+       Var                *var;
+       ListCell   *l;
+
+       if (node == NULL)
+               return NIL;
+       /* Check single-clause cases using subroutine */
+       var = find_forced_null_var(node);
+       if (var)
+       {
+               result = list_make1(var);
+       }
+       /* Otherwise, handle AND-conditions */
+       else if (IsA(node, List))
+       {
+               /*
+                * At top level, we are examining an implicit-AND list: if any of the
+                * arms produces FALSE-or-NULL then the result is FALSE-or-NULL.
+                */
+               foreach(l, (List *) node)
+               {
+                       result = list_concat(result,
+                                                                find_forced_null_vars(lfirst(l)));
+               }
+       }
+       else if (IsA(node, BoolExpr))
+       {
+               BoolExpr   *expr = (BoolExpr *) node;
+
+               /*
+                * We don't bother considering the OR case, because it's fairly
+                * unlikely anyone would write "v1 IS NULL OR v1 IS NULL".
+                * Likewise, the NOT case isn't worth expending code on.
+                */
+               if (expr->boolop == AND_EXPR)
+               {
+                       /* At top level we can just recurse (to the List case) */
+                       result = find_forced_null_vars((Node *) expr->args);
+               }
+       }
+       return result;
+}
+
+/*
+ * find_forced_null_var
+ *             Return the Var forced null by the given clause, or NULL if it's
+ *             not an IS NULL-type clause.  For success, the clause must enforce
+ *             *only* nullness of the particular Var, not any other conditions.
+ *
+ * This is just the single-clause case of find_forced_null_vars(), without
+ * any allowance for AND conditions.  It's used by initsplan.c on individual
+ * qual clauses.  The reason for not just applying find_forced_null_vars()
+ * is that if an AND of an IS NULL clause with something else were to somehow
+ * survive AND/OR flattening, initsplan.c might get fooled into discarding
+ * the whole clause when only the IS NULL part of it had been proved redundant.
+ */
+Var *
+find_forced_null_var(Node *node)
+{
+       if (node == NULL)
+               return NULL;
+       if (IsA(node, NullTest))
+       {
+               /* check for var IS NULL */
+               NullTest   *expr = (NullTest *) node;
+
+               if (expr->nulltesttype == IS_NULL)
+               {
+                       Var        *var = (Var *) expr->arg;
+
+                       if (var && IsA(var, Var) &&
+                               var->varlevelsup == 0)
+                               return var;
+               }
+       }
+       else if (IsA(node, BooleanTest))
+       {
+               /* var IS UNKNOWN is equivalent to var IS NULL */
+               BooleanTest *expr = (BooleanTest *) node;
+
+               if (expr->booltesttype == IS_UNKNOWN)
+               {
+                       Var        *var = (Var *) expr->arg;
+
+                       if (var && IsA(var, Var) &&
+                               var->varlevelsup == 0)
+                               return var;
+               }
+       }
+       return NULL;
+}
+
 /*
  * Can we treat a ScalarArrayOpExpr as strict?
  *
@@ -2479,6 +2809,24 @@ eval_const_expressions_mutator(Node *node,
                newbtest->booltesttype = btest->booltesttype;
                return (Node *) newbtest;
        }
+       if (IsA(node, FlattenedSubLink))
+       {
+               FlattenedSubLink *fslink = (FlattenedSubLink *) node;
+               FlattenedSubLink *newfslink;
+               Expr       *quals;
+
+               /* Simplify and also canonicalize the arguments */
+               quals = (Expr *) eval_const_expressions_mutator((Node *) fslink->quals,
+                                                                                                               context);
+               quals = canonicalize_qual(quals);
+
+               newfslink = makeNode(FlattenedSubLink);
+               newfslink->jointype = fslink->jointype;
+               newfslink->lefthand = fslink->lefthand;
+               newfslink->righthand = fslink->righthand;
+               newfslink->quals = quals;
+               return (Node *) newfslink;
+       }
 
        /*
         * For any node type not handled above, we recurse using
@@ -3706,7 +4054,6 @@ expression_tree_walker(Node *node,
                case T_SetToDefault:
                case T_CurrentOfExpr:
                case T_RangeTblRef:
-               case T_OuterJoinInfo:
                        /* primitive node types with no expression subnodes */
                        break;
                case T_Aggref:
@@ -3937,11 +4284,11 @@ expression_tree_walker(Node *node,
                                /* groupClauses are deemed uninteresting */
                        }
                        break;
-               case T_InClauseInfo:
+               case T_FlattenedSubLink:
                        {
-                               InClauseInfo *ininfo = (InClauseInfo *) node;
+                               FlattenedSubLink *fslink = (FlattenedSubLink *) node;
 
-                               if (expression_tree_walker((Node *) ininfo->sub_targetlist,
+                               if (expression_tree_walker((Node *) fslink->quals,
                                                                                   walker, context))
                                        return true;
                        }
@@ -4175,7 +4522,6 @@ expression_tree_mutator(Node *node,
                case T_SetToDefault:
                case T_CurrentOfExpr:
                case T_RangeTblRef:
-               case T_OuterJoinInfo:
                        return (Node *) copyObject(node);
                case T_Aggref:
                        {
@@ -4541,14 +4887,14 @@ expression_tree_mutator(Node *node,
                                return (Node *) newnode;
                        }
                        break;
-               case T_InClauseInfo:
+               case T_FlattenedSubLink:
                        {
-                               InClauseInfo *ininfo = (InClauseInfo *) node;
-                               InClauseInfo *newnode;
+                               FlattenedSubLink *fslink = (FlattenedSubLink *) node;
+                               FlattenedSubLink *newnode;
 
-                               FLATCOPY(newnode, ininfo, InClauseInfo);
-                               MUTATE(newnode->sub_targetlist, ininfo->sub_targetlist, List *);
-                               /* Assume we need not make a copy of in_operators list */
+                               FLATCOPY(newnode, fslink, FlattenedSubLink);
+                               /* Assume we need not copy the relids bitmapsets */
+                               MUTATE(newnode->quals, fslink->quals, Expr *);
                                return (Node *) newnode;
                        }
                        break;
index 655443e9efe03872fa51249005dad923d141e4ba..5996af2b5d548bcbf53589e73fab88e60254d459 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.145 2008/08/07 01:11:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.146 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "catalog/pg_operator.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
+#include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
+#include "optimizer/var.h"
 #include "parser/parse_expr.h"
 #include "parser/parsetree.h"
 #include "utils/selfuncs.h"
@@ -33,7 +35,6 @@
 static List *translate_sub_tlist(List *tlist, int relid);
 static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
 static Oid     distinct_col_search(int colno, List *colnos, List *opids);
-static bool hash_safe_operators(List *opids);
 
 
 /*****************************************************************************
@@ -481,15 +482,16 @@ create_index_path(PlannerInfo *root,
                 * into different lists, it should be sufficient to use pointer
                 * comparison to remove duplicates.)
                 *
-                * Always assume the join type is JOIN_INNER; even if some of the join
-                * clauses come from other contexts, that's not our problem.
+                * Note that we force the clauses to be treated as non-join clauses
+                * during selectivity estimation.
                 */
                allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
                pathnode->rows = rel->tuples *
                        clauselist_selectivity(root,
                                                                   allclauses,
                                                                   rel->relid,  /* do not use 0! */
-                                                                  JOIN_INNER);
+                                                                  JOIN_INNER,
+                                                                  NULL);
                /* Like costsize.c, force estimate to be at least one row */
                pathnode->rows = clamp_row_est(pathnode->rows);
        }
@@ -719,42 +721,141 @@ create_material_path(RelOptInfo *rel, Path *subpath)
 /*
  * create_unique_path
  *       Creates a path representing elimination of distinct rows from the
- *       input data.
+ *       input data.  Distinct-ness is defined according to the needs of the
+ *       semijoin represented by sjinfo.  If it is not possible to identify
+ *       how to make the data unique, NULL is returned.
  *
  * If used at all, this is likely to be called repeatedly on the same rel;
  * and the input subpath should always be the same (the cheapest_total path
  * for the rel).  So we cache the result.
  */
 UniquePath *
-create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
+create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
+                                  SpecialJoinInfo *sjinfo)
 {
        UniquePath *pathnode;
        Path            sort_path;              /* dummy for result of cost_sort */
        Path            agg_path;               /* dummy for result of cost_agg */
        MemoryContext oldcontext;
-       List       *sub_targetlist;
        List       *in_operators;
-       ListCell   *l;
+       List       *uniq_exprs;
+       bool            all_btree;
+       bool            all_hash;
        int                     numCols;
+       ListCell   *lc;
 
-       /* Caller made a mistake if subpath isn't cheapest_total */
+       /* Caller made a mistake if subpath isn't cheapest_total ... */
        Assert(subpath == rel->cheapest_total_path);
+       /* ... or if SpecialJoinInfo is the wrong one */
+       Assert(sjinfo->jointype == JOIN_SEMI);
+       Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
 
        /* If result already cached, return it */
        if (rel->cheapest_unique_path)
                return (UniquePath *) rel->cheapest_unique_path;
 
+       /* If we previously failed, return NULL quickly */
+       if (sjinfo->join_quals == NIL)
+               return NULL;
+
        /*
-        * We must ensure path struct is allocated in main planning context;
-        * otherwise GEQO memory management causes trouble.  (Compare
-        * best_inner_indexscan().)
+        * We must ensure path struct and subsidiary data are allocated in main
+        * planning context; otherwise GEQO memory management causes trouble.
+        * (Compare best_inner_indexscan().)
         */
        oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 
-       pathnode = makeNode(UniquePath);
+       /*
+        * Look to see whether the semijoin's join quals consist of AND'ed
+        * equality operators, with (only) RHS variables on only one side of
+        * each one.  If so, we can figure out how to enforce uniqueness for
+        * the RHS.
+        *
+        * Note that the in_operators list consists of the joinqual operators
+        * themselves (but commuted if needed to put the RHS value on the right).
+        * These could be cross-type operators, in which case the operator
+        * actually needed for uniqueness is a related single-type operator.
+        * We assume here that that operator will be available from the btree
+        * or hash opclass when the time comes ... if not, create_unique_plan()
+        * will fail.
+        */
+       in_operators = NIL;
+       uniq_exprs = NIL;
+       all_btree = true;
+       all_hash = enable_hashagg;              /* don't consider hash if not enabled */
+       foreach(lc, sjinfo->join_quals)
+       {
+               OpExpr     *op = (OpExpr *) lfirst(lc);
+               Oid                     opno;
+               Node       *left_expr;
+               Node       *right_expr;
+               Relids          left_varnos;
+               Relids          right_varnos;
+
+               /* must be binary opclause... */
+               if (!IsA(op, OpExpr))
+                       goto no_unique_path;
+               if (list_length(op->args) != 2)
+                       goto no_unique_path;
+               opno = op->opno;
+               left_expr = linitial(op->args);
+               right_expr = lsecond(op->args);
+
+               /* check rel membership of arguments */
+               left_varnos = pull_varnos(left_expr);
+               right_varnos = pull_varnos(right_expr);
+               if (!bms_is_empty(right_varnos) &&
+                       bms_is_subset(right_varnos, sjinfo->syn_righthand) &&
+                       !bms_overlap(left_varnos, sjinfo->syn_righthand))
+               {
+                       /* typical case, right_expr is RHS variable */
+               }
+               else if (!bms_is_empty(left_varnos) &&
+                                bms_is_subset(left_varnos, sjinfo->syn_righthand) &&
+                                !bms_overlap(right_varnos, sjinfo->syn_righthand))
+               {
+                       /* flipped case, left_expr is RHS variable */
+                       opno = get_commutator(opno);
+                       if (!OidIsValid(opno))
+                               goto no_unique_path;
+                       right_expr = left_expr;
+               }
+               else
+                       goto no_unique_path;
 
-       /* There is no substructure to allocate, so can switch back right away */
-       MemoryContextSwitchTo(oldcontext);
+               /* all operators must be btree equality or hash equality */
+               if (all_btree)
+               {
+                       /* oprcanmerge is considered a hint... */
+                       if (!op_mergejoinable(opno) ||
+                               get_mergejoin_opfamilies(opno) == NIL)
+                               all_btree = false;
+               }
+               if (all_hash)
+               {
+                       /* ... but oprcanhash had better be correct */
+                       if (!op_hashjoinable(opno))
+                               all_hash = false;
+               }
+               if (!(all_btree || all_hash))
+                       goto no_unique_path;
+
+               /* so far so good, keep building lists */
+               in_operators = lappend_oid(in_operators, opno);
+               uniq_exprs = lappend(uniq_exprs, copyObject(right_expr));
+       }
+
+       /*
+        * The expressions we'd need to unique-ify mustn't be volatile.
+        */
+       if (contain_volatile_functions((Node *) uniq_exprs))
+               goto no_unique_path;
+
+       /*
+        * If we get here, we can unique-ify using at least one of sorting
+        * and hashing.  Start building the result Path object.
+        */
+       pathnode = makeNode(UniquePath);
 
        pathnode->path.pathtype = T_Unique;
        pathnode->path.parent = rel;
@@ -766,43 +867,24 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
        pathnode->path.pathkeys = NIL;
 
        pathnode->subpath = subpath;
-
-       /*
-        * Try to identify the targetlist that will actually be unique-ified. In
-        * current usage, this routine is only used for sub-selects of IN clauses,
-        * so we should be able to find the tlist in in_info_list.      Get the IN
-        * clause's operators, too, because they determine what "unique" means.
-        */
-       sub_targetlist = NIL;
-       in_operators = NIL;
-       foreach(l, root->in_info_list)
-       {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
-
-               if (bms_equal(ininfo->righthand, rel->relids))
-               {
-                       sub_targetlist = ininfo->sub_targetlist;
-                       in_operators = ininfo->in_operators;
-                       break;
-               }
-       }
+       pathnode->in_operators = in_operators;
+       pathnode->uniq_exprs = uniq_exprs;
 
        /*
         * If the input is a subquery whose output must be unique already, then we
         * don't need to do anything.  The test for uniqueness has to consider
         * exactly which columns we are extracting; for example "SELECT DISTINCT
         * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
-        * this optimization unless we found our own targetlist above, and it
-        * consists only of simple Vars referencing subquery outputs.  (Possibly
-        * we could do something with expressions in the subquery outputs, too,
-        * but for now keep it simple.)
+        * this optimization unless uniq_exprs consists only of simple Vars
+        * referencing subquery outputs.  (Possibly we could do something with
+        * expressions in the subquery outputs, too, but for now keep it simple.)
         */
-       if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
+       if (rel->rtekind == RTE_SUBQUERY)
        {
                RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
                List       *sub_tlist_colnos;
 
-               sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
+               sub_tlist_colnos = translate_sub_tlist(uniq_exprs, rel->relid);
 
                if (sub_tlist_colnos &&
                        query_is_distinct_for(rte->subquery,
@@ -816,48 +898,37 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 
                        rel->cheapest_unique_path = (Path *) pathnode;
 
+                       MemoryContextSwitchTo(oldcontext);
+
                        return pathnode;
                }
        }
 
-       /*
-        * If we know the targetlist, try to estimate number of result rows;
-        * otherwise punt.
-        */
-       if (sub_targetlist)
-       {
-               pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
-               numCols = list_length(sub_targetlist);
-       }
-       else
-       {
-               pathnode->rows = rel->rows;
-               numCols = list_length(rel->reltargetlist);
-       }
+       /* Estimate number of output rows */
+       pathnode->rows = estimate_num_groups(root, uniq_exprs, rel->rows);
+       numCols = list_length(uniq_exprs);
 
-       /*
-        * Estimate cost for sort+unique implementation
-        */
-       cost_sort(&sort_path, root, NIL,
-                         subpath->total_cost,
-                         rel->rows,
-                         rel->width,
-                         -1.0);
+       if (all_btree)
+       {
+               /*
+                * Estimate cost for sort+unique implementation
+                */
+               cost_sort(&sort_path, root, NIL,
+                                 subpath->total_cost,
+                                 rel->rows,
+                                 rel->width,
+                                 -1.0);
 
-       /*
-        * Charge one cpu_operator_cost per comparison per input tuple. We assume
-        * all columns get compared at most of the tuples.      (XXX probably this is
-        * an overestimate.)  This should agree with make_unique.
-        */
-       sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+               /*
+                * Charge one cpu_operator_cost per comparison per input tuple.
+                * We assume all columns get compared at most of the tuples. (XXX
+                * probably this is an overestimate.)  This should agree with
+                * make_unique.
+                */
+               sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+       }
 
-       /*
-        * Is it safe to use a hashed implementation?  If so, estimate and compare
-        * costs.  We only try this if we know the IN operators, else we can't
-        * check their hashability.
-        */
-       pathnode->umethod = UNIQUE_PATH_SORT;
-       if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
+       if (all_hash)
        {
                /*
                 * Estimate the overhead per hashtable entry at 64 bytes (same as in
@@ -865,19 +936,31 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
                 */
                int                     hashentrysize = rel->width + 64;
 
-               if (hashentrysize * pathnode->rows <= work_mem * 1024L)
-               {
+               if (hashentrysize * pathnode->rows > work_mem * 1024L)
+                       all_hash = false;       /* don't try to hash */
+               else
                        cost_agg(&agg_path, root,
                                         AGG_HASHED, 0,
                                         numCols, pathnode->rows,
                                         subpath->startup_cost,
                                         subpath->total_cost,
                                         rel->rows);
-                       if (agg_path.total_cost < sort_path.total_cost)
-                               pathnode->umethod = UNIQUE_PATH_HASH;
-               }
        }
 
+       if (all_btree && all_hash)
+       {
+               if (agg_path.total_cost < sort_path.total_cost)
+                       pathnode->umethod = UNIQUE_PATH_HASH;
+               else
+                       pathnode->umethod = UNIQUE_PATH_SORT;
+       }
+       else if (all_btree)
+               pathnode->umethod = UNIQUE_PATH_SORT;
+       else if (all_hash)
+               pathnode->umethod = UNIQUE_PATH_HASH;
+       else
+               goto no_unique_path;
+
        if (pathnode->umethod == UNIQUE_PATH_HASH)
        {
                pathnode->path.startup_cost = agg_path.startup_cost;
@@ -891,7 +974,18 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
 
        rel->cheapest_unique_path = (Path *) pathnode;
 
+       MemoryContextSwitchTo(oldcontext);
+
        return pathnode;
+
+no_unique_path:                                        /* failure exit */
+
+       /* Mark the SpecialJoinInfo as not unique-able */
+       sjinfo->join_quals = NIL;
+
+       MemoryContextSwitchTo(oldcontext);
+
+       return NULL;
 }
 
 /*
@@ -1068,31 +1162,6 @@ distinct_col_search(int colno, List *colnos, List *opids)
        return InvalidOid;
 }
 
-/*
- * hash_safe_operators - can all the specified IN operators be hashed?
- *
- * We assume hashed aggregation will work if each IN operator is marked
- * hashjoinable.  If the IN operators are cross-type, this could conceivably
- * fail: the aggregation will need a hashable equality operator for the RHS
- * datatype --- but it's pretty hard to conceive of a hash opfamily that has
- * cross-type hashing without support for hashing the individual types, so
- * we don't expend cycles here to support the case.  We could check
- * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
- * former is a significantly more expensive test.
- */
-static bool
-hash_safe_operators(List *opids)
-{
-       ListCell   *lc;
-
-       foreach(lc, opids)
-       {
-               if (!op_hashjoinable(lfirst_oid(lc)))
-                       return false;
-       }
-       return true;
-}
-
 /*
  * create_subqueryscan_path
  *       Creates a path corresponding to a sequential scan of a subquery,
@@ -1157,6 +1226,7 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
  *
  * 'joinrel' is the join relation.
  * 'jointype' is the type of join required
+ * 'sjinfo' is extra info about the join for selectivity estimation
  * 'outer_path' is the outer path
  * 'inner_path' is the inner path
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@@ -1168,6 +1238,7 @@ NestPath *
 create_nestloop_path(PlannerInfo *root,
                                         RelOptInfo *joinrel,
                                         JoinType jointype,
+                                        SpecialJoinInfo *sjinfo,
                                         Path *outer_path,
                                         Path *inner_path,
                                         List *restrict_clauses,
@@ -1183,7 +1254,7 @@ create_nestloop_path(PlannerInfo *root,
        pathnode->joinrestrictinfo = restrict_clauses;
        pathnode->path.pathkeys = pathkeys;
 
-       cost_nestloop(pathnode, root);
+       cost_nestloop(pathnode, root, sjinfo);
 
        return pathnode;
 }
@@ -1195,6 +1266,7 @@ create_nestloop_path(PlannerInfo *root,
  *
  * 'joinrel' is the join relation
  * 'jointype' is the type of join required
+ * 'sjinfo' is extra info about the join for selectivity estimation
  * 'outer_path' is the outer path
  * 'inner_path' is the inner path
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@@ -1208,6 +1280,7 @@ MergePath *
 create_mergejoin_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                         SpecialJoinInfo *sjinfo,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
@@ -1256,7 +1329,7 @@ create_mergejoin_path(PlannerInfo *root,
        pathnode->outersortkeys = outersortkeys;
        pathnode->innersortkeys = innersortkeys;
 
-       cost_mergejoin(pathnode, root);
+       cost_mergejoin(pathnode, root, sjinfo);
 
        return pathnode;
 }
@@ -1267,6 +1340,7 @@ create_mergejoin_path(PlannerInfo *root,
  *
  * 'joinrel' is the join relation
  * 'jointype' is the type of join required
+ * 'sjinfo' is extra info about the join for selectivity estimation
  * 'outer_path' is the cheapest outer path
  * 'inner_path' is the cheapest inner path
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
@@ -1277,6 +1351,7 @@ HashPath *
 create_hashjoin_path(PlannerInfo *root,
                                         RelOptInfo *joinrel,
                                         JoinType jointype,
+                                        SpecialJoinInfo *sjinfo,
                                         Path *outer_path,
                                         Path *inner_path,
                                         List *restrict_clauses,
@@ -1294,7 +1369,7 @@ create_hashjoin_path(PlannerInfo *root,
        pathnode->jpath.path.pathkeys = NIL;
        pathnode->path_hashclauses = hashclauses;
 
-       cost_hashjoin(pathnode, root);
+       cost_hashjoin(pathnode, root, sjinfo);
 
        return pathnode;
 }
index 74c218c23c9191fa0883fa2e73b80826663986a7..f5592d17bb92ad81690384dfecc8f85f3a1f21fc 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.89 2008/01/01 19:45:50 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/relnode.c,v 1.90 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -273,7 +273,7 @@ find_join_rel(PlannerInfo *root, Relids relids)
  * 'joinrelids' is the Relids set that uniquely identifies the join
  * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be
  *             joined
- * 'jointype': type of join (inner/outer)
+ * 'sjinfo': join context info
  * 'restrictlist_ptr': result variable.  If not NULL, *restrictlist_ptr
  *             receives the list of RestrictInfo nodes that apply to this
  *             particular pair of joinable relations.
@@ -286,7 +286,7 @@ build_join_rel(PlannerInfo *root,
                           Relids joinrelids,
                           RelOptInfo *outer_rel,
                           RelOptInfo *inner_rel,
-                          JoinType jointype,
+                          SpecialJoinInfo *sjinfo,
                           List **restrictlist_ptr)
 {
        RelOptInfo *joinrel;
@@ -375,7 +375,7 @@ build_join_rel(PlannerInfo *root,
         * Set estimates of the joinrel's size.
         */
        set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
-                                                          jointype, restrictlist);
+                                                          sjinfo, restrictlist);
 
        /*
         * Add the joinrel to the query's joinrel list, and store it into the
index 98440795961797201906e2cc695d4c5130437912..290f9bb64bfbbf3154065fb22311757ff248d1cd 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.74 2008/05/12 00:00:49 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.75 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -644,23 +644,23 @@ flatten_join_alias_vars_mutator(Node *node,
                /* Recurse in case join input is itself a join */
                return flatten_join_alias_vars_mutator(newvar, context);
        }
-       if (IsA(node, InClauseInfo))
+       if (IsA(node, FlattenedSubLink))
        {
-               /* Copy the InClauseInfo node with correct mutation of subnodes */
-               InClauseInfo *ininfo;
+               /* Copy the FlattenedSubLink node with correct mutation of subnodes */
+               FlattenedSubLink *fslink;
 
-               ininfo = (InClauseInfo *) expression_tree_mutator(node,
+               fslink = (FlattenedSubLink *) expression_tree_mutator(node,
                                                                                         flatten_join_alias_vars_mutator,
-                                                                                                                 (void *) context);
-               /* now fix InClauseInfo's relid sets */
+                                                                                                                        (void *) context);
+               /* now fix FlattenedSubLink's relid sets */
                if (context->sublevels_up == 0)
                {
-                       ininfo->lefthand = alias_relid_set(context->root,
-                                                                                          ininfo->lefthand);
-                       ininfo->righthand = alias_relid_set(context->root,
-                                                                                               ininfo->righthand);
+                       fslink->lefthand = alias_relid_set(context->root,
+                                                                                          fslink->lefthand);
+                       fslink->righthand = alias_relid_set(context->root,
+                                                                                               fslink->righthand);
                }
-               return (Node *) ininfo;
+               return (Node *) fslink;
        }
 
        if (IsA(node, Query))
index 63a8ba2bd05ad8ce59225fab8c1fc0e9d9b04b51..60492fe3d091fa52b349f5190a082531ec5277ea 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.107 2008/01/01 19:45:51 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.108 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -176,15 +176,15 @@ OffsetVarNodes_walker(Node *node, OffsetVarNodes_context *context)
                        j->rtindex += context->offset;
                /* fall through to examine children */
        }
-       if (IsA(node, InClauseInfo))
+       if (IsA(node, FlattenedSubLink))
        {
-               InClauseInfo *ininfo = (InClauseInfo *) node;
+               FlattenedSubLink *fslink = (FlattenedSubLink *) node;
 
                if (context->sublevels_up == 0)
                {
-                       ininfo->lefthand = offset_relid_set(ininfo->lefthand,
+                       fslink->lefthand = offset_relid_set(fslink->lefthand,
                                                                                                context->offset);
-                       ininfo->righthand = offset_relid_set(ininfo->righthand,
+                       fslink->righthand = offset_relid_set(fslink->righthand,
                                                                                                 context->offset);
                }
                /* fall through to examine children */
@@ -338,16 +338,16 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context)
                        j->rtindex = context->new_index;
                /* fall through to examine children */
        }
-       if (IsA(node, InClauseInfo))
+       if (IsA(node, FlattenedSubLink))
        {
-               InClauseInfo *ininfo = (InClauseInfo *) node;
+               FlattenedSubLink *fslink = (FlattenedSubLink *) node;
 
                if (context->sublevels_up == 0)
                {
-                       ininfo->lefthand = adjust_relid_set(ininfo->lefthand,
+                       fslink->lefthand = adjust_relid_set(fslink->lefthand,
                                                                                                context->rt_index,
                                                                                                context->new_index);
-                       ininfo->righthand = adjust_relid_set(ininfo->righthand,
+                       fslink->righthand = adjust_relid_set(fslink->righthand,
                                                                                                 context->rt_index,
                                                                                                 context->new_index);
                }
@@ -589,8 +589,8 @@ rangeTableEntry_used_walker(Node *node,
                /* fall through to examine children */
        }
        /* Shouldn't need to handle planner auxiliary nodes here */
-       Assert(!IsA(node, OuterJoinInfo));
-       Assert(!IsA(node, InClauseInfo));
+       Assert(!IsA(node, FlattenedSubLink));
+       Assert(!IsA(node, SpecialJoinInfo));
        Assert(!IsA(node, AppendRelInfo));
 
        if (IsA(node, Query))
index 439c6f3d21a213434ced3465b73a9062823dfce7..7d5609d447ee48568d3881ac84c8e1326df52301 100644 (file)
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.250 2008/07/07 20:24:55 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.251 2008/08/14 18:47:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1300,7 +1300,7 @@ icnlikesel(PG_FUNCTION_ARGS)
  */
 Selectivity
 booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
-                       int varRelid, JoinType jointype)
+                       int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 {
        VariableStatData vardata;
        double          selec;
@@ -1436,12 +1436,14 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
                        case IS_TRUE:
                        case IS_NOT_FALSE:
                                selec = (double) clause_selectivity(root, arg,
-                                                                                                       varRelid, jointype);
+                                                                                                       varRelid,
+                                                                                                       jointype, sjinfo);
                                break;
                        case IS_FALSE:
                        case IS_NOT_TRUE:
                                selec = 1.0 - (double) clause_selectivity(root, arg,
-                                                                                                                 varRelid, jointype);
+                                                                                                                 varRelid,
+                                                                                                                 jointype, sjinfo);
                                break;
                        default:
                                elog(ERROR, "unrecognized booltesttype: %d",
@@ -1463,25 +1465,12 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
  *             nulltestsel             - Selectivity of NullTest Node.
  */
 Selectivity
-nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
-                       Node *arg, int varRelid, JoinType jointype)
+nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
+                       int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 {
        VariableStatData vardata;
        double          selec;
 
-       /*
-        * Special hack: an IS NULL test being applied at an outer join should not
-        * be taken at face value, since it's very likely being used to select the
-        * outer-side rows that don't have a match, and thus its selectivity has
-        * nothing whatever to do with the statistics of the original table
-        * column.      We do not have nearly enough context here to determine its
-        * true selectivity, so for the moment punt and guess at 0.5.  Eventually
-        * the planner should be made to provide enough info about the clause's
-        * context to let us do better.
-        */
-       if (IS_OUTER_JOIN(jointype) && nulltesttype == IS_NULL)
-               return (Selectivity) 0.5;
-
        examine_variable(root, arg, varRelid, &vardata);
 
        if (HeapTupleIsValid(vardata.statsTuple))
@@ -1579,7 +1568,9 @@ Selectivity
 scalararraysel(PlannerInfo *root,
                           ScalarArrayOpExpr *clause,
                           bool is_join_clause,
-                          int varRelid, JoinType jointype)
+                          int varRelid,
+                          JoinType jointype,
+                          SpecialJoinInfo *sjinfo)
 {
        Oid                     operator = clause->opno;
        bool            useOr = clause->useOr;
@@ -1802,7 +1793,7 @@ estimate_array_length(Node *arrayexpr)
 Selectivity
 rowcomparesel(PlannerInfo *root,
                          RowCompareExpr *clause,
-                         int varRelid, JoinType jointype)
+                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
 {
        Selectivity s1;
        Oid                     opno = linitial_oid(clause->opnos);
@@ -1942,25 +1933,16 @@ eqjoinsel(PG_FUNCTION_ARGS)
                hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool));
 
                /*
-                * If we are doing any variant of JOIN_IN, pretend all the values of
+                * If we are doing any variant of JOIN_SEMI, pretend all the values of
                 * the righthand relation are unique (ie, act as if it's been
                 * DISTINCT'd).
                 *
-                * NOTE: it might seem that we should unique-ify the lefthand input
-                * when considering JOIN_REVERSE_IN.  But this is not so, because the
-                * join clause we've been handed has not been commuted from the way
-                * the parser originally wrote it.      We know that the unique side of
-                * the IN clause is *always* on the right.
-                *
                 * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or
                 * JOIN_RIGHT here, because we do not have enough information to
                 * determine which var is really on which side of the join. Perhaps
                 * someday we should pass in more information.
                 */
-               if (jointype == JOIN_IN ||
-                       jointype == JOIN_REVERSE_IN ||
-                       jointype == JOIN_UNIQUE_INNER ||
-                       jointype == JOIN_UNIQUE_OUTER)
+               if (jointype == JOIN_SEMI)
                {
                        float4          oneovern = 1.0 / nd2;
 
@@ -5144,7 +5126,8 @@ genericcostestimate(PlannerInfo *root,
        /* Estimate the fraction of main-table tuples that will be visited */
        *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
                                                                                           index->rel->relid,
-                                                                                          JOIN_INNER);
+                                                                                          JOIN_INNER,
+                                                                                          NULL);
 
        /*
         * If caller didn't give us an estimate, estimate the number of index
@@ -5483,7 +5466,8 @@ btcostestimate(PG_FUNCTION_ARGS)
 
                btreeSelectivity = clauselist_selectivity(root, indexBoundQuals,
                                                                                                  index->rel->relid,
-                                                                                                 JOIN_INNER);
+                                                                                                 JOIN_INNER,
+                                                                                                 NULL);
                numIndexTuples = btreeSelectivity * index->rel->tuples;
 
                /*
index e35356ab55f40472f2fb8fe31749368890344159..5fdd23c5a919233f8504c5f7ee992219a242bfb7 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.207 2008/08/02 21:32:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.208 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -202,8 +202,8 @@ typedef enum NodeTag
        T_PathKey,
        T_RestrictInfo,
        T_InnerIndexscanInfo,
-       T_OuterJoinInfo,
-       T_InClauseInfo,
+       T_FlattenedSubLink,
+       T_SpecialJoinInfo,
        T_AppendRelInfo,
        T_PlannerParamItem,
 
@@ -474,31 +474,49 @@ typedef enum CmdType
 typedef enum JoinType
 {
        /*
-        * The canonical kinds of joins
+        * The canonical kinds of joins according to the SQL JOIN syntax.
+        * Only these codes can appear in parser output (e.g., JoinExpr nodes).
         */
        JOIN_INNER,                                     /* matching tuple pairs only */
-       JOIN_LEFT,                                      /* pairs + unmatched outer tuples */
-       JOIN_FULL,                                      /* pairs + unmatched outer + unmatched inner */
-       JOIN_RIGHT,                                     /* pairs + unmatched inner tuples */
+       JOIN_LEFT,                                      /* pairs + unmatched LHS tuples */
+       JOIN_FULL,                                      /* pairs + unmatched LHS + unmatched RHS */
+       JOIN_RIGHT,                                     /* pairs + unmatched RHS tuples */
 
        /*
-        * These are used for queries like WHERE foo IN (SELECT bar FROM ...).
-        * Only JOIN_IN is actually implemented in the executor; the others are
-        * defined for internal use in the planner.
+        * Semijoins and anti-semijoins (as defined in relational theory) do
+        * not appear in the SQL JOIN syntax, but there are standard idioms for
+        * representing them (e.g., using EXISTS).  The planner recognizes these
+        * cases and converts them to joins.  So the planner and executor must
+        * support these codes.  NOTE: in JOIN_SEMI output, it is unspecified
+        * which matching RHS row is joined to.  In JOIN_ANTI output, the row
+        * is guaranteed to be null-extended.
         */
-       JOIN_IN,                                        /* at most one result per outer row */
-       JOIN_REVERSE_IN,                        /* at most one result per inner row */
-       JOIN_UNIQUE_OUTER,                      /* outer path must be made unique */
-       JOIN_UNIQUE_INNER                       /* inner path must be made unique */
+       JOIN_SEMI,                                      /* 1 copy of each LHS row that has match(es) */
+       JOIN_ANTI,                                      /* 1 copy of each LHS row that has no match */
+
+       /*
+        * These codes are used internally in the planner, but are not supported
+        * by the executor (nor, indeed, by most of the planner).
+        */
+       JOIN_UNIQUE_OUTER,                      /* LHS path must be made unique */
+       JOIN_UNIQUE_INNER                       /* RHS path must be made unique */
 
        /*
         * We might need additional join types someday.
         */
 } JoinType;
 
+/*
+ * OUTER joins are those for which pushed-down quals must behave differently
+ * from the join's own quals.  This is in fact everything except INNER joins.
+ * However, this macro must also exclude the JOIN_UNIQUE symbols since those
+ * are temporary proxies for what will eventually be an INNER join.
+ *
+ * Note: in some places it is preferable to treat JOIN_SEMI as not being
+ * an outer join, since it doesn't produce null-extended rows.  Be aware
+ * of that distinction when deciding whether to use this macro.
+ */
 #define IS_OUTER_JOIN(jointype) \
-       ((jointype) == JOIN_LEFT || \
-        (jointype) == JOIN_FULL || \
-        (jointype) == JOIN_RIGHT)
+       ((jointype) > JOIN_INNER && (jointype) < JOIN_UNIQUE_OUTER)
 
 #endif   /* NODES_H */
index fc9331939d79ddd7ee503ec62619c5bb4f1af267..fc7630b7e3176eac10777a78e30604650ae8dff0 100644 (file)
@@ -30,7 +30,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.58 2008/03/17 02:18:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.59 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -218,6 +218,9 @@ extern List *list_union_ptr(List *list1, List *list2);
 extern List *list_union_int(List *list1, List *list2);
 extern List *list_union_oid(List *list1, List *list2);
 
+extern List *list_intersection(List *list1, List *list2);
+/* currently, there's no need for list_intersection_int etc */
+
 extern List *list_difference(List *list1, List *list2);
 extern List *list_difference_ptr(List *list1, List *list2);
 extern List *list_difference_int(List *list1, List *list2);
index f8c23071661b50086aa26287915fc5082354a1ae..8fb6861e50c7635643af0c80e2bfb1c93c571fd6 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.157 2008/08/05 02:43:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.158 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -153,9 +153,7 @@ typedef struct PlannerInfo
        List       *full_join_clauses;          /* list of RestrictInfos for
                                                                                 * mergejoinable full join clauses */
 
-       List       *oj_info_list;       /* list of OuterJoinInfos */
-
-       List       *in_info_list;       /* list of InClauseInfos */
+       List       *join_info_list;             /* list of SpecialJoinInfos */
 
        List       *append_rel_list;    /* list of AppendRelInfos */
 
@@ -175,7 +173,6 @@ typedef struct PlannerInfo
        double          tuple_fraction; /* tuple_fraction passed to query_planner */
 
        bool            hasJoinRTEs;    /* true if any RTEs are RTE_JOIN kind */
-       bool            hasOuterJoins;  /* true if any RTEs are outer joins */
        bool            hasHavingQual;  /* true if havingQual was non-null */
        bool            hasPseudoConstantQuals; /* true if any RestrictInfo has
                                                                                 * pseudoconstant = true */
@@ -756,6 +753,8 @@ typedef struct UniquePath
        Path            path;
        Path       *subpath;
        UniquePathMethod umethod;
+       List       *in_operators;       /* equality operators of the IN clause */
+       List       *uniq_exprs;         /* expressions to be made unique */
        double          rows;                   /* estimated number of result tuples */
 } UniquePath;
 
@@ -1053,18 +1052,49 @@ typedef struct InnerIndexscanInfo
 } InnerIndexscanInfo;
 
 /*
- * Outer join info.
+ * "Flattened SubLinks"
+ *
+ * When we pull an IN or EXISTS SubLink up into the parent query, the
+ * join conditions extracted from the IN/EXISTS clause need to be specially
+ * treated in distribute_qual_to_rels processing.  We handle this by
+ * wrapping such expressions in a FlattenedSubLink node that identifies
+ * the join they come from.  The FlattenedSubLink node is discarded after
+ * distribute_qual_to_rels, having served its purpose.
+ *
+ * Although the planner treats this as an expression node type, it is not
+ * recognized by the parser or executor, so we declare it here rather than
+ * in primnodes.h.
+ */
+
+typedef struct FlattenedSubLink
+{
+       Expr            xpr;
+       JoinType        jointype;               /* must be JOIN_SEMI or JOIN_ANTI */
+       Relids          lefthand;               /* base relids treated as syntactic LHS */
+       Relids          righthand;              /* base relids syntactically within RHS */
+       Expr       *quals;                      /* join quals (in explicit-AND format) */
+} FlattenedSubLink;
+
+/*
+ * "Special join" info.
  *
  * One-sided outer joins constrain the order of joining partially but not
  * completely. We flatten such joins into the planner's top-level list of
- * relations to join, but record information about each outer join in an
- * OuterJoinInfo struct.  These structs are kept in the PlannerInfo node's
- * oj_info_list.
+ * relations to join, but record information about each outer join in a
+ * SpecialJoinInfo struct.  These structs are kept in the PlannerInfo node's
+ * join_info_list.
+ *
+ * Similarly, semijoins and antijoins created by flattening IN (subselect)
+ * and EXISTS(subselect) clauses create partial constraints on join order.
+ * These are likewise recorded in SpecialJoinInfo structs.
+ *
+ * We make SpecialJoinInfos for FULL JOINs even though there is no flexibility
+ * of planning for them, because this simplifies make_join_rel()'s API.
  *
  * min_lefthand and min_righthand are the sets of base relids that must be
- * available on each side when performing the outer join.  lhs_strict is
- * true if the outer join's condition cannot succeed when the LHS variables
- * are all NULL (this means that the outer join can commute with upper-level
+ * available on each side when performing the special join.  lhs_strict is
+ * true if the special join's condition cannot succeed when the LHS variables
+ * are all NULL (this means that an outer join can commute with upper-level
  * outer joins even if it appears in their RHS).  We don't bother to set
  * lhs_strict for FULL JOINs, however.
  *
@@ -1072,9 +1102,8 @@ typedef struct InnerIndexscanInfo
  * if they were, this would break the logic that enforces join order.
  *
  * syn_lefthand and syn_righthand are the sets of base relids that are
- * syntactically below this outer join.  (These are needed to help compute
- * min_lefthand and min_righthand for higher joins, but are not used
- * thereafter.)
+ * syntactically below this special join.  (These are needed to help compute
+ * min_lefthand and min_righthand for higher joins.)
  *
  * delay_upper_joins is set TRUE if we detect a pushed-down clause that has
  * to be evaluated after this join is formed (because it references the RHS).
@@ -1082,46 +1111,35 @@ typedef struct InnerIndexscanInfo
  * commute with this join, because that would leave noplace to check the
  * pushed-down clause. (We don't track this for FULL JOINs, either.)
  *
- * Note: OuterJoinInfo directly represents only LEFT JOIN and FULL JOIN;
- * RIGHT JOIN is handled by switching the inputs to make it a LEFT JOIN.
- * We make an OuterJoinInfo for FULL JOINs even though there is no flexibility
- * of planning for them, because this simplifies make_join_rel()'s API.
+ * join_quals is an implicit-AND list of the quals syntactically associated
+ * with the join (they may or may not end up being applied at the join level).
+ * This is just a side list and does not drive actual application of quals.
+ * For JOIN_SEMI joins, this is cleared to NIL in create_unique_path() if
+ * the join is found not to be suitable for a uniqueify-the-RHS plan.
+ *
+ * jointype is never JOIN_RIGHT; a RIGHT JOIN is handled by switching
+ * the inputs to make it a LEFT JOIN.  So the allowed values of jointype
+ * in a join_info_list member are only LEFT, FULL, SEMI, or ANTI.
+ *
+ * For purposes of join selectivity estimation, we create transient
+ * SpecialJoinInfo structures for regular inner joins; so it is possible
+ * to have jointype == JOIN_INNER in such a structure, even though this is
+ * not allowed within join_info_list.  Note that lhs_strict, delay_upper_joins,
+ * and join_quals are not set meaningfully for such structs.
  */
 
-typedef struct OuterJoinInfo
+typedef struct SpecialJoinInfo
 {
        NodeTag         type;
        Relids          min_lefthand;   /* base relids in minimum LHS for join */
        Relids          min_righthand;  /* base relids in minimum RHS for join */
        Relids          syn_lefthand;   /* base relids syntactically within LHS */
        Relids          syn_righthand;  /* base relids syntactically within RHS */
-       bool            is_full_join;   /* it's a FULL OUTER JOIN */
+       JoinType        jointype;               /* always INNER, LEFT, FULL, SEMI, or ANTI */
        bool            lhs_strict;             /* joinclause is strict for some LHS rel */
        bool            delay_upper_joins;              /* can't commute with upper RHS */
-} OuterJoinInfo;
-
-/*
- * IN clause info.
- *
- * When we convert top-level IN quals into join operations, we must restrict
- * the order of joining and use special join methods at some join points.
- * We record information about each such IN clause in an InClauseInfo struct.
- * These structs are kept in the PlannerInfo node's in_info_list.
- *
- * Note: sub_targetlist is a bit misnamed; it is a list of the expressions
- * on the RHS of the IN's join clauses.  (This normally starts out as a list
- * of Vars referencing the subquery outputs, but can get mutated if the
- * subquery is flattened into the main query.)
- */
-
-typedef struct InClauseInfo
-{
-       NodeTag         type;
-       Relids          lefthand;               /* base relids in lefthand expressions */
-       Relids          righthand;              /* base relids coming from the subselect */
-       List       *sub_targetlist; /* RHS expressions of the IN's comparisons */
-       List       *in_operators;       /* OIDs of the IN's equality operators */
-} InClauseInfo;
+       List       *join_quals;         /* join quals, in implicit-AND list format */
+} SpecialJoinInfo;
 
 /*
  * Append-relation info.
index 69a4f4c774ca90a67eb82d0c287b0d6ba3487146..41e52d909aa63a605f23a24c327b52a039d14e52 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.91 2008/08/02 21:32:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.92 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,6 +59,9 @@ extern bool contain_mutable_functions(Node *clause);
 extern bool contain_volatile_functions(Node *clause);
 extern bool contain_nonstrict_functions(Node *clause);
 extern Relids find_nonnullable_rels(Node *clause);
+extern List *find_nonnullable_vars(Node *clause);
+extern List *find_forced_null_vars(Node *clause);
+extern Var *find_forced_null_var(Node *clause);
 
 extern bool is_pseudo_constant_clause(Node *clause);
 extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
index 6780846cb0afe7e7ff950358bd99d124b6f6758a..17caeb4ee4187803a449309be01f54cd94058a3b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.90 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.91 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,9 +87,12 @@ extern void cost_group(Path *path, PlannerInfo *root,
                   int numGroupCols, double numGroups,
                   Cost input_startup_cost, Cost input_total_cost,
                   double input_tuples);
-extern void cost_nestloop(NestPath *path, PlannerInfo *root);
-extern void cost_mergejoin(MergePath *path, PlannerInfo *root);
-extern void cost_hashjoin(HashPath *path, PlannerInfo *root);
+extern void cost_nestloop(NestPath *path, PlannerInfo *root,
+                                                 SpecialJoinInfo *sjinfo);
+extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
+                                                  SpecialJoinInfo *sjinfo);
+extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
+                                                 SpecialJoinInfo *sjinfo);
 extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
 extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
 extern Cost get_initplan_cost(PlannerInfo *root, SubPlan *subplan);
@@ -97,7 +100,7 @@ extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
 extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                                   RelOptInfo *outer_rel,
                                                   RelOptInfo *inner_rel,
-                                                  JoinType jointype,
+                                                  SpecialJoinInfo *sjinfo,
                                                   List *restrictlist);
 extern void set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel);
 extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel);
@@ -109,10 +112,12 @@ extern void set_values_size_estimates(PlannerInfo *root, RelOptInfo *rel);
 extern Selectivity clauselist_selectivity(PlannerInfo *root,
                                           List *clauses,
                                           int varRelid,
-                                          JoinType jointype);
+                                          JoinType jointype,
+                                          SpecialJoinInfo *sjinfo);
 extern Selectivity clause_selectivity(PlannerInfo *root,
                                   Node *clause,
                                   int varRelid,
-                                  JoinType jointype);
+                                  JoinType jointype,
+                                  SpecialJoinInfo *sjinfo);
 
 #endif   /* COST_H */
index c5a1f93c34797629012304e3b90236a38493f5c1..1db1674e12220d3a2c056dab7e226ff66ba27fd5 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.77 2008/01/01 19:45:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.78 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -50,7 +50,7 @@ extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
 extern ResultPath *create_result_path(List *quals);
 extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
-                                  Path *subpath);
+                                  Path *subpath, SpecialJoinInfo *sjinfo);
 extern Path *create_subqueryscan_path(RelOptInfo *rel, List *pathkeys);
 extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel);
 extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel);
@@ -58,6 +58,7 @@ extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel);
 extern NestPath *create_nestloop_path(PlannerInfo *root,
                                         RelOptInfo *joinrel,
                                         JoinType jointype,
+                                        SpecialJoinInfo *sjinfo,
                                         Path *outer_path,
                                         Path *inner_path,
                                         List *restrict_clauses,
@@ -66,6 +67,7 @@ extern NestPath *create_nestloop_path(PlannerInfo *root,
 extern MergePath *create_mergejoin_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                         SpecialJoinInfo *sjinfo,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
@@ -77,6 +79,7 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
 extern HashPath *create_hashjoin_path(PlannerInfo *root,
                                         RelOptInfo *joinrel,
                                         JoinType jointype,
+                                        SpecialJoinInfo *sjinfo,
                                         Path *outer_path,
                                         Path *inner_path,
                                         List *restrict_clauses,
@@ -93,7 +96,7 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root,
                           Relids joinrelids,
                           RelOptInfo *outer_rel,
                           RelOptInfo *inner_rel,
-                          JoinType jointype,
+                          SpecialJoinInfo *sjinfo,
                           List **restrictlist_ptr);
 
 #endif   /* PATHNODE_H */
index 81e8089df169812fd758261dc8dbb52e1db9488c..455745e2fa338edc784a334a054711f9552837fe 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.104 2008/03/31 16:59:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.105 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -89,9 +89,8 @@ extern void create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel);
  *        routines to create join paths
  */
 extern void add_paths_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
-                                        RelOptInfo *outerrel,
-                                        RelOptInfo *innerrel,
-                                        JoinType jointype,
+                                        RelOptInfo *outerrel, RelOptInfo *innerrel,
+                                        JoinType jointype, SpecialJoinInfo *sjinfo,
                                         List *restrictlist);
 
 /*
index 0aa1d32f947eae29c38f00806d4d4aee50d6b958..1f15eda941acb3d57cd4979f23960763a98c4906 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.110 2008/08/07 19:35:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.111 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -76,7 +76,6 @@ extern int    join_collapse_limit;
 
 extern void add_base_rels_to_query(PlannerInfo *root, Node *jtnode);
 extern void build_base_rel_tlists(PlannerInfo *root, List *final_tlist);
-extern void add_IN_vars_to_tlists(PlannerInfo *root);
 extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
                                           Relids where_needed);
 extern List *deconstruct_jointree(PlannerInfo *root);
index 80fa3b515268c564e3c4eab300a7dc89e2e791c8..1a90a13208c5ef130b10ed88f997cb1fb1b98a08 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.60 2008/03/18 22:04:14 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/prep.h,v 1.61 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,7 +21,7 @@
 /*
  * prototypes for prepjointree.c
  */
-extern Node *pull_up_IN_clauses(PlannerInfo *root, Node *node);
+extern Node *pull_up_sublinks(PlannerInfo *root, Node *node);
 extern void inline_set_returning_functions(PlannerInfo *root);
 extern Node *pull_up_subqueries(PlannerInfo *root, Node *jtnode,
                                   bool below_outer_join, bool append_rel_member);
index bb60aac9d4f089301b564761d902fe6410a228d8..b9bd76b07ff4b856276cb8305f6360ec4f37eef5 100644 (file)
@@ -5,7 +5,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.31 2008/07/10 02:14:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/subselect.h,v 1.32 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -15,7 +15,9 @@
 #include "nodes/plannodes.h"
 #include "nodes/relation.h"
 
-extern Node *convert_IN_to_join(PlannerInfo *root, SubLink *sublink);
+extern Node *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink);
+extern Node *convert_EXISTS_sublink_to_join(PlannerInfo *root,
+                                                                                       SubLink *sublink, bool under_not);
 extern Node *SS_replace_correlation_vars(PlannerInfo *root, Node *expr);
 extern Node *SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual);
 extern void SS_finalize_plan(PlannerInfo *root, Plan *plan,
index 808da5129c10dd799b60e12632ce76251658a3be..e94e7e5916582b83da6cfc4ee2f4e85861a7f4e1 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.44 2008/03/09 00:32:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.45 2008/08/14 18:48:00 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -148,17 +148,19 @@ extern Datum nlikejoinsel(PG_FUNCTION_ARGS);
 extern Datum icnlikejoinsel(PG_FUNCTION_ARGS);
 
 extern Selectivity booltestsel(PlannerInfo *root, BoolTestType booltesttype,
-                       Node *arg, int varRelid, JoinType jointype);
+                       Node *arg, int varRelid,
+                       JoinType jointype, SpecialJoinInfo *sjinfo);
 extern Selectivity nulltestsel(PlannerInfo *root, NullTestType nulltesttype,
-                       Node *arg, int varRelid, JoinType jointype);
+                       Node *arg, int varRelid,
+                       JoinType jointype, SpecialJoinInfo *sjinfo);
 extern Selectivity scalararraysel(PlannerInfo *root,
                           ScalarArrayOpExpr *clause,
                           bool is_join_clause,
-                          int varRelid, JoinType jointype);
+                          int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
 extern int     estimate_array_length(Node *arrayexpr);
 extern Selectivity rowcomparesel(PlannerInfo *root,
                          RowCompareExpr *clause,
-                         int varRelid, JoinType jointype);
+                         int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo);
 
 extern void mergejoinscansel(PlannerInfo *root, Node *clause,
                                 Oid opfamily, int strategy, bool nulls_first,