Support RIGHT and FULL OUTER JOIN in hash joins.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 31 Dec 2010 01:24:55 +0000 (20:24 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 31 Dec 2010 01:26:08 +0000 (20:26 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Dec 2010 01:24:55 +0000 (20:24 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 31 Dec 2010 01:26:08 +0000 (20:26 -0500)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index 5cd7332237b55a9733ffc96179e2ef4ca554a21c..934a283b8d2b7c2db8c849759410a7b2e9defced 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -105,7 +105,8 @@ MultiExecHash(HashState *node)
                         break;
                 /* We have to compute the hash value */
                 econtext->ecxt_innertuple = slot;
-               if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
+               if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
+                                                                false, hashtable->keepNulls,
                                                                  &hashvalue))
                 {
                         int                     bucketNumber;
@@ -231,7 +232,7 @@ ExecEndHash(HashState *node)
   * ----------------------------------------------------------------
   */
  HashJoinTable
-ExecHashTableCreate(Hash *node, List *hashOperators)
+ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
  {
         HashJoinTable hashtable;
         Plan       *outerNode;
@@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
         hashtable->nbuckets = nbuckets;
         hashtable->log2_nbuckets = log2_nbuckets;
         hashtable->buckets = NULL;
+       hashtable->keepNulls = keepNulls;
         hashtable->skewEnabled = false;
         hashtable->skewBucket = NULL;
         hashtable->skewBucketLen = 0;
@@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable,
                 HashJoinTuple hashTuple;
                 int                     hashTupleSize;
  
+               /* Create the HashJoinTuple */
                 hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
                 hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
                                                                                                            hashTupleSize);
                 hashTuple->hashvalue = hashvalue;
                 memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+
+               /*
+                * We always reset the tuple-matched flag on insertion.  This is okay
+                * even when reloading a tuple from a batch file, since the tuple
+                * could not possibly have been matched to an outer tuple before it
+                * went into the batch file.
+                */
+               HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
+
+               /* Push it onto the front of the bucket's list */
                 hashTuple->next = hashtable->buckets[bucketno];
                 hashtable->buckets[bucketno] = hashTuple;
+
+               /* Account for space used, and back off if we've used too much */
                 hashtable->spaceUsed += hashTupleSize;
                 if (hashtable->spaceUsed > hashtable->spacePeak)
                         hashtable->spacePeak = hashtable->spaceUsed;
@@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
   *             scan a hash bucket for matches to the current outer tuple
   *
   * The current outer tuple must be stored in econtext->ecxt_outertuple.
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
   */
-HashJoinTuple
+bool
  ExecScanHashBucket(HashJoinState *hjstate,
                                    ExprContext *econtext)
  {
@@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
                         if (ExecQual(hjclauses, econtext, false))
                         {
                                 hjstate->hj_CurTuple = hashTuple;
-                               return hashTuple;
+                               return true;
                         }
                 }
  
@@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate,
         /*
          * no match
          */
-       return NULL;
+       return false;
+}
+
+/*
+ * ExecPrepHashTableForUnmatched
+ *             set up for a series of ExecScanHashTableForUnmatched calls
+ */
+void
+ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
+{
+       /*
+        *----------
+        * During this scan we use the HashJoinState fields as follows:
+        *
+        * hj_CurBucketNo: next regular bucket to scan
+        * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
+        * hj_CurTuple: last tuple returned, or NULL to start next bucket
+        *----------
+        */
+       hjstate->hj_CurBucketNo = 0;
+       hjstate->hj_CurSkewBucketNo = 0;
+       hjstate->hj_CurTuple = NULL;
+}
+
+/*
+ * ExecScanHashTableForUnmatched
+ *             scan the hash table for unmatched inner tuples
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
+ */
+bool
+ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
+{
+       HashJoinTable hashtable = hjstate->hj_HashTable;
+       HashJoinTuple hashTuple = hjstate->hj_CurTuple;
+
+       for (;;)
+       {
+               /*
+                * hj_CurTuple is the address of the tuple last returned from the
+                * current bucket, or NULL if it's time to start scanning a new
+                * bucket.
+                */
+               if (hashTuple != NULL)
+                       hashTuple = hashTuple->next;
+               else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
+               {
+                       hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+                       hjstate->hj_CurBucketNo++;
+               }
+               else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
+               {
+                       int             j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
+
+                       hashTuple = hashtable->skewBucket[j]->tuples;
+                       hjstate->hj_CurSkewBucketNo++;
+               }
+               else
+                       break;                          /* finished all buckets */
+
+               while (hashTuple != NULL)
+               {
+                       if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
+                       {
+                               TupleTableSlot *inntuple;
+
+                               /* insert hashtable's tuple into exec slot */
+                               inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
+                                                                                                hjstate->hj_HashTupleSlot,
+                                                                                                false);        /* do not pfree */
+                               econtext->ecxt_innertuple = inntuple;
+
+                               /*
+                                * Reset temp memory each time; although this function doesn't
+                                * do any qual eval, the caller will, so let's keep it
+                                * parallel to ExecScanHashBucket.
+                                */
+                               ResetExprContext(econtext);
+
+                               hjstate->hj_CurTuple = hashTuple;
+                               return true;
+                       }
+
+                       hashTuple = hashTuple->next;
+               }
+       }
+
+       /*
+        * no more unmatched tuples
+        */
+       return false;
  }
  
  /*
@@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable)
         MemoryContextSwitchTo(oldcxt);
  }
  
+/*
+ * ExecHashTableResetMatchFlags
+ *             Clear all the HeapTupleHeaderHasMatch flags in the table
+ */
+void
+ExecHashTableResetMatchFlags(HashJoinTable hashtable)
+{
+       HashJoinTuple tuple;
+       int                     i;
+
+       /* Reset all flags in the main table ... */
+       for (i = 0; i < hashtable->nbuckets; i++)
+       {
+               for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next)
+                       HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+       }
+
+       /* ... and the same for the skew buckets, if any */
+       for (i = 0; i < hashtable->nSkewBuckets; i++)
+       {
+               int             j = hashtable->skewBucketNums[i];
+               HashSkewBucket *skewBucket = hashtable->skewBucket[j];
+
+               for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next)
+                       HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+       }
+}
+
+
  void
  ExecReScanHash(HashState *node)
  {
@@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
                                                                                                    hashTupleSize);
         hashTuple->hashvalue = hashvalue;
         memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+       HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
  
         /* Push it onto the front of the skew bucket's list */
         hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples;
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c

index dfca8cac5221bf552a153bf58ceae8152fa03a0f..cae31ba43a92a202a2e779401a636e1e26a2a6be 100644 (file)
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -22,8 +22,20 @@
  #include "utils/memutils.h"
  
  
-/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
-#define HASHJOIN_IS_OUTER(hjstate)     ((hjstate)->hj_NullInnerTupleSlot != NULL)
+/*
+ * States of the ExecHashJoin state machine
+ */
+#define HJ_BUILD_HASHTABLE             1
+#define HJ_NEED_NEW_OUTER              2
+#define HJ_SCAN_BUCKET                 3
+#define HJ_FILL_OUTER_TUPLE            4
+#define HJ_FILL_INNER_TUPLES   5
+#define HJ_NEED_NEW_BATCH              6
+
+/* Returns true if doing null-fill on outer relation */
+#define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
+/* Returns true if doing null-fill on inner relation */
+#define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
  
  static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
                                                   HashJoinState *hjstate,
@@ -32,7 +44,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
                                                   BufFile *file,
                                                   uint32 *hashvalue,
                                                   TupleTableSlot *tupleSlot);
-static int     ExecHashJoinNewBatch(HashJoinState *hjstate);
+static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
  
  
  /* ----------------------------------------------------------------
@@ -52,11 +64,9 @@ ExecHashJoin(HashJoinState *node)
         HashState  *hashNode;
         List       *joinqual;
         List       *otherqual;
-       TupleTableSlot *inntuple;
         ExprContext *econtext;
         ExprDoneCond isDone;
         HashJoinTable hashtable;
-       HashJoinTuple curtuple;
         TupleTableSlot *outerTupleSlot;
         uint32          hashvalue;
         int                     batchno;
@@ -69,10 +79,6 @@ ExecHashJoin(HashJoinState *node)
         otherqual = node->js.ps.qual;
         hashNode = (HashState *) innerPlanState(node);
         outerNode = outerPlanState(node);
-
-       /*
-        * get information from HashJoin state
-        */
         hashtable = node->hj_HashTable;
         econtext = node->js.ps.ps_ExprContext;
  
@@ -100,238 +106,300 @@ ExecHashJoin(HashJoinState *node)
         ResetExprContext(econtext);
  
         /*
-        * if this is the first call, build the hash table for inner relation
+        * run the hash join state machine
          */
-       if (hashtable == NULL)
+       for (;;)
         {
-               /*
-                * If the outer relation is completely empty, we can quit without
-                * building the hash table.  However, for an inner join it is only a
-                * win to check this when the outer relation's startup cost is less
-                * than the projected cost of building the hash table.  Otherwise it's
-                * best to build the hash table first and see if the inner relation is
-                * empty.  (When it's an outer join, we should always make this check,
-                * since we aren't going to be able to skip the join on the strength
-                * of an empty inner relation anyway.)
-                *
-                * If we are rescanning the join, we make use of information gained on
-                * the previous scan: don't bother to try the prefetch if the previous
-                * scan found the outer relation nonempty.      This is not 100% reliable
-                * since with new parameters the outer relation might yield different
-                * results, but it's a good heuristic.
-                *
-                * The only way to make the check is to try to fetch a tuple from the
-                * outer plan node.  If we succeed, we have to stash it away for later
-                * consumption by ExecHashJoinOuterGetTuple.
-                */
-               if (HASHJOIN_IS_OUTER(node) ||
-                       (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
-                        !node->hj_OuterNotEmpty))
+               switch (node->hj_JoinState)
                 {
-                       node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
-                       if (TupIsNull(node->hj_FirstOuterTupleSlot))
-                       {
-                               node->hj_OuterNotEmpty = false;
-                               return NULL;
-                       }
-                       else
-                               node->hj_OuterNotEmpty = true;
-               }
-               else
-                       node->hj_FirstOuterTupleSlot = NULL;
+                       case HJ_BUILD_HASHTABLE:
+                               /*
+                                * First time through: build hash table for inner relation.
+                                */
+                               Assert(hashtable == NULL);
  
-               /*
-                * create the hash table
-                */
-               hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
-                                                                               node->hj_HashOperators);
-               node->hj_HashTable = hashtable;
+                               /*
+                                * If the outer relation is completely empty, and it's not
+                                * right/full join, we can quit without building the hash
+                                * table.  However, for an inner join it is only a win to
+                                * check this when the outer relation's startup cost is less
+                                * than the projected cost of building the hash
+                                * table.  Otherwise it's best to build the hash table first
+                                * and see if the inner relation is empty.  (When it's a left
+                                * join, we should always make this check, since we aren't
+                                * going to be able to skip the join on the strength of an
+                                * empty inner relation anyway.)
+                                *
+                                * If we are rescanning the join, we make use of information
+                                * gained on the previous scan: don't bother to try the
+                                * prefetch if the previous scan found the outer relation
+                                * nonempty. This is not 100% reliable since with new
+                                * parameters the outer relation might yield different
+                                * results, but it's a good heuristic.
+                                *
+                                * The only way to make the check is to try to fetch a tuple
+                                * from the outer plan node.  If we succeed, we have to stash
+                                * it away for later consumption by ExecHashJoinOuterGetTuple.
+                                */
+                               if (HJ_FILL_INNER(node))
+                               {
+                                       /* no chance to not build the hash table */
+                                       node->hj_FirstOuterTupleSlot = NULL;
+                               }
+                               else if (HJ_FILL_OUTER(node) ||
+                                                (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
+                                                 !node->hj_OuterNotEmpty))
+                               {
+                                       node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
+                                       if (TupIsNull(node->hj_FirstOuterTupleSlot))
+                                       {
+                                               node->hj_OuterNotEmpty = false;
+                                               return NULL;
+                                       }
+                                       else
+                                               node->hj_OuterNotEmpty = true;
+                               }
+                               else
+                                       node->hj_FirstOuterTupleSlot = NULL;
  
-               /*
-                * execute the Hash node, to build the hash table
-                */
-               hashNode->hashtable = hashtable;
-               (void) MultiExecProcNode((PlanState *) hashNode);
+                               /*
+                                * create the hash table
+                                */
+                               hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
+                                                                                               node->hj_HashOperators,
+                                                                                               HJ_FILL_INNER(node));
+                               node->hj_HashTable = hashtable;
  
-               /*
-                * If the inner relation is completely empty, and we're not doing an
-                * outer join, we can quit without scanning the outer relation.
-                */
-               if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
-                       return NULL;
+                               /*
+                                * execute the Hash node, to build the hash table
+                                */
+                               hashNode->hashtable = hashtable;
+                               (void) MultiExecProcNode((PlanState *) hashNode);
  
-               /*
-                * need to remember whether nbatch has increased since we began
-                * scanning the outer relation
-                */
-               hashtable->nbatch_outstart = hashtable->nbatch;
+                               /*
+                                * If the inner relation is completely empty, and we're not
+                                * doing a left outer join, we can quit without scanning the
+                                * outer relation.
+                                */
+                               if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
+                                       return NULL;
  
-               /*
-                * Reset OuterNotEmpty for scan.  (It's OK if we fetched a tuple
-                * above, because ExecHashJoinOuterGetTuple will immediately set it
-                * again.)
-                */
-               node->hj_OuterNotEmpty = false;
-       }
+                               /*
+                                * need to remember whether nbatch has increased since we began
+                                * scanning the outer relation
+                                */
+                               hashtable->nbatch_outstart = hashtable->nbatch;
  
-       /*
-        * run the hash join process
-        */
-       for (;;)
-       {
-               /*
-                * If we don't have an outer tuple, get the next one
-                */
-               if (node->hj_NeedNewOuter)
-               {
-                       outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
-                                                                                                          node,
-                                                                                                          &hashvalue);
-                       if (TupIsNull(outerTupleSlot))
-                       {
-                               /* end of join */
-                               return NULL;
-                       }
+                               /*
+                                * Reset OuterNotEmpty for scan.  (It's OK if we fetched a
+                                * tuple above, because ExecHashJoinOuterGetTuple will
+                                * immediately set it again.)
+                                */
+                               node->hj_OuterNotEmpty = false;
  
-                       econtext->ecxt_outertuple = outerTupleSlot;
-                       node->hj_NeedNewOuter = false;
-                       node->hj_MatchedOuter = false;
+                               node->hj_JoinState = HJ_NEED_NEW_OUTER;
  
-                       /*
-                        * Now we have an outer tuple; find the corresponding bucket for
-                        * this tuple in the main hash table or skew hash table.
-                        */
-                       node->hj_CurHashValue = hashvalue;
-                       ExecHashGetBucketAndBatch(hashtable, hashvalue,
-                                                                         &node->hj_CurBucketNo, &batchno);
-                       node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
-                                                                                                                        hashvalue);
-                       node->hj_CurTuple = NULL;
+                               /* FALL THRU */
  
-                       /*
-                        * Now we've got an outer tuple and the corresponding hash bucket,
-                        * but it might not belong to the current batch, or it might match
-                        * a skew bucket.
-                        */
-                       if (batchno != hashtable->curbatch &&
-                               node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
-                       {
+                       case HJ_NEED_NEW_OUTER:
                                 /*
-                                * Need to postpone this outer tuple to a later batch. Save it
-                                * in the corresponding outer-batch file.
+                                * We don't have an outer tuple, try to get the next one
                                  */
-                               Assert(batchno > hashtable->curbatch);
-                               ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
-                                                                         hashvalue,
-                                                                         &hashtable->outerBatchFile[batchno]);
-                               node->hj_NeedNewOuter = true;
-                               continue;               /* loop around for a new outer tuple */
-                       }
-               }
+                               outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
+                                                                                                                  node,
+                                                                                                                  &hashvalue);
+                               if (TupIsNull(outerTupleSlot))
+                               {
+                                       /* end of batch, or maybe whole join */
+                                       if (HJ_FILL_INNER(node))
+                                       {
+                                               /* set up to scan for unmatched inner tuples */
+                                               ExecPrepHashTableForUnmatched(node);
+                                               node->hj_JoinState = HJ_FILL_INNER_TUPLES;
+                                       }
+                                       else
+                                               node->hj_JoinState = HJ_NEED_NEW_BATCH;
+                                       continue;
+                               }
  
-               /*
-                * OK, scan the selected hash bucket for matches
-                */
-               for (;;)
-               {
-                       curtuple = ExecScanHashBucket(node, econtext);
-                       if (curtuple == NULL)
-                               break;                  /* out of matches */
+                               econtext->ecxt_outertuple = outerTupleSlot;
+                               node->hj_MatchedOuter = false;
  
-                       /*
-                        * we've got a match, but still need to test non-hashed quals
-                        */
-                       inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
-                                                                                        node->hj_HashTupleSlot,
-                                                                                        false);        /* don't pfree */
-                       econtext->ecxt_innertuple = inntuple;
+                               /*
+                                * Find the corresponding bucket for this tuple in the main
+                                * hash table or skew hash table.
+                                */
+                               node->hj_CurHashValue = hashvalue;
+                               ExecHashGetBucketAndBatch(hashtable, hashvalue,
+                                                                                 &node->hj_CurBucketNo, &batchno);
+                               node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
+                                                                                                                                hashvalue);
+                               node->hj_CurTuple = NULL;
  
-                       /* reset temp memory each time to avoid leaks from qual expr */
-                       ResetExprContext(econtext);
+                               /*
+                                * The tuple might not belong to the current batch (where
+                                * "current batch" includes the skew buckets if any).
+                                */
+                               if (batchno != hashtable->curbatch &&
+                                       node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
+                               {
+                                       /*
+                                        * Need to postpone this outer tuple to a later batch.
+                                        * Save it in the corresponding outer-batch file.
+                                        */
+                                       Assert(batchno > hashtable->curbatch);
+                                       ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
+                                                                                 hashvalue,
+                                                                                 &hashtable->outerBatchFile[batchno]);
+                                       /* Loop around, staying in HJ_NEED_NEW_OUTER state */
+                                       continue;
+                               }
  
-                       /*
-                        * if we pass the qual, then save state for next call and have
-                        * ExecProject form the projection, store it in the tuple table,
-                        * and return the slot.
-                        *
-                        * Only the joinquals determine MatchedOuter status, but all quals
-                        * must pass to actually return the tuple.
-                        */
-                       if (joinqual == NIL || ExecQual(joinqual, econtext, false))
-                       {
-                               node->hj_MatchedOuter = true;
+                               /* OK, let's scan the bucket for matches */
+                               node->hj_JoinState = HJ_SCAN_BUCKET;
+
+                               /* FALL THRU */
  
-                               /* In an antijoin, we never return a matched tuple */
-                               if (node->js.jointype == JOIN_ANTI)
+                       case HJ_SCAN_BUCKET:
+                               /*
+                                * Scan the selected hash bucket for matches to current outer
+                                */
+                               if (!ExecScanHashBucket(node, econtext))
                                 {
-                                       node->hj_NeedNewOuter = true;
-                                       break;          /* out of loop over hash bucket */
+                                       /* out of matches; check for possible outer-join fill */
+                                       node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
+                                       continue;
                                 }
  
                                 /*
-                                * In a semijoin, we'll consider returning the first match,
-                                * but after that we're done with this outer tuple.
+                                * We've got a match, but still need to test non-hashed quals.
+                                * ExecScanHashBucket already set up all the state needed to
+                                * call ExecQual.
+                                *
+                                * If we pass the qual, then save state for next call and have
+                                * ExecProject form the projection, store it in the tuple
+                                * table, and return the slot.
+                                *
+                                * Only the joinquals determine tuple match status, but all
+                                * quals must pass to actually return the tuple.
                                  */
-                               if (node->js.jointype == JOIN_SEMI)
-                                       node->hj_NeedNewOuter = true;
-
-                               if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+                               if (joinqual == NIL || ExecQual(joinqual, econtext, false))
                                 {
-                                       TupleTableSlot *result;
+                                       node->hj_MatchedOuter = true;
+                                       HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
  
-                                       result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+                                       /* In an antijoin, we never return a matched tuple */
+                                       if (node->js.jointype == JOIN_ANTI)
+                                       {
+                                               node->hj_JoinState = HJ_NEED_NEW_OUTER;
+                                               continue;
+                                       }
  
-                                       if (isDone != ExprEndResult)
+                                       /*
+                                        * In a semijoin, we'll consider returning the first match,
+                                        * but after that we're done with this outer tuple.
+                                        */
+                                       if (node->js.jointype == JOIN_SEMI)
+                                               node->hj_JoinState = HJ_NEED_NEW_OUTER;
+
+                                       if (otherqual == NIL ||
+                                               ExecQual(otherqual, econtext, false))
                                         {
-                                               node->js.ps.ps_TupFromTlist =
-                                                       (isDone == ExprMultipleResult);
-                                               return result;
+                                               TupleTableSlot *result;
+
+                                               result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+                                               if (isDone != ExprEndResult)
+                                               {
+                                                       node->js.ps.ps_TupFromTlist =
+                                                               (isDone == ExprMultipleResult);
+                                                       return result;
+                                               }
                                         }
                                 }
+                               break;
  
+                       case HJ_FILL_OUTER_TUPLE:
                                 /*
-                                * If semijoin and we didn't return the tuple, we're still
-                                * done with this outer tuple.
+                                * The current outer tuple has run out of matches, so check
+                                * whether to emit a dummy outer-join tuple.  Whether we
+                                * emit one or not, the next state is NEED_NEW_OUTER.
                                  */
-                               if (node->js.jointype == JOIN_SEMI)
-                                       break;          /* out of loop over hash bucket */
-                       }
-               }
+                               node->hj_JoinState = HJ_NEED_NEW_OUTER;
  
-               /*
-                * Now the current outer tuple has run out of matches, so check
-                * whether to emit a dummy outer-join tuple. If not, loop around to
-                * get a new outer tuple.
-                */
-               node->hj_NeedNewOuter = true;
+                               if (!node->hj_MatchedOuter &&
+                                       HJ_FILL_OUTER(node))
+                               {
+                                       /*
+                                        * Generate a fake join tuple with nulls for the inner
+                                        * tuple, and return it if it passes the non-join quals.
+                                        */
+                                       econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+
+                                       if (otherqual == NIL ||
+                                               ExecQual(otherqual, econtext, false))
+                                       {
+                                               TupleTableSlot *result;
  
-               if (!node->hj_MatchedOuter &&
-                       HASHJOIN_IS_OUTER(node))
-               {
-                       /*
-                        * We are doing an outer join and there were no join matches for
-                        * this outer tuple.  Generate a fake join tuple with nulls for
-                        * the inner tuple, and return it if it passes the non-join quals.
-                        */
-                       econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+                                               result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
  
-                       if (otherqual == NIL || ExecQual(otherqual, econtext, false))
-                       {
+                                               if (isDone != ExprEndResult)
+                                               {
+                                                       node->js.ps.ps_TupFromTlist =
+                                                               (isDone == ExprMultipleResult);
+                                                       return result;
+                                               }
+                                       }
+                               }
+                               break;
+
+                       case HJ_FILL_INNER_TUPLES:
                                 /*
-                                * qualification was satisfied so we project and return the
-                                * slot containing the result tuple using ExecProject().
+                                * We have finished a batch, but we are doing right/full join,
+                                * so any unmatched inner tuples in the hashtable have to be
+                                * emitted before we continue to the next batch.
                                  */
-                               TupleTableSlot *result;
+                               if (!ExecScanHashTableForUnmatched(node, econtext))
+                               {
+                                       /* no more unmatched tuples */
+                                       node->hj_JoinState = HJ_NEED_NEW_BATCH;
+                                       continue;
+                               }
  
-                               result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+                               /*
+                                * Generate a fake join tuple with nulls for the outer tuple,
+                                * and return it if it passes the non-join quals.
+                                */
+                               econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
  
-                               if (isDone != ExprEndResult)
+                               if (otherqual == NIL ||
+                                       ExecQual(otherqual, econtext, false))
                                 {
-                                       node->js.ps.ps_TupFromTlist =
-                                               (isDone == ExprMultipleResult);
-                                       return result;
+                                       TupleTableSlot *result;
+
+                                       result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+                                       if (isDone != ExprEndResult)
+                                       {
+                                               node->js.ps.ps_TupFromTlist =
+                                                       (isDone == ExprMultipleResult);
+                                               return result;
+                                       }
                                 }
-                       }
+                               break;
+
+                       case HJ_NEED_NEW_BATCH:
+                               /*
+                                * Try to advance to next batch.  Done if there are no more.
+                                */
+                               if (!ExecHashJoinNewBatch(node))
+                                       return NULL;                            /* end of join */
+                               node->hj_JoinState = HJ_NEED_NEW_OUTER;
+                               break;
+
+                       default:
+                               elog(ERROR, "unrecognized hashjoin state: %d",
+                                        (int) node->hj_JoinState);
                 }
         }
  }
@@ -406,7 +474,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
         ExecInitResultTupleSlot(estate, &hjstate->js.ps);
         hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
  
-       /* note: HASHJOIN_IS_OUTER macro depends on this initialization */
+       /* set up null tuples for outer joins, if needed */
         switch (node->join.jointype)
         {
                 case JOIN_INNER:
@@ -418,6 +486,19 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
                                 ExecInitNullTupleSlot(estate,
                                                                  ExecGetResultType(innerPlanState(hjstate)));
                         break;
+               case JOIN_RIGHT:
+                       hjstate->hj_NullOuterTupleSlot =
+                               ExecInitNullTupleSlot(estate,
+                                                                ExecGetResultType(outerPlanState(hjstate)));
+                       break;
+               case JOIN_FULL:
+                       hjstate->hj_NullOuterTupleSlot =
+                               ExecInitNullTupleSlot(estate,
+                                                                ExecGetResultType(outerPlanState(hjstate)));
+                       hjstate->hj_NullInnerTupleSlot =
+                               ExecInitNullTupleSlot(estate,
+                                                                ExecGetResultType(innerPlanState(hjstate)));
+                       break;
                 default:
                         elog(ERROR, "unrecognized join type: %d",
                                  (int) node->join.jointype);
@@ -425,7 +506,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
  
         /*
          * now for some voodoo.  our temporary tuple slot is actually the result
-        * tuple slot of the Hash node (which is our inner plan).  we do this
+        * tuple slot of the Hash node (which is our inner plan).  we can do this
          * because Hash nodes don't return tuples via ExecProcNode() -- instead
          * the hash join node uses ExecScanHashBucket() to get at the contents of
          * the hash table.      -cim 6/9/91
@@ -485,7 +566,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
         ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
  
         hjstate->js.ps.ps_TupFromTlist = false;
-       hjstate->hj_NeedNewOuter = true;
+       hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
         hjstate->hj_MatchedOuter = false;
         hjstate->hj_OuterNotEmpty = false;
  
@@ -533,12 +614,13 @@ ExecEndHashJoin(HashJoinState *node)
   * ExecHashJoinOuterGetTuple
   *
   *             get the next outer tuple for hashjoin: either by
- *             executing a plan node in the first pass, or from
+ *             executing the outer plan node in the first pass, or from
   *             the temp files for the hashjoin batches.
   *
- * Returns a null slot if no more outer tuples.  On success, the tuple's
- * hash value is stored at *hashvalue --- this is either originally computed,
- * or re-read from the temp file.
+ * Returns a null slot if no more outer tuples (within the current batch).
+ *
+ * On success, the tuple's hash value is stored at *hashvalue --- this is
+ * either originally computed, or re-read from the temp file.
   */
  static TupleTableSlot *
  ExecHashJoinOuterGetTuple(PlanState *outerNode,
@@ -572,7 +654,7 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
                         if (ExecHashGetHashValue(hashtable, econtext,
                                                                          hjstate->hj_OuterHashKeys,
                                                                          true,          /* outer tuple */
-                                                                        HASHJOIN_IS_OUTER(hjstate),
+                                                                        HJ_FILL_OUTER(hjstate),
                                                                          hashvalue))
                         {
                                 /* remember outer relation is not empty for possible rescan */
@@ -587,31 +669,27 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
                          */
                         slot = ExecProcNode(outerNode);
                 }
+       }
+       else if (curbatch < hashtable->nbatch)
+       {
+               BufFile    *file = hashtable->outerBatchFile[curbatch];
  
                 /*
-                * We have just reached the end of the first pass. Try to switch to a
-                * saved batch.
+                * In outer-join cases, we could get here even though the batch file
+                * is empty.
                  */
-               curbatch = ExecHashJoinNewBatch(hjstate);
-       }
+               if (file == NULL)
+                       return NULL;
  
-       /*
-        * Try to read from a temp file. Loop allows us to advance to new batches
-        * as needed.  NOTE: nbatch could increase inside ExecHashJoinNewBatch, so
-        * don't try to optimize this loop.
-        */
-       while (curbatch < hashtable->nbatch)
-       {
                 slot = ExecHashJoinGetSavedTuple(hjstate,
-                                                                                hashtable->outerBatchFile[curbatch],
+                                                                                file,
                                                                                  hashvalue,
                                                                                  hjstate->hj_OuterTupleSlot);
                 if (!TupIsNull(slot))
                         return slot;
-               curbatch = ExecHashJoinNewBatch(hjstate);
         }
  
-       /* Out of batches... */
+       /* End of this batch */
         return NULL;
  }
  
@@ -619,10 +697,9 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
   * ExecHashJoinNewBatch
   *             switch to a new hashjoin batch
   *
- * Returns the number of the new batch (1..nbatch-1), or nbatch if no more.
- * We will never return a batch number that has an empty outer batch file.
+ * Returns true if successful, false if there are no more batches.
   */
-static int
+static bool
  ExecHashJoinNewBatch(HashJoinState *hjstate)
  {
         HashJoinTable hashtable = hjstate->hj_HashTable;
@@ -632,7 +709,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
         TupleTableSlot *slot;
         uint32          hashvalue;
  
-start_over:
         nbatch = hashtable->nbatch;
         curbatch = hashtable->curbatch;
  
@@ -657,6 +733,7 @@ start_over:
                 hashtable->skewEnabled = false;
                 hashtable->skewBucket = NULL;
                 hashtable->skewBucketNums = NULL;
+               hashtable->nSkewBuckets = 0;
                 hashtable->spaceUsedSkew = 0;
         }
  
@@ -665,8 +742,9 @@ start_over:
          * sides.  We can sometimes skip over batches that are empty on only one
          * side, but there are exceptions:
          *
-        * 1. In an outer join, we have to process outer batches even if the inner
-        * batch is empty.
+        * 1. In a left/full outer join, we have to process outer batches even if
+        * the inner batch is empty.  Similarly, in a right/full outer join, we
+        * have to process inner batches even if the outer batch is empty.
          *
          * 2. If we have increased nbatch since the initial estimate, we have to
          * scan inner batches since they might contain tuples that need to be
@@ -682,7 +760,10 @@ start_over:
                         hashtable->innerBatchFile[curbatch] == NULL))
         {
                 if (hashtable->outerBatchFile[curbatch] &&
-                       HASHJOIN_IS_OUTER(hjstate))
+                       HJ_FILL_OUTER(hjstate))
+                       break;                          /* must process due to rule 1 */
+               if (hashtable->innerBatchFile[curbatch] &&
+                       HJ_FILL_INNER(hjstate))
                         break;                          /* must process due to rule 1 */
                 if (hashtable->innerBatchFile[curbatch] &&
                         nbatch != hashtable->nbatch_original)
@@ -702,7 +783,7 @@ start_over:
         }
  
         if (curbatch >= nbatch)
-               return curbatch;                /* no more batches */
+               return false;           /* no more batches */
  
         hashtable->curbatch = curbatch;
  
@@ -741,20 +822,17 @@ start_over:
         }
  
         /*
-        * If there's no outer batch file, advance to next batch.
+        * Rewind outer batch file (if present), so that we can start reading it.
          */
-       if (hashtable->outerBatchFile[curbatch] == NULL)
-               goto start_over;
-
-       /*
-        * Rewind outer batch file, so that we can start reading it.
-        */
-       if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not rewind hash-join temporary file: %m")));
+       if (hashtable->outerBatchFile[curbatch] != NULL)
+       {
+               if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not rewind hash-join temporary file: %m")));
+       }
  
-       return curbatch;
+       return true;
  }
  
  /*
@@ -857,9 +935,16 @@ ExecReScanHashJoin(HashJoinState *node)
                         node->js.ps.righttree->chgParam == NULL)
                 {
                         /*
-                        * okay to reuse the hash table; needn't rescan inner, either.
+                        * Okay to reuse the hash table; needn't rescan inner, either.
                          *
-                        * What we do need to do is reset our state about the emptiness of
+                        * However, if it's a right/full join, we'd better reset the
+                        * inner-tuple match flags contained in the table.
+                        */
+                       if (HJ_FILL_INNER(node))
+                               ExecHashTableResetMatchFlags(node->hj_HashTable);
+
+                       /*
+                        * Also, we need to reset our state about the emptiness of
                          * the outer relation, so that the new scan of the outer will
                          * update it correctly if it turns out to be empty this time.
                          * (There's no harm in clearing it now because ExecHashJoin won't
@@ -869,12 +954,16 @@ ExecReScanHashJoin(HashJoinState *node)
                          * through.)
                          */
                         node->hj_OuterNotEmpty = false;
+
+                       /* ExecHashJoin can skip the BUILD_HASHTABLE step */
+                       node->hj_JoinState = HJ_NEED_NEW_OUTER;
                 }
                 else
                 {
                         /* must destroy and rebuild hash table */
                         ExecHashTableDestroy(node->hj_HashTable);
                         node->hj_HashTable = NULL;
+                       node->hj_JoinState = HJ_BUILD_HASHTABLE;
  
                         /*
                          * if chgParam of subnode is not null then plan will be re-scanned
@@ -892,7 +981,6 @@ ExecReScanHashJoin(HashJoinState *node)
         node->hj_CurTuple = NULL;
  
         node->js.ps.ps_TupFromTlist = false;
-       node->hj_NeedNewOuter = true;
         node->hj_MatchedOuter = false;
         node->hj_FirstOuterTupleSlot = NULL;
  
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c

index 7dffe18c9730dfa21a6db56b2037105343a8e838..fd38e8bd7a9a103dd5fb6290365be37776dec7dd 100644 (file)
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -889,8 +889,8 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
          * combination for which an opfamily member operator exists.  If we have
          * choices, we prefer simple Var members (possibly with RelabelType) since
          * these are (a) cheapest to compute at runtime and (b) most likely to
-        * have useful statistics.      Also, if enable_hashjoin is on, we prefer
-        * operators that are also hashjoinable.
+        * have useful statistics. Also, prefer operators that are also
+        * hashjoinable.
          */
         if (outer_members && inner_members)
         {
@@ -925,8 +925,7 @@ generate_join_implied_equalities_normal(PlannerInfo *root,
                                         (IsA(inner_em->em_expr, RelabelType) &&
                                          IsA(((RelabelType *) inner_em->em_expr)->arg, Var)))
                                         score++;
-                               if (!enable_hashjoin ||
-                                       op_hashjoinable(eq_op,
+                               if (op_hashjoinable(eq_op,
                                                                         exprType((Node *) outer_em->em_expr)))
                                         score++;
                                 if (score > best_score)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index bea134b3f1a02db8c611a6fc46d5bba041961682..54257c05c50b784e6fe5130183c33b1de8f923d9 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -41,7 +41,8 @@ static List *select_mergejoin_clauses(PlannerInfo *root,
                                                  RelOptInfo *outerrel,
                                                  RelOptInfo *innerrel,
                                                  List *restrictlist,
-                                                JoinType jointype);
+                                                JoinType jointype,
+                                                bool *have_nonmergeable_clause);
  
  
  /*
@@ -77,12 +78,13 @@ add_paths_to_joinrel(PlannerInfo *root,
                                          List *restrictlist)
  {
         List       *mergeclause_list = NIL;
+       bool            have_nonmergeable_clause = false;
  
         /*
          * Find potential mergejoin clauses.  We can skip this if we are not
-        * interested in doing a mergejoin.  However, mergejoin is currently our
-        * only way of implementing full outer joins, so override mergejoin
-        * disable if it's a full join.
+        * interested in doing a mergejoin.  However, mergejoin may be our only
+        * way of implementing a full outer join, so override enable_mergejoin if
+        * it's a full join.
          */
         if (enable_mergejoin || jointype == JOIN_FULL)
                 mergeclause_list = select_mergejoin_clauses(root,
@@ -90,22 +92,27 @@ add_paths_to_joinrel(PlannerInfo *root,
                                                                                                         outerrel,
                                                                                                         innerrel,
                                                                                                         restrictlist,
-                                                                                                       jointype);
+                                                                                                       jointype,
+                                                                                                       &have_nonmergeable_clause);
  
         /*
          * 1. Consider mergejoin paths where both relations must be explicitly
-        * sorted.
+        * sorted.  Skip this if we can't mergejoin.
          */
-       sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-                                                restrictlist, mergeclause_list, jointype, sjinfo);
+       if (!have_nonmergeable_clause)
+               sort_inner_and_outer(root, joinrel, outerrel, innerrel,
+                                                        restrictlist, mergeclause_list, jointype, sjinfo);
  
         /*
          * 2. Consider paths where the outer relation need not be explicitly
          * sorted. This includes both nestloops and mergejoins where the outer
-        * path is already ordered.
+        * path is already ordered.  Again, skip this if we can't mergejoin.
+        * (That's okay because we know that nestloop can't handle right/full
+        * joins at all, so it wouldn't work in those cases either.)
          */
-       match_unsorted_outer(root, joinrel, outerrel, innerrel,
-                                                restrictlist, mergeclause_list, jointype, sjinfo);
+       if (!have_nonmergeable_clause)
+               match_unsorted_outer(root, joinrel, outerrel, innerrel,
+                                                        restrictlist, mergeclause_list, jointype, sjinfo);
  
  #ifdef NOT_USED
  
@@ -120,15 +127,17 @@ add_paths_to_joinrel(PlannerInfo *root,
          * those made by match_unsorted_outer when add_paths_to_joinrel() is
          * invoked with the two rels given in the other order.
          */
-       match_unsorted_inner(root, joinrel, outerrel, innerrel,
-                                                restrictlist, mergeclause_list, jointype, sjinfo);
+       if (!have_nonmergeable_clause)
+               match_unsorted_inner(root, joinrel, outerrel, innerrel,
+                                                        restrictlist, mergeclause_list, jointype, sjinfo);
  #endif
  
         /*
          * 4. Consider paths where both outer and inner relations must be hashed
-        * before being joined.
+        * before being joined.  As above, disregard enable_hashjoin for full
+        * joins, because there may be no other alternative.
          */
-       if (enable_hashjoin)
+       if (enable_hashjoin || jointype == JOIN_FULL)
                 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
                                                          restrictlist, jointype, sjinfo);
  }
@@ -189,37 +198,11 @@ sort_inner_and_outer(PlannerInfo *root,
                                          JoinType jointype,
                                          SpecialJoinInfo *sjinfo)
  {
-       bool            useallclauses;
         Path       *outer_path;
         Path       *inner_path;
         List       *all_pathkeys;
         ListCell   *l;
  
-       /*
-        * If we are doing a right or full join, we must use *all* the
-        * mergeclauses as join clauses, else we will not have a valid plan.
-        */
-       switch (jointype)
-       {
-               case JOIN_INNER:
-               case JOIN_LEFT:
-               case JOIN_SEMI:
-               case JOIN_ANTI:
-               case JOIN_UNIQUE_OUTER:
-               case JOIN_UNIQUE_INNER:
-                       useallclauses = false;
-                       break;
-               case JOIN_RIGHT:
-               case JOIN_FULL:
-                       useallclauses = true;
-                       break;
-               default:
-                       elog(ERROR, "unrecognized join type: %d",
-                                (int) jointype);
-                       useallclauses = false;          /* keep compiler quiet */
-                       break;
-       }
-
         /*
          * We only consider the cheapest-total-cost input paths, since we are
          * assuming here that a sort is required.  We will consider
@@ -390,9 +373,9 @@ match_unsorted_outer(PlannerInfo *root,
  
         /*
          * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
-        * are doing a right or full join, we must use *all* the mergeclauses as
-        * join clauses, else we will not have a valid plan.  (Although these two
-        * flags are currently inverses, keep them separate for clarity and
+        * are doing a right or full mergejoin, we must use *all* the mergeclauses
+        * as join clauses, else we will not have a valid plan.  (Although these
+        * two flags are currently inverses, keep them separate for clarity and
          * possible future changes.)
          */
         switch (jointype)
@@ -574,8 +557,8 @@ match_unsorted_outer(PlannerInfo *root,
                  * Special corner case: for "x FULL JOIN y ON true", there will be no
                  * join clauses at all.  Ordinarily we'd generate a clauseless
                  * nestloop path, but since mergejoin is our only join type that
-                * supports FULL JOIN, it's necessary to generate a clauseless
-                * mergejoin path instead.
+                * supports FULL JOIN without any join clauses, it's necessary to
+                * generate a clauseless mergejoin path instead.
                  */
                 if (mergeclauses == NIL)
                 {
@@ -781,29 +764,10 @@ hash_inner_and_outer(PlannerInfo *root,
                                          JoinType jointype,
                                          SpecialJoinInfo *sjinfo)
  {
-       bool            isouterjoin;
+       bool            isouterjoin = IS_OUTER_JOIN(jointype);
         List       *hashclauses;
         ListCell   *l;
  
-       /*
-        * Hashjoin only supports inner, left, semi, and anti joins.
-        */
-       switch (jointype)
-       {
-               case JOIN_INNER:
-               case JOIN_SEMI:
-               case JOIN_UNIQUE_OUTER:
-               case JOIN_UNIQUE_INNER:
-                       isouterjoin = false;
-                       break;
-               case JOIN_LEFT:
-               case JOIN_ANTI:
-                       isouterjoin = true;
-                       break;
-               default:
-                       return;
-       }
-
         /*
          * We need to build only one hashpath for any given pair of outer and
          * inner relations; all of the hashable clauses will be used as keys.
@@ -963,6 +927,11 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
   *       Select mergejoin clauses that are usable for a particular join.
   *       Returns a list of RestrictInfo nodes for those clauses.
   *
+ * *have_nonmergeable_clause is set TRUE if this is a right/full join and
+ * there are nonmergejoinable join clauses.  The executor's mergejoin
+ * machinery cannot handle such cases, so we have to avoid generating a
+ * mergejoin plan.
+ *
   * We also mark each selected RestrictInfo to show which side is currently
   * being considered as outer.  These are transient markings that are only
   * good for the duration of the current add_paths_to_joinrel() call!
@@ -977,13 +946,15 @@ select_mergejoin_clauses(PlannerInfo *root,
                                                  RelOptInfo *outerrel,
                                                  RelOptInfo *innerrel,
                                                  List *restrictlist,
-                                                JoinType jointype)
+                                                JoinType jointype,
+                                                bool *have_nonmergeable_clause)
  {
         List       *result_list = NIL;
         bool            isouterjoin = IS_OUTER_JOIN(jointype);
-       bool            have_nonmergeable_joinclause = false;
         ListCell   *l;
  
+       *have_nonmergeable_clause = false;
+
         foreach(l, restrictlist)
         {
                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
@@ -991,7 +962,7 @@ select_mergejoin_clauses(PlannerInfo *root,
                 /*
                  * If processing an outer join, only use its own join clauses in the
                  * merge.  For inner joins we can use pushed-down clauses too. (Note:
-                * we don't set have_nonmergeable_joinclause here because pushed-down
+                * we don't set have_nonmergeable_clause here because pushed-down
                  * clauses will become otherquals not joinquals.)
                  */
                 if (isouterjoin && restrictinfo->is_pushed_down)
@@ -1008,7 +979,7 @@ select_mergejoin_clauses(PlannerInfo *root,
                          * FALSE.)
                          */
                         if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
-                               have_nonmergeable_joinclause = true;
+                               *have_nonmergeable_clause = true;
                         continue;                       /* not mergejoinable */
                 }
  
@@ -1017,7 +988,7 @@ select_mergejoin_clauses(PlannerInfo *root,
                  */
                 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
                 {
-                       have_nonmergeable_joinclause = true;
+                       *have_nonmergeable_clause = true;
                         continue;                       /* no good for these input relations */
                 }
  
@@ -1046,7 +1017,7 @@ select_mergejoin_clauses(PlannerInfo *root,
                 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
                         EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
                 {
-                       have_nonmergeable_joinclause = true;
+                       *have_nonmergeable_clause = true;
                         continue;                       /* can't handle redundant eclasses */
                 }
  
@@ -1054,27 +1025,19 @@ select_mergejoin_clauses(PlannerInfo *root,
         }
  
         /*
-        * If it is a right/full join then *all* the explicit join clauses must be
-        * mergejoinable, else the executor will fail. If we are asked for a right
-        * join then just return NIL to indicate no mergejoin is possible (we can
-        * handle it as a left join instead). If we are asked for a full join then
-        * emit an error, because there is no fallback.
+        * If it is not a right/full join then we don't need to insist on all the
+        * joinclauses being mergejoinable, so reset the flag.  This simplifies
+        * the logic in add_paths_to_joinrel.
          */
-       if (have_nonmergeable_joinclause)
+       switch (jointype)
         {
-               switch (jointype)
-               {
-                       case JOIN_RIGHT:
-                               return NIL;             /* not mergejoinable */
-                       case JOIN_FULL:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                                errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
-                               break;
-                       default:
-                               /* otherwise, it's OK to have nonmergeable join quals */
-                               break;
-               }
+               case JOIN_RIGHT:
+               case JOIN_FULL:
+                       break;
+               default:
+                       /* otherwise, it's OK to have nonmergeable join quals */
+                       *have_nonmergeable_clause = false;
+                       break;
         }
  
         return result_list;
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c

index 197c49d774c6067e472e8062ca581950c6777dee..605a32d52d6c44f87f88203b1cc36846af6b96d8 100644 (file)
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -658,6 +658,18 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
                                                                  JOIN_FULL, sjinfo,
                                                                  restrictlist);
+
+                       /*
+                        * If there are join quals that aren't mergeable or hashable, we
+                        * may not be able to build any valid plan.  Complain here so that
+                        * we can give a somewhat-useful error message.  (Since we have no
+                        * flexibility of planning for a full join, there's no chance of
+                        * succeeding later with another pair of input rels.)
+                        */
+                       if (joinrel->pathlist == NIL)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
                         break;
                 case JOIN_SEMI:
  
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 6efb6dc4bcea2ec7e442c4906143fbf5e5699404..c74a91f8a7f72002f8d9c5497855fec220e4ac36 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -1328,10 +1328,9 @@ distribute_restrictinfo_to_rels(PlannerInfo *root,
  
                         /*
                          * Check for hashjoinable operators.  (We don't bother setting the
-                        * hashjoin info if we're not going to need it.)
+                        * hashjoin info except in true join clauses.)
                          */
-                       if (enable_hashjoin)
-                               check_hashjoinable(restrictinfo);
+                       check_hashjoinable(restrictinfo);
  
                         /*
                          * Add clause to the join lists of all the relevant relations.
@@ -1458,10 +1457,9 @@ build_implied_join_equality(Oid opno,
                                                                          qualscope, /* required_relids */
                                                                          NULL);         /* nullable_relids */
  
-       /* Set mergejoinability info always, and hashjoinability if enabled */
+       /* Set mergejoinability/hashjoinability flags */
         check_mergejoinable(restrictinfo);
-       if (enable_hashjoin)
-               check_hashjoinable(restrictinfo);
+       check_hashjoinable(restrictinfo);
  
         return restrictinfo;
  }
diff --git a/src/include/access/htup.h b/src/include/access/htup.h

index adf1321052cf1013ec647906facfe57a97688c06..f540966d684f73b45d5af2b24dd72d92fe9fbb9a 100644 (file)
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -195,6 +195,14 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
  
  #define HEAP2_XACT_MASK                        0xC000  /* visibility-related bits */
  
+/*
+ * HEAP_TUPLE_HAS_MATCH is a temporary flag used during hash joins.  It is
+ * only used in tuples that are in the hash table, and those don't need
+ * any visibility information, so we can overlay it on a visibility flag
+ * instead of using up a dedicated bit.
+ */
+#define HEAP_TUPLE_HAS_MATCH   HEAP_ONLY_TUPLE /* tuple has a join match */
+
  /*
   * HeapTupleHeader accessor macros
   *
@@ -343,6 +351,21 @@ do { \
    (tup)->t_infomask2 &= ~HEAP_ONLY_TUPLE \
  )
  
+#define HeapTupleHeaderHasMatch(tup) \
+( \
+  (tup)->t_infomask2 & HEAP_TUPLE_HAS_MATCH \
+)
+
+#define HeapTupleHeaderSetMatch(tup) \
+( \
+  (tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \
+)
+
+#define HeapTupleHeaderClearMatch(tup) \
+( \
+  (tup)->t_infomask2 &= ~HEAP_TUPLE_HAS_MATCH \
+)
+
  #define HeapTupleHeaderGetNatts(tup) \
         ((tup)->t_infomask2 & HEAP_NATTS_MASK)
  
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h

index 1b93f58af671e4e40e3991d78702328b5c627972..c4aa0d372682c93acc21a60aff3332a58976a28c 100644 (file)
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -113,6 +113,8 @@ typedef struct HashJoinTableData
         struct HashJoinTupleData **buckets;
         /* buckets array is per-batch storage, as are all the tuples */
  
+       bool            keepNulls;              /* true to store unmatchable NULL tuples */
+
         bool            skewEnabled;    /* are we using skew optimization? */
         HashSkewBucket **skewBucket;    /* hashtable of skew buckets */
         int                     skewBucketLen;  /* size of skewBucket array (a power of 2!) */
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h

index 444a0137a2d7dd6883a207a3cb0632098be9c3b0..8517d1cf2846da5a747e094c4e389440a8e64799 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -22,7 +22,8 @@ extern Node *MultiExecHash(HashState *node);
  extern void ExecEndHash(HashState *node);
  extern void ExecReScanHash(HashState *node);
  
-extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators);
+extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators,
+                                                                                bool keepNulls);
  extern void ExecHashTableDestroy(HashJoinTable hashtable);
  extern void ExecHashTableInsert(HashJoinTable hashtable,
                                         TupleTableSlot *slot,
@@ -37,9 +38,12 @@ extern void ExecHashGetBucketAndBatch(HashJoinTable hashtable,
                                                   uint32 hashvalue,
                                                   int *bucketno,
                                                   int *batchno);
-extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
-                                  ExprContext *econtext);
+extern bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext);
+extern void ExecPrepHashTableForUnmatched(HashJoinState *hjstate);
+extern bool ExecScanHashTableForUnmatched(HashJoinState *hjstate,
+                                                                                 ExprContext *econtext);
  extern void ExecHashTableReset(HashJoinTable hashtable);
+extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable);
  extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
                                                 int *numbuckets,
                                                 int *numbatches,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index d669c24b981f11b1ad480c7c56cb22f33e613fac..6af4bb8d76caec8dc701e0b09d3e8c54f1723b61 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1468,6 +1468,10 @@ typedef struct MergeJoinState
  /* ----------------
   *      HashJoinState information
   *
+ *             hashclauses                             original form of the hashjoin condition
+ *             hj_OuterHashKeys                the outer hash keys in the hashjoin condition
+ *             hj_InnerHashKeys                the inner hash keys in the hashjoin condition
+ *             hj_HashOperators                the join operators in the hashjoin condition
   *             hj_HashTable                    hash table for the hashjoin
   *                                                             (NULL if table not built yet)
   *             hj_CurHashValue                 hash value for current outer tuple
@@ -1477,14 +1481,12 @@ typedef struct MergeJoinState
   *                                                             tuple, or NULL if starting search
   *                                                             (hj_CurXXX variables are undefined if
   *                                                             OuterTupleSlot is empty!)
- *             hj_OuterHashKeys                the outer hash keys in the hashjoin condition
- *             hj_InnerHashKeys                the inner hash keys in the hashjoin condition
- *             hj_HashOperators                the join operators in the hashjoin condition
   *             hj_OuterTupleSlot               tuple slot for outer tuples
- *             hj_HashTupleSlot                tuple slot for hashed tuples
- *             hj_NullInnerTupleSlot   prepared null tuple for left outer joins
+ *             hj_HashTupleSlot                tuple slot for inner (hashed) tuples
+ *             hj_NullOuterTupleSlot   prepared null tuple for right/full outer joins
+ *             hj_NullInnerTupleSlot   prepared null tuple for left/full outer joins
   *             hj_FirstOuterTupleSlot  first tuple retrieved from outer plan
- *             hj_NeedNewOuter                 true if need new outer tuple on next call
+ *             hj_JoinState                    current state of ExecHashJoin state machine
   *             hj_MatchedOuter                 true if found a join match for current outer
   *             hj_OuterNotEmpty                true if outer relation known not empty
   * ----------------
@@ -1498,19 +1500,20 @@ typedef struct HashJoinState
  {
         JoinState       js;                             /* its first field is NodeTag */
         List       *hashclauses;        /* list of ExprState nodes */
+       List       *hj_OuterHashKeys;           /* list of ExprState nodes */
+       List       *hj_InnerHashKeys;           /* list of ExprState nodes */
+       List       *hj_HashOperators;           /* list of operator OIDs */
         HashJoinTable hj_HashTable;
         uint32          hj_CurHashValue;
         int                     hj_CurBucketNo;
         int                     hj_CurSkewBucketNo;
         HashJoinTuple hj_CurTuple;
-       List       *hj_OuterHashKeys;           /* list of ExprState nodes */
-       List       *hj_InnerHashKeys;           /* list of ExprState nodes */
-       List       *hj_HashOperators;           /* list of operator OIDs */
         TupleTableSlot *hj_OuterTupleSlot;
         TupleTableSlot *hj_HashTupleSlot;
+       TupleTableSlot *hj_NullOuterTupleSlot;
         TupleTableSlot *hj_NullInnerTupleSlot;
         TupleTableSlot *hj_FirstOuterTupleSlot;
-       bool            hj_NeedNewOuter;
+       int                     hj_JoinState;
         bool            hj_MatchedOuter;
         bool            hj_OuterNotEmpty;
  } HashJoinState;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 31 Dec 2010 01:24:55 +0000 (20:24 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 31 Dec 2010 01:26:08 +0000 (20:26 -0500)
src/backend/executor/nodeHash.c		patch \| blob \| history
src/backend/executor/nodeHashjoin.c		patch \| blob \| history
src/backend/optimizer/path/equivclass.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/path/joinrels.c		patch \| blob \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| history
src/include/access/htup.h		patch \| blob \| history
src/include/executor/hashjoin.h		patch \| blob \| history
src/include/executor/nodeHash.h		patch \| blob \| history
src/include/nodes/execnodes.h		patch \| blob \| history