break;
/* We have to compute the hash value */
econtext->ecxt_innertuple = slot;
- if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
+ if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
+ false, hashtable->keepNulls,
&hashvalue))
{
int bucketNumber;
* ----------------------------------------------------------------
*/
HashJoinTable
-ExecHashTableCreate(Hash *node, List *hashOperators)
+ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
{
HashJoinTable hashtable;
Plan *outerNode;
hashtable->nbuckets = nbuckets;
hashtable->log2_nbuckets = log2_nbuckets;
hashtable->buckets = NULL;
+ hashtable->keepNulls = keepNulls;
hashtable->skewEnabled = false;
hashtable->skewBucket = NULL;
hashtable->skewBucketLen = 0;
HashJoinTuple hashTuple;
int hashTupleSize;
+ /* Create the HashJoinTuple */
hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
hashTupleSize);
hashTuple->hashvalue = hashvalue;
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+
+ /*
+ * We always reset the tuple-matched flag on insertion. This is okay
+ * even when reloading a tuple from a batch file, since the tuple
+ * could not possibly have been matched to an outer tuple before it
+ * went into the batch file.
+ */
+ HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
+
+ /* Push it onto the front of the bucket's list */
hashTuple->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = hashTuple;
+
+ /* Account for space used, and back off if we've used too much */
hashtable->spaceUsed += hashTupleSize;
if (hashtable->spaceUsed > hashtable->spacePeak)
hashtable->spacePeak = hashtable->spaceUsed;
* scan a hash bucket for matches to the current outer tuple
*
* The current outer tuple must be stored in econtext->ecxt_outertuple.
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
*/
-HashJoinTuple
+bool
ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext)
{
if (ExecQual(hjclauses, econtext, false))
{
hjstate->hj_CurTuple = hashTuple;
- return hashTuple;
+ return true;
}
}
/*
* no match
*/
- return NULL;
+ return false;
+}
+
+/*
+ * ExecPrepHashTableForUnmatched
+ * set up for a series of ExecScanHashTableForUnmatched calls
+ */
+void
+ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
+{
+ /*
+ *----------
+ * During this scan we use the HashJoinState fields as follows:
+ *
+ * hj_CurBucketNo: next regular bucket to scan
+ * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
+ * hj_CurTuple: last tuple returned, or NULL to start next bucket
+ *----------
+ */
+ hjstate->hj_CurBucketNo = 0;
+ hjstate->hj_CurSkewBucketNo = 0;
+ hjstate->hj_CurTuple = NULL;
+}
+
+/*
+ * ExecScanHashTableForUnmatched
+ * scan the hash table for unmatched inner tuples
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
+ */
+bool
+ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
+{
+ HashJoinTable hashtable = hjstate->hj_HashTable;
+ HashJoinTuple hashTuple = hjstate->hj_CurTuple;
+
+ for (;;)
+ {
+ /*
+ * hj_CurTuple is the address of the tuple last returned from the
+ * current bucket, or NULL if it's time to start scanning a new
+ * bucket.
+ */
+ if (hashTuple != NULL)
+ hashTuple = hashTuple->next;
+ else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
+ {
+ hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+ hjstate->hj_CurBucketNo++;
+ }
+ else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
+ {
+ int j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
+
+ hashTuple = hashtable->skewBucket[j]->tuples;
+ hjstate->hj_CurSkewBucketNo++;
+ }
+ else
+ break; /* finished all buckets */
+
+ while (hashTuple != NULL)
+ {
+ if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
+ {
+ TupleTableSlot *inntuple;
+
+ /* insert hashtable's tuple into exec slot */
+ inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
+ hjstate->hj_HashTupleSlot,
+ false); /* do not pfree */
+ econtext->ecxt_innertuple = inntuple;
+
+ /*
+ * Reset temp memory each time; although this function doesn't
+ * do any qual eval, the caller will, so let's keep it
+ * parallel to ExecScanHashBucket.
+ */
+ ResetExprContext(econtext);
+
+ hjstate->hj_CurTuple = hashTuple;
+ return true;
+ }
+
+ hashTuple = hashTuple->next;
+ }
+ }
+
+ /*
+ * no more unmatched tuples
+ */
+ return false;
}
/*
MemoryContextSwitchTo(oldcxt);
}
+/*
+ * ExecHashTableResetMatchFlags
+ * Clear all the HeapTupleHeaderHasMatch flags in the table
+ */
+void
+ExecHashTableResetMatchFlags(HashJoinTable hashtable)
+{
+ HashJoinTuple tuple;
+ int i;
+
+ /* Reset all flags in the main table ... */
+ for (i = 0; i < hashtable->nbuckets; i++)
+ {
+ for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next)
+ HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+ }
+
+ /* ... and the same for the skew buckets, if any */
+ for (i = 0; i < hashtable->nSkewBuckets; i++)
+ {
+ int j = hashtable->skewBucketNums[i];
+ HashSkewBucket *skewBucket = hashtable->skewBucket[j];
+
+ for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next)
+ HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+ }
+}
+
+
void
ExecReScanHash(HashState *node)
{
hashTupleSize);
hashTuple->hashvalue = hashvalue;
memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+ HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
/* Push it onto the front of the skew bucket's list */
hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples;
#include "utils/memutils.h"
-/* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
-#define HASHJOIN_IS_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
+/*
+ * States of the ExecHashJoin state machine
+ */
+#define HJ_BUILD_HASHTABLE 1
+#define HJ_NEED_NEW_OUTER 2
+#define HJ_SCAN_BUCKET 3
+#define HJ_FILL_OUTER_TUPLE 4
+#define HJ_FILL_INNER_TUPLES 5
+#define HJ_NEED_NEW_BATCH 6
+
+/* Returns true if doing null-fill on outer relation */
+#define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
+/* Returns true if doing null-fill on inner relation */
+#define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
HashJoinState *hjstate,
BufFile *file,
uint32 *hashvalue,
TupleTableSlot *tupleSlot);
-static int ExecHashJoinNewBatch(HashJoinState *hjstate);
+static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
/* ----------------------------------------------------------------
HashState *hashNode;
List *joinqual;
List *otherqual;
- TupleTableSlot *inntuple;
ExprContext *econtext;
ExprDoneCond isDone;
HashJoinTable hashtable;
- HashJoinTuple curtuple;
TupleTableSlot *outerTupleSlot;
uint32 hashvalue;
int batchno;
otherqual = node->js.ps.qual;
hashNode = (HashState *) innerPlanState(node);
outerNode = outerPlanState(node);
-
- /*
- * get information from HashJoin state
- */
hashtable = node->hj_HashTable;
econtext = node->js.ps.ps_ExprContext;
ResetExprContext(econtext);
/*
- * if this is the first call, build the hash table for inner relation
+ * run the hash join state machine
*/
- if (hashtable == NULL)
+ for (;;)
{
- /*
- * If the outer relation is completely empty, we can quit without
- * building the hash table. However, for an inner join it is only a
- * win to check this when the outer relation's startup cost is less
- * than the projected cost of building the hash table. Otherwise it's
- * best to build the hash table first and see if the inner relation is
- * empty. (When it's an outer join, we should always make this check,
- * since we aren't going to be able to skip the join on the strength
- * of an empty inner relation anyway.)
- *
- * If we are rescanning the join, we make use of information gained on
- * the previous scan: don't bother to try the prefetch if the previous
- * scan found the outer relation nonempty. This is not 100% reliable
- * since with new parameters the outer relation might yield different
- * results, but it's a good heuristic.
- *
- * The only way to make the check is to try to fetch a tuple from the
- * outer plan node. If we succeed, we have to stash it away for later
- * consumption by ExecHashJoinOuterGetTuple.
- */
- if (HASHJOIN_IS_OUTER(node) ||
- (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
- !node->hj_OuterNotEmpty))
+ switch (node->hj_JoinState)
{
- node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
- if (TupIsNull(node->hj_FirstOuterTupleSlot))
- {
- node->hj_OuterNotEmpty = false;
- return NULL;
- }
- else
- node->hj_OuterNotEmpty = true;
- }
- else
- node->hj_FirstOuterTupleSlot = NULL;
+ case HJ_BUILD_HASHTABLE:
+ /*
+ * First time through: build hash table for inner relation.
+ */
+ Assert(hashtable == NULL);
- /*
- * create the hash table
- */
- hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
- node->hj_HashOperators);
- node->hj_HashTable = hashtable;
+ /*
+ * If the outer relation is completely empty, and it's not
+ * right/full join, we can quit without building the hash
+ * table. However, for an inner join it is only a win to
+ * check this when the outer relation's startup cost is less
+ * than the projected cost of building the hash
+ * table. Otherwise it's best to build the hash table first
+ * and see if the inner relation is empty. (When it's a left
+ * join, we should always make this check, since we aren't
+ * going to be able to skip the join on the strength of an
+ * empty inner relation anyway.)
+ *
+ * If we are rescanning the join, we make use of information
+ * gained on the previous scan: don't bother to try the
+ * prefetch if the previous scan found the outer relation
+ * nonempty. This is not 100% reliable since with new
+ * parameters the outer relation might yield different
+ * results, but it's a good heuristic.
+ *
+ * The only way to make the check is to try to fetch a tuple
+ * from the outer plan node. If we succeed, we have to stash
+ * it away for later consumption by ExecHashJoinOuterGetTuple.
+ */
+ if (HJ_FILL_INNER(node))
+ {
+ /* no chance to not build the hash table */
+ node->hj_FirstOuterTupleSlot = NULL;
+ }
+ else if (HJ_FILL_OUTER(node) ||
+ (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
+ !node->hj_OuterNotEmpty))
+ {
+ node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
+ if (TupIsNull(node->hj_FirstOuterTupleSlot))
+ {
+ node->hj_OuterNotEmpty = false;
+ return NULL;
+ }
+ else
+ node->hj_OuterNotEmpty = true;
+ }
+ else
+ node->hj_FirstOuterTupleSlot = NULL;
- /*
- * execute the Hash node, to build the hash table
- */
- hashNode->hashtable = hashtable;
- (void) MultiExecProcNode((PlanState *) hashNode);
+ /*
+ * create the hash table
+ */
+ hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
+ node->hj_HashOperators,
+ HJ_FILL_INNER(node));
+ node->hj_HashTable = hashtable;
- /*
- * If the inner relation is completely empty, and we're not doing an
- * outer join, we can quit without scanning the outer relation.
- */
- if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
- return NULL;
+ /*
+ * execute the Hash node, to build the hash table
+ */
+ hashNode->hashtable = hashtable;
+ (void) MultiExecProcNode((PlanState *) hashNode);
- /*
- * need to remember whether nbatch has increased since we began
- * scanning the outer relation
- */
- hashtable->nbatch_outstart = hashtable->nbatch;
+ /*
+ * If the inner relation is completely empty, and we're not
+ * doing a left outer join, we can quit without scanning the
+ * outer relation.
+ */
+ if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
+ return NULL;
- /*
- * Reset OuterNotEmpty for scan. (It's OK if we fetched a tuple
- * above, because ExecHashJoinOuterGetTuple will immediately set it
- * again.)
- */
- node->hj_OuterNotEmpty = false;
- }
+ /*
+ * need to remember whether nbatch has increased since we began
+ * scanning the outer relation
+ */
+ hashtable->nbatch_outstart = hashtable->nbatch;
- /*
- * run the hash join process
- */
- for (;;)
- {
- /*
- * If we don't have an outer tuple, get the next one
- */
- if (node->hj_NeedNewOuter)
- {
- outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
- node,
- &hashvalue);
- if (TupIsNull(outerTupleSlot))
- {
- /* end of join */
- return NULL;
- }
+ /*
+ * Reset OuterNotEmpty for scan. (It's OK if we fetched a
+ * tuple above, because ExecHashJoinOuterGetTuple will
+ * immediately set it again.)
+ */
+ node->hj_OuterNotEmpty = false;
- econtext->ecxt_outertuple = outerTupleSlot;
- node->hj_NeedNewOuter = false;
- node->hj_MatchedOuter = false;
+ node->hj_JoinState = HJ_NEED_NEW_OUTER;
- /*
- * Now we have an outer tuple; find the corresponding bucket for
- * this tuple in the main hash table or skew hash table.
- */
- node->hj_CurHashValue = hashvalue;
- ExecHashGetBucketAndBatch(hashtable, hashvalue,
- &node->hj_CurBucketNo, &batchno);
- node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
- hashvalue);
- node->hj_CurTuple = NULL;
+ /* FALL THRU */
- /*
- * Now we've got an outer tuple and the corresponding hash bucket,
- * but it might not belong to the current batch, or it might match
- * a skew bucket.
- */
- if (batchno != hashtable->curbatch &&
- node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
- {
+ case HJ_NEED_NEW_OUTER:
/*
- * Need to postpone this outer tuple to a later batch. Save it
- * in the corresponding outer-batch file.
+ * We don't have an outer tuple, try to get the next one
*/
- Assert(batchno > hashtable->curbatch);
- ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
- hashvalue,
- &hashtable->outerBatchFile[batchno]);
- node->hj_NeedNewOuter = true;
- continue; /* loop around for a new outer tuple */
- }
- }
+ outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
+ node,
+ &hashvalue);
+ if (TupIsNull(outerTupleSlot))
+ {
+ /* end of batch, or maybe whole join */
+ if (HJ_FILL_INNER(node))
+ {
+ /* set up to scan for unmatched inner tuples */
+ ExecPrepHashTableForUnmatched(node);
+ node->hj_JoinState = HJ_FILL_INNER_TUPLES;
+ }
+ else
+ node->hj_JoinState = HJ_NEED_NEW_BATCH;
+ continue;
+ }
- /*
- * OK, scan the selected hash bucket for matches
- */
- for (;;)
- {
- curtuple = ExecScanHashBucket(node, econtext);
- if (curtuple == NULL)
- break; /* out of matches */
+ econtext->ecxt_outertuple = outerTupleSlot;
+ node->hj_MatchedOuter = false;
- /*
- * we've got a match, but still need to test non-hashed quals
- */
- inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
- node->hj_HashTupleSlot,
- false); /* don't pfree */
- econtext->ecxt_innertuple = inntuple;
+ /*
+ * Find the corresponding bucket for this tuple in the main
+ * hash table or skew hash table.
+ */
+ node->hj_CurHashValue = hashvalue;
+ ExecHashGetBucketAndBatch(hashtable, hashvalue,
+ &node->hj_CurBucketNo, &batchno);
+ node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
+ hashvalue);
+ node->hj_CurTuple = NULL;
- /* reset temp memory each time to avoid leaks from qual expr */
- ResetExprContext(econtext);
+ /*
+ * The tuple might not belong to the current batch (where
+ * "current batch" includes the skew buckets if any).
+ */
+ if (batchno != hashtable->curbatch &&
+ node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
+ {
+ /*
+ * Need to postpone this outer tuple to a later batch.
+ * Save it in the corresponding outer-batch file.
+ */
+ Assert(batchno > hashtable->curbatch);
+ ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
+ hashvalue,
+ &hashtable->outerBatchFile[batchno]);
+ /* Loop around, staying in HJ_NEED_NEW_OUTER state */
+ continue;
+ }
- /*
- * if we pass the qual, then save state for next call and have
- * ExecProject form the projection, store it in the tuple table,
- * and return the slot.
- *
- * Only the joinquals determine MatchedOuter status, but all quals
- * must pass to actually return the tuple.
- */
- if (joinqual == NIL || ExecQual(joinqual, econtext, false))
- {
- node->hj_MatchedOuter = true;
+ /* OK, let's scan the bucket for matches */
+ node->hj_JoinState = HJ_SCAN_BUCKET;
+
+ /* FALL THRU */
- /* In an antijoin, we never return a matched tuple */
- if (node->js.jointype == JOIN_ANTI)
+ case HJ_SCAN_BUCKET:
+ /*
+ * Scan the selected hash bucket for matches to current outer
+ */
+ if (!ExecScanHashBucket(node, econtext))
{
- node->hj_NeedNewOuter = true;
- break; /* out of loop over hash bucket */
+ /* out of matches; check for possible outer-join fill */
+ node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
+ continue;
}
/*
- * In a semijoin, we'll consider returning the first match,
- * but after that we're done with this outer tuple.
+ * We've got a match, but still need to test non-hashed quals.
+ * ExecScanHashBucket already set up all the state needed to
+ * call ExecQual.
+ *
+ * If we pass the qual, then save state for next call and have
+ * ExecProject form the projection, store it in the tuple
+ * table, and return the slot.
+ *
+ * Only the joinquals determine tuple match status, but all
+ * quals must pass to actually return the tuple.
*/
- if (node->js.jointype == JOIN_SEMI)
- node->hj_NeedNewOuter = true;
-
- if (otherqual == NIL || ExecQual(otherqual, econtext, false))
+ if (joinqual == NIL || ExecQual(joinqual, econtext, false))
{
- TupleTableSlot *result;
+ node->hj_MatchedOuter = true;
+ HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
- result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+ /* In an antijoin, we never return a matched tuple */
+ if (node->js.jointype == JOIN_ANTI)
+ {
+ node->hj_JoinState = HJ_NEED_NEW_OUTER;
+ continue;
+ }
- if (isDone != ExprEndResult)
+ /*
+ * In a semijoin, we'll consider returning the first match,
+ * but after that we're done with this outer tuple.
+ */
+ if (node->js.jointype == JOIN_SEMI)
+ node->hj_JoinState = HJ_NEED_NEW_OUTER;
+
+ if (otherqual == NIL ||
+ ExecQual(otherqual, econtext, false))
{
- node->js.ps.ps_TupFromTlist =
- (isDone == ExprMultipleResult);
- return result;
+ TupleTableSlot *result;
+
+ result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+ if (isDone != ExprEndResult)
+ {
+ node->js.ps.ps_TupFromTlist =
+ (isDone == ExprMultipleResult);
+ return result;
+ }
}
}
+ break;
+ case HJ_FILL_OUTER_TUPLE:
/*
- * If semijoin and we didn't return the tuple, we're still
- * done with this outer tuple.
+ * The current outer tuple has run out of matches, so check
+ * whether to emit a dummy outer-join tuple. Whether we
+ * emit one or not, the next state is NEED_NEW_OUTER.
*/
- if (node->js.jointype == JOIN_SEMI)
- break; /* out of loop over hash bucket */
- }
- }
+ node->hj_JoinState = HJ_NEED_NEW_OUTER;
- /*
- * Now the current outer tuple has run out of matches, so check
- * whether to emit a dummy outer-join tuple. If not, loop around to
- * get a new outer tuple.
- */
- node->hj_NeedNewOuter = true;
+ if (!node->hj_MatchedOuter &&
+ HJ_FILL_OUTER(node))
+ {
+ /*
+ * Generate a fake join tuple with nulls for the inner
+ * tuple, and return it if it passes the non-join quals.
+ */
+ econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+
+ if (otherqual == NIL ||
+ ExecQual(otherqual, econtext, false))
+ {
+ TupleTableSlot *result;
- if (!node->hj_MatchedOuter &&
- HASHJOIN_IS_OUTER(node))
- {
- /*
- * We are doing an outer join and there were no join matches for
- * this outer tuple. Generate a fake join tuple with nulls for
- * the inner tuple, and return it if it passes the non-join quals.
- */
- econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
+ result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
- if (otherqual == NIL || ExecQual(otherqual, econtext, false))
- {
+ if (isDone != ExprEndResult)
+ {
+ node->js.ps.ps_TupFromTlist =
+ (isDone == ExprMultipleResult);
+ return result;
+ }
+ }
+ }
+ break;
+
+ case HJ_FILL_INNER_TUPLES:
/*
- * qualification was satisfied so we project and return the
- * slot containing the result tuple using ExecProject().
+ * We have finished a batch, but we are doing right/full join,
+ * so any unmatched inner tuples in the hashtable have to be
+ * emitted before we continue to the next batch.
*/
- TupleTableSlot *result;
+ if (!ExecScanHashTableForUnmatched(node, econtext))
+ {
+ /* no more unmatched tuples */
+ node->hj_JoinState = HJ_NEED_NEW_BATCH;
+ continue;
+ }
- result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+ /*
+ * Generate a fake join tuple with nulls for the outer tuple,
+ * and return it if it passes the non-join quals.
+ */
+ econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
- if (isDone != ExprEndResult)
+ if (otherqual == NIL ||
+ ExecQual(otherqual, econtext, false))
{
- node->js.ps.ps_TupFromTlist =
- (isDone == ExprMultipleResult);
- return result;
+ TupleTableSlot *result;
+
+ result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
+
+ if (isDone != ExprEndResult)
+ {
+ node->js.ps.ps_TupFromTlist =
+ (isDone == ExprMultipleResult);
+ return result;
+ }
}
- }
+ break;
+
+ case HJ_NEED_NEW_BATCH:
+ /*
+ * Try to advance to next batch. Done if there are no more.
+ */
+ if (!ExecHashJoinNewBatch(node))
+ return NULL; /* end of join */
+ node->hj_JoinState = HJ_NEED_NEW_OUTER;
+ break;
+
+ default:
+ elog(ERROR, "unrecognized hashjoin state: %d",
+ (int) node->hj_JoinState);
}
}
}
ExecInitResultTupleSlot(estate, &hjstate->js.ps);
hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
- /* note: HASHJOIN_IS_OUTER macro depends on this initialization */
+ /* set up null tuples for outer joins, if needed */
switch (node->join.jointype)
{
case JOIN_INNER:
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(hjstate)));
break;
+ case JOIN_RIGHT:
+ hjstate->hj_NullOuterTupleSlot =
+ ExecInitNullTupleSlot(estate,
+ ExecGetResultType(outerPlanState(hjstate)));
+ break;
+ case JOIN_FULL:
+ hjstate->hj_NullOuterTupleSlot =
+ ExecInitNullTupleSlot(estate,
+ ExecGetResultType(outerPlanState(hjstate)));
+ hjstate->hj_NullInnerTupleSlot =
+ ExecInitNullTupleSlot(estate,
+ ExecGetResultType(innerPlanState(hjstate)));
+ break;
default:
elog(ERROR, "unrecognized join type: %d",
(int) node->join.jointype);
/*
* now for some voodoo. our temporary tuple slot is actually the result
- * tuple slot of the Hash node (which is our inner plan). we do this
+ * tuple slot of the Hash node (which is our inner plan). we can do this
* because Hash nodes don't return tuples via ExecProcNode() -- instead
* the hash join node uses ExecScanHashBucket() to get at the contents of
* the hash table. -cim 6/9/91
((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
hjstate->js.ps.ps_TupFromTlist = false;
- hjstate->hj_NeedNewOuter = true;
+ hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
hjstate->hj_MatchedOuter = false;
hjstate->hj_OuterNotEmpty = false;
* ExecHashJoinOuterGetTuple
*
* get the next outer tuple for hashjoin: either by
- * executing a plan node in the first pass, or from
+ * executing the outer plan node in the first pass, or from
* the temp files for the hashjoin batches.
*
- * Returns a null slot if no more outer tuples. On success, the tuple's
- * hash value is stored at *hashvalue --- this is either originally computed,
- * or re-read from the temp file.
+ * Returns a null slot if no more outer tuples (within the current batch).
+ *
+ * On success, the tuple's hash value is stored at *hashvalue --- this is
+ * either originally computed, or re-read from the temp file.
*/
static TupleTableSlot *
ExecHashJoinOuterGetTuple(PlanState *outerNode,
if (ExecHashGetHashValue(hashtable, econtext,
hjstate->hj_OuterHashKeys,
true, /* outer tuple */
- HASHJOIN_IS_OUTER(hjstate),
+ HJ_FILL_OUTER(hjstate),
hashvalue))
{
/* remember outer relation is not empty for possible rescan */
*/
slot = ExecProcNode(outerNode);
}
+ }
+ else if (curbatch < hashtable->nbatch)
+ {
+ BufFile *file = hashtable->outerBatchFile[curbatch];
/*
- * We have just reached the end of the first pass. Try to switch to a
- * saved batch.
+ * In outer-join cases, we could get here even though the batch file
+ * is empty.
*/
- curbatch = ExecHashJoinNewBatch(hjstate);
- }
+ if (file == NULL)
+ return NULL;
- /*
- * Try to read from a temp file. Loop allows us to advance to new batches
- * as needed. NOTE: nbatch could increase inside ExecHashJoinNewBatch, so
- * don't try to optimize this loop.
- */
- while (curbatch < hashtable->nbatch)
- {
slot = ExecHashJoinGetSavedTuple(hjstate,
- hashtable->outerBatchFile[curbatch],
+ file,
hashvalue,
hjstate->hj_OuterTupleSlot);
if (!TupIsNull(slot))
return slot;
- curbatch = ExecHashJoinNewBatch(hjstate);
}
- /* Out of batches... */
+ /* End of this batch */
return NULL;
}
* ExecHashJoinNewBatch
* switch to a new hashjoin batch
*
- * Returns the number of the new batch (1..nbatch-1), or nbatch if no more.
- * We will never return a batch number that has an empty outer batch file.
+ * Returns true if successful, false if there are no more batches.
*/
-static int
+static bool
ExecHashJoinNewBatch(HashJoinState *hjstate)
{
HashJoinTable hashtable = hjstate->hj_HashTable;
TupleTableSlot *slot;
uint32 hashvalue;
-start_over:
nbatch = hashtable->nbatch;
curbatch = hashtable->curbatch;
hashtable->skewEnabled = false;
hashtable->skewBucket = NULL;
hashtable->skewBucketNums = NULL;
+ hashtable->nSkewBuckets = 0;
hashtable->spaceUsedSkew = 0;
}
* sides. We can sometimes skip over batches that are empty on only one
* side, but there are exceptions:
*
- * 1. In an outer join, we have to process outer batches even if the inner
- * batch is empty.
+ * 1. In a left/full outer join, we have to process outer batches even if
+ * the inner batch is empty. Similarly, in a right/full outer join, we
+ * have to process inner batches even if the outer batch is empty.
*
* 2. If we have increased nbatch since the initial estimate, we have to
* scan inner batches since they might contain tuples that need to be
hashtable->innerBatchFile[curbatch] == NULL))
{
if (hashtable->outerBatchFile[curbatch] &&
- HASHJOIN_IS_OUTER(hjstate))
+ HJ_FILL_OUTER(hjstate))
+ break; /* must process due to rule 1 */
+ if (hashtable->innerBatchFile[curbatch] &&
+ HJ_FILL_INNER(hjstate))
break; /* must process due to rule 1 */
if (hashtable->innerBatchFile[curbatch] &&
nbatch != hashtable->nbatch_original)
}
if (curbatch >= nbatch)
- return curbatch; /* no more batches */
+ return false; /* no more batches */
hashtable->curbatch = curbatch;
}
/*
- * If there's no outer batch file, advance to next batch.
+ * Rewind outer batch file (if present), so that we can start reading it.
*/
- if (hashtable->outerBatchFile[curbatch] == NULL)
- goto start_over;
-
- /*
- * Rewind outer batch file, so that we can start reading it.
- */
- if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not rewind hash-join temporary file: %m")));
+ if (hashtable->outerBatchFile[curbatch] != NULL)
+ {
+ if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not rewind hash-join temporary file: %m")));
+ }
- return curbatch;
+ return true;
}
/*
node->js.ps.righttree->chgParam == NULL)
{
/*
- * okay to reuse the hash table; needn't rescan inner, either.
+ * Okay to reuse the hash table; needn't rescan inner, either.
*
- * What we do need to do is reset our state about the emptiness of
+ * However, if it's a right/full join, we'd better reset the
+ * inner-tuple match flags contained in the table.
+ */
+ if (HJ_FILL_INNER(node))
+ ExecHashTableResetMatchFlags(node->hj_HashTable);
+
+ /*
+ * Also, we need to reset our state about the emptiness of
* the outer relation, so that the new scan of the outer will
* update it correctly if it turns out to be empty this time.
* (There's no harm in clearing it now because ExecHashJoin won't
* through.)
*/
node->hj_OuterNotEmpty = false;
+
+ /* ExecHashJoin can skip the BUILD_HASHTABLE step */
+ node->hj_JoinState = HJ_NEED_NEW_OUTER;
}
else
{
/* must destroy and rebuild hash table */
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
+ node->hj_JoinState = HJ_BUILD_HASHTABLE;
/*
* if chgParam of subnode is not null then plan will be re-scanned
node->hj_CurTuple = NULL;
node->js.ps.ps_TupFromTlist = false;
- node->hj_NeedNewOuter = true;
node->hj_MatchedOuter = false;
node->hj_FirstOuterTupleSlot = NULL;
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
- JoinType jointype);
+ JoinType jointype,
+ bool *have_nonmergeable_clause);
/*
List *restrictlist)
{
List *mergeclause_list = NIL;
+ bool have_nonmergeable_clause = false;
/*
* Find potential mergejoin clauses. We can skip this if we are not
- * interested in doing a mergejoin. However, mergejoin is currently our
- * only way of implementing full outer joins, so override mergejoin
- * disable if it's a full join.
+ * interested in doing a mergejoin. However, mergejoin may be our only
+ * way of implementing a full outer join, so override enable_mergejoin if
+ * it's a full join.
*/
if (enable_mergejoin || jointype == JOIN_FULL)
mergeclause_list = select_mergejoin_clauses(root,
outerrel,
innerrel,
restrictlist,
- jointype);
+ jointype,
+ &have_nonmergeable_clause);
/*
* 1. Consider mergejoin paths where both relations must be explicitly
- * sorted.
+ * sorted. Skip this if we can't mergejoin.
*/
- sort_inner_and_outer(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype, sjinfo);
+ if (!have_nonmergeable_clause)
+ sort_inner_and_outer(root, joinrel, outerrel, innerrel,
+ restrictlist, mergeclause_list, jointype, sjinfo);
/*
* 2. Consider paths where the outer relation need not be explicitly
* sorted. This includes both nestloops and mergejoins where the outer
- * path is already ordered.
+ * path is already ordered. Again, skip this if we can't mergejoin.
+ * (That's okay because we know that nestloop can't handle right/full
+ * joins at all, so it wouldn't work in those cases either.)
*/
- match_unsorted_outer(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype, sjinfo);
+ if (!have_nonmergeable_clause)
+ match_unsorted_outer(root, joinrel, outerrel, innerrel,
+ restrictlist, mergeclause_list, jointype, sjinfo);
#ifdef NOT_USED
* those made by match_unsorted_outer when add_paths_to_joinrel() is
* invoked with the two rels given in the other order.
*/
- match_unsorted_inner(root, joinrel, outerrel, innerrel,
- restrictlist, mergeclause_list, jointype, sjinfo);
+ if (!have_nonmergeable_clause)
+ match_unsorted_inner(root, joinrel, outerrel, innerrel,
+ restrictlist, mergeclause_list, jointype, sjinfo);
#endif
/*
* 4. Consider paths where both outer and inner relations must be hashed
- * before being joined.
+ * before being joined. As above, disregard enable_hashjoin for full
+ * joins, because there may be no other alternative.
*/
- if (enable_hashjoin)
+ if (enable_hashjoin || jointype == JOIN_FULL)
hash_inner_and_outer(root, joinrel, outerrel, innerrel,
restrictlist, jointype, sjinfo);
}
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
- bool useallclauses;
Path *outer_path;
Path *inner_path;
List *all_pathkeys;
ListCell *l;
- /*
- * If we are doing a right or full join, we must use *all* the
- * mergeclauses as join clauses, else we will not have a valid plan.
- */
- switch (jointype)
- {
- case JOIN_INNER:
- case JOIN_LEFT:
- case JOIN_SEMI:
- case JOIN_ANTI:
- case JOIN_UNIQUE_OUTER:
- case JOIN_UNIQUE_INNER:
- useallclauses = false;
- break;
- case JOIN_RIGHT:
- case JOIN_FULL:
- useallclauses = true;
- break;
- default:
- elog(ERROR, "unrecognized join type: %d",
- (int) jointype);
- useallclauses = false; /* keep compiler quiet */
- break;
- }
-
/*
* We only consider the cheapest-total-cost input paths, since we are
* assuming here that a sort is required. We will consider
/*
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
- * are doing a right or full join, we must use *all* the mergeclauses as
- * join clauses, else we will not have a valid plan. (Although these two
- * flags are currently inverses, keep them separate for clarity and
+ * are doing a right or full mergejoin, we must use *all* the mergeclauses
+ * as join clauses, else we will not have a valid plan. (Although these
+ * two flags are currently inverses, keep them separate for clarity and
* possible future changes.)
*/
switch (jointype)
* Special corner case: for "x FULL JOIN y ON true", there will be no
* join clauses at all. Ordinarily we'd generate a clauseless
* nestloop path, but since mergejoin is our only join type that
- * supports FULL JOIN, it's necessary to generate a clauseless
- * mergejoin path instead.
+ * supports FULL JOIN without any join clauses, it's necessary to
+ * generate a clauseless mergejoin path instead.
*/
if (mergeclauses == NIL)
{
JoinType jointype,
SpecialJoinInfo *sjinfo)
{
- bool isouterjoin;
+ bool isouterjoin = IS_OUTER_JOIN(jointype);
List *hashclauses;
ListCell *l;
- /*
- * Hashjoin only supports inner, left, semi, and anti joins.
- */
- switch (jointype)
- {
- case JOIN_INNER:
- case JOIN_SEMI:
- case JOIN_UNIQUE_OUTER:
- case JOIN_UNIQUE_INNER:
- isouterjoin = false;
- break;
- case JOIN_LEFT:
- case JOIN_ANTI:
- isouterjoin = true;
- break;
- default:
- return;
- }
-
/*
* We need to build only one hashpath for any given pair of outer and
* inner relations; all of the hashable clauses will be used as keys.
* Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
*
+ * *have_nonmergeable_clause is set TRUE if this is a right/full join and
+ * there are nonmergejoinable join clauses. The executor's mergejoin
+ * machinery cannot handle such cases, so we have to avoid generating a
+ * mergejoin plan.
+ *
* We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only
* good for the duration of the current add_paths_to_joinrel() call!
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
- JoinType jointype)
+ JoinType jointype,
+ bool *have_nonmergeable_clause)
{
List *result_list = NIL;
bool isouterjoin = IS_OUTER_JOIN(jointype);
- bool have_nonmergeable_joinclause = false;
ListCell *l;
+ *have_nonmergeable_clause = false;
+
foreach(l, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* If processing an outer join, only use its own join clauses in the
* merge. For inner joins we can use pushed-down clauses too. (Note:
- * we don't set have_nonmergeable_joinclause here because pushed-down
+ * we don't set have_nonmergeable_clause here because pushed-down
* clauses will become otherquals not joinquals.)
*/
if (isouterjoin && restrictinfo->is_pushed_down)
* FALSE.)
*/
if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
- have_nonmergeable_joinclause = true;
+ *have_nonmergeable_clause = true;
continue; /* not mergejoinable */
}
*/
if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
{
- have_nonmergeable_joinclause = true;
+ *have_nonmergeable_clause = true;
continue; /* no good for these input relations */
}
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
{
- have_nonmergeable_joinclause = true;
+ *have_nonmergeable_clause = true;
continue; /* can't handle redundant eclasses */
}
}
/*
- * If it is a right/full join then *all* the explicit join clauses must be
- * mergejoinable, else the executor will fail. If we are asked for a right
- * join then just return NIL to indicate no mergejoin is possible (we can
- * handle it as a left join instead). If we are asked for a full join then
- * emit an error, because there is no fallback.
+ * If it is not a right/full join then we don't need to insist on all the
+ * joinclauses being mergejoinable, so reset the flag. This simplifies
+ * the logic in add_paths_to_joinrel.
*/
- if (have_nonmergeable_joinclause)
+ switch (jointype)
{
- switch (jointype)
- {
- case JOIN_RIGHT:
- return NIL; /* not mergejoinable */
- case JOIN_FULL:
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
- break;
- default:
- /* otherwise, it's OK to have nonmergeable join quals */
- break;
- }
+ case JOIN_RIGHT:
+ case JOIN_FULL:
+ break;
+ default:
+ /* otherwise, it's OK to have nonmergeable join quals */
+ *have_nonmergeable_clause = false;
+ break;
}
return result_list;