1 /*-------------------------------------------------------------------------
4 * Routines to handle hash join nodes
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/executor/nodeHashjoin.c
13 *-------------------------------------------------------------------------
18 #include "access/htup_details.h"
19 #include "executor/executor.h"
20 #include "executor/hashjoin.h"
21 #include "executor/nodeHash.h"
22 #include "executor/nodeHashjoin.h"
23 #include "miscadmin.h"
24 #include "utils/memutils.h"
28 * States of the ExecHashJoin state machine
30 #define HJ_BUILD_HASHTABLE 1
31 #define HJ_NEED_NEW_OUTER 2
32 #define HJ_SCAN_BUCKET 3
33 #define HJ_FILL_OUTER_TUPLE 4
34 #define HJ_FILL_INNER_TUPLES 5
35 #define HJ_NEED_NEW_BATCH 6
37 /* Returns true if doing null-fill on outer relation */
38 #define HJ_FILL_OUTER(hjstate) ((hjstate)->hj_NullInnerTupleSlot != NULL)
39 /* Returns true if doing null-fill on inner relation */
40 #define HJ_FILL_INNER(hjstate) ((hjstate)->hj_NullOuterTupleSlot != NULL)
42 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
43 HashJoinState *hjstate,
45 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
48 TupleTableSlot *tupleSlot);
49 static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
52 /* ----------------------------------------------------------------
55 * This function implements the Hybrid Hashjoin algorithm.
57 * Note: the relation we build hash table on is the "inner"
58 * the other one is "outer".
59 * ----------------------------------------------------------------
61 TupleTableSlot * /* return: a tuple or NULL */
62 ExecHashJoin(HashJoinState *node)
68 ExprContext *econtext;
69 HashJoinTable hashtable;
70 TupleTableSlot *outerTupleSlot;
75 * get information from HashJoin node
77 joinqual = node->js.joinqual;
78 otherqual = node->js.ps.qual;
79 hashNode = (HashState *) innerPlanState(node);
80 outerNode = outerPlanState(node);
81 hashtable = node->hj_HashTable;
82 econtext = node->js.ps.ps_ExprContext;
85 * Reset per-tuple memory context to free any expression evaluation
86 * storage allocated in the previous tuple cycle.
88 ResetExprContext(econtext);
91 * run the hash join state machine
95 switch (node->hj_JoinState)
97 case HJ_BUILD_HASHTABLE:
100 * First time through: build hash table for inner relation.
102 Assert(hashtable == NULL);
105 * If the outer relation is completely empty, and it's not
106 * right/full join, we can quit without building the hash
107 * table. However, for an inner join it is only a win to
108 * check this when the outer relation's startup cost is less
109 * than the projected cost of building the hash table.
110 * Otherwise it's best to build the hash table first and see
111 * if the inner relation is empty. (When it's a left join, we
112 * should always make this check, since we aren't going to be
113 * able to skip the join on the strength of an empty inner
116 * If we are rescanning the join, we make use of information
117 * gained on the previous scan: don't bother to try the
118 * prefetch if the previous scan found the outer relation
119 * nonempty. This is not 100% reliable since with new
120 * parameters the outer relation might yield different
121 * results, but it's a good heuristic.
123 * The only way to make the check is to try to fetch a tuple
124 * from the outer plan node. If we succeed, we have to stash
125 * it away for later consumption by ExecHashJoinOuterGetTuple.
127 if (HJ_FILL_INNER(node))
129 /* no chance to not build the hash table */
130 node->hj_FirstOuterTupleSlot = NULL;
132 else if (HJ_FILL_OUTER(node) ||
133 (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
134 !node->hj_OuterNotEmpty))
136 node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
137 if (TupIsNull(node->hj_FirstOuterTupleSlot))
139 node->hj_OuterNotEmpty = false;
143 node->hj_OuterNotEmpty = true;
146 node->hj_FirstOuterTupleSlot = NULL;
149 * create the hash table
151 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
152 node->hj_HashOperators,
153 HJ_FILL_INNER(node));
154 node->hj_HashTable = hashtable;
157 * execute the Hash node, to build the hash table
159 hashNode->hashtable = hashtable;
160 (void) MultiExecProcNode((PlanState *) hashNode);
163 * If the inner relation is completely empty, and we're not
164 * doing a left outer join, we can quit without scanning the
167 if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
171 * need to remember whether nbatch has increased since we
172 * began scanning the outer relation
174 hashtable->nbatch_outstart = hashtable->nbatch;
177 * Reset OuterNotEmpty for scan. (It's OK if we fetched a
178 * tuple above, because ExecHashJoinOuterGetTuple will
179 * immediately set it again.)
181 node->hj_OuterNotEmpty = false;
183 node->hj_JoinState = HJ_NEED_NEW_OUTER;
187 case HJ_NEED_NEW_OUTER:
190 * We don't have an outer tuple, try to get the next one
192 outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
195 if (TupIsNull(outerTupleSlot))
197 /* end of batch, or maybe whole join */
198 if (HJ_FILL_INNER(node))
200 /* set up to scan for unmatched inner tuples */
201 ExecPrepHashTableForUnmatched(node);
202 node->hj_JoinState = HJ_FILL_INNER_TUPLES;
205 node->hj_JoinState = HJ_NEED_NEW_BATCH;
209 econtext->ecxt_outertuple = outerTupleSlot;
210 node->hj_MatchedOuter = false;
213 * Find the corresponding bucket for this tuple in the main
214 * hash table or skew hash table.
216 node->hj_CurHashValue = hashvalue;
217 ExecHashGetBucketAndBatch(hashtable, hashvalue,
218 &node->hj_CurBucketNo, &batchno);
219 node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
221 node->hj_CurTuple = NULL;
224 * The tuple might not belong to the current batch (where
225 * "current batch" includes the skew buckets if any).
227 if (batchno != hashtable->curbatch &&
228 node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
231 * Need to postpone this outer tuple to a later batch.
232 * Save it in the corresponding outer-batch file.
234 Assert(batchno > hashtable->curbatch);
235 ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
237 &hashtable->outerBatchFile[batchno]);
238 /* Loop around, staying in HJ_NEED_NEW_OUTER state */
242 /* OK, let's scan the bucket for matches */
243 node->hj_JoinState = HJ_SCAN_BUCKET;
250 * We check for interrupts here because this corresponds to
251 * where we'd fetch a row from a child plan node in other join
254 CHECK_FOR_INTERRUPTS();
257 * Scan the selected hash bucket for matches to current outer
259 if (!ExecScanHashBucket(node, econtext))
261 /* out of matches; check for possible outer-join fill */
262 node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
267 * We've got a match, but still need to test non-hashed quals.
268 * ExecScanHashBucket already set up all the state needed to
271 * If we pass the qual, then save state for next call and have
272 * ExecProject form the projection, store it in the tuple
273 * table, and return the slot.
275 * Only the joinquals determine tuple match status, but all
276 * quals must pass to actually return the tuple.
278 if (joinqual == NIL || ExecQual(joinqual, econtext, false))
280 node->hj_MatchedOuter = true;
281 HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
283 /* In an antijoin, we never return a matched tuple */
284 if (node->js.jointype == JOIN_ANTI)
286 node->hj_JoinState = HJ_NEED_NEW_OUTER;
291 * In a semijoin, we'll consider returning the first
292 * match, but after that we're done with this outer tuple.
294 if (node->js.jointype == JOIN_SEMI)
295 node->hj_JoinState = HJ_NEED_NEW_OUTER;
297 if (otherqual == NIL ||
298 ExecQual(otherqual, econtext, false))
299 return ExecProject(node->js.ps.ps_ProjInfo);
301 InstrCountFiltered2(node, 1);
304 InstrCountFiltered1(node, 1);
307 case HJ_FILL_OUTER_TUPLE:
310 * The current outer tuple has run out of matches, so check
311 * whether to emit a dummy outer-join tuple. Whether we emit
312 * one or not, the next state is NEED_NEW_OUTER.
314 node->hj_JoinState = HJ_NEED_NEW_OUTER;
316 if (!node->hj_MatchedOuter &&
320 * Generate a fake join tuple with nulls for the inner
321 * tuple, and return it if it passes the non-join quals.
323 econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
325 if (otherqual == NIL ||
326 ExecQual(otherqual, econtext, false))
327 return ExecProject(node->js.ps.ps_ProjInfo);
329 InstrCountFiltered2(node, 1);
333 case HJ_FILL_INNER_TUPLES:
336 * We have finished a batch, but we are doing right/full join,
337 * so any unmatched inner tuples in the hashtable have to be
338 * emitted before we continue to the next batch.
340 if (!ExecScanHashTableForUnmatched(node, econtext))
342 /* no more unmatched tuples */
343 node->hj_JoinState = HJ_NEED_NEW_BATCH;
348 * Generate a fake join tuple with nulls for the outer tuple,
349 * and return it if it passes the non-join quals.
351 econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
353 if (otherqual == NIL ||
354 ExecQual(otherqual, econtext, false))
355 return ExecProject(node->js.ps.ps_ProjInfo);
357 InstrCountFiltered2(node, 1);
360 case HJ_NEED_NEW_BATCH:
363 * Try to advance to next batch. Done if there are no more.
365 if (!ExecHashJoinNewBatch(node))
366 return NULL; /* end of join */
367 node->hj_JoinState = HJ_NEED_NEW_OUTER;
371 elog(ERROR, "unrecognized hashjoin state: %d",
372 (int) node->hj_JoinState);
377 /* ----------------------------------------------------------------
380 * Init routine for HashJoin node.
381 * ----------------------------------------------------------------
384 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
386 HashJoinState *hjstate;
394 /* check for unsupported flags */
395 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
398 * create state structure
400 hjstate = makeNode(HashJoinState);
401 hjstate->js.ps.plan = (Plan *) node;
402 hjstate->js.ps.state = estate;
405 * Miscellaneous initialization
407 * create expression context for node
409 ExecAssignExprContext(estate, &hjstate->js.ps);
412 * initialize child expressions
414 hjstate->js.ps.targetlist = (List *)
415 ExecInitExpr((Expr *) node->join.plan.targetlist,
416 (PlanState *) hjstate);
417 hjstate->js.ps.qual = (List *)
418 ExecInitExpr((Expr *) node->join.plan.qual,
419 (PlanState *) hjstate);
420 hjstate->js.jointype = node->join.jointype;
421 hjstate->js.joinqual = (List *)
422 ExecInitExpr((Expr *) node->join.joinqual,
423 (PlanState *) hjstate);
424 hjstate->hashclauses = (List *)
425 ExecInitExpr((Expr *) node->hashclauses,
426 (PlanState *) hjstate);
429 * initialize child nodes
431 * Note: we could suppress the REWIND flag for the inner input, which
432 * would amount to betting that the hash will be a single batch. Not
433 * clear if this would be a win or not.
435 outerNode = outerPlan(node);
436 hashNode = (Hash *) innerPlan(node);
438 outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
439 innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
442 * tuple table initialization
444 ExecInitResultTupleSlot(estate, &hjstate->js.ps);
445 hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
447 /* set up null tuples for outer joins, if needed */
448 switch (node->join.jointype)
455 hjstate->hj_NullInnerTupleSlot =
456 ExecInitNullTupleSlot(estate,
457 ExecGetResultType(innerPlanState(hjstate)));
460 hjstate->hj_NullOuterTupleSlot =
461 ExecInitNullTupleSlot(estate,
462 ExecGetResultType(outerPlanState(hjstate)));
465 hjstate->hj_NullOuterTupleSlot =
466 ExecInitNullTupleSlot(estate,
467 ExecGetResultType(outerPlanState(hjstate)));
468 hjstate->hj_NullInnerTupleSlot =
469 ExecInitNullTupleSlot(estate,
470 ExecGetResultType(innerPlanState(hjstate)));
473 elog(ERROR, "unrecognized join type: %d",
474 (int) node->join.jointype);
478 * now for some voodoo. our temporary tuple slot is actually the result
479 * tuple slot of the Hash node (which is our inner plan). we can do this
480 * because Hash nodes don't return tuples via ExecProcNode() -- instead
481 * the hash join node uses ExecScanHashBucket() to get at the contents of
482 * the hash table. -cim 6/9/91
485 HashState *hashstate = (HashState *) innerPlanState(hjstate);
486 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
488 hjstate->hj_HashTupleSlot = slot;
492 * initialize tuple type and projection info
494 ExecAssignResultTypeFromTL(&hjstate->js.ps);
495 ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
497 ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
498 ExecGetResultType(outerPlanState(hjstate)));
501 * initialize hash-specific info
503 hjstate->hj_HashTable = NULL;
504 hjstate->hj_FirstOuterTupleSlot = NULL;
506 hjstate->hj_CurHashValue = 0;
507 hjstate->hj_CurBucketNo = 0;
508 hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
509 hjstate->hj_CurTuple = NULL;
512 * Deconstruct the hash clauses into outer and inner argument values, so
513 * that we can evaluate those subexpressions separately. Also make a list
514 * of the hash operator OIDs, in preparation for looking up the hash
520 foreach(l, hjstate->hashclauses)
522 FuncExprState *fstate = castNode(FuncExprState, lfirst(l));
523 OpExpr *hclause = castNode(OpExpr, fstate->xprstate.expr);
525 lclauses = lappend(lclauses, linitial(fstate->args));
526 rclauses = lappend(rclauses, lsecond(fstate->args));
527 hoperators = lappend_oid(hoperators, hclause->opno);
529 hjstate->hj_OuterHashKeys = lclauses;
530 hjstate->hj_InnerHashKeys = rclauses;
531 hjstate->hj_HashOperators = hoperators;
532 /* child Hash node needs to evaluate inner hash keys, too */
533 ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
535 hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
536 hjstate->hj_MatchedOuter = false;
537 hjstate->hj_OuterNotEmpty = false;
542 /* ----------------------------------------------------------------
545 * clean up routine for HashJoin node
546 * ----------------------------------------------------------------
549 ExecEndHashJoin(HashJoinState *node)
554 if (node->hj_HashTable)
556 ExecHashTableDestroy(node->hj_HashTable);
557 node->hj_HashTable = NULL;
561 * Free the exprcontext
563 ExecFreeExprContext(&node->js.ps);
566 * clean out the tuple table
568 ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
569 ExecClearTuple(node->hj_OuterTupleSlot);
570 ExecClearTuple(node->hj_HashTupleSlot);
575 ExecEndNode(outerPlanState(node));
576 ExecEndNode(innerPlanState(node));
580 * ExecHashJoinOuterGetTuple
582 * get the next outer tuple for hashjoin: either by
583 * executing the outer plan node in the first pass, or from
584 * the temp files for the hashjoin batches.
586 * Returns a null slot if no more outer tuples (within the current batch).
588 * On success, the tuple's hash value is stored at *hashvalue --- this is
589 * either originally computed, or re-read from the temp file.
591 static TupleTableSlot *
592 ExecHashJoinOuterGetTuple(PlanState *outerNode,
593 HashJoinState *hjstate,
596 HashJoinTable hashtable = hjstate->hj_HashTable;
597 int curbatch = hashtable->curbatch;
598 TupleTableSlot *slot;
600 if (curbatch == 0) /* if it is the first pass */
603 * Check to see if first outer tuple was already fetched by
604 * ExecHashJoin() and not used yet.
606 slot = hjstate->hj_FirstOuterTupleSlot;
607 if (!TupIsNull(slot))
608 hjstate->hj_FirstOuterTupleSlot = NULL;
610 slot = ExecProcNode(outerNode);
612 while (!TupIsNull(slot))
615 * We have to compute the tuple's hash value.
617 ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
619 econtext->ecxt_outertuple = slot;
620 if (ExecHashGetHashValue(hashtable, econtext,
621 hjstate->hj_OuterHashKeys,
622 true, /* outer tuple */
623 HJ_FILL_OUTER(hjstate),
626 /* remember outer relation is not empty for possible rescan */
627 hjstate->hj_OuterNotEmpty = true;
633 * That tuple couldn't match because of a NULL, so discard it and
634 * continue with the next one.
636 slot = ExecProcNode(outerNode);
639 else if (curbatch < hashtable->nbatch)
641 BufFile *file = hashtable->outerBatchFile[curbatch];
644 * In outer-join cases, we could get here even though the batch file
650 slot = ExecHashJoinGetSavedTuple(hjstate,
653 hjstate->hj_OuterTupleSlot);
654 if (!TupIsNull(slot))
658 /* End of this batch */
663 * ExecHashJoinNewBatch
664 * switch to a new hashjoin batch
666 * Returns true if successful, false if there are no more batches.
669 ExecHashJoinNewBatch(HashJoinState *hjstate)
671 HashJoinTable hashtable = hjstate->hj_HashTable;
675 TupleTableSlot *slot;
678 nbatch = hashtable->nbatch;
679 curbatch = hashtable->curbatch;
684 * We no longer need the previous outer batch file; close it right
685 * away to free disk space.
687 if (hashtable->outerBatchFile[curbatch])
688 BufFileClose(hashtable->outerBatchFile[curbatch]);
689 hashtable->outerBatchFile[curbatch] = NULL;
691 else /* we just finished the first batch */
694 * Reset some of the skew optimization state variables, since we no
695 * longer need to consider skew tuples after the first batch. The
696 * memory context reset we are about to do will release the skew
699 hashtable->skewEnabled = false;
700 hashtable->skewBucket = NULL;
701 hashtable->skewBucketNums = NULL;
702 hashtable->nSkewBuckets = 0;
703 hashtable->spaceUsedSkew = 0;
707 * We can always skip over any batches that are completely empty on both
708 * sides. We can sometimes skip over batches that are empty on only one
709 * side, but there are exceptions:
711 * 1. In a left/full outer join, we have to process outer batches even if
712 * the inner batch is empty. Similarly, in a right/full outer join, we
713 * have to process inner batches even if the outer batch is empty.
715 * 2. If we have increased nbatch since the initial estimate, we have to
716 * scan inner batches since they might contain tuples that need to be
717 * reassigned to later inner batches.
719 * 3. Similarly, if we have increased nbatch since starting the outer
720 * scan, we have to rescan outer batches in case they contain tuples that
721 * need to be reassigned.
724 while (curbatch < nbatch &&
725 (hashtable->outerBatchFile[curbatch] == NULL ||
726 hashtable->innerBatchFile[curbatch] == NULL))
728 if (hashtable->outerBatchFile[curbatch] &&
729 HJ_FILL_OUTER(hjstate))
730 break; /* must process due to rule 1 */
731 if (hashtable->innerBatchFile[curbatch] &&
732 HJ_FILL_INNER(hjstate))
733 break; /* must process due to rule 1 */
734 if (hashtable->innerBatchFile[curbatch] &&
735 nbatch != hashtable->nbatch_original)
736 break; /* must process due to rule 2 */
737 if (hashtable->outerBatchFile[curbatch] &&
738 nbatch != hashtable->nbatch_outstart)
739 break; /* must process due to rule 3 */
740 /* We can ignore this batch. */
741 /* Release associated temp files right away. */
742 if (hashtable->innerBatchFile[curbatch])
743 BufFileClose(hashtable->innerBatchFile[curbatch]);
744 hashtable->innerBatchFile[curbatch] = NULL;
745 if (hashtable->outerBatchFile[curbatch])
746 BufFileClose(hashtable->outerBatchFile[curbatch]);
747 hashtable->outerBatchFile[curbatch] = NULL;
751 if (curbatch >= nbatch)
752 return false; /* no more batches */
754 hashtable->curbatch = curbatch;
757 * Reload the hash table with the new inner batch (which could be empty)
759 ExecHashTableReset(hashtable);
761 innerFile = hashtable->innerBatchFile[curbatch];
763 if (innerFile != NULL)
765 if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
767 (errcode_for_file_access(),
768 errmsg("could not rewind hash-join temporary file: %m")));
770 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
773 hjstate->hj_HashTupleSlot)))
776 * NOTE: some tuples may be sent to future batches. Also, it is
777 * possible for hashtable->nbatch to be increased here!
779 ExecHashTableInsert(hashtable, slot, hashvalue);
783 * after we build the hash table, the inner batch file is no longer
786 BufFileClose(innerFile);
787 hashtable->innerBatchFile[curbatch] = NULL;
791 * Rewind outer batch file (if present), so that we can start reading it.
793 if (hashtable->outerBatchFile[curbatch] != NULL)
795 if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
797 (errcode_for_file_access(),
798 errmsg("could not rewind hash-join temporary file: %m")));
805 * ExecHashJoinSaveTuple
806 * save a tuple to a batch file.
808 * The data recorded in the file for each tuple is its hash value,
809 * then the tuple in MinimalTuple format.
811 * Note: it is important always to call this in the regular executor
812 * context, not in a shorter-lived context; else the temp file buffers
813 * will get messed up.
816 ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
819 BufFile *file = *fileptr;
824 /* First write to this batch file, so open it. */
825 file = BufFileCreateTemp(false);
829 written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
830 if (written != sizeof(uint32))
832 (errcode_for_file_access(),
833 errmsg("could not write to hash-join temporary file: %m")));
835 written = BufFileWrite(file, (void *) tuple, tuple->t_len);
836 if (written != tuple->t_len)
838 (errcode_for_file_access(),
839 errmsg("could not write to hash-join temporary file: %m")));
843 * ExecHashJoinGetSavedTuple
844 * read the next tuple from a batch file. Return NULL if no more.
846 * On success, *hashvalue is set to the tuple's hash value, and the tuple
847 * itself is stored in the given slot.
849 static TupleTableSlot *
850 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
853 TupleTableSlot *tupleSlot)
860 * We check for interrupts here because this is typically taken as an
861 * alternative code path to an ExecProcNode() call, which would include
864 CHECK_FOR_INTERRUPTS();
867 * Since both the hash value and the MinimalTuple length word are uint32,
868 * we can read them both in one BufFileRead() call without any type
871 nread = BufFileRead(file, (void *) header, sizeof(header));
872 if (nread == 0) /* end of file */
874 ExecClearTuple(tupleSlot);
877 if (nread != sizeof(header))
879 (errcode_for_file_access(),
880 errmsg("could not read from hash-join temporary file: %m")));
881 *hashvalue = header[0];
882 tuple = (MinimalTuple) palloc(header[1]);
883 tuple->t_len = header[1];
884 nread = BufFileRead(file,
885 (void *) ((char *) tuple + sizeof(uint32)),
886 header[1] - sizeof(uint32));
887 if (nread != header[1] - sizeof(uint32))
889 (errcode_for_file_access(),
890 errmsg("could not read from hash-join temporary file: %m")));
891 return ExecStoreMinimalTuple(tuple, tupleSlot, true);
896 ExecReScanHashJoin(HashJoinState *node)
899 * In a multi-batch join, we currently have to do rescans the hard way,
900 * primarily because batch temp files may have already been released. But
901 * if it's a single-batch join, and there is no parameter change for the
902 * inner subnode, then we can just re-use the existing hash table without
905 if (node->hj_HashTable != NULL)
907 if (node->hj_HashTable->nbatch == 1 &&
908 node->js.ps.righttree->chgParam == NULL)
911 * Okay to reuse the hash table; needn't rescan inner, either.
913 * However, if it's a right/full join, we'd better reset the
914 * inner-tuple match flags contained in the table.
916 if (HJ_FILL_INNER(node))
917 ExecHashTableResetMatchFlags(node->hj_HashTable);
920 * Also, we need to reset our state about the emptiness of the
921 * outer relation, so that the new scan of the outer will update
922 * it correctly if it turns out to be empty this time. (There's no
923 * harm in clearing it now because ExecHashJoin won't need the
924 * info. In the other cases, where the hash table doesn't exist
925 * or we are destroying it, we leave this state alone because
926 * ExecHashJoin will need it the first time through.)
928 node->hj_OuterNotEmpty = false;
930 /* ExecHashJoin can skip the BUILD_HASHTABLE step */
931 node->hj_JoinState = HJ_NEED_NEW_OUTER;
935 /* must destroy and rebuild hash table */
936 ExecHashTableDestroy(node->hj_HashTable);
937 node->hj_HashTable = NULL;
938 node->hj_JoinState = HJ_BUILD_HASHTABLE;
941 * if chgParam of subnode is not null then plan will be re-scanned
942 * by first ExecProcNode.
944 if (node->js.ps.righttree->chgParam == NULL)
945 ExecReScan(node->js.ps.righttree);
949 /* Always reset intra-tuple state */
950 node->hj_CurHashValue = 0;
951 node->hj_CurBucketNo = 0;
952 node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
953 node->hj_CurTuple = NULL;
955 node->hj_MatchedOuter = false;
956 node->hj_FirstOuterTupleSlot = NULL;
959 * if chgParam of subnode is not null then plan will be re-scanned by
960 * first ExecProcNode.
962 if (node->js.ps.lefttree->chgParam == NULL)
963 ExecReScan(node->js.ps.lefttree);