]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeHashjoin.c
Remove useless ps_OuterTupleSlot field from PlanState. I suppose this was
[postgresql] / src / backend / executor / nodeHashjoin.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeHashjoin.c
4  *        Routines to handle hash join nodes
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.96 2008/10/23 14:34:34 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include "executor/executor.h"
19 #include "executor/hashjoin.h"
20 #include "executor/nodeHash.h"
21 #include "executor/nodeHashjoin.h"
22 #include "utils/memutils.h"
23
24
25 /* Returns true for JOIN_LEFT and JOIN_ANTI jointypes */
26 #define HASHJOIN_IS_OUTER(hjstate)  ((hjstate)->hj_NullInnerTupleSlot != NULL)
27
28 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
29                                                   HashJoinState *hjstate,
30                                                   uint32 *hashvalue);
31 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
32                                                   BufFile *file,
33                                                   uint32 *hashvalue,
34                                                   TupleTableSlot *tupleSlot);
35 static int      ExecHashJoinNewBatch(HashJoinState *hjstate);
36
37
38 /* ----------------------------------------------------------------
39  *              ExecHashJoin
40  *
41  *              This function implements the Hybrid Hashjoin algorithm.
42  *
43  *              Note: the relation we build hash table on is the "inner"
44  *                        the other one is "outer".
45  * ----------------------------------------------------------------
46  */
47 TupleTableSlot *                                /* return: a tuple or NULL */
48 ExecHashJoin(HashJoinState *node)
49 {
50         EState     *estate;
51         PlanState  *outerNode;
52         HashState  *hashNode;
53         List       *joinqual;
54         List       *otherqual;
55         TupleTableSlot *inntuple;
56         ExprContext *econtext;
57         ExprDoneCond isDone;
58         HashJoinTable hashtable;
59         HashJoinTuple curtuple;
60         TupleTableSlot *outerTupleSlot;
61         uint32          hashvalue;
62         int                     batchno;
63
64         /*
65          * get information from HashJoin node
66          */
67         estate = node->js.ps.state;
68         joinqual = node->js.joinqual;
69         otherqual = node->js.ps.qual;
70         hashNode = (HashState *) innerPlanState(node);
71         outerNode = outerPlanState(node);
72
73         /*
74          * get information from HashJoin state
75          */
76         hashtable = node->hj_HashTable;
77         econtext = node->js.ps.ps_ExprContext;
78
79         /*
80          * Check to see if we're still projecting out tuples from a previous join
81          * tuple (because there is a function-returning-set in the projection
82          * expressions).  If so, try to project another one.
83          */
84         if (node->js.ps.ps_TupFromTlist)
85         {
86                 TupleTableSlot *result;
87
88                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
89                 if (isDone == ExprMultipleResult)
90                         return result;
91                 /* Done with that source tuple... */
92                 node->js.ps.ps_TupFromTlist = false;
93         }
94
95         /*
96          * Reset per-tuple memory context to free any expression evaluation
97          * storage allocated in the previous tuple cycle.  Note this can't happen
98          * until we're done projecting out tuples from a join tuple.
99          */
100         ResetExprContext(econtext);
101
102         /*
103          * if this is the first call, build the hash table for inner relation
104          */
105         if (hashtable == NULL)
106         {
107                 /*
108                  * If the outer relation is completely empty, we can quit without
109                  * building the hash table.  However, for an inner join it is only a
110                  * win to check this when the outer relation's startup cost is less
111                  * than the projected cost of building the hash table.  Otherwise it's
112                  * best to build the hash table first and see if the inner relation is
113                  * empty.  (When it's an outer join, we should always make this check,
114                  * since we aren't going to be able to skip the join on the strength
115                  * of an empty inner relation anyway.)
116                  *
117                  * If we are rescanning the join, we make use of information gained on
118                  * the previous scan: don't bother to try the prefetch if the previous
119                  * scan found the outer relation nonempty.      This is not 100% reliable
120                  * since with new parameters the outer relation might yield different
121                  * results, but it's a good heuristic.
122                  *
123                  * The only way to make the check is to try to fetch a tuple from the
124                  * outer plan node.  If we succeed, we have to stash it away for later
125                  * consumption by ExecHashJoinOuterGetTuple.
126                  */
127                 if (HASHJOIN_IS_OUTER(node) ||
128                         (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
129                          !node->hj_OuterNotEmpty))
130                 {
131                         node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
132                         if (TupIsNull(node->hj_FirstOuterTupleSlot))
133                         {
134                                 node->hj_OuterNotEmpty = false;
135                                 return NULL;
136                         }
137                         else
138                                 node->hj_OuterNotEmpty = true;
139                 }
140                 else
141                         node->hj_FirstOuterTupleSlot = NULL;
142
143                 /*
144                  * create the hash table
145                  */
146                 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
147                                                                                 node->hj_HashOperators);
148                 node->hj_HashTable = hashtable;
149
150                 /*
151                  * execute the Hash node, to build the hash table
152                  */
153                 hashNode->hashtable = hashtable;
154                 (void) MultiExecProcNode((PlanState *) hashNode);
155
156                 /*
157                  * If the inner relation is completely empty, and we're not doing an
158                  * outer join, we can quit without scanning the outer relation.
159                  */
160                 if (hashtable->totalTuples == 0 && !HASHJOIN_IS_OUTER(node))
161                         return NULL;
162
163                 /*
164                  * need to remember whether nbatch has increased since we began
165                  * scanning the outer relation
166                  */
167                 hashtable->nbatch_outstart = hashtable->nbatch;
168
169                 /*
170                  * Reset OuterNotEmpty for scan.  (It's OK if we fetched a tuple
171                  * above, because ExecHashJoinOuterGetTuple will immediately set it
172                  * again.)
173                  */
174                 node->hj_OuterNotEmpty = false;
175         }
176
177         /*
178          * run the hash join process
179          */
180         for (;;)
181         {
182                 /*
183                  * If we don't have an outer tuple, get the next one
184                  */
185                 if (node->hj_NeedNewOuter)
186                 {
187                         outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
188                                                                                                            node,
189                                                                                                            &hashvalue);
190                         if (TupIsNull(outerTupleSlot))
191                         {
192                                 /* end of join */
193                                 return NULL;
194                         }
195
196                         econtext->ecxt_outertuple = outerTupleSlot;
197                         node->hj_NeedNewOuter = false;
198                         node->hj_MatchedOuter = false;
199
200                         /*
201                          * now we have an outer tuple, find the corresponding bucket for
202                          * this tuple from the hash table
203                          */
204                         node->hj_CurHashValue = hashvalue;
205                         ExecHashGetBucketAndBatch(hashtable, hashvalue,
206                                                                           &node->hj_CurBucketNo, &batchno);
207                         node->hj_CurTuple = NULL;
208
209                         /*
210                          * Now we've got an outer tuple and the corresponding hash bucket,
211                          * but this tuple may not belong to the current batch.
212                          */
213                         if (batchno != hashtable->curbatch)
214                         {
215                                 /*
216                                  * Need to postpone this outer tuple to a later batch. Save it
217                                  * in the corresponding outer-batch file.
218                                  */
219                                 Assert(batchno > hashtable->curbatch);
220                                 ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
221                                                                           hashvalue,
222                                                                           &hashtable->outerBatchFile[batchno]);
223                                 node->hj_NeedNewOuter = true;
224                                 continue;               /* loop around for a new outer tuple */
225                         }
226                 }
227
228                 /*
229                  * OK, scan the selected hash bucket for matches
230                  */
231                 for (;;)
232                 {
233                         curtuple = ExecScanHashBucket(node, econtext);
234                         if (curtuple == NULL)
235                                 break;                  /* out of matches */
236
237                         /*
238                          * we've got a match, but still need to test non-hashed quals
239                          */
240                         inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
241                                                                                          node->hj_HashTupleSlot,
242                                                                                          false);        /* don't pfree */
243                         econtext->ecxt_innertuple = inntuple;
244
245                         /* reset temp memory each time to avoid leaks from qual expr */
246                         ResetExprContext(econtext);
247
248                         /*
249                          * if we pass the qual, then save state for next call and have
250                          * ExecProject form the projection, store it in the tuple table,
251                          * and return the slot.
252                          *
253                          * Only the joinquals determine MatchedOuter status, but all quals
254                          * must pass to actually return the tuple.
255                          */
256                         if (joinqual == NIL || ExecQual(joinqual, econtext, false))
257                         {
258                                 node->hj_MatchedOuter = true;
259
260                                 /* In an antijoin, we never return a matched tuple */
261                                 if (node->js.jointype == JOIN_ANTI)
262                                 {
263                                         node->hj_NeedNewOuter = true;
264                                         break;          /* out of loop over hash bucket */
265                                 }
266
267                                 /*
268                                  * In a semijoin, we'll consider returning the first match,
269                                  * but after that we're done with this outer tuple.
270                                  */
271                                 if (node->js.jointype == JOIN_SEMI)
272                                         node->hj_NeedNewOuter = true;
273
274                                 if (otherqual == NIL || ExecQual(otherqual, econtext, false))
275                                 {
276                                         TupleTableSlot *result;
277
278                                         result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
279
280                                         if (isDone != ExprEndResult)
281                                         {
282                                                 node->js.ps.ps_TupFromTlist =
283                                                         (isDone == ExprMultipleResult);
284                                                 return result;
285                                         }
286                                 }
287
288                                 /*
289                                  * If semijoin and we didn't return the tuple, we're still
290                                  * done with this outer tuple.
291                                  */
292                                 if (node->js.jointype == JOIN_SEMI)
293                                         break;          /* out of loop over hash bucket */
294                         }
295                 }
296
297                 /*
298                  * Now the current outer tuple has run out of matches, so check
299                  * whether to emit a dummy outer-join tuple. If not, loop around to
300                  * get a new outer tuple.
301                  */
302                 node->hj_NeedNewOuter = true;
303
304                 if (!node->hj_MatchedOuter &&
305                         HASHJOIN_IS_OUTER(node))
306                 {
307                         /*
308                          * We are doing an outer join and there were no join matches for
309                          * this outer tuple.  Generate a fake join tuple with nulls for
310                          * the inner tuple, and return it if it passes the non-join quals.
311                          */
312                         econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
313
314                         if (otherqual == NIL || ExecQual(otherqual, econtext, false))
315                         {
316                                 /*
317                                  * qualification was satisfied so we project and return the
318                                  * slot containing the result tuple using ExecProject().
319                                  */
320                                 TupleTableSlot *result;
321
322                                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
323
324                                 if (isDone != ExprEndResult)
325                                 {
326                                         node->js.ps.ps_TupFromTlist =
327                                                 (isDone == ExprMultipleResult);
328                                         return result;
329                                 }
330                         }
331                 }
332         }
333 }
334
335 /* ----------------------------------------------------------------
336  *              ExecInitHashJoin
337  *
338  *              Init routine for HashJoin node.
339  * ----------------------------------------------------------------
340  */
341 HashJoinState *
342 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
343 {
344         HashJoinState *hjstate;
345         Plan       *outerNode;
346         Hash       *hashNode;
347         List       *lclauses;
348         List       *rclauses;
349         List       *hoperators;
350         ListCell   *l;
351
352         /* check for unsupported flags */
353         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
354
355         /*
356          * create state structure
357          */
358         hjstate = makeNode(HashJoinState);
359         hjstate->js.ps.plan = (Plan *) node;
360         hjstate->js.ps.state = estate;
361
362         /*
363          * Miscellaneous initialization
364          *
365          * create expression context for node
366          */
367         ExecAssignExprContext(estate, &hjstate->js.ps);
368
369         /*
370          * initialize child expressions
371          */
372         hjstate->js.ps.targetlist = (List *)
373                 ExecInitExpr((Expr *) node->join.plan.targetlist,
374                                          (PlanState *) hjstate);
375         hjstate->js.ps.qual = (List *)
376                 ExecInitExpr((Expr *) node->join.plan.qual,
377                                          (PlanState *) hjstate);
378         hjstate->js.jointype = node->join.jointype;
379         hjstate->js.joinqual = (List *)
380                 ExecInitExpr((Expr *) node->join.joinqual,
381                                          (PlanState *) hjstate);
382         hjstate->hashclauses = (List *)
383                 ExecInitExpr((Expr *) node->hashclauses,
384                                          (PlanState *) hjstate);
385
386         /*
387          * initialize child nodes
388          *
389          * Note: we could suppress the REWIND flag for the inner input, which
390          * would amount to betting that the hash will be a single batch.  Not
391          * clear if this would be a win or not.
392          */
393         outerNode = outerPlan(node);
394         hashNode = (Hash *) innerPlan(node);
395
396         outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
397         innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
398
399 #define HASHJOIN_NSLOTS 3
400
401         /*
402          * tuple table initialization
403          */
404         ExecInitResultTupleSlot(estate, &hjstate->js.ps);
405         hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
406
407         /* note: HASHJOIN_IS_OUTER macro depends on this initialization */
408         switch (node->join.jointype)
409         {
410                 case JOIN_INNER:
411                 case JOIN_SEMI:
412                         break;
413                 case JOIN_LEFT:
414                 case JOIN_ANTI:
415                         hjstate->hj_NullInnerTupleSlot =
416                                 ExecInitNullTupleSlot(estate,
417                                                                  ExecGetResultType(innerPlanState(hjstate)));
418                         break;
419                 default:
420                         elog(ERROR, "unrecognized join type: %d",
421                                  (int) node->join.jointype);
422         }
423
424         /*
425          * now for some voodoo.  our temporary tuple slot is actually the result
426          * tuple slot of the Hash node (which is our inner plan).  we do this
427          * because Hash nodes don't return tuples via ExecProcNode() -- instead
428          * the hash join node uses ExecScanHashBucket() to get at the contents of
429          * the hash table.      -cim 6/9/91
430          */
431         {
432                 HashState  *hashstate = (HashState *) innerPlanState(hjstate);
433                 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
434
435                 hjstate->hj_HashTupleSlot = slot;
436         }
437
438         /*
439          * initialize tuple type and projection info
440          */
441         ExecAssignResultTypeFromTL(&hjstate->js.ps);
442         ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
443
444         ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
445                                                   ExecGetResultType(outerPlanState(hjstate)));
446
447         /*
448          * initialize hash-specific info
449          */
450         hjstate->hj_HashTable = NULL;
451         hjstate->hj_FirstOuterTupleSlot = NULL;
452
453         hjstate->hj_CurHashValue = 0;
454         hjstate->hj_CurBucketNo = 0;
455         hjstate->hj_CurTuple = NULL;
456
457         /*
458          * Deconstruct the hash clauses into outer and inner argument values, so
459          * that we can evaluate those subexpressions separately.  Also make a list
460          * of the hash operator OIDs, in preparation for looking up the hash
461          * functions to use.
462          */
463         lclauses = NIL;
464         rclauses = NIL;
465         hoperators = NIL;
466         foreach(l, hjstate->hashclauses)
467         {
468                 FuncExprState *fstate = (FuncExprState *) lfirst(l);
469                 OpExpr     *hclause;
470
471                 Assert(IsA(fstate, FuncExprState));
472                 hclause = (OpExpr *) fstate->xprstate.expr;
473                 Assert(IsA(hclause, OpExpr));
474                 lclauses = lappend(lclauses, linitial(fstate->args));
475                 rclauses = lappend(rclauses, lsecond(fstate->args));
476                 hoperators = lappend_oid(hoperators, hclause->opno);
477         }
478         hjstate->hj_OuterHashKeys = lclauses;
479         hjstate->hj_InnerHashKeys = rclauses;
480         hjstate->hj_HashOperators = hoperators;
481         /* child Hash node needs to evaluate inner hash keys, too */
482         ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
483
484         hjstate->js.ps.ps_TupFromTlist = false;
485         hjstate->hj_NeedNewOuter = true;
486         hjstate->hj_MatchedOuter = false;
487         hjstate->hj_OuterNotEmpty = false;
488
489         return hjstate;
490 }
491
492 int
493 ExecCountSlotsHashJoin(HashJoin *node)
494 {
495         return ExecCountSlotsNode(outerPlan(node)) +
496                 ExecCountSlotsNode(innerPlan(node)) +
497                 HASHJOIN_NSLOTS;
498 }
499
500 /* ----------------------------------------------------------------
501  *              ExecEndHashJoin
502  *
503  *              clean up routine for HashJoin node
504  * ----------------------------------------------------------------
505  */
506 void
507 ExecEndHashJoin(HashJoinState *node)
508 {
509         /*
510          * Free hash table
511          */
512         if (node->hj_HashTable)
513         {
514                 ExecHashTableDestroy(node->hj_HashTable);
515                 node->hj_HashTable = NULL;
516         }
517
518         /*
519          * Free the exprcontext
520          */
521         ExecFreeExprContext(&node->js.ps);
522
523         /*
524          * clean out the tuple table
525          */
526         ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
527         ExecClearTuple(node->hj_OuterTupleSlot);
528         ExecClearTuple(node->hj_HashTupleSlot);
529
530         /*
531          * clean up subtrees
532          */
533         ExecEndNode(outerPlanState(node));
534         ExecEndNode(innerPlanState(node));
535 }
536
537 /*
538  * ExecHashJoinOuterGetTuple
539  *
540  *              get the next outer tuple for hashjoin: either by
541  *              executing a plan node in the first pass, or from
542  *              the temp files for the hashjoin batches.
543  *
544  * Returns a null slot if no more outer tuples.  On success, the tuple's
545  * hash value is stored at *hashvalue --- this is either originally computed,
546  * or re-read from the temp file.
547  */
548 static TupleTableSlot *
549 ExecHashJoinOuterGetTuple(PlanState *outerNode,
550                                                   HashJoinState *hjstate,
551                                                   uint32 *hashvalue)
552 {
553         HashJoinTable hashtable = hjstate->hj_HashTable;
554         int                     curbatch = hashtable->curbatch;
555         TupleTableSlot *slot;
556
557         if (curbatch == 0)                      /* if it is the first pass */
558         {
559                 /*
560                  * Check to see if first outer tuple was already fetched by
561                  * ExecHashJoin() and not used yet.
562                  */
563                 slot = hjstate->hj_FirstOuterTupleSlot;
564                 if (!TupIsNull(slot))
565                         hjstate->hj_FirstOuterTupleSlot = NULL;
566                 else
567                         slot = ExecProcNode(outerNode);
568
569                 while (!TupIsNull(slot))
570                 {
571                         /*
572                          * We have to compute the tuple's hash value.
573                          */
574                         ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
575
576                         econtext->ecxt_outertuple = slot;
577                         if (ExecHashGetHashValue(hashtable, econtext,
578                                                                          hjstate->hj_OuterHashKeys,
579                                                                          true,          /* outer tuple */
580                                                                          HASHJOIN_IS_OUTER(hjstate),
581                                                                          hashvalue))
582                         {
583                                 /* remember outer relation is not empty for possible rescan */
584                                 hjstate->hj_OuterNotEmpty = true;
585
586                                 return slot;
587                         }
588
589                         /*
590                          * That tuple couldn't match because of a NULL, so discard it and
591                          * continue with the next one.
592                          */
593                         slot = ExecProcNode(outerNode);
594                 }
595
596                 /*
597                  * We have just reached the end of the first pass. Try to switch to a
598                  * saved batch.
599                  */
600                 curbatch = ExecHashJoinNewBatch(hjstate);
601         }
602
603         /*
604          * Try to read from a temp file. Loop allows us to advance to new batches
605          * as needed.  NOTE: nbatch could increase inside ExecHashJoinNewBatch, so
606          * don't try to optimize this loop.
607          */
608         while (curbatch < hashtable->nbatch)
609         {
610                 slot = ExecHashJoinGetSavedTuple(hjstate,
611                                                                                  hashtable->outerBatchFile[curbatch],
612                                                                                  hashvalue,
613                                                                                  hjstate->hj_OuterTupleSlot);
614                 if (!TupIsNull(slot))
615                         return slot;
616                 curbatch = ExecHashJoinNewBatch(hjstate);
617         }
618
619         /* Out of batches... */
620         return NULL;
621 }
622
623 /*
624  * ExecHashJoinNewBatch
625  *              switch to a new hashjoin batch
626  *
627  * Returns the number of the new batch (1..nbatch-1), or nbatch if no more.
628  * We will never return a batch number that has an empty outer batch file.
629  */
630 static int
631 ExecHashJoinNewBatch(HashJoinState *hjstate)
632 {
633         HashJoinTable hashtable = hjstate->hj_HashTable;
634         int                     nbatch;
635         int                     curbatch;
636         BufFile    *innerFile;
637         TupleTableSlot *slot;
638         uint32          hashvalue;
639
640 start_over:
641         nbatch = hashtable->nbatch;
642         curbatch = hashtable->curbatch;
643
644         if (curbatch > 0)
645         {
646                 /*
647                  * We no longer need the previous outer batch file; close it right
648                  * away to free disk space.
649                  */
650                 if (hashtable->outerBatchFile[curbatch])
651                         BufFileClose(hashtable->outerBatchFile[curbatch]);
652                 hashtable->outerBatchFile[curbatch] = NULL;
653         }
654
655         /*
656          * We can always skip over any batches that are completely empty on both
657          * sides.  We can sometimes skip over batches that are empty on only one
658          * side, but there are exceptions:
659          *
660          * 1. In an outer join, we have to process outer batches even if the inner
661          * batch is empty.
662          *
663          * 2. If we have increased nbatch since the initial estimate, we have to
664          * scan inner batches since they might contain tuples that need to be
665          * reassigned to later inner batches.
666          *
667          * 3. Similarly, if we have increased nbatch since starting the outer
668          * scan, we have to rescan outer batches in case they contain tuples that
669          * need to be reassigned.
670          */
671         curbatch++;
672         while (curbatch < nbatch &&
673                    (hashtable->outerBatchFile[curbatch] == NULL ||
674                         hashtable->innerBatchFile[curbatch] == NULL))
675         {
676                 if (hashtable->outerBatchFile[curbatch] &&
677                         HASHJOIN_IS_OUTER(hjstate))
678                         break;                          /* must process due to rule 1 */
679                 if (hashtable->innerBatchFile[curbatch] &&
680                         nbatch != hashtable->nbatch_original)
681                         break;                          /* must process due to rule 2 */
682                 if (hashtable->outerBatchFile[curbatch] &&
683                         nbatch != hashtable->nbatch_outstart)
684                         break;                          /* must process due to rule 3 */
685                 /* We can ignore this batch. */
686                 /* Release associated temp files right away. */
687                 if (hashtable->innerBatchFile[curbatch])
688                         BufFileClose(hashtable->innerBatchFile[curbatch]);
689                 hashtable->innerBatchFile[curbatch] = NULL;
690                 if (hashtable->outerBatchFile[curbatch])
691                         BufFileClose(hashtable->outerBatchFile[curbatch]);
692                 hashtable->outerBatchFile[curbatch] = NULL;
693                 curbatch++;
694         }
695
696         if (curbatch >= nbatch)
697                 return curbatch;                /* no more batches */
698
699         hashtable->curbatch = curbatch;
700
701         /*
702          * Reload the hash table with the new inner batch (which could be empty)
703          */
704         ExecHashTableReset(hashtable);
705
706         innerFile = hashtable->innerBatchFile[curbatch];
707
708         if (innerFile != NULL)
709         {
710                 if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
711                         ereport(ERROR,
712                                         (errcode_for_file_access(),
713                                    errmsg("could not rewind hash-join temporary file: %m")));
714
715                 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
716                                                                                                  innerFile,
717                                                                                                  &hashvalue,
718                                                                                                  hjstate->hj_HashTupleSlot)))
719                 {
720                         /*
721                          * NOTE: some tuples may be sent to future batches.  Also, it is
722                          * possible for hashtable->nbatch to be increased here!
723                          */
724                         ExecHashTableInsert(hashtable, slot, hashvalue);
725                 }
726
727                 /*
728                  * after we build the hash table, the inner batch file is no longer
729                  * needed
730                  */
731                 BufFileClose(innerFile);
732                 hashtable->innerBatchFile[curbatch] = NULL;
733         }
734
735         /*
736          * If there's no outer batch file, advance to next batch.
737          */
738         if (hashtable->outerBatchFile[curbatch] == NULL)
739                 goto start_over;
740
741         /*
742          * Rewind outer batch file, so that we can start reading it.
743          */
744         if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
745                 ereport(ERROR,
746                                 (errcode_for_file_access(),
747                                  errmsg("could not rewind hash-join temporary file: %m")));
748
749         return curbatch;
750 }
751
752 /*
753  * ExecHashJoinSaveTuple
754  *              save a tuple to a batch file.
755  *
756  * The data recorded in the file for each tuple is its hash value,
757  * then the tuple in MinimalTuple format.
758  *
759  * Note: it is important always to call this in the regular executor
760  * context, not in a shorter-lived context; else the temp file buffers
761  * will get messed up.
762  */
763 void
764 ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
765                                           BufFile **fileptr)
766 {
767         BufFile    *file = *fileptr;
768         size_t          written;
769
770         if (file == NULL)
771         {
772                 /* First write to this batch file, so open it. */
773                 file = BufFileCreateTemp(false);
774                 *fileptr = file;
775         }
776
777         written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
778         if (written != sizeof(uint32))
779                 ereport(ERROR,
780                                 (errcode_for_file_access(),
781                                  errmsg("could not write to hash-join temporary file: %m")));
782
783         written = BufFileWrite(file, (void *) tuple, tuple->t_len);
784         if (written != tuple->t_len)
785                 ereport(ERROR,
786                                 (errcode_for_file_access(),
787                                  errmsg("could not write to hash-join temporary file: %m")));
788 }
789
790 /*
791  * ExecHashJoinGetSavedTuple
792  *              read the next tuple from a batch file.  Return NULL if no more.
793  *
794  * On success, *hashvalue is set to the tuple's hash value, and the tuple
795  * itself is stored in the given slot.
796  */
797 static TupleTableSlot *
798 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
799                                                   BufFile *file,
800                                                   uint32 *hashvalue,
801                                                   TupleTableSlot *tupleSlot)
802 {
803         uint32          header[2];
804         size_t          nread;
805         MinimalTuple tuple;
806
807         /*
808          * Since both the hash value and the MinimalTuple length word are uint32,
809          * we can read them both in one BufFileRead() call without any type
810          * cheating.
811          */
812         nread = BufFileRead(file, (void *) header, sizeof(header));
813         if (nread == 0)                         /* end of file */
814         {
815                 ExecClearTuple(tupleSlot);
816                 return NULL;
817         }
818         if (nread != sizeof(header))
819                 ereport(ERROR,
820                                 (errcode_for_file_access(),
821                                  errmsg("could not read from hash-join temporary file: %m")));
822         *hashvalue = header[0];
823         tuple = (MinimalTuple) palloc(header[1]);
824         tuple->t_len = header[1];
825         nread = BufFileRead(file,
826                                                 (void *) ((char *) tuple + sizeof(uint32)),
827                                                 header[1] - sizeof(uint32));
828         if (nread != header[1] - sizeof(uint32))
829                 ereport(ERROR,
830                                 (errcode_for_file_access(),
831                                  errmsg("could not read from hash-join temporary file: %m")));
832         return ExecStoreMinimalTuple(tuple, tupleSlot, true);
833 }
834
835
836 void
837 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
838 {
839         /*
840          * In a multi-batch join, we currently have to do rescans the hard way,
841          * primarily because batch temp files may have already been released. But
842          * if it's a single-batch join, and there is no parameter change for the
843          * inner subnode, then we can just re-use the existing hash table without
844          * rebuilding it.
845          */
846         if (node->hj_HashTable != NULL)
847         {
848                 if (node->hj_HashTable->nbatch == 1 &&
849                         ((PlanState *) node)->righttree->chgParam == NULL)
850                 {
851                         /*
852                          * okay to reuse the hash table; needn't rescan inner, either.
853                          *
854                          * What we do need to do is reset our state about the emptiness of
855                          * the outer relation, so that the new scan of the outer will
856                          * update it correctly if it turns out to be empty this time.
857                          * (There's no harm in clearing it now because ExecHashJoin won't
858                          * need the info.  In the other cases, where the hash table
859                          * doesn't exist or we are destroying it, we leave this state
860                          * alone because ExecHashJoin will need it the first time
861                          * through.)
862                          */
863                         node->hj_OuterNotEmpty = false;
864                 }
865                 else
866                 {
867                         /* must destroy and rebuild hash table */
868                         ExecHashTableDestroy(node->hj_HashTable);
869                         node->hj_HashTable = NULL;
870
871                         /*
872                          * if chgParam of subnode is not null then plan will be re-scanned
873                          * by first ExecProcNode.
874                          */
875                         if (((PlanState *) node)->righttree->chgParam == NULL)
876                                 ExecReScan(((PlanState *) node)->righttree, exprCtxt);
877                 }
878         }
879
880         /* Always reset intra-tuple state */
881         node->hj_CurHashValue = 0;
882         node->hj_CurBucketNo = 0;
883         node->hj_CurTuple = NULL;
884
885         node->js.ps.ps_TupFromTlist = false;
886         node->hj_NeedNewOuter = true;
887         node->hj_MatchedOuter = false;
888         node->hj_FirstOuterTupleSlot = NULL;
889
890         /*
891          * if chgParam of subnode is not null then plan will be re-scanned by
892          * first ExecProcNode.
893          */
894         if (((PlanState *) node)->lefttree->chgParam == NULL)
895                 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
896 }