]> granicus.if.org Git - postgresql/blob - src/backend/executor/nodeHashjoin.c
Clean up most -Wunused-but-set-variable warnings from gcc 4.6
[postgresql] / src / backend / executor / nodeHashjoin.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeHashjoin.c
4  *        Routines to handle hash join nodes
5  *
6  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        src/backend/executor/nodeHashjoin.c
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include "executor/executor.h"
19 #include "executor/hashjoin.h"
20 #include "executor/nodeHash.h"
21 #include "executor/nodeHashjoin.h"
22 #include "utils/memutils.h"
23
24
25 /*
26  * States of the ExecHashJoin state machine
27  */
28 #define HJ_BUILD_HASHTABLE              1
29 #define HJ_NEED_NEW_OUTER               2
30 #define HJ_SCAN_BUCKET                  3
31 #define HJ_FILL_OUTER_TUPLE             4
32 #define HJ_FILL_INNER_TUPLES    5
33 #define HJ_NEED_NEW_BATCH               6
34
35 /* Returns true if doing null-fill on outer relation */
36 #define HJ_FILL_OUTER(hjstate)  ((hjstate)->hj_NullInnerTupleSlot != NULL)
37 /* Returns true if doing null-fill on inner relation */
38 #define HJ_FILL_INNER(hjstate)  ((hjstate)->hj_NullOuterTupleSlot != NULL)
39
40 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
41                                                   HashJoinState *hjstate,
42                                                   uint32 *hashvalue);
43 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
44                                                   BufFile *file,
45                                                   uint32 *hashvalue,
46                                                   TupleTableSlot *tupleSlot);
47 static bool ExecHashJoinNewBatch(HashJoinState *hjstate);
48
49
50 /* ----------------------------------------------------------------
51  *              ExecHashJoin
52  *
53  *              This function implements the Hybrid Hashjoin algorithm.
54  *
55  *              Note: the relation we build hash table on is the "inner"
56  *                        the other one is "outer".
57  * ----------------------------------------------------------------
58  */
59 TupleTableSlot *                                /* return: a tuple or NULL */
60 ExecHashJoin(HashJoinState *node)
61 {
62         PlanState  *outerNode;
63         HashState  *hashNode;
64         List       *joinqual;
65         List       *otherqual;
66         ExprContext *econtext;
67         ExprDoneCond isDone;
68         HashJoinTable hashtable;
69         TupleTableSlot *outerTupleSlot;
70         uint32          hashvalue;
71         int                     batchno;
72
73         /*
74          * get information from HashJoin node
75          */
76         joinqual = node->js.joinqual;
77         otherqual = node->js.ps.qual;
78         hashNode = (HashState *) innerPlanState(node);
79         outerNode = outerPlanState(node);
80         hashtable = node->hj_HashTable;
81         econtext = node->js.ps.ps_ExprContext;
82
83         /*
84          * Check to see if we're still projecting out tuples from a previous join
85          * tuple (because there is a function-returning-set in the projection
86          * expressions).  If so, try to project another one.
87          */
88         if (node->js.ps.ps_TupFromTlist)
89         {
90                 TupleTableSlot *result;
91
92                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
93                 if (isDone == ExprMultipleResult)
94                         return result;
95                 /* Done with that source tuple... */
96                 node->js.ps.ps_TupFromTlist = false;
97         }
98
99         /*
100          * Reset per-tuple memory context to free any expression evaluation
101          * storage allocated in the previous tuple cycle.  Note this can't happen
102          * until we're done projecting out tuples from a join tuple.
103          */
104         ResetExprContext(econtext);
105
106         /*
107          * run the hash join state machine
108          */
109         for (;;)
110         {
111                 switch (node->hj_JoinState)
112                 {
113                         case HJ_BUILD_HASHTABLE:
114
115                                 /*
116                                  * First time through: build hash table for inner relation.
117                                  */
118                                 Assert(hashtable == NULL);
119
120                                 /*
121                                  * If the outer relation is completely empty, and it's not
122                                  * right/full join, we can quit without building the hash
123                                  * table.  However, for an inner join it is only a win to
124                                  * check this when the outer relation's startup cost is less
125                                  * than the projected cost of building the hash table.
126                                  * Otherwise it's best to build the hash table first and see
127                                  * if the inner relation is empty.      (When it's a left join, we
128                                  * should always make this check, since we aren't going to be
129                                  * able to skip the join on the strength of an empty inner
130                                  * relation anyway.)
131                                  *
132                                  * If we are rescanning the join, we make use of information
133                                  * gained on the previous scan: don't bother to try the
134                                  * prefetch if the previous scan found the outer relation
135                                  * nonempty. This is not 100% reliable since with new
136                                  * parameters the outer relation might yield different
137                                  * results, but it's a good heuristic.
138                                  *
139                                  * The only way to make the check is to try to fetch a tuple
140                                  * from the outer plan node.  If we succeed, we have to stash
141                                  * it away for later consumption by ExecHashJoinOuterGetTuple.
142                                  */
143                                 if (HJ_FILL_INNER(node))
144                                 {
145                                         /* no chance to not build the hash table */
146                                         node->hj_FirstOuterTupleSlot = NULL;
147                                 }
148                                 else if (HJ_FILL_OUTER(node) ||
149                                                  (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost &&
150                                                   !node->hj_OuterNotEmpty))
151                                 {
152                                         node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
153                                         if (TupIsNull(node->hj_FirstOuterTupleSlot))
154                                         {
155                                                 node->hj_OuterNotEmpty = false;
156                                                 return NULL;
157                                         }
158                                         else
159                                                 node->hj_OuterNotEmpty = true;
160                                 }
161                                 else
162                                         node->hj_FirstOuterTupleSlot = NULL;
163
164                                 /*
165                                  * create the hash table
166                                  */
167                                 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
168                                                                                                 node->hj_HashOperators,
169                                                                                                 HJ_FILL_INNER(node));
170                                 node->hj_HashTable = hashtable;
171
172                                 /*
173                                  * execute the Hash node, to build the hash table
174                                  */
175                                 hashNode->hashtable = hashtable;
176                                 (void) MultiExecProcNode((PlanState *) hashNode);
177
178                                 /*
179                                  * If the inner relation is completely empty, and we're not
180                                  * doing a left outer join, we can quit without scanning the
181                                  * outer relation.
182                                  */
183                                 if (hashtable->totalTuples == 0 && !HJ_FILL_OUTER(node))
184                                         return NULL;
185
186                                 /*
187                                  * need to remember whether nbatch has increased since we
188                                  * began scanning the outer relation
189                                  */
190                                 hashtable->nbatch_outstart = hashtable->nbatch;
191
192                                 /*
193                                  * Reset OuterNotEmpty for scan.  (It's OK if we fetched a
194                                  * tuple above, because ExecHashJoinOuterGetTuple will
195                                  * immediately set it again.)
196                                  */
197                                 node->hj_OuterNotEmpty = false;
198
199                                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
200
201                                 /* FALL THRU */
202
203                         case HJ_NEED_NEW_OUTER:
204
205                                 /*
206                                  * We don't have an outer tuple, try to get the next one
207                                  */
208                                 outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
209                                                                                                                    node,
210                                                                                                                    &hashvalue);
211                                 if (TupIsNull(outerTupleSlot))
212                                 {
213                                         /* end of batch, or maybe whole join */
214                                         if (HJ_FILL_INNER(node))
215                                         {
216                                                 /* set up to scan for unmatched inner tuples */
217                                                 ExecPrepHashTableForUnmatched(node);
218                                                 node->hj_JoinState = HJ_FILL_INNER_TUPLES;
219                                         }
220                                         else
221                                                 node->hj_JoinState = HJ_NEED_NEW_BATCH;
222                                         continue;
223                                 }
224
225                                 econtext->ecxt_outertuple = outerTupleSlot;
226                                 node->hj_MatchedOuter = false;
227
228                                 /*
229                                  * Find the corresponding bucket for this tuple in the main
230                                  * hash table or skew hash table.
231                                  */
232                                 node->hj_CurHashValue = hashvalue;
233                                 ExecHashGetBucketAndBatch(hashtable, hashvalue,
234                                                                                   &node->hj_CurBucketNo, &batchno);
235                                 node->hj_CurSkewBucketNo = ExecHashGetSkewBucket(hashtable,
236                                                                                                                                  hashvalue);
237                                 node->hj_CurTuple = NULL;
238
239                                 /*
240                                  * The tuple might not belong to the current batch (where
241                                  * "current batch" includes the skew buckets if any).
242                                  */
243                                 if (batchno != hashtable->curbatch &&
244                                         node->hj_CurSkewBucketNo == INVALID_SKEW_BUCKET_NO)
245                                 {
246                                         /*
247                                          * Need to postpone this outer tuple to a later batch.
248                                          * Save it in the corresponding outer-batch file.
249                                          */
250                                         Assert(batchno > hashtable->curbatch);
251                                         ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
252                                                                                   hashvalue,
253                                                                                 &hashtable->outerBatchFile[batchno]);
254                                         /* Loop around, staying in HJ_NEED_NEW_OUTER state */
255                                         continue;
256                                 }
257
258                                 /* OK, let's scan the bucket for matches */
259                                 node->hj_JoinState = HJ_SCAN_BUCKET;
260
261                                 /* FALL THRU */
262
263                         case HJ_SCAN_BUCKET:
264
265                                 /*
266                                  * Scan the selected hash bucket for matches to current outer
267                                  */
268                                 if (!ExecScanHashBucket(node, econtext))
269                                 {
270                                         /* out of matches; check for possible outer-join fill */
271                                         node->hj_JoinState = HJ_FILL_OUTER_TUPLE;
272                                         continue;
273                                 }
274
275                                 /*
276                                  * We've got a match, but still need to test non-hashed quals.
277                                  * ExecScanHashBucket already set up all the state needed to
278                                  * call ExecQual.
279                                  *
280                                  * If we pass the qual, then save state for next call and have
281                                  * ExecProject form the projection, store it in the tuple
282                                  * table, and return the slot.
283                                  *
284                                  * Only the joinquals determine tuple match status, but all
285                                  * quals must pass to actually return the tuple.
286                                  */
287                                 if (joinqual == NIL || ExecQual(joinqual, econtext, false))
288                                 {
289                                         node->hj_MatchedOuter = true;
290                                         HeapTupleHeaderSetMatch(HJTUPLE_MINTUPLE(node->hj_CurTuple));
291
292                                         /* In an antijoin, we never return a matched tuple */
293                                         if (node->js.jointype == JOIN_ANTI)
294                                         {
295                                                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
296                                                 continue;
297                                         }
298
299                                         /*
300                                          * In a semijoin, we'll consider returning the first
301                                          * match, but after that we're done with this outer tuple.
302                                          */
303                                         if (node->js.jointype == JOIN_SEMI)
304                                                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
305
306                                         if (otherqual == NIL ||
307                                                 ExecQual(otherqual, econtext, false))
308                                         {
309                                                 TupleTableSlot *result;
310
311                                                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
312
313                                                 if (isDone != ExprEndResult)
314                                                 {
315                                                         node->js.ps.ps_TupFromTlist =
316                                                                 (isDone == ExprMultipleResult);
317                                                         return result;
318                                                 }
319                                         }
320                                 }
321                                 break;
322
323                         case HJ_FILL_OUTER_TUPLE:
324
325                                 /*
326                                  * The current outer tuple has run out of matches, so check
327                                  * whether to emit a dummy outer-join tuple.  Whether we emit
328                                  * one or not, the next state is NEED_NEW_OUTER.
329                                  */
330                                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
331
332                                 if (!node->hj_MatchedOuter &&
333                                         HJ_FILL_OUTER(node))
334                                 {
335                                         /*
336                                          * Generate a fake join tuple with nulls for the inner
337                                          * tuple, and return it if it passes the non-join quals.
338                                          */
339                                         econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
340
341                                         if (otherqual == NIL ||
342                                                 ExecQual(otherqual, econtext, false))
343                                         {
344                                                 TupleTableSlot *result;
345
346                                                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
347
348                                                 if (isDone != ExprEndResult)
349                                                 {
350                                                         node->js.ps.ps_TupFromTlist =
351                                                                 (isDone == ExprMultipleResult);
352                                                         return result;
353                                                 }
354                                         }
355                                 }
356                                 break;
357
358                         case HJ_FILL_INNER_TUPLES:
359
360                                 /*
361                                  * We have finished a batch, but we are doing right/full join,
362                                  * so any unmatched inner tuples in the hashtable have to be
363                                  * emitted before we continue to the next batch.
364                                  */
365                                 if (!ExecScanHashTableForUnmatched(node, econtext))
366                                 {
367                                         /* no more unmatched tuples */
368                                         node->hj_JoinState = HJ_NEED_NEW_BATCH;
369                                         continue;
370                                 }
371
372                                 /*
373                                  * Generate a fake join tuple with nulls for the outer tuple,
374                                  * and return it if it passes the non-join quals.
375                                  */
376                                 econtext->ecxt_outertuple = node->hj_NullOuterTupleSlot;
377
378                                 if (otherqual == NIL ||
379                                         ExecQual(otherqual, econtext, false))
380                                 {
381                                         TupleTableSlot *result;
382
383                                         result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
384
385                                         if (isDone != ExprEndResult)
386                                         {
387                                                 node->js.ps.ps_TupFromTlist =
388                                                         (isDone == ExprMultipleResult);
389                                                 return result;
390                                         }
391                                 }
392                                 break;
393
394                         case HJ_NEED_NEW_BATCH:
395
396                                 /*
397                                  * Try to advance to next batch.  Done if there are no more.
398                                  */
399                                 if (!ExecHashJoinNewBatch(node))
400                                         return NULL;    /* end of join */
401                                 node->hj_JoinState = HJ_NEED_NEW_OUTER;
402                                 break;
403
404                         default:
405                                 elog(ERROR, "unrecognized hashjoin state: %d",
406                                          (int) node->hj_JoinState);
407                 }
408         }
409 }
410
411 /* ----------------------------------------------------------------
412  *              ExecInitHashJoin
413  *
414  *              Init routine for HashJoin node.
415  * ----------------------------------------------------------------
416  */
417 HashJoinState *
418 ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
419 {
420         HashJoinState *hjstate;
421         Plan       *outerNode;
422         Hash       *hashNode;
423         List       *lclauses;
424         List       *rclauses;
425         List       *hoperators;
426         ListCell   *l;
427
428         /* check for unsupported flags */
429         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
430
431         /*
432          * create state structure
433          */
434         hjstate = makeNode(HashJoinState);
435         hjstate->js.ps.plan = (Plan *) node;
436         hjstate->js.ps.state = estate;
437
438         /*
439          * Miscellaneous initialization
440          *
441          * create expression context for node
442          */
443         ExecAssignExprContext(estate, &hjstate->js.ps);
444
445         /*
446          * initialize child expressions
447          */
448         hjstate->js.ps.targetlist = (List *)
449                 ExecInitExpr((Expr *) node->join.plan.targetlist,
450                                          (PlanState *) hjstate);
451         hjstate->js.ps.qual = (List *)
452                 ExecInitExpr((Expr *) node->join.plan.qual,
453                                          (PlanState *) hjstate);
454         hjstate->js.jointype = node->join.jointype;
455         hjstate->js.joinqual = (List *)
456                 ExecInitExpr((Expr *) node->join.joinqual,
457                                          (PlanState *) hjstate);
458         hjstate->hashclauses = (List *)
459                 ExecInitExpr((Expr *) node->hashclauses,
460                                          (PlanState *) hjstate);
461
462         /*
463          * initialize child nodes
464          *
465          * Note: we could suppress the REWIND flag for the inner input, which
466          * would amount to betting that the hash will be a single batch.  Not
467          * clear if this would be a win or not.
468          */
469         outerNode = outerPlan(node);
470         hashNode = (Hash *) innerPlan(node);
471
472         outerPlanState(hjstate) = ExecInitNode(outerNode, estate, eflags);
473         innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate, eflags);
474
475         /*
476          * tuple table initialization
477          */
478         ExecInitResultTupleSlot(estate, &hjstate->js.ps);
479         hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
480
481         /* set up null tuples for outer joins, if needed */
482         switch (node->join.jointype)
483         {
484                 case JOIN_INNER:
485                 case JOIN_SEMI:
486                         break;
487                 case JOIN_LEFT:
488                 case JOIN_ANTI:
489                         hjstate->hj_NullInnerTupleSlot =
490                                 ExecInitNullTupleSlot(estate,
491                                                                  ExecGetResultType(innerPlanState(hjstate)));
492                         break;
493                 case JOIN_RIGHT:
494                         hjstate->hj_NullOuterTupleSlot =
495                                 ExecInitNullTupleSlot(estate,
496                                                                  ExecGetResultType(outerPlanState(hjstate)));
497                         break;
498                 case JOIN_FULL:
499                         hjstate->hj_NullOuterTupleSlot =
500                                 ExecInitNullTupleSlot(estate,
501                                                                  ExecGetResultType(outerPlanState(hjstate)));
502                         hjstate->hj_NullInnerTupleSlot =
503                                 ExecInitNullTupleSlot(estate,
504                                                                  ExecGetResultType(innerPlanState(hjstate)));
505                         break;
506                 default:
507                         elog(ERROR, "unrecognized join type: %d",
508                                  (int) node->join.jointype);
509         }
510
511         /*
512          * now for some voodoo.  our temporary tuple slot is actually the result
513          * tuple slot of the Hash node (which is our inner plan).  we can do this
514          * because Hash nodes don't return tuples via ExecProcNode() -- instead
515          * the hash join node uses ExecScanHashBucket() to get at the contents of
516          * the hash table.      -cim 6/9/91
517          */
518         {
519                 HashState  *hashstate = (HashState *) innerPlanState(hjstate);
520                 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
521
522                 hjstate->hj_HashTupleSlot = slot;
523         }
524
525         /*
526          * initialize tuple type and projection info
527          */
528         ExecAssignResultTypeFromTL(&hjstate->js.ps);
529         ExecAssignProjectionInfo(&hjstate->js.ps, NULL);
530
531         ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
532                                                   ExecGetResultType(outerPlanState(hjstate)));
533
534         /*
535          * initialize hash-specific info
536          */
537         hjstate->hj_HashTable = NULL;
538         hjstate->hj_FirstOuterTupleSlot = NULL;
539
540         hjstate->hj_CurHashValue = 0;
541         hjstate->hj_CurBucketNo = 0;
542         hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
543         hjstate->hj_CurTuple = NULL;
544
545         /*
546          * Deconstruct the hash clauses into outer and inner argument values, so
547          * that we can evaluate those subexpressions separately.  Also make a list
548          * of the hash operator OIDs, in preparation for looking up the hash
549          * functions to use.
550          */
551         lclauses = NIL;
552         rclauses = NIL;
553         hoperators = NIL;
554         foreach(l, hjstate->hashclauses)
555         {
556                 FuncExprState *fstate = (FuncExprState *) lfirst(l);
557                 OpExpr     *hclause;
558
559                 Assert(IsA(fstate, FuncExprState));
560                 hclause = (OpExpr *) fstate->xprstate.expr;
561                 Assert(IsA(hclause, OpExpr));
562                 lclauses = lappend(lclauses, linitial(fstate->args));
563                 rclauses = lappend(rclauses, lsecond(fstate->args));
564                 hoperators = lappend_oid(hoperators, hclause->opno);
565         }
566         hjstate->hj_OuterHashKeys = lclauses;
567         hjstate->hj_InnerHashKeys = rclauses;
568         hjstate->hj_HashOperators = hoperators;
569         /* child Hash node needs to evaluate inner hash keys, too */
570         ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
571
572         hjstate->js.ps.ps_TupFromTlist = false;
573         hjstate->hj_JoinState = HJ_BUILD_HASHTABLE;
574         hjstate->hj_MatchedOuter = false;
575         hjstate->hj_OuterNotEmpty = false;
576
577         return hjstate;
578 }
579
580 /* ----------------------------------------------------------------
581  *              ExecEndHashJoin
582  *
583  *              clean up routine for HashJoin node
584  * ----------------------------------------------------------------
585  */
586 void
587 ExecEndHashJoin(HashJoinState *node)
588 {
589         /*
590          * Free hash table
591          */
592         if (node->hj_HashTable)
593         {
594                 ExecHashTableDestroy(node->hj_HashTable);
595                 node->hj_HashTable = NULL;
596         }
597
598         /*
599          * Free the exprcontext
600          */
601         ExecFreeExprContext(&node->js.ps);
602
603         /*
604          * clean out the tuple table
605          */
606         ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
607         ExecClearTuple(node->hj_OuterTupleSlot);
608         ExecClearTuple(node->hj_HashTupleSlot);
609
610         /*
611          * clean up subtrees
612          */
613         ExecEndNode(outerPlanState(node));
614         ExecEndNode(innerPlanState(node));
615 }
616
617 /*
618  * ExecHashJoinOuterGetTuple
619  *
620  *              get the next outer tuple for hashjoin: either by
621  *              executing the outer plan node in the first pass, or from
622  *              the temp files for the hashjoin batches.
623  *
624  * Returns a null slot if no more outer tuples (within the current batch).
625  *
626  * On success, the tuple's hash value is stored at *hashvalue --- this is
627  * either originally computed, or re-read from the temp file.
628  */
629 static TupleTableSlot *
630 ExecHashJoinOuterGetTuple(PlanState *outerNode,
631                                                   HashJoinState *hjstate,
632                                                   uint32 *hashvalue)
633 {
634         HashJoinTable hashtable = hjstate->hj_HashTable;
635         int                     curbatch = hashtable->curbatch;
636         TupleTableSlot *slot;
637
638         if (curbatch == 0)                      /* if it is the first pass */
639         {
640                 /*
641                  * Check to see if first outer tuple was already fetched by
642                  * ExecHashJoin() and not used yet.
643                  */
644                 slot = hjstate->hj_FirstOuterTupleSlot;
645                 if (!TupIsNull(slot))
646                         hjstate->hj_FirstOuterTupleSlot = NULL;
647                 else
648                         slot = ExecProcNode(outerNode);
649
650                 while (!TupIsNull(slot))
651                 {
652                         /*
653                          * We have to compute the tuple's hash value.
654                          */
655                         ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
656
657                         econtext->ecxt_outertuple = slot;
658                         if (ExecHashGetHashValue(hashtable, econtext,
659                                                                          hjstate->hj_OuterHashKeys,
660                                                                          true,          /* outer tuple */
661                                                                          HJ_FILL_OUTER(hjstate),
662                                                                          hashvalue))
663                         {
664                                 /* remember outer relation is not empty for possible rescan */
665                                 hjstate->hj_OuterNotEmpty = true;
666
667                                 return slot;
668                         }
669
670                         /*
671                          * That tuple couldn't match because of a NULL, so discard it and
672                          * continue with the next one.
673                          */
674                         slot = ExecProcNode(outerNode);
675                 }
676         }
677         else if (curbatch < hashtable->nbatch)
678         {
679                 BufFile    *file = hashtable->outerBatchFile[curbatch];
680
681                 /*
682                  * In outer-join cases, we could get here even though the batch file
683                  * is empty.
684                  */
685                 if (file == NULL)
686                         return NULL;
687
688                 slot = ExecHashJoinGetSavedTuple(hjstate,
689                                                                                  file,
690                                                                                  hashvalue,
691                                                                                  hjstate->hj_OuterTupleSlot);
692                 if (!TupIsNull(slot))
693                         return slot;
694         }
695
696         /* End of this batch */
697         return NULL;
698 }
699
700 /*
701  * ExecHashJoinNewBatch
702  *              switch to a new hashjoin batch
703  *
704  * Returns true if successful, false if there are no more batches.
705  */
706 static bool
707 ExecHashJoinNewBatch(HashJoinState *hjstate)
708 {
709         HashJoinTable hashtable = hjstate->hj_HashTable;
710         int                     nbatch;
711         int                     curbatch;
712         BufFile    *innerFile;
713         TupleTableSlot *slot;
714         uint32          hashvalue;
715
716         nbatch = hashtable->nbatch;
717         curbatch = hashtable->curbatch;
718
719         if (curbatch > 0)
720         {
721                 /*
722                  * We no longer need the previous outer batch file; close it right
723                  * away to free disk space.
724                  */
725                 if (hashtable->outerBatchFile[curbatch])
726                         BufFileClose(hashtable->outerBatchFile[curbatch]);
727                 hashtable->outerBatchFile[curbatch] = NULL;
728         }
729         else    /* we just finished the first batch */
730         {
731                 /*
732                  * Reset some of the skew optimization state variables, since we no
733                  * longer need to consider skew tuples after the first batch. The
734                  * memory context reset we are about to do will release the skew
735                  * hashtable itself.
736                  */
737                 hashtable->skewEnabled = false;
738                 hashtable->skewBucket = NULL;
739                 hashtable->skewBucketNums = NULL;
740                 hashtable->nSkewBuckets = 0;
741                 hashtable->spaceUsedSkew = 0;
742         }
743
744         /*
745          * We can always skip over any batches that are completely empty on both
746          * sides.  We can sometimes skip over batches that are empty on only one
747          * side, but there are exceptions:
748          *
749          * 1. In a left/full outer join, we have to process outer batches even if
750          * the inner batch is empty.  Similarly, in a right/full outer join, we
751          * have to process inner batches even if the outer batch is empty.
752          *
753          * 2. If we have increased nbatch since the initial estimate, we have to
754          * scan inner batches since they might contain tuples that need to be
755          * reassigned to later inner batches.
756          *
757          * 3. Similarly, if we have increased nbatch since starting the outer
758          * scan, we have to rescan outer batches in case they contain tuples that
759          * need to be reassigned.
760          */
761         curbatch++;
762         while (curbatch < nbatch &&
763                    (hashtable->outerBatchFile[curbatch] == NULL ||
764                         hashtable->innerBatchFile[curbatch] == NULL))
765         {
766                 if (hashtable->outerBatchFile[curbatch] &&
767                         HJ_FILL_OUTER(hjstate))
768                         break;                          /* must process due to rule 1 */
769                 if (hashtable->innerBatchFile[curbatch] &&
770                         HJ_FILL_INNER(hjstate))
771                         break;                          /* must process due to rule 1 */
772                 if (hashtable->innerBatchFile[curbatch] &&
773                         nbatch != hashtable->nbatch_original)
774                         break;                          /* must process due to rule 2 */
775                 if (hashtable->outerBatchFile[curbatch] &&
776                         nbatch != hashtable->nbatch_outstart)
777                         break;                          /* must process due to rule 3 */
778                 /* We can ignore this batch. */
779                 /* Release associated temp files right away. */
780                 if (hashtable->innerBatchFile[curbatch])
781                         BufFileClose(hashtable->innerBatchFile[curbatch]);
782                 hashtable->innerBatchFile[curbatch] = NULL;
783                 if (hashtable->outerBatchFile[curbatch])
784                         BufFileClose(hashtable->outerBatchFile[curbatch]);
785                 hashtable->outerBatchFile[curbatch] = NULL;
786                 curbatch++;
787         }
788
789         if (curbatch >= nbatch)
790                 return false;                   /* no more batches */
791
792         hashtable->curbatch = curbatch;
793
794         /*
795          * Reload the hash table with the new inner batch (which could be empty)
796          */
797         ExecHashTableReset(hashtable);
798
799         innerFile = hashtable->innerBatchFile[curbatch];
800
801         if (innerFile != NULL)
802         {
803                 if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
804                         ereport(ERROR,
805                                         (errcode_for_file_access(),
806                                    errmsg("could not rewind hash-join temporary file: %m")));
807
808                 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
809                                                                                                  innerFile,
810                                                                                                  &hashvalue,
811                                                                                                  hjstate->hj_HashTupleSlot)))
812                 {
813                         /*
814                          * NOTE: some tuples may be sent to future batches.  Also, it is
815                          * possible for hashtable->nbatch to be increased here!
816                          */
817                         ExecHashTableInsert(hashtable, slot, hashvalue);
818                 }
819
820                 /*
821                  * after we build the hash table, the inner batch file is no longer
822                  * needed
823                  */
824                 BufFileClose(innerFile);
825                 hashtable->innerBatchFile[curbatch] = NULL;
826         }
827
828         /*
829          * Rewind outer batch file (if present), so that we can start reading it.
830          */
831         if (hashtable->outerBatchFile[curbatch] != NULL)
832         {
833                 if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
834                         ereport(ERROR,
835                                         (errcode_for_file_access(),
836                                    errmsg("could not rewind hash-join temporary file: %m")));
837         }
838
839         return true;
840 }
841
842 /*
843  * ExecHashJoinSaveTuple
844  *              save a tuple to a batch file.
845  *
846  * The data recorded in the file for each tuple is its hash value,
847  * then the tuple in MinimalTuple format.
848  *
849  * Note: it is important always to call this in the regular executor
850  * context, not in a shorter-lived context; else the temp file buffers
851  * will get messed up.
852  */
853 void
854 ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
855                                           BufFile **fileptr)
856 {
857         BufFile    *file = *fileptr;
858         size_t          written;
859
860         if (file == NULL)
861         {
862                 /* First write to this batch file, so open it. */
863                 file = BufFileCreateTemp(false);
864                 *fileptr = file;
865         }
866
867         written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
868         if (written != sizeof(uint32))
869                 ereport(ERROR,
870                                 (errcode_for_file_access(),
871                                  errmsg("could not write to hash-join temporary file: %m")));
872
873         written = BufFileWrite(file, (void *) tuple, tuple->t_len);
874         if (written != tuple->t_len)
875                 ereport(ERROR,
876                                 (errcode_for_file_access(),
877                                  errmsg("could not write to hash-join temporary file: %m")));
878 }
879
880 /*
881  * ExecHashJoinGetSavedTuple
882  *              read the next tuple from a batch file.  Return NULL if no more.
883  *
884  * On success, *hashvalue is set to the tuple's hash value, and the tuple
885  * itself is stored in the given slot.
886  */
887 static TupleTableSlot *
888 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
889                                                   BufFile *file,
890                                                   uint32 *hashvalue,
891                                                   TupleTableSlot *tupleSlot)
892 {
893         uint32          header[2];
894         size_t          nread;
895         MinimalTuple tuple;
896
897         /*
898          * Since both the hash value and the MinimalTuple length word are uint32,
899          * we can read them both in one BufFileRead() call without any type
900          * cheating.
901          */
902         nread = BufFileRead(file, (void *) header, sizeof(header));
903         if (nread == 0)                         /* end of file */
904         {
905                 ExecClearTuple(tupleSlot);
906                 return NULL;
907         }
908         if (nread != sizeof(header))
909                 ereport(ERROR,
910                                 (errcode_for_file_access(),
911                                  errmsg("could not read from hash-join temporary file: %m")));
912         *hashvalue = header[0];
913         tuple = (MinimalTuple) palloc(header[1]);
914         tuple->t_len = header[1];
915         nread = BufFileRead(file,
916                                                 (void *) ((char *) tuple + sizeof(uint32)),
917                                                 header[1] - sizeof(uint32));
918         if (nread != header[1] - sizeof(uint32))
919                 ereport(ERROR,
920                                 (errcode_for_file_access(),
921                                  errmsg("could not read from hash-join temporary file: %m")));
922         return ExecStoreMinimalTuple(tuple, tupleSlot, true);
923 }
924
925
926 void
927 ExecReScanHashJoin(HashJoinState *node)
928 {
929         /*
930          * In a multi-batch join, we currently have to do rescans the hard way,
931          * primarily because batch temp files may have already been released. But
932          * if it's a single-batch join, and there is no parameter change for the
933          * inner subnode, then we can just re-use the existing hash table without
934          * rebuilding it.
935          */
936         if (node->hj_HashTable != NULL)
937         {
938                 if (node->hj_HashTable->nbatch == 1 &&
939                         node->js.ps.righttree->chgParam == NULL)
940                 {
941                         /*
942                          * Okay to reuse the hash table; needn't rescan inner, either.
943                          *
944                          * However, if it's a right/full join, we'd better reset the
945                          * inner-tuple match flags contained in the table.
946                          */
947                         if (HJ_FILL_INNER(node))
948                                 ExecHashTableResetMatchFlags(node->hj_HashTable);
949
950                         /*
951                          * Also, we need to reset our state about the emptiness of the
952                          * outer relation, so that the new scan of the outer will update
953                          * it correctly if it turns out to be empty this time. (There's no
954                          * harm in clearing it now because ExecHashJoin won't need the
955                          * info.  In the other cases, where the hash table doesn't exist
956                          * or we are destroying it, we leave this state alone because
957                          * ExecHashJoin will need it the first time through.)
958                          */
959                         node->hj_OuterNotEmpty = false;
960
961                         /* ExecHashJoin can skip the BUILD_HASHTABLE step */
962                         node->hj_JoinState = HJ_NEED_NEW_OUTER;
963                 }
964                 else
965                 {
966                         /* must destroy and rebuild hash table */
967                         ExecHashTableDestroy(node->hj_HashTable);
968                         node->hj_HashTable = NULL;
969                         node->hj_JoinState = HJ_BUILD_HASHTABLE;
970
971                         /*
972                          * if chgParam of subnode is not null then plan will be re-scanned
973                          * by first ExecProcNode.
974                          */
975                         if (node->js.ps.righttree->chgParam == NULL)
976                                 ExecReScan(node->js.ps.righttree);
977                 }
978         }
979
980         /* Always reset intra-tuple state */
981         node->hj_CurHashValue = 0;
982         node->hj_CurBucketNo = 0;
983         node->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO;
984         node->hj_CurTuple = NULL;
985
986         node->js.ps.ps_TupFromTlist = false;
987         node->hj_MatchedOuter = false;
988         node->hj_FirstOuterTupleSlot = NULL;
989
990         /*
991          * if chgParam of subnode is not null then plan will be re-scanned by
992          * first ExecProcNode.
993          */
994         if (node->js.ps.lefttree->chgParam == NULL)
995                 ExecReScan(node->js.ps.lefttree);
996 }