1 /*-------------------------------------------------------------------------
4 * Routines to handle hash join nodes
6 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.45 2002/12/15 16:17:46 tgl Exp $
13 *-------------------------------------------------------------------------
18 #include "executor/executor.h"
19 #include "executor/nodeHash.h"
20 #include "executor/nodeHashjoin.h"
21 #include "optimizer/clauses.h"
22 #include "utils/memutils.h"
25 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *node,
26 HashJoinState *hjstate);
27 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
29 TupleTableSlot *tupleSlot);
30 static int ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable);
31 static int ExecHashJoinNewBatch(HashJoinState *hjstate);
34 /* ----------------------------------------------------------------
37 * This function implements the Hybrid Hashjoin algorithm.
38 * recursive partitioning remains to be added.
39 * Note: the relation we build hash table on is the inner
40 * the other one is outer.
41 * ----------------------------------------------------------------
43 TupleTableSlot * /* return: a tuple or NULL */
44 ExecHashJoin(HashJoinState *node)
54 TupleTableSlot *inntuple;
55 ExprContext *econtext;
57 HashJoinTable hashtable;
59 TupleTableSlot *outerTupleSlot;
60 TupleTableSlot *innerTupleSlot;
65 * get information from HashJoin node
67 hjclauses = node->hashclauses;
68 estate = node->js.ps.state;
69 joinqual = node->js.joinqual;
70 otherqual = node->js.ps.qual;
71 hashNode = (HashState *) innerPlanState(node);
72 outerNode = outerPlanState(node);
73 hashPhaseDone = node->hj_hashdone;
74 dir = estate->es_direction;
77 * get information from HashJoin state
79 hashtable = node->hj_HashTable;
80 outerkeys = node->hj_OuterHashKeys;
81 econtext = node->js.ps.ps_ExprContext;
84 * Check to see if we're still projecting out tuples from a previous
85 * join tuple (because there is a function-returning-set in the
86 * projection expressions). If so, try to project another one.
88 if (node->js.ps.ps_TupFromTlist)
90 TupleTableSlot *result;
92 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
93 if (isDone == ExprMultipleResult)
95 /* Done with that source tuple... */
96 node->js.ps.ps_TupFromTlist = false;
100 * Reset per-tuple memory context to free any expression evaluation
101 * storage allocated in the previous tuple cycle. Note this can't
102 * happen until we're done projecting out tuples from a join tuple.
104 ResetExprContext(econtext);
107 * if this is the first call, build the hash table for inner relation
110 { /* if the hash phase not completed */
111 if (hashtable == NULL)
112 { /* if the hash table has not been created */
115 * create the hash table
117 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
118 node->hj_HashTable = hashtable;
121 * execute the Hash node, to build the hash table
123 hashNode->hashtable = hashtable;
124 innerTupleSlot = ExecProcNode((PlanState *) hashNode);
126 node->hj_hashdone = true;
129 * Open temp files for outer batches, if needed. Note that file
130 * buffers are palloc'd in regular executor context.
132 for (i = 0; i < hashtable->nbatch; i++)
133 hashtable->outerBatchFile[i] = BufFileCreateTemp();
135 else if (hashtable == NULL)
139 * Now get an outer tuple and probe into the hash table for matches
141 outerTupleSlot = node->js.ps.ps_OuterTupleSlot;
146 * If we don't have an outer tuple, get the next one
148 if (node->hj_NeedNewOuter)
150 outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
152 if (TupIsNull(outerTupleSlot))
155 * when the last batch runs out, clean up and exit
157 ExecHashTableDestroy(hashtable);
158 node->hj_HashTable = NULL;
162 node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
163 econtext->ecxt_outertuple = outerTupleSlot;
164 node->hj_NeedNewOuter = false;
165 node->hj_MatchedOuter = false;
168 * now we have an outer tuple, find the corresponding bucket
169 * for this tuple from the hash table
171 node->hj_CurBucketNo = ExecHashGetBucket(hashtable, econtext,
173 node->hj_CurTuple = NULL;
176 * Now we've got an outer tuple and the corresponding hash
177 * bucket, but this tuple may not belong to the current batch.
178 * This need only be checked in the first pass.
180 if (hashtable->curbatch == 0)
182 int batch = ExecHashJoinGetBatch(node->hj_CurBucketNo,
188 * Need to postpone this outer tuple to a later batch.
189 * Save it in the corresponding outer-batch file.
191 int batchno = batch - 1;
193 hashtable->outerBatchSize[batchno]++;
194 ExecHashJoinSaveTuple(outerTupleSlot->val,
195 hashtable->outerBatchFile[batchno]);
196 node->hj_NeedNewOuter = true;
197 continue; /* loop around for a new outer tuple */
203 * OK, scan the selected hash bucket for matches
207 curtuple = ExecScanHashBucket(node,
210 if (curtuple == NULL)
211 break; /* out of matches */
214 * we've got a match, but still need to test non-hashed quals
216 inntuple = ExecStoreTuple(curtuple,
217 node->hj_HashTupleSlot,
219 false); /* don't pfree this tuple */
220 econtext->ecxt_innertuple = inntuple;
222 /* reset temp memory each time to avoid leaks from qual expr */
223 ResetExprContext(econtext);
226 * if we pass the qual, then save state for next call and have
227 * ExecProject form the projection, store it in the tuple
228 * table, and return the slot.
230 * Only the joinquals determine MatchedOuter status, but all
231 * quals must pass to actually return the tuple.
233 if (ExecQual(joinqual, econtext, false))
235 node->hj_MatchedOuter = true;
237 if (otherqual == NIL || ExecQual(otherqual, econtext, false))
239 TupleTableSlot *result;
241 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
243 if (isDone != ExprEndResult)
245 node->js.ps.ps_TupFromTlist =
246 (isDone == ExprMultipleResult);
254 * Now the current outer tuple has run out of matches, so check
255 * whether to emit a dummy outer-join tuple. If not, loop around
256 * to get a new outer tuple.
258 node->hj_NeedNewOuter = true;
260 if (!node->hj_MatchedOuter &&
261 node->js.jointype == JOIN_LEFT)
264 * We are doing an outer join and there were no join matches
265 * for this outer tuple. Generate a fake join tuple with
266 * nulls for the inner tuple, and return it if it passes the
269 econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
271 if (ExecQual(otherqual, econtext, false))
274 * qualification was satisfied so we project and return
275 * the slot containing the result tuple using
278 TupleTableSlot *result;
280 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
282 if (isDone != ExprEndResult)
284 node->js.ps.ps_TupFromTlist =
285 (isDone == ExprMultipleResult);
293 /* ----------------------------------------------------------------
296 * Init routine for HashJoin node.
297 * ----------------------------------------------------------------
300 ExecInitHashJoin(HashJoin *node, EState *estate)
302 HashJoinState *hjstate;
309 * create state structure
311 hjstate = makeNode(HashJoinState);
312 hjstate->js.ps.plan = (Plan *) node;
313 hjstate->js.ps.state = estate;
316 * Miscellaneous initialization
318 * create expression context for node
320 ExecAssignExprContext(estate, &hjstate->js.ps);
323 * initialize child expressions
325 hjstate->js.ps.targetlist = (List *)
326 ExecInitExpr((Expr *) node->join.plan.targetlist,
327 (PlanState *) hjstate);
328 hjstate->js.ps.qual = (List *)
329 ExecInitExpr((Expr *) node->join.plan.qual,
330 (PlanState *) hjstate);
331 hjstate->js.jointype = node->join.jointype;
332 hjstate->js.joinqual = (List *)
333 ExecInitExpr((Expr *) node->join.joinqual,
334 (PlanState *) hjstate);
335 hjstate->hashclauses = (List *)
336 ExecInitExpr((Expr *) node->hashclauses,
337 (PlanState *) hjstate);
340 * initialize child nodes
342 outerNode = outerPlan(node);
343 hashNode = (Hash *) innerPlan(node);
345 outerPlanState(hjstate) = ExecInitNode(outerNode, estate);
346 innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate);
348 #define HASHJOIN_NSLOTS 3
351 * tuple table initialization
353 ExecInitResultTupleSlot(estate, &hjstate->js.ps);
354 hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
356 switch (node->join.jointype)
361 hjstate->hj_NullInnerTupleSlot =
362 ExecInitNullTupleSlot(estate,
363 ExecGetTupType(innerPlanState(hjstate)));
366 elog(ERROR, "ExecInitHashJoin: unsupported join type %d",
367 (int) node->join.jointype);
371 * now for some voodoo. our temporary tuple slot is actually the
372 * result tuple slot of the Hash node (which is our inner plan). we
373 * do this because Hash nodes don't return tuples via ExecProcNode()
374 * -- instead the hash join node uses ExecScanHashBucket() to get at
375 * the contents of the hash table. -cim 6/9/91
378 HashState *hashstate = (HashState *) innerPlanState(hjstate);
379 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
381 hjstate->hj_HashTupleSlot = slot;
385 * initialize tuple type and projection info
387 ExecAssignResultTypeFromTL(&hjstate->js.ps);
388 ExecAssignProjectionInfo(&hjstate->js.ps);
390 ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
391 ExecGetTupType(outerPlanState(hjstate)),
395 * initialize hash-specific info
398 hjstate->hj_hashdone = false;
400 hjstate->hj_HashTable = (HashJoinTable) NULL;
401 hjstate->hj_CurBucketNo = 0;
402 hjstate->hj_CurTuple = (HashJoinTuple) NULL;
405 * The planner already made a list of the inner hashkeys for us,
406 * but we also need a list of the outer hashkeys. Each list of
407 * exprs must then be prepared for execution.
409 hjstate->hj_InnerHashKeys = (List *)
410 ExecInitExpr((Expr *) hashNode->hashkeys,
411 innerPlanState(hjstate));
412 ((HashState *) innerPlanState(hjstate))->hashkeys =
413 hjstate->hj_InnerHashKeys;
416 foreach(hcl, node->hashclauses)
418 hclauses = lappend(hclauses, get_leftop(lfirst(hcl)));
420 hjstate->hj_OuterHashKeys = (List *)
421 ExecInitExpr((Expr *) hclauses,
422 (PlanState *) hjstate);
424 hjstate->js.ps.ps_OuterTupleSlot = NULL;
425 hjstate->js.ps.ps_TupFromTlist = false;
426 hjstate->hj_NeedNewOuter = true;
427 hjstate->hj_MatchedOuter = false;
433 ExecCountSlotsHashJoin(HashJoin *node)
435 return ExecCountSlotsNode(outerPlan(node)) +
436 ExecCountSlotsNode(innerPlan(node)) +
440 /* ----------------------------------------------------------------
443 * clean up routine for HashJoin node
444 * ----------------------------------------------------------------
447 ExecEndHashJoin(HashJoinState *node)
450 * free hash table in case we end plan before all tuples are retrieved
452 if (node->hj_HashTable)
454 ExecHashTableDestroy(node->hj_HashTable);
455 node->hj_HashTable = NULL;
459 * Free the exprcontext
461 ExecFreeExprContext(&node->js.ps);
464 * clean out the tuple table
466 ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
467 ExecClearTuple(node->hj_OuterTupleSlot);
468 ExecClearTuple(node->hj_HashTupleSlot);
473 ExecEndNode(outerPlanState(node));
474 ExecEndNode(innerPlanState(node));
477 /* ----------------------------------------------------------------
478 * ExecHashJoinOuterGetTuple
480 * get the next outer tuple for hashjoin: either by
481 * executing a plan node as in the first pass, or from
482 * the tmp files for the hashjoin batches.
483 * ----------------------------------------------------------------
486 static TupleTableSlot *
487 ExecHashJoinOuterGetTuple(PlanState *node, HashJoinState *hjstate)
489 HashJoinTable hashtable = hjstate->hj_HashTable;
490 int curbatch = hashtable->curbatch;
491 TupleTableSlot *slot;
494 { /* if it is the first pass */
495 slot = ExecProcNode(node);
496 if (!TupIsNull(slot))
500 * We have just reached the end of the first pass. Try to switch
503 curbatch = ExecHashJoinNewBatch(hjstate);
507 * Try to read from a temp file. Loop allows us to advance to new
510 while (curbatch <= hashtable->nbatch)
512 slot = ExecHashJoinGetSavedTuple(hjstate,
513 hashtable->outerBatchFile[curbatch - 1],
514 hjstate->hj_OuterTupleSlot);
515 if (!TupIsNull(slot))
517 curbatch = ExecHashJoinNewBatch(hjstate);
520 /* Out of batches... */
524 /* ----------------------------------------------------------------
525 * ExecHashJoinGetSavedTuple
527 * read the next tuple from a tmp file
528 * ----------------------------------------------------------------
531 static TupleTableSlot *
532 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
534 TupleTableSlot *tupleSlot)
540 nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
542 return NULL; /* end of file */
543 if (nread != sizeof(HeapTupleData))
544 elog(ERROR, "Read from hashjoin temp file failed");
545 heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
546 memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
547 heapTuple->t_datamcxt = CurrentMemoryContext;
548 heapTuple->t_data = (HeapTupleHeader)
549 ((char *) heapTuple + HEAPTUPLESIZE);
550 nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
551 if (nread != (size_t) htup.t_len)
552 elog(ERROR, "Read from hashjoin temp file failed");
553 return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
556 /* ----------------------------------------------------------------
557 * ExecHashJoinNewBatch
559 * switch to a new hashjoin batch
560 * ----------------------------------------------------------------
563 ExecHashJoinNewBatch(HashJoinState *hjstate)
565 HashJoinTable hashtable = hjstate->hj_HashTable;
566 int nbatch = hashtable->nbatch;
567 int newbatch = hashtable->curbatch + 1;
568 long *innerBatchSize = hashtable->innerBatchSize;
569 long *outerBatchSize = hashtable->outerBatchSize;
571 TupleTableSlot *slot;
572 ExprContext *econtext;
578 * We no longer need the previous outer batch file; close it right
579 * away to free disk space.
581 BufFileClose(hashtable->outerBatchFile[newbatch - 2]);
582 hashtable->outerBatchFile[newbatch - 2] = NULL;
586 * We can skip over any batches that are empty on either side. Release
587 * associated temp files right away.
589 while (newbatch <= nbatch &&
590 (innerBatchSize[newbatch - 1] == 0L ||
591 outerBatchSize[newbatch - 1] == 0L))
593 BufFileClose(hashtable->innerBatchFile[newbatch - 1]);
594 hashtable->innerBatchFile[newbatch - 1] = NULL;
595 BufFileClose(hashtable->outerBatchFile[newbatch - 1]);
596 hashtable->outerBatchFile[newbatch - 1] = NULL;
600 if (newbatch > nbatch)
601 return newbatch; /* no more batches */
604 * Rewind inner and outer batch files for this batch, so that we can
605 * start reading them.
607 if (BufFileSeek(hashtable->outerBatchFile[newbatch - 1], 0, 0L, SEEK_SET))
608 elog(ERROR, "Failed to rewind hash temp file");
610 innerFile = hashtable->innerBatchFile[newbatch - 1];
612 if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
613 elog(ERROR, "Failed to rewind hash temp file");
616 * Reload the hash table with the new inner batch
618 ExecHashTableReset(hashtable, innerBatchSize[newbatch - 1]);
620 econtext = hjstate->js.ps.ps_ExprContext;
621 innerhashkeys = hjstate->hj_InnerHashKeys;
623 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
625 hjstate->hj_HashTupleSlot))
628 econtext->ecxt_innertuple = slot;
629 ExecHashTableInsert(hashtable, econtext, innerhashkeys);
633 * after we build the hash table, the inner batch file is no longer
636 BufFileClose(innerFile);
637 hashtable->innerBatchFile[newbatch - 1] = NULL;
639 hashtable->curbatch = newbatch;
643 /* ----------------------------------------------------------------
644 * ExecHashJoinGetBatch
646 * determine the batch number for a bucketno
647 * +----------------+-------+-------+ ... +-------+
648 * 0 nbuckets totalbuckets
650 * ----------------------------------------------------------------
653 ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable)
657 if (bucketno < hashtable->nbuckets || hashtable->nbatch == 0)
660 b = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
661 (hashtable->totalbuckets - hashtable->nbuckets);
665 /* ----------------------------------------------------------------
666 * ExecHashJoinSaveTuple
668 * save a tuple to a tmp file.
670 * The data recorded in the file for each tuple is an image of its
671 * HeapTupleData (with meaningless t_data pointer) followed by the
672 * HeapTupleHeader and tuple data.
673 * ----------------------------------------------------------------
677 ExecHashJoinSaveTuple(HeapTuple heapTuple,
682 written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
683 if (written != sizeof(HeapTupleData))
684 elog(ERROR, "Write to hashjoin temp file failed");
685 written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
686 if (written != (size_t) heapTuple->t_len)
687 elog(ERROR, "Write to hashjoin temp file failed");
691 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
693 if (!node->hj_hashdone)
696 node->hj_hashdone = false;
699 * Unfortunately, currently we have to destroy hashtable in all
702 if (node->hj_HashTable)
704 ExecHashTableDestroy(node->hj_HashTable);
705 node->hj_HashTable = NULL;
708 node->hj_CurBucketNo = 0;
709 node->hj_CurTuple = (HashJoinTuple) NULL;
711 node->js.ps.ps_OuterTupleSlot = (TupleTableSlot *) NULL;
712 node->js.ps.ps_TupFromTlist = false;
713 node->hj_NeedNewOuter = true;
714 node->hj_MatchedOuter = false;
717 * if chgParam of subnodes is not null then plans will be re-scanned
718 * by first ExecProcNode.
720 if (((PlanState *) node)->lefttree->chgParam == NULL)
721 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
722 if (((PlanState *) node)->righttree->chgParam == NULL)
723 ExecReScan(((PlanState *) node)->righttree, exprCtxt);