1 /*-------------------------------------------------------------------------
4 * Routines to handle hash join nodes
6 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.47 2003/01/20 18:54:45 tgl Exp $
13 *-------------------------------------------------------------------------
18 #include "executor/executor.h"
19 #include "executor/nodeHash.h"
20 #include "executor/nodeHashjoin.h"
21 #include "optimizer/clauses.h"
22 #include "utils/memutils.h"
25 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *node,
26 HashJoinState *hjstate);
27 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
29 TupleTableSlot *tupleSlot);
30 static int ExecHashJoinNewBatch(HashJoinState *hjstate);
33 /* ----------------------------------------------------------------
36 * This function implements the Hybrid Hashjoin algorithm.
37 * recursive partitioning remains to be added.
38 * Note: the relation we build hash table on is the inner
39 * the other one is outer.
40 * ----------------------------------------------------------------
42 TupleTableSlot * /* return: a tuple or NULL */
43 ExecHashJoin(HashJoinState *node)
53 TupleTableSlot *inntuple;
54 ExprContext *econtext;
56 HashJoinTable hashtable;
58 TupleTableSlot *outerTupleSlot;
59 TupleTableSlot *innerTupleSlot;
64 * get information from HashJoin node
66 hjclauses = node->hashclauses;
67 estate = node->js.ps.state;
68 joinqual = node->js.joinqual;
69 otherqual = node->js.ps.qual;
70 hashNode = (HashState *) innerPlanState(node);
71 outerNode = outerPlanState(node);
72 hashPhaseDone = node->hj_hashdone;
73 dir = estate->es_direction;
76 * get information from HashJoin state
78 hashtable = node->hj_HashTable;
79 outerkeys = node->hj_OuterHashKeys;
80 econtext = node->js.ps.ps_ExprContext;
83 * Check to see if we're still projecting out tuples from a previous
84 * join tuple (because there is a function-returning-set in the
85 * projection expressions). If so, try to project another one.
87 if (node->js.ps.ps_TupFromTlist)
89 TupleTableSlot *result;
91 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
92 if (isDone == ExprMultipleResult)
94 /* Done with that source tuple... */
95 node->js.ps.ps_TupFromTlist = false;
99 * If we're doing an IN join, we want to return at most one row per
100 * outer tuple; so we can stop scanning the inner scan if we matched on
103 if (node->js.jointype == JOIN_IN &&
104 node->hj_MatchedOuter)
105 node->hj_NeedNewOuter = true;
108 * Reset per-tuple memory context to free any expression evaluation
109 * storage allocated in the previous tuple cycle. Note this can't
110 * happen until we're done projecting out tuples from a join tuple.
112 ResetExprContext(econtext);
115 * if this is the first call, build the hash table for inner relation
118 { /* if the hash phase not completed */
119 if (hashtable == NULL)
120 { /* if the hash table has not been created */
123 * create the hash table
125 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
126 node->hj_HashTable = hashtable;
129 * execute the Hash node, to build the hash table
131 hashNode->hashtable = hashtable;
132 innerTupleSlot = ExecProcNode((PlanState *) hashNode);
134 node->hj_hashdone = true;
137 * Open temp files for outer batches, if needed. Note that file
138 * buffers are palloc'd in regular executor context.
140 for (i = 0; i < hashtable->nbatch; i++)
141 hashtable->outerBatchFile[i] = BufFileCreateTemp();
143 else if (hashtable == NULL)
147 * Now get an outer tuple and probe into the hash table for matches
149 outerTupleSlot = node->js.ps.ps_OuterTupleSlot;
154 * If we don't have an outer tuple, get the next one
156 if (node->hj_NeedNewOuter)
158 outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
160 if (TupIsNull(outerTupleSlot))
163 * when the last batch runs out, clean up and exit
165 ExecHashTableDestroy(hashtable);
166 node->hj_HashTable = NULL;
170 node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
171 econtext->ecxt_outertuple = outerTupleSlot;
172 node->hj_NeedNewOuter = false;
173 node->hj_MatchedOuter = false;
176 * now we have an outer tuple, find the corresponding bucket
177 * for this tuple from the hash table
179 node->hj_CurBucketNo = ExecHashGetBucket(hashtable, econtext,
181 node->hj_CurTuple = NULL;
184 * Now we've got an outer tuple and the corresponding hash
185 * bucket, but this tuple may not belong to the current batch.
186 * This need only be checked in the first pass.
188 if (hashtable->curbatch == 0)
190 int batchno = ExecHashGetBatch(node->hj_CurBucketNo,
196 * Need to postpone this outer tuple to a later batch.
197 * Save it in the corresponding outer-batch file.
199 hashtable->outerBatchSize[batchno]++;
200 ExecHashJoinSaveTuple(outerTupleSlot->val,
201 hashtable->outerBatchFile[batchno]);
202 node->hj_NeedNewOuter = true;
203 continue; /* loop around for a new outer tuple */
209 * OK, scan the selected hash bucket for matches
213 curtuple = ExecScanHashBucket(node,
216 if (curtuple == NULL)
217 break; /* out of matches */
220 * we've got a match, but still need to test non-hashed quals
222 inntuple = ExecStoreTuple(curtuple,
223 node->hj_HashTupleSlot,
225 false); /* don't pfree this tuple */
226 econtext->ecxt_innertuple = inntuple;
228 /* reset temp memory each time to avoid leaks from qual expr */
229 ResetExprContext(econtext);
232 * if we pass the qual, then save state for next call and have
233 * ExecProject form the projection, store it in the tuple
234 * table, and return the slot.
236 * Only the joinquals determine MatchedOuter status, but all
237 * quals must pass to actually return the tuple.
239 if (ExecQual(joinqual, econtext, false))
241 node->hj_MatchedOuter = true;
243 if (otherqual == NIL || ExecQual(otherqual, econtext, false))
245 TupleTableSlot *result;
247 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
249 if (isDone != ExprEndResult)
251 node->js.ps.ps_TupFromTlist =
252 (isDone == ExprMultipleResult);
260 * Now the current outer tuple has run out of matches, so check
261 * whether to emit a dummy outer-join tuple. If not, loop around
262 * to get a new outer tuple.
264 node->hj_NeedNewOuter = true;
266 if (!node->hj_MatchedOuter &&
267 node->js.jointype == JOIN_LEFT)
270 * We are doing an outer join and there were no join matches
271 * for this outer tuple. Generate a fake join tuple with
272 * nulls for the inner tuple, and return it if it passes the
275 econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
277 if (ExecQual(otherqual, econtext, false))
280 * qualification was satisfied so we project and return
281 * the slot containing the result tuple using
284 TupleTableSlot *result;
286 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
288 if (isDone != ExprEndResult)
290 node->js.ps.ps_TupFromTlist =
291 (isDone == ExprMultipleResult);
299 /* ----------------------------------------------------------------
302 * Init routine for HashJoin node.
303 * ----------------------------------------------------------------
306 ExecInitHashJoin(HashJoin *node, EState *estate)
308 HashJoinState *hjstate;
315 * create state structure
317 hjstate = makeNode(HashJoinState);
318 hjstate->js.ps.plan = (Plan *) node;
319 hjstate->js.ps.state = estate;
322 * Miscellaneous initialization
324 * create expression context for node
326 ExecAssignExprContext(estate, &hjstate->js.ps);
329 * initialize child expressions
331 hjstate->js.ps.targetlist = (List *)
332 ExecInitExpr((Expr *) node->join.plan.targetlist,
333 (PlanState *) hjstate);
334 hjstate->js.ps.qual = (List *)
335 ExecInitExpr((Expr *) node->join.plan.qual,
336 (PlanState *) hjstate);
337 hjstate->js.jointype = node->join.jointype;
338 hjstate->js.joinqual = (List *)
339 ExecInitExpr((Expr *) node->join.joinqual,
340 (PlanState *) hjstate);
341 hjstate->hashclauses = (List *)
342 ExecInitExpr((Expr *) node->hashclauses,
343 (PlanState *) hjstate);
346 * initialize child nodes
348 outerNode = outerPlan(node);
349 hashNode = (Hash *) innerPlan(node);
351 outerPlanState(hjstate) = ExecInitNode(outerNode, estate);
352 innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate);
354 #define HASHJOIN_NSLOTS 3
357 * tuple table initialization
359 ExecInitResultTupleSlot(estate, &hjstate->js.ps);
360 hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
362 switch (node->join.jointype)
368 hjstate->hj_NullInnerTupleSlot =
369 ExecInitNullTupleSlot(estate,
370 ExecGetTupType(innerPlanState(hjstate)));
373 elog(ERROR, "ExecInitHashJoin: unsupported join type %d",
374 (int) node->join.jointype);
378 * now for some voodoo. our temporary tuple slot is actually the
379 * result tuple slot of the Hash node (which is our inner plan). we
380 * do this because Hash nodes don't return tuples via ExecProcNode()
381 * -- instead the hash join node uses ExecScanHashBucket() to get at
382 * the contents of the hash table. -cim 6/9/91
385 HashState *hashstate = (HashState *) innerPlanState(hjstate);
386 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
388 hjstate->hj_HashTupleSlot = slot;
392 * initialize tuple type and projection info
394 ExecAssignResultTypeFromTL(&hjstate->js.ps);
395 ExecAssignProjectionInfo(&hjstate->js.ps);
397 ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
398 ExecGetTupType(outerPlanState(hjstate)),
402 * initialize hash-specific info
405 hjstate->hj_hashdone = false;
407 hjstate->hj_HashTable = (HashJoinTable) NULL;
408 hjstate->hj_CurBucketNo = 0;
409 hjstate->hj_CurTuple = (HashJoinTuple) NULL;
412 * The planner already made a list of the inner hashkeys for us,
413 * but we also need a list of the outer hashkeys. Each list of
414 * exprs must then be prepared for execution.
416 hjstate->hj_InnerHashKeys = (List *)
417 ExecInitExpr((Expr *) hashNode->hashkeys,
418 innerPlanState(hjstate));
419 ((HashState *) innerPlanState(hjstate))->hashkeys =
420 hjstate->hj_InnerHashKeys;
423 foreach(hcl, node->hashclauses)
425 hclauses = lappend(hclauses, get_leftop(lfirst(hcl)));
427 hjstate->hj_OuterHashKeys = (List *)
428 ExecInitExpr((Expr *) hclauses,
429 (PlanState *) hjstate);
431 hjstate->js.ps.ps_OuterTupleSlot = NULL;
432 hjstate->js.ps.ps_TupFromTlist = false;
433 hjstate->hj_NeedNewOuter = true;
434 hjstate->hj_MatchedOuter = false;
440 ExecCountSlotsHashJoin(HashJoin *node)
442 return ExecCountSlotsNode(outerPlan(node)) +
443 ExecCountSlotsNode(innerPlan(node)) +
447 /* ----------------------------------------------------------------
450 * clean up routine for HashJoin node
451 * ----------------------------------------------------------------
454 ExecEndHashJoin(HashJoinState *node)
457 * free hash table in case we end plan before all tuples are retrieved
459 if (node->hj_HashTable)
461 ExecHashTableDestroy(node->hj_HashTable);
462 node->hj_HashTable = NULL;
466 * Free the exprcontext
468 ExecFreeExprContext(&node->js.ps);
471 * clean out the tuple table
473 ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
474 ExecClearTuple(node->hj_OuterTupleSlot);
475 ExecClearTuple(node->hj_HashTupleSlot);
480 ExecEndNode(outerPlanState(node));
481 ExecEndNode(innerPlanState(node));
484 /* ----------------------------------------------------------------
485 * ExecHashJoinOuterGetTuple
487 * get the next outer tuple for hashjoin: either by
488 * executing a plan node as in the first pass, or from
489 * the tmp files for the hashjoin batches.
490 * ----------------------------------------------------------------
493 static TupleTableSlot *
494 ExecHashJoinOuterGetTuple(PlanState *node, HashJoinState *hjstate)
496 HashJoinTable hashtable = hjstate->hj_HashTable;
497 int curbatch = hashtable->curbatch;
498 TupleTableSlot *slot;
501 { /* if it is the first pass */
502 slot = ExecProcNode(node);
503 if (!TupIsNull(slot))
507 * We have just reached the end of the first pass. Try to switch
510 curbatch = ExecHashJoinNewBatch(hjstate);
514 * Try to read from a temp file. Loop allows us to advance to new
517 while (curbatch <= hashtable->nbatch)
519 slot = ExecHashJoinGetSavedTuple(hjstate,
520 hashtable->outerBatchFile[curbatch - 1],
521 hjstate->hj_OuterTupleSlot);
522 if (!TupIsNull(slot))
524 curbatch = ExecHashJoinNewBatch(hjstate);
527 /* Out of batches... */
531 /* ----------------------------------------------------------------
532 * ExecHashJoinGetSavedTuple
534 * read the next tuple from a tmp file
535 * ----------------------------------------------------------------
538 static TupleTableSlot *
539 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
541 TupleTableSlot *tupleSlot)
547 nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
549 return NULL; /* end of file */
550 if (nread != sizeof(HeapTupleData))
551 elog(ERROR, "Read from hashjoin temp file failed");
552 heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
553 memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
554 heapTuple->t_datamcxt = CurrentMemoryContext;
555 heapTuple->t_data = (HeapTupleHeader)
556 ((char *) heapTuple + HEAPTUPLESIZE);
557 nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
558 if (nread != (size_t) htup.t_len)
559 elog(ERROR, "Read from hashjoin temp file failed");
560 return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
563 /* ----------------------------------------------------------------
564 * ExecHashJoinNewBatch
566 * switch to a new hashjoin batch
567 * ----------------------------------------------------------------
570 ExecHashJoinNewBatch(HashJoinState *hjstate)
572 HashJoinTable hashtable = hjstate->hj_HashTable;
573 int nbatch = hashtable->nbatch;
574 int newbatch = hashtable->curbatch + 1;
575 long *innerBatchSize = hashtable->innerBatchSize;
576 long *outerBatchSize = hashtable->outerBatchSize;
578 TupleTableSlot *slot;
579 ExprContext *econtext;
585 * We no longer need the previous outer batch file; close it right
586 * away to free disk space.
588 BufFileClose(hashtable->outerBatchFile[newbatch - 2]);
589 hashtable->outerBatchFile[newbatch - 2] = NULL;
593 * We can skip over any batches that are empty on either side. Release
594 * associated temp files right away.
596 while (newbatch <= nbatch &&
597 (innerBatchSize[newbatch - 1] == 0L ||
598 outerBatchSize[newbatch - 1] == 0L))
600 BufFileClose(hashtable->innerBatchFile[newbatch - 1]);
601 hashtable->innerBatchFile[newbatch - 1] = NULL;
602 BufFileClose(hashtable->outerBatchFile[newbatch - 1]);
603 hashtable->outerBatchFile[newbatch - 1] = NULL;
607 if (newbatch > nbatch)
608 return newbatch; /* no more batches */
611 * Rewind inner and outer batch files for this batch, so that we can
612 * start reading them.
614 if (BufFileSeek(hashtable->outerBatchFile[newbatch - 1], 0, 0L, SEEK_SET))
615 elog(ERROR, "Failed to rewind hash temp file");
617 innerFile = hashtable->innerBatchFile[newbatch - 1];
619 if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
620 elog(ERROR, "Failed to rewind hash temp file");
623 * Reload the hash table with the new inner batch
625 ExecHashTableReset(hashtable, innerBatchSize[newbatch - 1]);
627 econtext = hjstate->js.ps.ps_ExprContext;
628 innerhashkeys = hjstate->hj_InnerHashKeys;
630 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
632 hjstate->hj_HashTupleSlot))
635 econtext->ecxt_innertuple = slot;
636 ExecHashTableInsert(hashtable, econtext, innerhashkeys);
640 * after we build the hash table, the inner batch file is no longer
643 BufFileClose(innerFile);
644 hashtable->innerBatchFile[newbatch - 1] = NULL;
646 hashtable->curbatch = newbatch;
650 /* ----------------------------------------------------------------
651 * ExecHashJoinSaveTuple
653 * save a tuple to a tmp file.
655 * The data recorded in the file for each tuple is an image of its
656 * HeapTupleData (with meaningless t_data pointer) followed by the
657 * HeapTupleHeader and tuple data.
658 * ----------------------------------------------------------------
662 ExecHashJoinSaveTuple(HeapTuple heapTuple,
667 written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
668 if (written != sizeof(HeapTupleData))
669 elog(ERROR, "Write to hashjoin temp file failed");
670 written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
671 if (written != (size_t) heapTuple->t_len)
672 elog(ERROR, "Write to hashjoin temp file failed");
676 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
678 if (!node->hj_hashdone)
681 node->hj_hashdone = false;
684 * Unfortunately, currently we have to destroy hashtable in all
687 if (node->hj_HashTable)
689 ExecHashTableDestroy(node->hj_HashTable);
690 node->hj_HashTable = NULL;
693 node->hj_CurBucketNo = 0;
694 node->hj_CurTuple = (HashJoinTuple) NULL;
696 node->js.ps.ps_OuterTupleSlot = (TupleTableSlot *) NULL;
697 node->js.ps.ps_TupFromTlist = false;
698 node->hj_NeedNewOuter = true;
699 node->hj_MatchedOuter = false;
702 * if chgParam of subnodes is not null then plans will be re-scanned
703 * by first ExecProcNode.
705 if (((PlanState *) node)->lefttree->chgParam == NULL)
706 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
707 if (((PlanState *) node)->righttree->chgParam == NULL)
708 ExecReScan(((PlanState *) node)->righttree, exprCtxt);