and batch files. Should reduce memory and I/O demands for such joins.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.95 2006/06/27 02:51:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execTuples.c,v 1.96 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
return ExecMaterializeSlot(slot);
}
+/* --------------------------------
+ * ExecFetchSlotMinimalTuple
+ * Fetch the slot's minimal physical tuple.
+ *
+ * If the slot contains a virtual tuple, we convert it to minimal
+ * physical form. The slot retains ownership of the physical tuple.
+ * Likewise, if it contains a regular tuple we convert to minimal form.
+ *
+ * As above, the result must be treated as read-only.
+ * --------------------------------
+ */
+MinimalTuple
+ExecFetchSlotMinimalTuple(TupleTableSlot *slot)
+{
+ MinimalTuple newTuple;
+ MemoryContext oldContext;
+
+ /*
+ * sanity checks
+ */
+ Assert(slot != NULL);
+ Assert(!slot->tts_isempty);
+
+ /*
+ * If we have a minimal physical tuple then just return it.
+ */
+ if (slot->tts_mintuple)
+ return slot->tts_mintuple;
+
+ /*
+ * Otherwise, build a minimal tuple, and then store it as the new slot
+ * value. (Note: tts_nvalid will be reset to zero here. There are cases
+ * in which this could be optimized but it's probably not worth worrying
+ * about.)
+ *
+ * We may be called in a context that is shorter-lived than the tuple
+ * slot, but we have to ensure that the materialized tuple will survive
+ * anyway.
+ */
+ oldContext = MemoryContextSwitchTo(slot->tts_mcxt);
+ newTuple = ExecCopySlotMinimalTuple(slot);
+ MemoryContextSwitchTo(oldContext);
+
+ ExecStoreMinimalTuple(newTuple, slot, true);
+
+ Assert(slot->tts_mintuple);
+ return slot->tts_mintuple;
+}
+
/* --------------------------------
* ExecMaterializeSlot
* Force a slot into the "materialized" state.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.103 2006/05/30 14:01:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.104 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* We have to compute the hash value */
econtext->ecxt_innertuple = slot;
hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
- ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue);
+ ExecHashTableInsert(hashtable, slot, hashvalue);
}
/* must provide our own instrumentation support */
* does not allow for any palloc overhead. The manipulations of spaceUsed
* don't count palloc overhead either.
*/
- tupsize = MAXALIGN(sizeof(HashJoinTupleData)) +
- MAXALIGN(sizeof(HeapTupleHeaderData)) +
+ tupsize = HJTUPLE_OVERHEAD +
+ MAXALIGN(sizeof(MinimalTupleData)) +
MAXALIGN(tupwidth);
inner_rel_bytes = ntuples * tupsize;
{
/* dump it out */
Assert(batchno > curbatch);
- ExecHashJoinSaveTuple(&tuple->htup, tuple->hashvalue,
+ ExecHashJoinSaveTuple(HJTUPLE_MINTUPLE(tuple),
+ tuple->hashvalue,
&hashtable->innerBatchFile[batchno]);
/* and remove from hash table */
if (prevtuple)
hashtable->buckets[i] = nexttuple;
/* prevtuple doesn't change */
hashtable->spaceUsed -=
- MAXALIGN(sizeof(HashJoinTupleData)) + tuple->htup.t_len;
+ HJTUPLE_OVERHEAD + HJTUPLE_MINTUPLE(tuple)->t_len;
pfree(tuple);
nfreed++;
}
* ExecHashTableInsert
* insert a tuple into the hash table depending on the hash value
* it may just go to a temp file for later batches
+ *
+ * Note: the passed TupleTableSlot may contain a regular, minimal, or virtual
+ * tuple; the minimal case in particular is certain to happen while reloading
+ * tuples from batch files. We could save some cycles in the regular-tuple
+ * case by not forcing the slot contents into minimal form; not clear if it's
+ * worth the messiness required.
*/
void
ExecHashTableInsert(HashJoinTable hashtable,
- HeapTuple tuple,
+ TupleTableSlot *slot,
uint32 hashvalue)
{
+ MinimalTuple tuple = ExecFetchSlotMinimalTuple(slot);
int bucketno;
int batchno;
HashJoinTuple hashTuple;
int hashTupleSize;
- hashTupleSize = MAXALIGN(sizeof(HashJoinTupleData)) + tuple->t_len;
+ hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
hashTupleSize);
hashTuple->hashvalue = hashvalue;
- memcpy((char *) &hashTuple->htup,
- (char *) tuple,
- sizeof(hashTuple->htup));
- hashTuple->htup.t_data = (HeapTupleHeader)
- (((char *) hashTuple) + MAXALIGN(sizeof(HashJoinTupleData)));
- memcpy((char *) hashTuple->htup.t_data,
- (char *) tuple->t_data,
- tuple->t_len);
+ memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
hashTuple->next = hashtable->buckets[bucketno];
hashtable->buckets[bucketno] = hashTuple;
hashtable->spaceUsed += hashTupleSize;
* put the tuple into a temp file for later batches
*/
Assert(batchno > hashtable->curbatch);
- ExecHashJoinSaveTuple(tuple, hashvalue,
+ ExecHashJoinSaveTuple(tuple,
+ hashvalue,
&hashtable->innerBatchFile[batchno]);
}
}
*
* The current outer tuple must be stored in econtext->ecxt_outertuple.
*/
-HeapTuple
+HashJoinTuple
ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext)
{
{
if (hashTuple->hashvalue == hashvalue)
{
- HeapTuple heapTuple = &hashTuple->htup;
TupleTableSlot *inntuple;
/* insert hashtable's tuple into exec slot so ExecQual sees it */
- inntuple = ExecStoreTuple(heapTuple,
- hjstate->hj_HashTupleSlot,
- InvalidBuffer,
- false); /* do not pfree */
+ inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
+ hjstate->hj_HashTupleSlot,
+ false); /* do not pfree */
econtext->ecxt_innertuple = inntuple;
/* reset temp memory each time to avoid leaks from qual expr */
if (ExecQual(hjclauses, econtext, false))
{
hjstate->hj_CurTuple = hashTuple;
- return heapTuple;
+ return hashTuple;
}
}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.82 2006/06/16 18:42:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.83 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
ExprContext *econtext;
ExprDoneCond isDone;
HashJoinTable hashtable;
- HeapTuple curtuple;
+ HashJoinTuple curtuple;
TupleTableSlot *outerTupleSlot;
uint32 hashvalue;
int batchno;
* in the corresponding outer-batch file.
*/
Assert(batchno > hashtable->curbatch);
- ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
+ ExecHashJoinSaveTuple(ExecFetchSlotMinimalTuple(outerTupleSlot),
hashvalue,
&hashtable->outerBatchFile[batchno]);
node->hj_NeedNewOuter = true;
/*
* we've got a match, but still need to test non-hashed quals
*/
- inntuple = ExecStoreTuple(curtuple,
- node->hj_HashTupleSlot,
- InvalidBuffer,
- false); /* don't pfree this tuple */
+ inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(curtuple),
+ node->hj_HashTupleSlot,
+ false); /* don't pfree */
econtext->ecxt_innertuple = inntuple;
/* reset temp memory each time to avoid leaks from qual expr */
* NOTE: some tuples may be sent to future batches. Also, it is
* possible for hashtable->nbatch to be increased here!
*/
- ExecHashTableInsert(hashtable,
- ExecFetchSlotTuple(slot),
- hashvalue);
+ ExecHashTableInsert(hashtable, slot, hashvalue);
}
/*
* save a tuple to a batch file.
*
* The data recorded in the file for each tuple is its hash value,
- * then an image of its HeapTupleData (with meaningless t_data pointer)
- * followed by the HeapTupleHeader and tuple data.
+ * then the tuple in MinimalTuple format.
*
* Note: it is important always to call this in the regular executor
* context, not in a shorter-lived context; else the temp file buffers
* will get messed up.
*/
void
-ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
+ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
BufFile **fileptr)
{
BufFile *file = *fileptr;
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
- written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
- if (written != sizeof(HeapTupleData))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not write to hash-join temporary file: %m")));
-
- written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
- if (written != (size_t) heapTuple->t_len)
+ written = BufFileWrite(file, (void *) tuple, tuple->t_len);
+ if (written != tuple->t_len)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to hash-join temporary file: %m")));
uint32 *hashvalue,
TupleTableSlot *tupleSlot)
{
- HeapTupleData htup;
+ uint32 header[2];
size_t nread;
- HeapTuple heapTuple;
+ MinimalTuple tuple;
- nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32));
- if (nread == 0)
- return NULL; /* end of file */
- if (nread != sizeof(uint32))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not read from hash-join temporary file: %m")));
- nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
- if (nread != sizeof(HeapTupleData))
+ /*
+ * Since both the hash value and the MinimalTuple length word are
+ * uint32, we can read them both in one BufFileRead() call without
+ * any type cheating.
+ */
+ nread = BufFileRead(file, (void *) header, sizeof(header));
+ if (nread == 0) /* end of file */
+ {
+ ExecClearTuple(tupleSlot);
+ return NULL;
+ }
+ if (nread != sizeof(header))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
- heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
- memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
- heapTuple->t_data = (HeapTupleHeader)
- ((char *) heapTuple + HEAPTUPLESIZE);
- nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
- if (nread != (size_t) htup.t_len)
+ *hashvalue = header[0];
+ tuple = (MinimalTuple) palloc(header[1]);
+ tuple->t_len = header[1];
+ nread = BufFileRead(file,
+ (void *) ((char *) tuple + sizeof(uint32)),
+ header[1] - sizeof(uint32));
+ if (nread != header[1] - sizeof(uint32))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read from hash-join temporary file: %m")));
- return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
+ return ExecStoreMinimalTuple(tuple, tupleSlot, true);
}
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.38 2006/03/05 15:58:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.39 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
struct HashJoinTupleData *next; /* link to next tuple in same bucket */
uint32 hashvalue; /* tuple's hash code */
- HeapTupleData htup; /* tuple header */
+ /* Tuple data, in MinimalTuple format, follows on a MAXALIGN boundary */
} HashJoinTupleData;
+#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
+#define HJTUPLE_MINTUPLE(hjtup) \
+ ((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
+
+
typedef struct HashJoinTableData
{
int nbuckets; /* # buckets in the in-memory hash table */
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.40 2006/03/05 15:58:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeHash.h,v 1.41 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern HashJoinTable ExecHashTableCreate(Hash *node, List *hashOperators);
extern void ExecHashTableDestroy(HashJoinTable hashtable);
extern void ExecHashTableInsert(HashJoinTable hashtable,
- HeapTuple tuple,
+ TupleTableSlot *slot,
uint32 hashvalue);
extern uint32 ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext,
uint32 hashvalue,
int *bucketno,
int *batchno);
-extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate,
+extern HashJoinTuple ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext);
extern void ExecHashTableReset(HashJoinTable hashtable);
extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.32 2006/03/05 15:58:56 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/executor/nodeHashjoin.h,v 1.33 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void ExecEndHashJoin(HashJoinState *node);
extern void ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt);
-extern void ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
+extern void ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue,
BufFile **fileptr);
#endif /* NODEHASHJOIN_H */
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.32 2006/06/27 02:51:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/executor/tuptable.h,v 1.33 2006/06/27 21:31:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern HeapTuple ExecCopySlotTuple(TupleTableSlot *slot);
extern MinimalTuple ExecCopySlotMinimalTuple(TupleTableSlot *slot);
extern HeapTuple ExecFetchSlotTuple(TupleTableSlot *slot);
+extern MinimalTuple ExecFetchSlotMinimalTuple(TupleTableSlot *slot);
extern HeapTuple ExecMaterializeSlot(TupleTableSlot *slot);
extern TupleTableSlot *ExecCopySlot(TupleTableSlot *dstslot,
TupleTableSlot *srcslot);