*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
outerNode = outerPlanState(node);
hashtable = node->hashtable;
- if (hashtable == NULL)
- elog(ERROR, "ExecHash: hash table is NULL.");
-
nbatch = hashtable->nbatch;
if (nbatch > 0)
* allocate and initialize the file arrays in hashCxt
*/
hashtable->innerBatchFile = (BufFile **)
- palloc(nbatch * sizeof(BufFile *));
+ palloc0(nbatch * sizeof(BufFile *));
hashtable->outerBatchFile = (BufFile **)
- palloc(nbatch * sizeof(BufFile *));
+ palloc0(nbatch * sizeof(BufFile *));
hashtable->innerBatchSize = (long *)
- palloc(nbatch * sizeof(long));
+ palloc0(nbatch * sizeof(long));
hashtable->outerBatchSize = (long *)
- palloc(nbatch * sizeof(long));
- for (i = 0; i < nbatch; i++)
- {
- hashtable->innerBatchFile[i] = NULL;
- hashtable->outerBatchFile[i] = NULL;
- hashtable->innerBatchSize[i] = 0;
- hashtable->outerBatchSize[i] = 0;
- }
+ palloc0(nbatch * sizeof(long));
/* The files will not be opened until later... */
}
MemoryContextSwitchTo(hashtable->batchCxt);
hashtable->buckets = (HashJoinTuple *)
- palloc(nbuckets * sizeof(HashJoinTuple));
-
- if (hashtable->buckets == NULL)
- elog(ERROR, "Insufficient memory for hash table.");
-
- for (i = 0; i < nbuckets; i++)
- hashtable->buckets[i] = NULL;
+ palloc0(nbuckets * sizeof(HashJoinTuple));
MemoryContextSwitchTo(oldcxt);
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
* first nbuckets hash buckets. We try to set it to make all the
- * batches the same size. But we have to keep nbatch small
- * enough to avoid integer overflow in ExecHashJoinGetBatch().
+ * batches the same size.
*/
dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
hash_table_bytes);
- if (dtmp < INT_MAX / totalbuckets)
+ if (dtmp < INT_MAX)
nbatch = (int) dtmp;
else
- nbatch = INT_MAX / totalbuckets;
+ nbatch = INT_MAX;
if (nbatch <= 0)
nbatch = 1;
}
List *hashkeys)
{
int bucketno = ExecHashGetBucket(hashtable, econtext, hashkeys);
+ int batchno = ExecHashGetBatch(bucketno, hashtable);
TupleTableSlot *slot = econtext->ecxt_innertuple;
HeapTuple heapTuple = slot->val;
/*
* decide whether to put the tuple in the hash table or a tmp file
*/
- if (bucketno < hashtable->nbuckets)
+ if (batchno < 0)
{
/*
* put the tuple in hash table
hashTupleSize = MAXALIGN(sizeof(*hashTuple)) + heapTuple->t_len;
hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
hashTupleSize);
- if (hashTuple == NULL)
- elog(ERROR, "Insufficient memory for hash table.");
memcpy((char *) &hashTuple->htup,
(char *) heapTuple,
sizeof(hashTuple->htup));
else
{
/*
- * put the tuple into a tmp file for other batches
+ * put the tuple into a tmp file for later batches
*/
- int batchno = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
- (hashtable->totalbuckets - hashtable->nbuckets);
-
hashtable->innerBatchSize[batchno]++;
ExecHashJoinSaveTuple(heapTuple,
hashtable->innerBatchFile[batchno]);
return bucketno;
}
+/* ----------------------------------------------------------------
+ * ExecHashGetBatch
+ *
+ * determine the batch number for a bucketno
+ *
+ * Returns -1 if bucket belongs to initial (or current) batch,
+ * else 0..nbatch-1 corresponding to external batch file number for bucket.
+ * ----------------------------------------------------------------
+ */
+int
+ExecHashGetBatch(int bucketno, HashJoinTable hashtable)
+{
+ if (bucketno < hashtable->nbuckets)
+ return -1;
+
+ return (bucketno - hashtable->nbuckets) % hashtable->nbatch;
+}
+
/* ----------------------------------------------------------------
* ExecScanHashBucket
*
{
MemoryContext oldcxt;
int nbuckets = hashtable->nbuckets;
- int i;
/*
* Release all the hash buckets and tuples acquired in the prior pass,
/* Reallocate and reinitialize the hash bucket headers. */
hashtable->buckets = (HashJoinTuple *)
- palloc(nbuckets * sizeof(HashJoinTuple));
-
- if (hashtable->buckets == NULL)
- elog(ERROR, "Insufficient memory for hash table.");
-
- for (i = 0; i < nbuckets; i++)
- hashtable->buckets[i] = NULL;
+ palloc0(nbuckets * sizeof(HashJoinTuple));
MemoryContextSwitchTo(oldcxt);
}
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.45 2002/12/15 16:17:46 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
BufFile *file,
TupleTableSlot *tupleSlot);
-static int ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable);
static int ExecHashJoinNewBatch(HashJoinState *hjstate);
*/
if (hashtable->curbatch == 0)
{
- int batch = ExecHashJoinGetBatch(node->hj_CurBucketNo,
- hashtable);
+ int batchno = ExecHashGetBatch(node->hj_CurBucketNo,
+ hashtable);
- if (batch > 0)
+ if (batchno >= 0)
{
/*
* Need to postpone this outer tuple to a later batch.
* Save it in the corresponding outer-batch file.
*/
- int batchno = batch - 1;
-
hashtable->outerBatchSize[batchno]++;
ExecHashJoinSaveTuple(outerTupleSlot->val,
hashtable->outerBatchFile[batchno]);
return newbatch;
}
-/* ----------------------------------------------------------------
- * ExecHashJoinGetBatch
- *
- * determine the batch number for a bucketno
- * +----------------+-------+-------+ ... +-------+
- * 0 nbuckets totalbuckets
- * batch 0 1 2 ...
- * ----------------------------------------------------------------
- */
-static int
-ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable)
-{
- int b;
-
- if (bucketno < hashtable->nbuckets || hashtable->nbatch == 0)
- return 0;
-
- b = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
- (hashtable->totalbuckets - hashtable->nbuckets);
- return b + 1;
-}
-
/* ----------------------------------------------------------------
* ExecHashJoinSaveTuple
*
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.97 2002/12/26 23:38:42 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.98 2002/12/30 15:21:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
bool enable_hashjoin = true;
-static Selectivity estimate_hash_bucketsize(Query *root, Var *var);
+static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
+ int nbuckets);
static bool cost_qual_eval_walker(Node *node, Cost *total);
static Selectivity approx_selectivity(Query *root, List *quals);
static void set_rel_width(Query *root, RelOptInfo *rel);
outer_path->parent->width);
double innerbytes = relation_byte_size(inner_path->parent->rows,
inner_path->parent->width);
- long hashtablebytes = SortMem * 1024L;
+ int virtualbuckets;
+ int physicalbuckets;
+ int numbatches;
Selectivity innerbucketsize;
List *hcl;
startup_cost += cpu_operator_cost * inner_path->parent->rows;
run_cost += cpu_operator_cost * outer_path->parent->rows;
+ /* Get hash table size that executor would use for inner relation */
+ ExecChooseHashTableSize(inner_path->parent->rows,
+ inner_path->parent->width,
+ &virtualbuckets,
+ &physicalbuckets,
+ &numbatches);
+
/*
* Determine bucketsize fraction for inner relation. We use the
* smallest bucketsize estimated for any individual hashclause;
if (thisbucketsize < 0)
{
/* not cached yet */
- thisbucketsize = estimate_hash_bucketsize(root, right);
+ thisbucketsize = estimate_hash_bucketsize(root, right,
+ virtualbuckets);
restrictinfo->right_bucketsize = thisbucketsize;
}
}
if (thisbucketsize < 0)
{
/* not cached yet */
- thisbucketsize = estimate_hash_bucketsize(root, left);
+ thisbucketsize = estimate_hash_bucketsize(root, left,
+ virtualbuckets);
restrictinfo->left_bucketsize = thisbucketsize;
}
}
* should be nice and sequential...). Writing the inner rel counts as
* startup cost, all the rest as run cost.
*/
- if (innerbytes > hashtablebytes)
+ if (numbatches)
{
double outerpages = page_size(outer_path->parent->rows,
outer_path->parent->width);
* smart enough to figure out how the restrict clauses might change the
* distribution, so this will have to do for now.
*
- * We can get the number of buckets the executor will use for the given
+ * We are passed the number of buckets the executor will use for the given
* input relation. If the data were perfectly distributed, with the same
* number of tuples going into each available bucket, then the bucketsize
* fraction would be 1/nbuckets. But this happy state of affairs will occur
* inner rel is well-dispersed (or the alternatives seem much worse).
*/
static Selectivity
-estimate_hash_bucketsize(Query *root, Var *var)
+estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
{
Oid relid;
RelOptInfo *rel;
- int virtualbuckets;
- int physicalbuckets;
- int numbatches;
HeapTuple tuple;
Form_pg_statistic stats;
double estfract,
if (rel->tuples <= 0.0 || rel->rows <= 0.0)
return 0.1; /* ensure we can divide below */
- /* Get hash table size that executor would use for this relation */
- ExecChooseHashTableSize(rel->rows, rel->width,
- &virtualbuckets,
- &physicalbuckets,
- &numbatches);
-
tuple = SearchSysCache(STATRELATT,
ObjectIdGetDatum(relid),
Int16GetDatum(var->varattno),
case ObjectIdAttributeNumber:
case SelfItemPointerAttributeNumber:
/* these are unique, so buckets should be well-distributed */
- return 1.0 / (double) virtualbuckets;
+ return 1.0 / (double) nbuckets;
case TableOidAttributeNumber:
/* hashing this is a terrible idea... */
return 1.0;
* the number of buckets is less than the expected number of distinct
* values; otherwise it is 1/ndistinct.
*/
- if (ndistinct > (double) virtualbuckets)
- estfract = 1.0 / (double) virtualbuckets;
+ if (ndistinct > (double) nbuckets)
+ estfract = 1.0 / (double) nbuckets;
else
estfract = 1.0 / ndistinct;
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: nodeHash.h,v 1.27 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern int ExecHashGetBucket(HashJoinTable hashtable,
ExprContext *econtext,
List *hashkeys);
+extern int ExecHashGetBatch(int bucketno, HashJoinTable hashtable);
extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
ExprContext *econtext);
extern void ExecHashTableReset(HashJoinTable hashtable, long ntuples);