]> granicus.if.org Git - postgresql/commitdiff
Better solution to integer overflow problem in hash batch-number
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 30 Dec 2002 15:21:23 +0000 (15:21 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 30 Dec 2002 15:21:23 +0000 (15:21 +0000)
computation: reduce the bucket number mod nbatch.  This changes the
association between original bucket numbers and batches, but that
doesn't matter.  Minor other cleanups in hashjoin code to help
centralize decisions.

src/backend/executor/nodeHash.c
src/backend/executor/nodeHashjoin.c
src/backend/optimizer/path/costsize.c
src/include/executor/nodeHash.h

index 9e0e4f0b8cbe0fd1abdc5080308a662f6e7fcd12..bea89630993d372a4bc553a07e001e2aafcebb28 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.73 2002/12/30 15:21:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -59,9 +59,6 @@ ExecHash(HashState *node)
        outerNode = outerPlanState(node);
 
        hashtable = node->hashtable;
-       if (hashtable == NULL)
-               elog(ERROR, "ExecHash: hash table is NULL.");
-
        nbatch = hashtable->nbatch;
 
        if (nbatch > 0)
@@ -284,20 +281,13 @@ ExecHashTableCreate(Hash *node)
                 * allocate and initialize the file arrays in hashCxt
                 */
                hashtable->innerBatchFile = (BufFile **)
-                       palloc(nbatch * sizeof(BufFile *));
+                       palloc0(nbatch * sizeof(BufFile *));
                hashtable->outerBatchFile = (BufFile **)
-                       palloc(nbatch * sizeof(BufFile *));
+                       palloc0(nbatch * sizeof(BufFile *));
                hashtable->innerBatchSize = (long *)
-                       palloc(nbatch * sizeof(long));
+                       palloc0(nbatch * sizeof(long));
                hashtable->outerBatchSize = (long *)
-                       palloc(nbatch * sizeof(long));
-               for (i = 0; i < nbatch; i++)
-               {
-                       hashtable->innerBatchFile[i] = NULL;
-                       hashtable->outerBatchFile[i] = NULL;
-                       hashtable->innerBatchSize[i] = 0;
-                       hashtable->outerBatchSize[i] = 0;
-               }
+                       palloc0(nbatch * sizeof(long));
                /* The files will not be opened until later... */
        }
 
@@ -308,13 +298,7 @@ ExecHashTableCreate(Hash *node)
        MemoryContextSwitchTo(hashtable->batchCxt);
 
        hashtable->buckets = (HashJoinTuple *)
-               palloc(nbuckets * sizeof(HashJoinTuple));
-
-       if (hashtable->buckets == NULL)
-               elog(ERROR, "Insufficient memory for hash table.");
-
-       for (i = 0; i < nbuckets; i++)
-               hashtable->buckets[i] = NULL;
+               palloc0(nbuckets * sizeof(HashJoinTuple));
 
        MemoryContextSwitchTo(oldcxt);
 
@@ -414,15 +398,14 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
                 * totalbuckets/nbuckets; in fact, it is the number of groups we
                 * will use for the part of the data that doesn't fall into the
                 * first nbuckets hash buckets.  We try to set it to make all the
-                * batches the same size.  But we have to keep nbatch small
-                * enough to avoid integer overflow in ExecHashJoinGetBatch().
+                * batches the same size.
                 */
                dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
                                        hash_table_bytes);
-               if (dtmp < INT_MAX / totalbuckets)
+               if (dtmp < INT_MAX)
                        nbatch = (int) dtmp;
                else
-                       nbatch = INT_MAX / totalbuckets;
+                       nbatch = INT_MAX;
                if (nbatch <= 0)
                        nbatch = 1;
        }
@@ -481,13 +464,14 @@ ExecHashTableInsert(HashJoinTable hashtable,
                                        List *hashkeys)
 {
        int                     bucketno = ExecHashGetBucket(hashtable, econtext, hashkeys);
+       int                     batchno = ExecHashGetBatch(bucketno, hashtable);
        TupleTableSlot *slot = econtext->ecxt_innertuple;
        HeapTuple       heapTuple = slot->val;
 
        /*
         * decide whether to put the tuple in the hash table or a tmp file
         */
-       if (bucketno < hashtable->nbuckets)
+       if (batchno < 0)
        {
                /*
                 * put the tuple in hash table
@@ -498,8 +482,6 @@ ExecHashTableInsert(HashJoinTable hashtable,
                hashTupleSize = MAXALIGN(sizeof(*hashTuple)) + heapTuple->t_len;
                hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
                                                                                                           hashTupleSize);
-               if (hashTuple == NULL)
-                       elog(ERROR, "Insufficient memory for hash table.");
                memcpy((char *) &hashTuple->htup,
                           (char *) heapTuple,
                           sizeof(hashTuple->htup));
@@ -515,11 +497,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
        else
        {
                /*
-                * put the tuple into a tmp file for other batches
+                * put the tuple into a tmp file for later batches
                 */
-               int                     batchno = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
-               (hashtable->totalbuckets - hashtable->nbuckets);
-
                hashtable->innerBatchSize[batchno]++;
                ExecHashJoinSaveTuple(heapTuple,
                                                          hashtable->innerBatchFile[batchno]);
@@ -592,6 +571,24 @@ ExecHashGetBucket(HashJoinTable hashtable,
        return bucketno;
 }
 
+/* ----------------------------------------------------------------
+ *             ExecHashGetBatch
+ *
+ *             determine the batch number for a bucketno
+ *
+ * Returns -1 if bucket belongs to initial (or current) batch,
+ * else 0..nbatch-1 corresponding to external batch file number for bucket.
+ * ----------------------------------------------------------------
+ */
+int
+ExecHashGetBatch(int bucketno, HashJoinTable hashtable)
+{
+       if (bucketno < hashtable->nbuckets)
+               return -1;
+
+       return (bucketno - hashtable->nbuckets) % hashtable->nbatch;
+}
+
 /* ----------------------------------------------------------------
  *             ExecScanHashBucket
  *
@@ -727,7 +724,6 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples)
 {
        MemoryContext oldcxt;
        int                     nbuckets = hashtable->nbuckets;
-       int                     i;
 
        /*
         * Release all the hash buckets and tuples acquired in the prior pass,
@@ -750,13 +746,7 @@ ExecHashTableReset(HashJoinTable hashtable, long ntuples)
 
        /* Reallocate and reinitialize the hash bucket headers. */
        hashtable->buckets = (HashJoinTuple *)
-               palloc(nbuckets * sizeof(HashJoinTuple));
-
-       if (hashtable->buckets == NULL)
-               elog(ERROR, "Insufficient memory for hash table.");
-
-       for (i = 0; i < nbuckets; i++)
-               hashtable->buckets[i] = NULL;
+               palloc0(nbuckets * sizeof(HashJoinTuple));
 
        MemoryContextSwitchTo(oldcxt);
 }
index 8f899b577d7f1cb5c8001c4257f1d561ba04cd8d..48cf30c21f40c23a1f396f61a192544209218229 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.45 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.46 2002/12/30 15:21:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,7 +27,6 @@ static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *node,
 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
                                                  BufFile *file,
                                                  TupleTableSlot *tupleSlot);
-static int     ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable);
 static int     ExecHashJoinNewBatch(HashJoinState *hjstate);
 
 
@@ -179,17 +178,15 @@ ExecHashJoin(HashJoinState *node)
                         */
                        if (hashtable->curbatch == 0)
                        {
-                               int                     batch = ExecHashJoinGetBatch(node->hj_CurBucketNo,
-                                                                                                                hashtable);
+                               int                     batchno = ExecHashGetBatch(node->hj_CurBucketNo,
+                                                                                                          hashtable);
 
-                               if (batch > 0)
+                               if (batchno >= 0)
                                {
                                        /*
                                         * Need to postpone this outer tuple to a later batch.
                                         * Save it in the corresponding outer-batch file.
                                         */
-                                       int                     batchno = batch - 1;
-
                                        hashtable->outerBatchSize[batchno]++;
                                        ExecHashJoinSaveTuple(outerTupleSlot->val,
                                                                         hashtable->outerBatchFile[batchno]);
@@ -640,28 +637,6 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
        return newbatch;
 }
 
-/* ----------------------------------------------------------------
- *             ExecHashJoinGetBatch
- *
- *             determine the batch number for a bucketno
- *             +----------------+-------+-------+ ... +-------+
- *             0                         nbuckets                                               totalbuckets
- * batch                0                       1               2         ...
- * ----------------------------------------------------------------
- */
-static int
-ExecHashJoinGetBatch(int bucketno, HashJoinTable hashtable)
-{
-       int                     b;
-
-       if (bucketno < hashtable->nbuckets || hashtable->nbatch == 0)
-               return 0;
-
-       b = (hashtable->nbatch * (bucketno - hashtable->nbuckets)) /
-               (hashtable->totalbuckets - hashtable->nbuckets);
-       return b + 1;
-}
-
 /* ----------------------------------------------------------------
  *             ExecHashJoinSaveTuple
  *
index a19dd92c826bd2cff177cdea72a63079fa998ecc..29b23948dfe45a6c29e8c4413728611c616ecce9 100644 (file)
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.97 2002/12/26 23:38:42 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.98 2002/12/30 15:21:21 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -85,7 +85,8 @@ bool          enable_mergejoin = true;
 bool           enable_hashjoin = true;
 
 
-static Selectivity estimate_hash_bucketsize(Query *root, Var *var);
+static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
+                                                                                       int nbuckets);
 static bool cost_qual_eval_walker(Node *node, Cost *total);
 static Selectivity approx_selectivity(Query *root, List *quals);
 static void set_rel_width(Query *root, RelOptInfo *rel);
@@ -882,7 +883,9 @@ cost_hashjoin(Path *path, Query *root,
                                                                                          outer_path->parent->width);
        double          innerbytes = relation_byte_size(inner_path->parent->rows,
                                                                                          inner_path->parent->width);
-       long            hashtablebytes = SortMem * 1024L;
+       int                     virtualbuckets;
+       int                     physicalbuckets;
+       int                     numbatches;
        Selectivity innerbucketsize;
        List       *hcl;
 
@@ -898,6 +901,13 @@ cost_hashjoin(Path *path, Query *root,
        startup_cost += cpu_operator_cost * inner_path->parent->rows;
        run_cost += cpu_operator_cost * outer_path->parent->rows;
 
+       /* Get hash table size that executor would use for inner relation */
+       ExecChooseHashTableSize(inner_path->parent->rows,
+                                                       inner_path->parent->width,
+                                                       &virtualbuckets,
+                                                       &physicalbuckets,
+                                                       &numbatches);
+
        /*
         * Determine bucketsize fraction for inner relation.  We use the
         * smallest bucketsize estimated for any individual hashclause;
@@ -931,7 +941,8 @@ cost_hashjoin(Path *path, Query *root,
                        if (thisbucketsize < 0)
                        {
                                /* not cached yet */
-                               thisbucketsize = estimate_hash_bucketsize(root, right);
+                               thisbucketsize = estimate_hash_bucketsize(root, right,
+                                                                                                                 virtualbuckets);
                                restrictinfo->right_bucketsize = thisbucketsize;
                        }
                }
@@ -943,7 +954,8 @@ cost_hashjoin(Path *path, Query *root,
                        if (thisbucketsize < 0)
                        {
                                /* not cached yet */
-                               thisbucketsize = estimate_hash_bucketsize(root, left);
+                               thisbucketsize = estimate_hash_bucketsize(root, left,
+                                                                                                                 virtualbuckets);
                                restrictinfo->left_bucketsize = thisbucketsize;
                        }
                }
@@ -982,7 +994,7 @@ cost_hashjoin(Path *path, Query *root,
         * should be nice and sequential...).  Writing the inner rel counts as
         * startup cost, all the rest as run cost.
         */
-       if (innerbytes > hashtablebytes)
+       if (numbatches)
        {
                double          outerpages = page_size(outer_path->parent->rows,
                                                                                   outer_path->parent->width);
@@ -1019,7 +1031,7 @@ cost_hashjoin(Path *path, Query *root,
  * smart enough to figure out how the restrict clauses might change the
  * distribution, so this will have to do for now.
  *
- * We can get the number of buckets the executor will use for the given
+ * We are passed the number of buckets the executor will use for the given
  * input relation.     If the data were perfectly distributed, with the same
  * number of tuples going into each available bucket, then the bucketsize
  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
@@ -1039,13 +1051,10 @@ cost_hashjoin(Path *path, Query *root,
  * inner rel is well-dispersed (or the alternatives seem much worse).
  */
 static Selectivity
-estimate_hash_bucketsize(Query *root, Var *var)
+estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
 {
        Oid                     relid;
        RelOptInfo *rel;
-       int                     virtualbuckets;
-       int                     physicalbuckets;
-       int                     numbatches;
        HeapTuple       tuple;
        Form_pg_statistic stats;
        double          estfract,
@@ -1071,12 +1080,6 @@ estimate_hash_bucketsize(Query *root, Var *var)
        if (rel->tuples <= 0.0 || rel->rows <= 0.0)
                return 0.1;                             /* ensure we can divide below */
 
-       /* Get hash table size that executor would use for this relation */
-       ExecChooseHashTableSize(rel->rows, rel->width,
-                                                       &virtualbuckets,
-                                                       &physicalbuckets,
-                                                       &numbatches);
-
        tuple = SearchSysCache(STATRELATT,
                                                   ObjectIdGetDatum(relid),
                                                   Int16GetDatum(var->varattno),
@@ -1093,7 +1096,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
                        case ObjectIdAttributeNumber:
                        case SelfItemPointerAttributeNumber:
                                /* these are unique, so buckets should be well-distributed */
-                               return 1.0 / (double) virtualbuckets;
+                               return 1.0 / (double) nbuckets;
                        case TableOidAttributeNumber:
                                /* hashing this is a terrible idea... */
                                return 1.0;
@@ -1134,8 +1137,8 @@ estimate_hash_bucketsize(Query *root, Var *var)
         * the number of buckets is less than the expected number of distinct
         * values; otherwise it is 1/ndistinct.
         */
-       if (ndistinct > (double) virtualbuckets)
-               estfract = 1.0 / (double) virtualbuckets;
+       if (ndistinct > (double) nbuckets)
+               estfract = 1.0 / (double) nbuckets;
        else
                estfract = 1.0 / ndistinct;
 
index c30073ec8cfe49e91858c336594201d50602f070..02e56355263b66272a814d93bc2877a976d7f2b2 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: nodeHash.h,v 1.27 2002/12/05 15:50:37 tgl Exp $
+ * $Id: nodeHash.h,v 1.28 2002/12/30 15:21:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -30,6 +30,7 @@ extern void ExecHashTableInsert(HashJoinTable hashtable,
 extern int ExecHashGetBucket(HashJoinTable hashtable,
                                  ExprContext *econtext,
                                  List *hashkeys);
+extern int ExecHashGetBatch(int bucketno, HashJoinTable hashtable);
 extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
                                   ExprContext *econtext);
 extern void ExecHashTableReset(HashJoinTable hashtable, long ntuples);