Make planner compute the number of hash buckets the same way that

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 11 Jun 2001 00:17:08 +0000 (00:17 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 11 Jun 2001 00:17:08 +0000 (00:17 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 11 Jun 2001 00:17:08 +0000 (00:17 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 11 Jun 2001 00:17:08 +0000 (00:17 +0000)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index b8fed0304f508d319f8ef40162f0f390a7d452cd..b8cee44fbdf4d4630845b1ceb6d3b50ea87f0e89 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
- *     $Id: nodeHash.c,v 1.57 2001/05/27 20:42:18 tgl Exp $
+ *     $Id: nodeHash.c,v 1.58 2001/06/11 00:17:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,14 +16,12 @@
   *             ExecHash                - generate an in-memory hash table of the relation
   *             ExecInitHash    - initialize node and subnodes
   *             ExecEndHash             - shutdown node and subnodes
- *
   */
+#include "postgres.h"
  
  #include <sys/types.h>
  #include <math.h>
  
-#include "postgres.h"
-
  #include "executor/execdebug.h"
  #include "executor/nodeHash.h"
  #include "executor/nodeHashjoin.h"
@@ -209,111 +207,27 @@ ExecEndHash(Hash *node)
   *             create a hashtable in shared memory for hashjoin.
   * ----------------------------------------------------------------
   */
-#define FUDGE_FAC                              2.0
-
  HashJoinTable
  ExecHashTableCreate(Hash *node)
  {
-       Plan       *outerNode;
-       double          ntuples;
-       int                     tupsize;
-       double          inner_rel_bytes;
-       double          hash_table_bytes;
-       int                     nbatch;
         HashJoinTable hashtable;
-       int                     nbuckets;
+       Plan       *outerNode;
         int                     totalbuckets;
-       int                     bucketsize;
+       int                     nbuckets;
+       int                     nbatch;
         int                     i;
         MemoryContext oldcxt;
  
         /*
          * Get information about the size of the relation to be hashed (it's
          * the "outer" subtree of this node, but the inner relation of the
-        * hashjoin).
-        *
-        * Caution: this is only the planner's estimates, and so can't be trusted
-        * too far.  Apply a healthy fudge factor.
+        * hashjoin).  Compute the appropriate size of the hash table.
          */
         outerNode = outerPlan(node);
-       ntuples = outerNode->plan_rows;
-       if (ntuples <= 0.0)                     /* force a plausible size if no info */
-               ntuples = 1000.0;
-
-       /*
-        * estimate tupsize based on footprint of tuple in hashtable... but
-        * what about palloc overhead?
-        */
-       tupsize = MAXALIGN(outerNode->plan_width) +
-               MAXALIGN(sizeof(HashJoinTupleData));
-       inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
-
-       /*
-        * Target hashtable size is SortMem kilobytes, but not less than
-        * sqrt(estimated inner rel size), so as to avoid horrible
-        * performance.
-        */
-       hash_table_bytes = sqrt(inner_rel_bytes);
-       if (hash_table_bytes < (SortMem * 1024L))
-               hash_table_bytes = SortMem * 1024L;
-
-       /*
-        * Count the number of hash buckets we want for the whole relation,
-        * for an average bucket load of NTUP_PER_BUCKET (per virtual
-        * bucket!).
-        */
-       totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
-
-       /*
-        * Count the number of buckets we think will actually fit in the
-        * target memory size, at a loading of NTUP_PER_BUCKET (physical
-        * buckets). NOTE: FUDGE_FAC here determines the fraction of the
-        * hashtable space reserved to allow for nonuniform distribution of
-        * hash values. Perhaps this should be a different number from the
-        * other uses of FUDGE_FAC, but since we have no real good way to pick
-        * either one...
-        */
-       bucketsize = NTUP_PER_BUCKET * tupsize;
-       nbuckets = (int) (hash_table_bytes / (bucketsize * FUDGE_FAC));
-       if (nbuckets <= 0)
-               nbuckets = 1;
  
-       if (totalbuckets <= nbuckets)
-       {
+       ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width,
+                                                       &totalbuckets, &nbuckets, &nbatch);
  
-               /*
-                * We have enough space, so no batching.  In theory we could even
-                * reduce nbuckets, but since that could lead to poor behavior if
-                * estimated ntuples is much less than reality, it seems better to
-                * make more buckets instead of fewer.
-                */
-               totalbuckets = nbuckets;
-               nbatch = 0;
-       }
-       else
-       {
-
-               /*
-                * Need to batch; compute how many batches we want to use. Note
-                * that nbatch doesn't have to have anything to do with the ratio
-                * totalbuckets/nbuckets; in fact, it is the number of groups we
-                * will use for the part of the data that doesn't fall into the
-                * first nbuckets hash buckets.
-                */
-               nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
-                                                       hash_table_bytes);
-               if (nbatch <= 0)
-                       nbatch = 1;
-       }
-
-       /*
-        * Now, totalbuckets is the number of (virtual) hashbuckets for the
-        * whole relation, and nbuckets is the number of physical hashbuckets
-        * we will use in the first pass.  Data falling into the first
-        * nbuckets virtual hashbuckets gets handled in the first pass;
-        * everything else gets divided into nbatch batches to be processed in
-        * additional passes.
-        */
  #ifdef HJDEBUG
         printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",
                    nbatch, totalbuckets, nbuckets);
@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)
         return hashtable;
  }
  
+
+/*
+ * Compute appropriate size for hashtable given the estimated size of the
+ * relation to be hashed (number of rows and average row width).
+ *
+ * Caution: the input is only the planner's estimates, and so can't be
+ * trusted too far.  Apply a healthy fudge factor.
+ *
+ * This is exported so that the planner's costsize.c can use it.
+ */
+
+/* Target bucket loading (tuples per bucket) */
+#define NTUP_PER_BUCKET                        10
+/* Fudge factor to allow for inaccuracy of input estimates */
+#define FUDGE_FAC                              2.0
+
+void
+ExecChooseHashTableSize(double ntuples, int tupwidth,
+                                               int *virtualbuckets,
+                                               int *physicalbuckets,
+                                               int *numbatches)
+{
+       int                     tupsize;
+       double          inner_rel_bytes;
+       double          hash_table_bytes;
+       int                     nbatch;
+       int                     nbuckets;
+       int                     totalbuckets;
+       int                     bucketsize;
+
+       /* Force a plausible relation size if no info */
+       if (ntuples <= 0.0)
+               ntuples = 1000.0;
+
+       /*
+        * Estimate tupsize based on footprint of tuple in hashtable... but
+        * what about palloc overhead?
+        */
+       tupsize = MAXALIGN(tupwidth) + MAXALIGN(sizeof(HashJoinTupleData));
+       inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
+
+       /*
+        * Target hashtable size is SortMem kilobytes, but not less than
+        * sqrt(estimated inner rel size), so as to avoid horrible
+        * performance.
+        */
+       hash_table_bytes = sqrt(inner_rel_bytes);
+       if (hash_table_bytes < (SortMem * 1024L))
+               hash_table_bytes = SortMem * 1024L;
+
+       /*
+        * Count the number of hash buckets we want for the whole relation,
+        * for an average bucket load of NTUP_PER_BUCKET (per virtual
+        * bucket!).
+        */
+       totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+
+       /*
+        * Count the number of buckets we think will actually fit in the
+        * target memory size, at a loading of NTUP_PER_BUCKET (physical
+        * buckets). NOTE: FUDGE_FAC here determines the fraction of the
+        * hashtable space reserved to allow for nonuniform distribution of
+        * hash values. Perhaps this should be a different number from the
+        * other uses of FUDGE_FAC, but since we have no real good way to pick
+        * either one...
+        */
+       bucketsize = NTUP_PER_BUCKET * tupsize;
+       nbuckets = (int) (hash_table_bytes / (bucketsize * FUDGE_FAC));
+       if (nbuckets <= 0)
+               nbuckets = 1;
+
+       if (totalbuckets <= nbuckets)
+       {
+               /*
+                * We have enough space, so no batching.  In theory we could even
+                * reduce nbuckets, but since that could lead to poor behavior if
+                * estimated ntuples is much less than reality, it seems better to
+                * make more buckets instead of fewer.
+                */
+               totalbuckets = nbuckets;
+               nbatch = 0;
+       }
+       else
+       {
+               /*
+                * Need to batch; compute how many batches we want to use. Note
+                * that nbatch doesn't have to have anything to do with the ratio
+                * totalbuckets/nbuckets; in fact, it is the number of groups we
+                * will use for the part of the data that doesn't fall into the
+                * first nbuckets hash buckets.
+                */
+               nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
+                                                       hash_table_bytes);
+               if (nbatch <= 0)
+                       nbatch = 1;
+       }
+
+       /*
+        * Now, totalbuckets is the number of (virtual) hashbuckets for the
+        * whole relation, and nbuckets is the number of physical hashbuckets
+        * we will use in the first pass.  Data falling into the first
+        * nbuckets virtual hashbuckets gets handled in the first pass;
+        * everything else gets divided into nbatch batches to be processed in
+        * additional passes.
+        */
+       *virtualbuckets = totalbuckets;
+       *physicalbuckets = nbuckets;
+       *numbatches = nbatch;
+}
+
+
  /* ----------------------------------------------------------------
   *             ExecHashTableDestroy
   *
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 06793f1d8b4133d419125e2a307df07f8aa623d3..2099adc664cf332358b94bcb3c4f6ce58da00409 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.76 2001/06/10 02:59:35 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.77 2001/06/11 00:17:08 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -791,19 +791,19 @@ cost_hashjoin(Path *path, Query *root,
   * smart enough to figure out how the restrict clauses might change the
   * distribution, so this will have to do for now.
   *
- * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
- * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
- * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
- * be reached only if the data values are uniformly distributed among the
- * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
- * data values, and (b) a not-too-skewed data distribution.  Otherwise the
- * buckets will be nonuniformly occupied.  If the other relation in the join
- * has a similar distribution, the most-loaded buckets are exactly those
- * that will be probed most often.  Therefore, the "average" bucket size for
- * costing purposes should really be taken as something close to the "worst
- * case" bucket size.  We try to estimate this by first scaling up if there
- * are too few distinct data values, and then scaling up again by the
- * ratio of the most common value's frequency to the average frequency.
+ * We can get the number of buckets the executor will use for the given
+ * input relation.  If the data were perfectly distributed, with the same
+ * number of tuples going into each available bucket, then the bucketsize
+ * fraction would be 1/nbuckets.  But this happy state of affairs will occur
+ * only if (a) there are at least nbuckets distinct data values, and (b)
+ * we have a not-too-skewed data distribution.  Otherwise the buckets will
+ * be nonuniformly occupied.  If the other relation in the join has a key
+ * distribution similar to this one's, then the most-loaded buckets are
+ * exactly those that will be probed most often.  Therefore, the "average"
+ * bucket size for costing purposes should really be taken as something close
+ * to the "worst case" bucket size.  We try to estimate this by adjusting the
+ * fraction if there are too few distinct data values, and then scaling up
+ * by the ratio of the most common value's frequency to the average frequency.
   *
   * If no statistics are available, use a default estimate of 0.1.  This will
   * discourage use of a hash rather strongly if the inner relation is large,
@@ -815,11 +815,13 @@ estimate_hash_bucketsize(Query *root, Var *var)
  {
         Oid                     relid;
         RelOptInfo *rel;
+       int                     virtualbuckets;
+       int                     physicalbuckets;
+       int                     numbatches;
         HeapTuple       tuple;
         Form_pg_statistic stats;
         double          estfract,
                                 ndistinct,
-                               needdistinct,
                                 mcvfreq,
                                 avgfreq;
         float4     *numbers;
@@ -841,6 +843,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
         if (rel->tuples <= 0.0 || rel->rows <= 0.0)
                 return 0.1;                             /* ensure we can divide below */
  
+       /* Get hash table size that executor would use for this relation */
+       ExecChooseHashTableSize(rel->rows, rel->width,
+                                                       &virtualbuckets,
+                                                       &physicalbuckets,
+                                                       &numbatches);
+
         tuple = SearchSysCache(STATRELATT,
                                                    ObjectIdGetDatum(relid),
                                                    Int16GetDatum(var->varattno),
@@ -857,7 +865,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
                         case ObjectIdAttributeNumber:
                         case SelfItemPointerAttributeNumber:
                                 /* these are unique, so buckets should be well-distributed */
-                               return (double) NTUP_PER_BUCKET / rel->rows;
+                               return 1.0 / (double) virtualbuckets;
                         case TableOidAttributeNumber:
                                 /* hashing this is a terrible idea... */
                                 return 1.0;
@@ -873,6 +881,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
         if (ndistinct < 0.0)
                 ndistinct = -ndistinct * rel->tuples;
  
+       if (ndistinct <= 0.0)           /* ensure we can divide */
+       {
+               ReleaseSysCache(tuple);
+               return 0.1;
+       }
+
         /* Also compute avg freq of all distinct data values in raw relation */
         avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
  
@@ -887,20 +901,14 @@ estimate_hash_bucketsize(Query *root, Var *var)
         ndistinct *= rel->rows / rel->tuples;
  
         /*
-        * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
-        * ie the number of rows after applying restriction clauses, because
-        * that's what the fraction will eventually be multiplied by in
-        * cost_heapjoin.
+        * Initial estimate of bucketsize fraction is 1/nbuckets as long as
+        * the number of buckets is less than the expected number of distinct
+        * values; otherwise it is 1/ndistinct.
          */
-       estfract = (double) NTUP_PER_BUCKET / rel->rows;
-
-       /*
-        * Adjust estimated bucketsize if too few distinct values (after
-        * restriction clauses) to fill all the buckets.
-        */
-       needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
-       if (ndistinct < needdistinct)
-               estfract *= needdistinct / ndistinct;
+       if (ndistinct > (double) virtualbuckets)
+               estfract = 1.0 / (double) virtualbuckets;
+       else
+               estfract = 1.0 / ndistinct;
  
         /*
          * Look up the frequency of the most common value, if available.
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h

index e00bdfbc355fb6d15b1b4432ee3f5aadea6b71d4..512edec6d18c2a2ae2fcbc505f7079b39ae14e09 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeHash.h,v 1.19 2001/03/22 04:00:44 momjian Exp $
+ * $Id: nodeHash.h,v 1.20 2001/06/11 00:17:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,9 +16,6 @@
  
  #include "nodes/plannodes.h"
  
-/* NTUP_PER_BUCKET is exported because planner wants to see it */
-#define NTUP_PER_BUCKET                        10
-
  extern TupleTableSlot *ExecHash(Hash *node);
  extern bool ExecInitHash(Hash *node, EState *estate, Plan *parent);
  extern int     ExecCountSlotsHash(Hash *node);
@@ -35,5 +32,9 @@ extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
                                    ExprContext *econtext);
  extern void ExecHashTableReset(HashJoinTable hashtable, long ntuples);
  extern void ExecReScanHash(Hash *node, ExprContext *exprCtxt, Plan *parent);
+extern void ExecChooseHashTableSize(double ntuples, int tupwidth,
+                                                                       int *virtualbuckets,
+                                                                       int *physicalbuckets,
+                                                                       int *numbatches);
  
  #endif  /* NODEHASH_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 11 Jun 2001 00:17:08 +0000 (00:17 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 11 Jun 2001 00:17:08 +0000 (00:17 +0000)
src/backend/executor/nodeHash.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/include/executor/nodeHash.h		patch \| blob \| history