Correct oversight in hashjoin cost estimation: nodeHash sizes its hash

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 18 Apr 2000 05:43:02 +0000 (05:43 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 18 Apr 2000 05:43:02 +0000 (05:43 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 18 Apr 2000 05:43:02 +0000 (05:43 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 18 Apr 2000 05:43:02 +0000 (05:43 +0000)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index bee4a10f35d4ce24ec39e239aef956ac8e094709..1d841576fea184cf17d98d2d1d9f8301721224f6 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
- *     $Id: nodeHash.c,v 1.44 2000/01/26 05:56:22 momjian Exp $
+ *     $Id: nodeHash.c,v 1.45 2000/04/18 05:43:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -221,7 +221,6 @@ ExecEndHash(Hash *node)
   *             create a hashtable in shared memory for hashjoin.
   * ----------------------------------------------------------------
   */
-#define NTUP_PER_BUCKET                        10
  #define FUDGE_FAC                              2.0
  
  HashJoinTable
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 6ecfb2a47138d3e8e1a21af3054b087f57cef82b..df3c6d5c429ccaf5e402d102286bc4c6f61c2655 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.57 2000/04/12 17:15:19 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.58 2000/04/18 05:43:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -51,6 +51,7 @@
  
  #include <math.h>
  
+#include "executor/nodeHash.h"
  #include "miscadmin.h"
  #include "nodes/plannodes.h"
  #include "optimizer/clauses.h"
@@ -604,12 +605,17 @@ cost_hashjoin(Path *path,
         run_cost += cpu_operator_cost * outer_path->parent->rows;
  
         /*
-        * the number of tuple comparisons needed is the number of outer
-        * tuples times the typical hash bucket size, which we estimate
-        * conservatively as the inner disbursion times the inner tuple count.
+        * The number of tuple comparisons needed is the number of outer
+        * tuples times the typical hash bucket size.  nodeHash.c tries for
+        * average bucket loading of NTUP_PER_BUCKET, but that goal will
+        * be reached only if data values are uniformly distributed among
+        * the buckets.  To be conservative, we scale up the target bucket
+        * size by the number of inner rows times inner disbursion, giving
+        * an estimate of the typical number of duplicates of each value.
+        * We then charge one cpu_operator_cost per tuple comparison.
          */
         run_cost += cpu_operator_cost * outer_path->parent->rows *
-               ceil(inner_path->parent->rows * innerdisbursion);
+               NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdisbursion);
  
         /*
          * Estimate the number of tuples that get through the hashing filter
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h

index 0460368d8b10a7f088a544dcc41bcc9d5de5eabc..b61ced7cdc1900c09f11391d2df3eafcb03ea271 100644 (file)
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: nodeHash.h,v 1.15 2000/01/26 05:58:05 momjian Exp $
+ * $Id: nodeHash.h,v 1.16 2000/04/18 05:43:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,6 +16,9 @@
  
  #include "nodes/plannodes.h"
  
+/* NTUP_PER_BUCKET is exported because planner wants to see it */
+#define NTUP_PER_BUCKET                        10
+
  extern TupleTableSlot *ExecHash(Hash *node);
  extern bool ExecInitHash(Hash *node, EState *estate, Plan *parent);
  extern int     ExecCountSlotsHash(Hash *node);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 18 Apr 2000 05:43:02 +0000 (05:43 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 18 Apr 2000 05:43:02 +0000 (05:43 +0000)
src/backend/executor/nodeHash.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/include/executor/nodeHash.h		patch \| blob \| history