Remove cost_hashjoin's very ancient hack to discourage (once, entirely forbid)

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 8 Jan 2007 16:09:22 +0000 (16:09 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 8 Jan 2007 16:09:22 +0000 (16:09 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 8 Jan 2007 16:09:22 +0000 (16:09 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 8 Jan 2007 16:09:22 +0000 (16:09 +0000)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 1d048767165d4a8fe964afc2c773ce4ce72da55d..69d1a890e4be12c30a4f7f02d81d1b9bccd7652e 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.172 2007/01/05 22:19:31 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.173 2007/01/08 16:09:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1498,10 +1498,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
         double          hashjointuples;
         double          outer_path_rows = PATH_ROWS(outer_path);
         double          inner_path_rows = PATH_ROWS(inner_path);
-       double          outerbytes = relation_byte_size(outer_path_rows,
-                                                                                               outer_path->parent->width);
-       double          innerbytes = relation_byte_size(inner_path_rows,
-                                                                                               inner_path->parent->width);
         int                     num_hashclauses = list_length(hashclauses);
         int                     numbuckets;
         int                     numbatches;
@@ -1538,13 +1534,16 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
  
         /*
          * Cost of computing hash function: must do it once per input tuple. We
-        * charge one cpu_operator_cost for each column's hash function.
+        * charge one cpu_operator_cost for each column's hash function.  Also,
+        * tack on one cpu_tuple_cost per inner row, to model the costs of
+        * inserting the row into the hashtable.
          *
          * XXX when a hashclause is more complex than a single operator, we really
          * should charge the extra eval costs of the left or right side, as
          * appropriate, here.  This seems more work than it's worth at the moment.
          */
-       startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;
+       startup_cost += (cpu_operator_cost * num_hashclauses + cpu_tuple_cost)
+               * inner_path_rows;
         run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
  
         /* Get hash table size that executor would use for inner relation */
@@ -1624,8 +1623,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
         /*
          * If inner relation is too big then we will need to "batch" the join,
          * which implies writing and reading most of the tuples to disk an extra
-        * time.  Charge one cost unit per page of I/O (correct since it should be
-        * nice and sequential...).  Writing the inner rel counts as startup cost,
+        * time.  Charge seq_page_cost per page, since the I/O should be nice and
+        * sequential.  Writing the inner rel counts as startup cost,
          * all the rest as run cost.
          */
         if (numbatches > 1)
@@ -1635,8 +1634,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
                 double          innerpages = page_size(inner_path_rows,
                                                                                    inner_path->parent->width);
  
-               startup_cost += innerpages;
-               run_cost += innerpages + 2 * outerpages;
+               startup_cost += seq_page_cost * innerpages;
+               run_cost += seq_page_cost * (innerpages + 2 * outerpages);
         }
  
         /* CPU costs */
@@ -1654,14 +1653,15 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
          * The number of tuple comparisons needed is the number of outer tuples
          * times the typical number of tuples in a hash bucket, which is the inner
          * relation size times its bucketsize fraction.  At each one, we need to
-        * evaluate the hashjoin quals.  (Note: charging the full qual eval cost
-        * at each tuple is pessimistic, since we don't evaluate the quals unless
-        * the hash values match exactly.)
+        * evaluate the hashjoin quals.  But actually, charging the full qual eval
+        * cost at each tuple is pessimistic, since we don't evaluate the quals
+        * unless the hash values match exactly.  For lack of a better idea, halve
+        * the cost estimate to allow for that.
          */
         startup_cost += hash_qual_cost.startup;
         run_cost += hash_qual_cost.per_tuple *
                 outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *
-               joininfactor;
+               joininfactor * 0.5;
  
         /*
          * For each tuple that gets through the hashjoin proper, we charge
@@ -1673,22 +1673,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
         run_cost += cpu_per_tuple * hashjointuples * joininfactor;
  
-       /*
-        * Bias against putting larger relation on inside.      We don't want an
-        * absolute prohibition, though, since larger relation might have better
-        * bucketsize --- and we can't trust the size estimates unreservedly,
-        * anyway.      Instead, inflate the run cost by the square root of the size
-        * ratio.  (Why square root?  No real good reason, but it seems
-        * reasonable...)
-        *
-        * Note: before 7.4 we implemented this by inflating startup cost; but if
-        * there's a disable_cost component in the input paths' startup cost, that
-        * unfairly penalizes the hash.  Probably it'd be better to keep track of
-        * disable penalty separately from cost.
-        */
-       if (innerbytes > outerbytes && outerbytes > 0)
-               run_cost *= sqrt(innerbytes / outerbytes);
-
         path->jpath.path.startup_cost = startup_cost;
         path->jpath.path.total_cost = startup_cost + run_cost;
  }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 8 Jan 2007 16:09:22 +0000 (16:09 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 8 Jan 2007 16:09:22 +0000 (16:09 +0000)