If we expect a hash join to be performed in multiple batches, suppress

author Tom Lane <tgl@sss.pgh.pa.us>

Thu, 26 Mar 2009 17:15:35 +0000 (17:15 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Thu, 26 Mar 2009 17:15:35 +0000 (17:15 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Thu, 26 Mar 2009 17:15:35 +0000 (17:15 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Thu, 26 Mar 2009 17:15:35 +0000 (17:15 +0000)
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 212fc0673c27082b05afa17a8946ba730a4aaa31..3fdd9bab66902619c1d882c2dfacfce06ca6e98c 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.355 2009/03/21 00:04:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.356 2009/03/26 17:15:34 tgl Exp $
   *
   * NOTES
   *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1448,6 +1448,7 @@ _outHashPath(StringInfo str, HashPath *node)
         _outJoinPathInfo(str, (JoinPath *) node);
  
         WRITE_NODE_FIELD(path_hashclauses);
+       WRITE_INT_FIELD(num_batches);
  }
  
  static void
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index b07a2599bbeb3ece9c12d39020d8feecaae3e6d8..2b3cf61e751e1f7c39e9b472aac927aa5dd6b4a2 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.205 2009/03/21 00:04:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.206 2009/03/26 17:15:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1880,6 +1880,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                                         &numbatches,
                                                         &num_skew_mcvs);
         virtualbuckets = (double) numbuckets *(double) numbatches;
+       /* mark the path with estimated # of batches */
+       path->num_batches = numbatches;
  
         /*
          * Determine bucketsize fraction for inner relation.  We use the smallest
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index be4d79f1bf2ff421260c9417270604ac0700226e..cff0424c6c7826616917bdebff2c59cf9cd7fa85 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.256 2009/03/21 00:04:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.257 2009/03/26 17:15:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1910,6 +1910,10 @@ create_hashjoin_plan(PlannerInfo *root,
         /* We don't want any excess columns in the hashed tuples */
         disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath);
  
+       /* If we expect batching, suppress excess columns in outer tuples too */
+       if (best_path->num_batches > 1)
+               disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath);
+
         /*
          * If there is a single join clause and we can identify the outer
          * variable as a simple column reference, supply its identity for
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 28b2828c2dd34077f5ad5a889c997c4bb9b570df..5ba413bb1ada6bc76a70296db40abda4b8c722ec 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.150 2009/02/27 00:06:27 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.151 2009/03/26 17:15:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1480,9 +1480,20 @@ create_hashjoin_path(PlannerInfo *root,
         pathnode->jpath.outerjoinpath = outer_path;
         pathnode->jpath.innerjoinpath = inner_path;
         pathnode->jpath.joinrestrictinfo = restrict_clauses;
-       /* A hashjoin never has pathkeys, since its ordering is unpredictable */
+       /*
+        * A hashjoin never has pathkeys, since its output ordering is
+        * unpredictable due to possible batching.  XXX If the inner relation is
+        * small enough, we could instruct the executor that it must not batch,
+        * and then we could assume that the output inherits the outer relation's
+        * ordering, which might save a sort step.  However there is considerable
+        * downside if our estimate of the inner relation size is badly off.
+        * For the moment we don't risk it.  (Note also that if we wanted to take
+        * this seriously, joinpath.c would have to consider many more paths for
+        * the outer rel than it does now.)
+        */
         pathnode->jpath.path.pathkeys = NIL;
         pathnode->path_hashclauses = hashclauses;
+       /* cost_hashjoin will fill in pathnode->num_batches */
  
         cost_hashjoin(pathnode, root, sjinfo);
  
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 4f1bc4067d21f9ea5aedd47a676b562033d3f35e..cf567b997461cca77a8ab82a09c86b7a9634a7d3 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.170 2009/03/05 23:06:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.171 2009/03/26 17:15:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -845,6 +845,7 @@ typedef struct HashPath
  {
         JoinPath        jpath;
         List       *path_hashclauses;           /* join clauses used for hashing */
+       int                     num_batches;                    /* number of batches expected */
  } HashPath;
  
  /*
author	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 26 Mar 2009 17:15:35 +0000 (17:15 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Thu, 26 Mar 2009 17:15:35 +0000 (17:15 +0000)
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history