*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.355 2009/03/21 00:04:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.356 2009/03/26 17:15:34 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
_outJoinPathInfo(str, (JoinPath *) node);
WRITE_NODE_FIELD(path_hashclauses);
+ WRITE_INT_FIELD(num_batches);
}
static void
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.205 2009/03/21 00:04:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.206 2009/03/26 17:15:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
&numbatches,
&num_skew_mcvs);
virtualbuckets = (double) numbuckets *(double) numbatches;
+ /* mark the path with estimated # of batches */
+ path->num_batches = numbatches;
/*
* Determine bucketsize fraction for inner relation. We use the smallest
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.256 2009/03/21 00:04:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.257 2009/03/26 17:15:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* We don't want any excess columns in the hashed tuples */
disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath);
+ /* If we expect batching, suppress excess columns in outer tuples too */
+ if (best_path->num_batches > 1)
+ disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath);
+
/*
* If there is a single join clause and we can identify the outer
* variable as a simple column reference, supply its identity for
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.150 2009/02/27 00:06:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.151 2009/03/26 17:15:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
- /* A hashjoin never has pathkeys, since its ordering is unpredictable */
+ /*
+ * A hashjoin never has pathkeys, since its output ordering is
+ * unpredictable due to possible batching. XXX If the inner relation is
+ * small enough, we could instruct the executor that it must not batch,
+ * and then we could assume that the output inherits the outer relation's
+ * ordering, which might save a sort step. However there is considerable
+ * downside if our estimate of the inner relation size is badly off.
+ * For the moment we don't risk it. (Note also that if we wanted to take
+ * this seriously, joinpath.c would have to consider many more paths for
+ * the outer rel than it does now.)
+ */
pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses;
+ /* cost_hashjoin will fill in pathnode->num_batches */
cost_hashjoin(pathnode, root, sjinfo);
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.170 2009/03/05 23:06:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.171 2009/03/26 17:15:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
{
JoinPath jpath;
List *path_hashclauses; /* join clauses used for hashing */
+ int num_batches; /* number of batches expected */
} HashPath;
/*