Further work on making use of new statistics in planner. Adjust APIs

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 5 Jun 2001 05:26:05 +0000 (05:26 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 5 Jun 2001 05:26:05 +0000 (05:26 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 5 Jun 2001 05:26:05 +0000 (05:26 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 5 Jun 2001 05:26:05 +0000 (05:26 +0000)
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index a5a968515e68a0dda875910caa25d0e6a22a2e38..07907b63683e5d9064cf55e2d0e5fb7fb8581cac 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.142 2001/05/20 20:28:17 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.143 2001/06/05 05:26:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1361,9 +1361,10 @@ _copyRestrictInfo(RestrictInfo *from)
          * copy remainder of node
          */
         Node_Copy(from, newnode, clause);
-       newnode->eval_cost = from->eval_cost;
         newnode->ispusheddown = from->ispusheddown;
         Node_Copy(from, newnode, subclauseindices);
+       newnode->eval_cost = from->eval_cost;
+       newnode->this_selec = from->this_selec;
         newnode->mergejoinoperator = from->mergejoinoperator;
         newnode->left_sortop = from->left_sortop;
         newnode->right_sortop = from->right_sortop;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index a89a8f7f335b9cdd77cf355afc6a0a85035ba4cb..656c1e9ea670ab6fc69f780e9ef3a70feb23c4d5 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.90 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.91 2001/06/05 05:26:03 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -514,14 +514,14 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
  {
         if (!equal(a->clause, b->clause))
                 return false;
-
-       /*
-        * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
-        * since they may not be set yet, and should be derivable from the
-        * clause anyway
-        */
         if (a->ispusheddown != b->ispusheddown)
                 return false;
+       /*
+        * We ignore eval_cost, this_selec, left/right_pathkey, and
+        * left/right_bucketsize, since they may not be set yet, and should be
+        * derivable from the clause anyway.  Probably it's not really necessary
+        * to compare any of these remaining fields ...
+        */
         if (!equal(a->subclauseindices, b->subclauseindices))
                 return false;
         if (a->mergejoinoperator != b->mergejoinoperator)
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index ad832d7ca9e82012235ad488d55d6478126a35b6..a83f0b64dbfa58d192cd1f0d757b90861a6ce90f 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.109 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.110 2001/06/05 05:26:04 tgl Exp $
   *
   * NOTES
   *       Most of the read functions for plan nodes are tested. (In fact, they
@@ -1792,6 +1792,8 @@ _readRestrictInfo(void)
  
         /* eval_cost is not part of saved representation; compute on first use */
         local_node->eval_cost = -1;
+       /* ditto for this_selec */
+       local_node->this_selec = -1;
         /* ditto for cached pathkeys and bucketsize */
         local_node->left_pathkey = NIL;
         local_node->right_pathkey = NIL;
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index afb3259e7367154251891fdb906a28e557e5bdd3..bdc1c033296103e4fc23328b62ae8981ef29c852 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.74 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -223,7 +223,7 @@ set_plain_rel_pathlist(Query *root, RelOptInfo *rel, RangeTblEntry *rte)
          */
  
         /* Consider sequential scan */
-       add_path(rel, create_seqscan_path(rel));
+       add_path(rel, create_seqscan_path(root, rel));
  
         /* Consider TID scans */
         create_tidscan_paths(root, rel);
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c

index 78407fb833a9a130bec20b0a097013012bb652e9..cafc01fc33f8ffcedb88290214fe918d31fd1be4 100644 (file)
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.44 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.45 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -48,9 +48,6 @@ typedef struct RangeQueryClause
  
  static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
                                                    bool varonleft, bool isLTsel, Selectivity s2);
-static Selectivity clause_selectivity(Query *root,
-                                  Node *clause,
-                                  int varRelid);
  
  
  /****************************************************************************
@@ -364,7 +361,7 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
   * When varRelid is 0, all variables are treated as variables. This
   * is appropriate for ordinary join clauses and restriction clauses.
   */
-static Selectivity
+Selectivity
  clause_selectivity(Query *root,
                                    Node *clause,
                                    int varRelid)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index b4379e4b39bc5585cfacefdb89a8f91525acb054..65c211deaeeb3326d3df5cd9bc98681f6c1e5be1 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.74 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.75 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -83,7 +83,9 @@ bool          enable_mergejoin = true;
  bool           enable_hashjoin = true;
  
  
+static Selectivity estimate_hash_bucketsize(Query *root, Var *var);
  static bool cost_qual_eval_walker(Node *node, Cost *total);
+static Selectivity approx_selectivity(Query *root, List *quals);
  static void set_rel_width(Query *root, RelOptInfo *rel);
  static double relation_byte_size(double tuples, int width);
  static double page_size(double tuples, int width);
@@ -99,7 +101,8 @@ static double page_size(double tuples, int width);
   * parameters, even though much of it could be extracted from the Path.
   */
  void
-cost_seqscan(Path *path, RelOptInfo *baserel)
+cost_seqscan(Path *path, Query *root,
+                        RelOptInfo *baserel)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -356,10 +359,11 @@ cost_index(Path *path, Query *root,
  
  /*
   * cost_tidscan
- *       Determines and returns the cost of scanning a relation using tid-s.
+ *       Determines and returns the cost of scanning a relation using TIDs.
   */
  void
-cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
+cost_tidscan(Path *path, Query *root,
+                        RelOptInfo *baserel, List *tideval)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -417,7 +421,8 @@ cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
   * but if it ever does, it should react gracefully to lack of key data.
   */
  void
-cost_sort(Path *path, List *pathkeys, double tuples, int width)
+cost_sort(Path *path, Query *root,
+                 List *pathkeys, double tuples, int width)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -479,7 +484,7 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width)
   * 'restrictlist' are the RestrictInfo nodes to be applied at the join
   */
  void
-cost_nestloop(Path *path,
+cost_nestloop(Path *path, Query *root,
                           Path *outer_path,
                           Path *inner_path,
                           List *restrictlist)
@@ -510,7 +515,8 @@ cost_nestloop(Path *path,
         run_cost += outer_path->parent->rows *
                 (inner_path->total_cost - inner_path->startup_cost);
         if (outer_path->parent->rows > 1)
-               run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
+               run_cost += (outer_path->parent->rows - 1) *
+                       inner_path->startup_cost * 0.5;
  
         /*
          * Number of tuples processed (not number emitted!).  If inner path is
@@ -540,15 +546,18 @@ cost_nestloop(Path *path,
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
   * 'restrictlist' are the RestrictInfo nodes to be applied at the join
+ * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
+ *             (this should be a subset of the restrictlist)
   * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
   *                             to sort the outer and inner relations, or NIL if no explicit
   *                             sort is needed because the source path is already ordered
   */
  void
-cost_mergejoin(Path *path,
+cost_mergejoin(Path *path, Query *root,
                            Path *outer_path,
                            Path *inner_path,
                            List *restrictlist,
+                          List *mergeclauses,
                            List *outersortkeys,
                            List *innersortkeys)
  {
@@ -573,6 +582,7 @@ cost_mergejoin(Path *path,
         {
                 startup_cost += outer_path->total_cost;
                 cost_sort(&sort_path,
+                                 root,
                                   outersortkeys,
                                   outer_path->parent->rows,
                                   outer_path->parent->width);
@@ -589,6 +599,7 @@ cost_mergejoin(Path *path,
         {
                 startup_cost += inner_path->total_cost;
                 cost_sort(&sort_path,
+                                 root,
                                   innersortkeys,
                                   inner_path->parent->rows,
                                   inner_path->parent->width);
@@ -602,12 +613,24 @@ cost_mergejoin(Path *path,
         }
  
         /*
-        * Estimate the number of tuples to be processed in the mergejoin
-        * itself as one per tuple in the two source relations.  This could be
-        * a drastic underestimate if there are many equal-keyed tuples in
-        * either relation, but we have no good way of estimating that...
+        * The number of tuple comparisons needed depends drastically on the
+        * number of equal keys in the two source relations, which we have no
+        * good way of estimating.  Somewhat arbitrarily, we charge one
+        * tuple comparison (one cpu_operator_cost) for each tuple in the
+        * two source relations.  This is probably a lower bound.
          */
-       ntuples = outer_path->parent->rows + inner_path->parent->rows;
+       run_cost += cpu_operator_cost *
+               (outer_path->parent->rows + inner_path->parent->rows);
+
+       /*
+        * For each tuple that gets through the mergejoin proper, we charge
+        * cpu_tuple_cost plus the cost of evaluating additional restriction
+        * clauses that are to be applied at the join.  It's OK to use an
+        * approximate selectivity here, since in most cases this is a minor
+        * component of the cost.
+        */
+       ntuples = approx_selectivity(root, mergeclauses) *
+               outer_path->parent->rows * inner_path->parent->rows;
  
         /* CPU costs */
         cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
@@ -625,15 +648,15 @@ cost_mergejoin(Path *path,
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
   * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerbucketsize' is an estimate of the bucketsize statistic
- *                             for the inner hash key.
+ * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ *             (this should be a subset of the restrictlist)
   */
  void
-cost_hashjoin(Path *path,
+cost_hashjoin(Path *path, Query *root,
                           Path *outer_path,
                           Path *inner_path,
                           List *restrictlist,
-                         Selectivity innerbucketsize)
+                         List *hashclauses)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -644,6 +667,10 @@ cost_hashjoin(Path *path,
         double          innerbytes = relation_byte_size(inner_path->parent->rows,
                                                                                           inner_path->parent->width);
         long            hashtablebytes = SortMem * 1024L;
+       RestrictInfo *restrictinfo;
+       Var                *left,
+                          *right;
+       Selectivity innerbucketsize;
  
         if (!enable_hashjoin)
                 startup_cost += disable_cost;
@@ -657,6 +684,46 @@ cost_hashjoin(Path *path,
         startup_cost += cpu_operator_cost * inner_path->parent->rows;
         run_cost += cpu_operator_cost * outer_path->parent->rows;
  
+       /*
+        * Determine bucketsize fraction for inner relation.  First we have
+        * to figure out which side of the hashjoin clause is the inner side.
+        */
+       Assert(length(hashclauses) == 1);
+       Assert(IsA(lfirst(hashclauses), RestrictInfo));
+       restrictinfo = (RestrictInfo *) lfirst(hashclauses);
+       /* these must be OK, since check_hashjoinable accepted the clause */
+       left = get_leftop(restrictinfo->clause);
+       right = get_rightop(restrictinfo->clause);
+
+       /*
+        * Since we tend to visit the same clauses over and over when
+        * planning a large query, we cache the bucketsize estimate in
+        * the RestrictInfo node to avoid repeated lookups of statistics.
+        */
+       if (intMember(right->varno, inner_path->parent->relids))
+       {
+               /* righthand side is inner */
+               innerbucketsize = restrictinfo->right_bucketsize;
+               if (innerbucketsize < 0)
+               {
+                       /* not cached yet */
+                       innerbucketsize = estimate_hash_bucketsize(root, right);
+                       restrictinfo->right_bucketsize = innerbucketsize;
+               }
+       }
+       else
+       {
+               Assert(intMember(left->varno, inner_path->parent->relids));
+               /* lefthand side is inner */
+               innerbucketsize = restrictinfo->left_bucketsize;
+               if (innerbucketsize < 0)
+               {
+                       /* not cached yet */
+                       innerbucketsize = estimate_hash_bucketsize(root, left);
+                       restrictinfo->left_bucketsize = innerbucketsize;
+               }
+       }
+
         /*
          * The number of tuple comparisons needed is the number of outer
          * tuples times the typical number of tuples in a hash bucket,
@@ -667,14 +734,14 @@ cost_hashjoin(Path *path,
                 ceil(inner_path->parent->rows * innerbucketsize);
  
         /*
-        * Estimate the number of tuples that get through the hashing filter
-        * as one per tuple in the two source relations.  This could be a
-        * drastic underestimate if there are many equal-keyed tuples in
-        * either relation, but we have no simple way of estimating that;
-        * and since this is only a second-order parameter, it's probably
-        * not worth expending a lot of effort on the estimate.
+        * For each tuple that gets through the hashjoin proper, we charge
+        * cpu_tuple_cost plus the cost of evaluating additional restriction
+        * clauses that are to be applied at the join.  It's OK to use an
+        * approximate selectivity here, since in most cases this is a minor
+        * component of the cost.
          */
-       ntuples = outer_path->parent->rows + inner_path->parent->rows;
+       ntuples = approx_selectivity(root, hashclauses) *
+               outer_path->parent->rows * inner_path->parent->rows;
  
         /* CPU costs */
         cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
@@ -718,10 +785,6 @@ cost_hashjoin(Path *path,
   * divided by total tuples in relation) if the specified Var is used
   * as a hash key.
   *
- * This statistic is used by cost_hashjoin.  We split out the calculation
- * because it's useful to cache the result for re-use across multiple path
- * cost calculations.
- *
   * XXX This is really pretty bogus since we're effectively assuming that the
   * distribution of hash keys will be the same after applying restriction
   * clauses as it was in the underlying relation.  However, we are not nearly
@@ -747,7 +810,7 @@ cost_hashjoin(Path *path,
   * which is what we want.  We do not want to hash unless we know that the
   * inner rel is well-dispersed (or the alternatives seem much worse).
   */
-Selectivity
+static Selectivity
  estimate_hash_bucketsize(Query *root, Var *var)
  {
         Oid                     relid;
@@ -1000,6 +1063,65 @@ cost_qual_eval_walker(Node *node, Cost *total)
  }
  
  
+/*
+ * approx_selectivity
+ *             Quick-and-dirty estimation of clause selectivities.
+ *             The input can be either an implicitly-ANDed list of boolean
+ *             expressions, or a list of RestrictInfo nodes (typically the latter).
+ *
+ * The "quick" part comes from caching the selectivity estimates so we can
+ * avoid recomputing them later.  (Since the same clauses are typically
+ * examined over and over in different possible join trees, this makes a
+ * big difference.)
+ *
+ * The "dirty" part comes from the fact that the selectivities of multiple
+ * clauses are estimated independently and multiplied together.  Currently,
+ * clauselist_selectivity can seldom do any better than that anyhow, but
+ * someday it might be smarter.
+ *
+ * Since we are only using the results to estimate how many potential
+ * output tuples are generated and passed through qpqual checking, it
+ * seems OK to live with the approximation.
+ */
+static Selectivity
+approx_selectivity(Query *root, List *quals)
+{
+       Selectivity     total = 1.0;
+       List       *l;
+
+       foreach(l, quals)
+       {
+               Node       *qual = (Node *) lfirst(l);
+               Selectivity     selec;
+
+               /*
+                * RestrictInfo nodes contain a this_selec field reserved for this
+                * routine's use, so that it's not necessary to evaluate the qual
+                * clause's selectivity more than once.  If the clause's selectivity
+                * hasn't been computed yet, the field will contain -1.
+                */
+               if (qual && IsA(qual, RestrictInfo))
+               {
+                       RestrictInfo *restrictinfo = (RestrictInfo *) qual;
+
+                       if (restrictinfo->this_selec < 0)
+                               restrictinfo->this_selec =
+                                       clause_selectivity(root,
+                                                                          (Node *) restrictinfo->clause,
+                                                                          0);
+                       selec = restrictinfo->this_selec;
+               }
+               else
+               {
+                       /* If it's a bare expression, must always do it the hard way */
+                       selec = clause_selectivity(root, qual, 0);
+               }
+               total *= selec;
+       }
+       return total;
+}
+
+
  /*
   * set_baserel_size_estimates
   *             Set the size estimates for the given base relation.
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index cd7cabd41deb7bf52b323b437d847eede311b8cc..5a0734224f2ecc5a0a4898c25d4a33c354409444 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.65 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -236,7 +236,8 @@ sort_inner_and_outer(Query *root,
                  * paths later, and only if they don't need a sort.
                  */
                 add_path(joinrel, (Path *)
-                                create_mergejoin_path(joinrel,
+                                create_mergejoin_path(root,
+                                                                          joinrel,
                                                                            jointype,
                                                                            outerrel->cheapest_total_path,
                                                                            innerrel->cheapest_total_path,
@@ -357,7 +358,8 @@ match_unsorted_outer(Query *root,
                          * innerjoin indexpath.
                          */
                         add_path(joinrel, (Path *)
-                                        create_nestloop_path(joinrel,
+                                        create_nestloop_path(root,
+                                                                                 joinrel,
                                                                                   jointype,
                                                                                   outerpath,
                                                                                   innerrel->cheapest_total_path,
@@ -366,7 +368,8 @@ match_unsorted_outer(Query *root,
                         if (innerrel->cheapest_startup_path !=
                                 innerrel->cheapest_total_path)
                                 add_path(joinrel, (Path *)
-                                                create_nestloop_path(joinrel,
+                                                create_nestloop_path(root,
+                                                                                         joinrel,
                                                                                           jointype,
                                                                                           outerpath,
                                                                                  innerrel->cheapest_startup_path,
@@ -374,7 +377,8 @@ match_unsorted_outer(Query *root,
                                                                                           merge_pathkeys));
                         if (bestinnerjoin != NULL)
                                 add_path(joinrel, (Path *)
-                                                create_nestloop_path(joinrel,
+                                                create_nestloop_path(root,
+                                                                                         joinrel,
                                                                                           jointype,
                                                                                           outerpath,
                                                                                           bestinnerjoin,
@@ -405,7 +409,8 @@ match_unsorted_outer(Query *root,
                  * innerrel->cheapest_total_path is already correctly sorted.)
                  */
                 add_path(joinrel, (Path *)
-                                create_mergejoin_path(joinrel,
+                                create_mergejoin_path(root,
+                                                                          joinrel,
                                                                            jointype,
                                                                            outerpath,
                                                                            innerrel->cheapest_total_path,
@@ -464,7 +469,8 @@ match_unsorted_outer(Query *root,
                                 else
                                         newclauses = mergeclauses;
                                 add_path(joinrel, (Path *)
-                                                create_mergejoin_path(joinrel,
+                                                create_mergejoin_path(root,
+                                                                                          joinrel,
                                                                                            jointype,
                                                                                            outerpath,
                                                                                            innerpath,
@@ -507,7 +513,8 @@ match_unsorted_outer(Query *root,
                                                         newclauses = mergeclauses;
                                         }
                                         add_path(joinrel, (Path *)
-                                                        create_mergejoin_path(joinrel,
+                                                        create_mergejoin_path(root,
+                                                                                                  joinrel,
                                                                                                    jointype,
                                                                                                    outerpath,
                                                                                                    innerpath,
@@ -605,7 +612,8 @@ match_unsorted_inner(Query *root,
                  */
                 merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
                 add_path(joinrel, (Path *)
-                                create_mergejoin_path(joinrel,
+                                create_mergejoin_path(root,
+                                                                          joinrel,
                                                                            jointype,
                                                                            outerrel->cheapest_total_path,
                                                                            innerpath,
@@ -633,7 +641,8 @@ match_unsorted_inner(Query *root,
                 merge_pathkeys = build_join_pathkeys(root, joinrel,
                                                                                          totalouterpath->pathkeys);
                 add_path(joinrel, (Path *)
-                                create_mergejoin_path(joinrel,
+                                create_mergejoin_path(root,
+                                                                          joinrel,
                                                                            jointype,
                                                                            totalouterpath,
                                                                            innerpath,
@@ -651,7 +660,8 @@ match_unsorted_inner(Query *root,
                         merge_pathkeys = build_join_pathkeys(root, joinrel,
                                                                                          startupouterpath->pathkeys);
                         add_path(joinrel, (Path *)
-                                        create_mergejoin_path(joinrel,
+                                        create_mergejoin_path(root,
+                                                                                  joinrel,
                                                                                    jointype,
                                                                                    startupouterpath,
                                                                                    innerpath,
@@ -718,10 +728,8 @@ hash_inner_and_outer(Query *root,
         foreach(i, restrictlist)
         {
                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
-               Expr       *clause;
                 Var                *left,
                                    *right;
-               Selectivity innerbucketsize;
                 List       *hashclauses;
  
                 if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -734,42 +742,22 @@ hash_inner_and_outer(Query *root,
                 if (isouterjoin && restrictinfo->ispusheddown)
                         continue;
  
-               clause = restrictinfo->clause;
                 /* these must be OK, since check_hashjoinable accepted the clause */
-               left = get_leftop(clause);
-               right = get_rightop(clause);
+               left = get_leftop(restrictinfo->clause);
+               right = get_rightop(restrictinfo->clause);
  
                 /*
-                * Check if clause is usable with these sub-rels, find inner side,
-                * estimate bucketsize of inner var for costing purposes.
-                *
-                * Since we tend to visit the same clauses over and over when
-                * planning a large query, we cache the bucketsize estimates in
-                * the RestrictInfo node to avoid repeated lookups of statistics.
+                * Check if clause is usable with these input rels.
                  */
                 if (intMember(left->varno, outerrelids) &&
                         intMember(right->varno, innerrelids))
                 {
                         /* righthand side is inner */
-                       innerbucketsize = restrictinfo->right_bucketsize;
-                       if (innerbucketsize < 0)
-                       {
-                               /* not cached yet */
-                               innerbucketsize = estimate_hash_bucketsize(root, right);
-                               restrictinfo->right_bucketsize = innerbucketsize;
-                       }
                 }
                 else if (intMember(left->varno, innerrelids) &&
                                  intMember(right->varno, outerrelids))
                 {
                         /* lefthand side is inner */
-                       innerbucketsize = restrictinfo->left_bucketsize;
-                       if (innerbucketsize < 0)
-                       {
-                               /* not cached yet */
-                               innerbucketsize = estimate_hash_bucketsize(root, left);
-                               restrictinfo->left_bucketsize = innerbucketsize;
-                       }
                 }
                 else
                         continue;                       /* no good for these input relations */
@@ -783,22 +771,22 @@ hash_inner_and_outer(Query *root,
                  * any but the cheapest-total-cost inner path, however.
                  */
                 add_path(joinrel, (Path *)
-                                create_hashjoin_path(joinrel,
+                                create_hashjoin_path(root,
+                                                                         joinrel,
                                                                           jointype,
                                                                           outerrel->cheapest_total_path,
                                                                           innerrel->cheapest_total_path,
                                                                           restrictlist,
-                                                                         hashclauses,
-                                                                         innerbucketsize));
+                                                                         hashclauses));
                 if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
                         add_path(joinrel, (Path *)
-                                        create_hashjoin_path(joinrel,
+                                        create_hashjoin_path(root,
+                                                                                 joinrel,
                                                                                   jointype,
                                                                                   outerrel->cheapest_startup_path,
                                                                                   innerrel->cheapest_total_path,
                                                                                   restrictlist,
-                                                                                 hashclauses,
-                                                                                 innerbucketsize));
+                                                                                 hashclauses));
         }
  }
  
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c

index 7106fa75d48cf7b38c953eb9e5a76137ace6ee0e..198a7f02690206032e6fe845bb66b1264402f11c 100644 (file)
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.8 2001/01/24 19:42:58 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.9 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -25,7 +25,7 @@
  #include "parser/parse_coerce.h"
  #include "utils/lsyscache.h"
  
-static void create_tidscan_joinpaths(RelOptInfo *rel);
+static void create_tidscan_joinpaths(Query *root, RelOptInfo *rel);
  static List *TidqualFromRestrictinfo(List *relids, List *restrictinfo);
  static bool isEvaluable(int varno, Node *node);
  static Node *TidequalClause(int varno, Expr *node);
@@ -243,7 +243,7 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo)
   * XXX does this actually work?
   */
  static void
-create_tidscan_joinpaths(RelOptInfo *rel)
+create_tidscan_joinpaths(Query *root, RelOptInfo *rel)
  {
         List       *rlst = NIL,
                            *lst;
@@ -266,7 +266,7 @@ create_tidscan_joinpaths(RelOptInfo *rel)
                         pathnode->tideval = tideval;
                         pathnode->unjoined_relids = joininfo->unjoined_relids;
  
-                       cost_tidscan(&pathnode->path, rel, tideval);
+                       cost_tidscan(&pathnode->path, root, rel, tideval);
  
                         rlst = lappend(rlst, pathnode);
                 }
@@ -286,6 +286,6 @@ create_tidscan_paths(Query *root, RelOptInfo *rel)
                                                                                                   rel->baserestrictinfo);
  
         if (tideval)
-               add_path(rel, (Path *) create_tidscan_path(rel, tideval));
-       create_tidscan_joinpaths(rel);
+               add_path(rel, (Path *) create_tidscan_path(root, rel, tideval));
+       create_tidscan_joinpaths(root, rel);
  }
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 81e7fec0427626377d3d6591c433abc45a55dbff..78e22c7b9e3c8ef8161fc0155644fa689a583b79 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.106 2001/05/20 20:28:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.107 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -47,7 +47,8 @@ static NestLoop *create_nestloop_plan(NestPath *best_path, List *tlist,
                                          List *joinclauses, List *otherclauses,
                                          Plan *outer_plan, List *outer_tlist,
                                          Plan *inner_plan, List *inner_tlist);
-static MergeJoin *create_mergejoin_plan(MergePath *best_path, List *tlist,
+static MergeJoin *create_mergejoin_plan(Query *root,
+                                         MergePath *best_path, List *tlist,
                                           List *joinclauses, List *otherclauses,
                                           Plan *outer_plan, List *outer_tlist,
                                           Plan *inner_plan, List *inner_tlist);
@@ -244,7 +245,8 @@ create_join_plan(Query *root, JoinPath *best_path)
         switch (best_path->path.pathtype)
         {
                 case T_MergeJoin:
-                       plan = (Join *) create_mergejoin_plan((MergePath *) best_path,
+                       plan = (Join *) create_mergejoin_plan(root,
+                                                                                                 (MergePath *) best_path,
                                                                                                   join_tlist,
                                                                                                   joinclauses,
                                                                                                   otherclauses,
@@ -673,7 +675,8 @@ create_nestloop_plan(NestPath *best_path,
  }
  
  static MergeJoin *
-create_mergejoin_plan(MergePath *best_path,
+create_mergejoin_plan(Query *root,
+                                         MergePath *best_path,
                                           List *tlist,
                                           List *joinclauses,
                                           List *otherclauses,
@@ -720,13 +723,15 @@ create_mergejoin_plan(MergePath *best_path,
          */
         if (best_path->outersortkeys)
                 outer_plan = (Plan *)
-                       make_sort_from_pathkeys(outer_tlist,
+                       make_sort_from_pathkeys(root,
+                                                                       outer_tlist,
                                                                         outer_plan,
                                                                         best_path->outersortkeys);
  
         if (best_path->innersortkeys)
                 inner_plan = (Plan *)
-                       make_sort_from_pathkeys(inner_tlist,
+                       make_sort_from_pathkeys(root,
+                                                                       inner_tlist,
                                                                         inner_plan,
                                                                         best_path->innersortkeys);
  
@@ -1367,14 +1372,15 @@ make_mergejoin(List *tlist,
   * each key number from 1 to keycount), or the executor will get confused!
   */
  Sort *
-make_sort(List *tlist, Plan *lefttree, int keycount)
+make_sort(Query *root, List *tlist, Plan *lefttree, int keycount)
  {
         Sort       *node = makeNode(Sort);
         Plan       *plan = &node->plan;
         Path            sort_path;              /* dummy for result of cost_sort */
  
         copy_plan_costsize(plan, lefttree); /* only care about copying size */
-       cost_sort(&sort_path, NIL, lefttree->plan_rows, lefttree->plan_width);
+       cost_sort(&sort_path, root, NIL,
+                         lefttree->plan_rows, lefttree->plan_width);
         plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
         plan->total_cost = sort_path.total_cost + lefttree->total_cost;
         plan->state = (EState *) NULL;
@@ -1399,7 +1405,8 @@ make_sort(List *tlist, Plan *lefttree, int keycount)
   * of resdom nodes in the sort plan's target list.
   */
  Sort *
-make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
+make_sort_from_pathkeys(Query *root, List *tlist,
+                                               Plan *lefttree, List *pathkeys)
  {
         List       *sort_tlist;
         List       *i;
@@ -1455,10 +1462,10 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
  
         Assert(numsortkeys > 0);
  
-       return make_sort(sort_tlist, lefttree, numsortkeys);
+       return make_sort(root, sort_tlist, lefttree, numsortkeys);
  }
  
-Material   *
+Material *
  make_material(List *tlist, Plan *lefttree)
  {
         Material   *node = makeNode(Material);
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 3b3c761bca670f7548ab67f1e2887bf30be4a986..cf2f798954b782c934b848c566b2cfec0112c847 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.62 2001/05/20 20:28:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.63 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -357,8 +357,9 @@ distribute_qual_to_rels(Query *root, Node *clause,
         bool            can_be_equijoin;
  
         restrictinfo->clause = (Expr *) clause;
-       restrictinfo->eval_cost = -1;           /* not computed until needed */
         restrictinfo->subclauseindices = NIL;
+       restrictinfo->eval_cost = -1;           /* not computed until needed */
+       restrictinfo->this_selec = -1;          /* not computed until needed */
         restrictinfo->mergejoinoperator = InvalidOid;
         restrictinfo->left_sortop = InvalidOid;
         restrictinfo->right_sortop = InvalidOid;
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 2f52e694d1310a31eb417abb295da6c441980f3e..b5a77ea623206fa1dcf7198f9476e52681b1b4c2 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.65 2001/05/20 20:28:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.66 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -317,7 +317,7 @@ subplanner(Query *root,
         {
                 Path            sort_path;      /* dummy for result of cost_sort */
  
-               cost_sort(&sort_path, root->query_pathkeys,
+               cost_sort(&sort_path, root, root->query_pathkeys,
                                   final_rel->rows, final_rel->width);
                 sort_path.startup_cost += cheapestpath->total_cost;
                 sort_path.total_cost += cheapestpath->total_cost;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index fbed3d6d092e5e53cadb6e94430bbc268e3ebdbe..a2fa8832058490dbc2a5c9a8432628f1a3384aa3 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.107 2001/05/20 20:28:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.108 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -48,7 +48,8 @@ static Plan *inheritance_planner(Query *parse, List *inheritlist);
  static Plan *grouping_planner(Query *parse, double tuple_fraction);
  static List *make_subplanTargetList(Query *parse, List *tlist,
                                            AttrNumber **groupColIdx);
-static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup,
+static Plan *make_groupplan(Query *parse,
+                          List *group_tlist, bool tuplePerGroup,
                            List *groupClause, AttrNumber *grpColIdx,
                            bool is_presorted, Plan *subplan);
  static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
@@ -1153,7 +1154,8 @@ grouping_planner(Query *parse, double tuple_fraction)
                         current_pathkeys = group_pathkeys;
                 }
  
-               result_plan = make_groupplan(group_tlist,
+               result_plan = make_groupplan(parse,
+                                                                        group_tlist,
                                                                          tuplePerGroup,
                                                                          parse->groupClause,
                                                                          groupColIdx,
@@ -1186,7 +1188,7 @@ grouping_planner(Query *parse, double tuple_fraction)
         if (parse->sortClause)
         {
                 if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
-                       result_plan = make_sortplan(tlist, result_plan,
+                       result_plan = make_sortplan(parse, tlist, result_plan,
                                                                                 parse->sortClause);
         }
  
@@ -1329,7 +1331,8 @@ make_subplanTargetList(Query *parse,
   *             first add an explicit Sort node.
   */
  static Plan *
-make_groupplan(List *group_tlist,
+make_groupplan(Query *parse,
+                          List *group_tlist,
                            bool tuplePerGroup,
                            List *groupClause,
                            AttrNumber *grpColIdx,
@@ -1374,7 +1377,7 @@ make_groupplan(List *group_tlist,
  
                 Assert(keyno > 0);
  
-               subplan = (Plan *) make_sort(sort_tlist, subplan, keyno);
+               subplan = (Plan *) make_sort(parse, sort_tlist, subplan, keyno);
         }
  
         return (Plan *) make_group(group_tlist, tuplePerGroup, numCols,
@@ -1386,7 +1389,7 @@ make_groupplan(List *group_tlist,
   *       Add a Sort node to implement an explicit ORDER BY clause.
   */
  Plan *
-make_sortplan(List *tlist, Plan *plannode, List *sortcls)
+make_sortplan(Query *parse, List *tlist, Plan *plannode, List *sortcls)
  {
         List       *sort_tlist;
         List       *i;
@@ -1419,7 +1422,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
  
         Assert(keyno > 0);
  
-       return (Plan *) make_sort(sort_tlist, plannode, keyno);
+       return (Plan *) make_sort(parse, sort_tlist, plannode, keyno);
  }
  
  /*
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 42cc47fa4ac3dc80a4a1b47059884fe03d4b77c2..764fe1836c64d052edac896c4db3258ca09ebcd5 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.64 2001/05/20 20:28:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.65 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -242,7 +242,7 @@ generate_union_plan(SetOperationStmt *op, Query *parse,
  
                 tlist = new_unsorted_tlist(plan->targetlist);
                 sortList = addAllTargetsToSortList(NIL, tlist);
-               plan = make_sortplan(tlist, plan, sortList);
+               plan = make_sortplan(parse, tlist, plan, sortList);
                 plan = (Plan *) make_unique(tlist, plan, copyObject(sortList));
         }
         return plan;
@@ -290,7 +290,7 @@ generate_nonunion_plan(SetOperationStmt *op, Query *parse,
          */
         tlist = new_unsorted_tlist(plan->targetlist);
         sortList = addAllTargetsToSortList(NIL, tlist);
-       plan = make_sortplan(tlist, plan, sortList);
+       plan = make_sortplan(parse, tlist, plan, sortList);
         switch (op->op)
         {
                 case SETOP_INTERSECT:
@@ -688,7 +688,8 @@ adjust_inherited_attrs_mutator(Node *node,
                         adjust_inherited_attrs_mutator((Node *) oldinfo->clause, context);
  
                 newinfo->subclauseindices = NIL;
-               newinfo->eval_cost = -1;                /* reset this too */
+               newinfo->eval_cost = -1;                /* reset these too */
+               newinfo->this_selec = -1;
                 newinfo->left_pathkey = NIL;    /* and these */
                 newinfo->right_pathkey = NIL;
                 newinfo->left_bucketsize = -1;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 801b328d81726a0f3a2315c4d9b299d104ccf9a6..7f1f3b402a40701bb9f22149f2df07f2cd644e09 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.73 2001/05/20 20:28:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.74 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -297,7 +297,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
   *       pathnode.
   */
  Path *
-create_seqscan_path(RelOptInfo *rel)
+create_seqscan_path(Query *root, RelOptInfo *rel)
  {
         Path       *pathnode = makeNode(Path);
  
@@ -305,7 +305,7 @@ create_seqscan_path(RelOptInfo *rel)
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* seqscan has unordered result */
  
-       cost_seqscan(pathnode, rel);
+       cost_seqscan(pathnode, root, rel);
  
         return pathnode;
  }
@@ -371,10 +371,9 @@ create_index_path(Query *root,
   * create_tidscan_path
   *       Creates a path corresponding to a tid_direct scan, returning the
   *       pathnode.
- *
   */
-TidPath    *
-create_tidscan_path(RelOptInfo *rel, List *tideval)
+TidPath *
+create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
  {
         TidPath    *pathnode = makeNode(TidPath);
  
@@ -385,7 +384,7 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
                                                                                                  * necessary? */
         pathnode->unjoined_relids = NIL;
  
-       cost_tidscan(&pathnode->path, rel, tideval);
+       cost_tidscan(&pathnode->path, root, rel, tideval);
  
         /*
          * divide selectivity for each clause to get an equal selectivity as
@@ -461,10 +460,10 @@ create_subqueryscan_path(RelOptInfo *rel)
   * 'pathkeys' are the path keys of the new join path
   *
   * Returns the resulting path node.
- *
   */
-NestPath   *
-create_nestloop_path(RelOptInfo *joinrel,
+NestPath *
+create_nestloop_path(Query *root,
+                                        RelOptInfo *joinrel,
                                          JoinType jointype,
                                          Path *outer_path,
                                          Path *inner_path,
@@ -481,7 +480,8 @@ create_nestloop_path(RelOptInfo *joinrel,
         pathnode->joinrestrictinfo = restrict_clauses;
         pathnode->path.pathkeys = pathkeys;
  
-       cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses);
+       cost_nestloop(&pathnode->path, root, outer_path, inner_path,
+                                 restrict_clauses);
  
         return pathnode;
  }
@@ -501,10 +501,10 @@ create_nestloop_path(RelOptInfo *joinrel,
   *             (this should be a subset of the restrict_clauses list)
   * 'outersortkeys' are the sort varkeys for the outer relation
   * 'innersortkeys' are the sort varkeys for the inner relation
- *
   */
-MergePath  *
-create_mergejoin_path(RelOptInfo *joinrel,
+MergePath *
+create_mergejoin_path(Query *root,
+                                         RelOptInfo *joinrel,
                                           JoinType jointype,
                                           Path *outer_path,
                                           Path *inner_path,
@@ -539,9 +539,11 @@ create_mergejoin_path(RelOptInfo *joinrel,
         pathnode->innersortkeys = innersortkeys;
  
         cost_mergejoin(&pathnode->jpath.path,
+                                  root,
                                    outer_path,
                                    inner_path,
                                    restrict_clauses,
+                                  mergeclauses,
                                    outersortkeys,
                                    innersortkeys);
  
@@ -559,17 +561,15 @@ create_mergejoin_path(RelOptInfo *joinrel,
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'hashclauses' is a list of the hash join clause (always a 1-element list)
   *             (this should be a subset of the restrict_clauses list)
- * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
- *
   */
-HashPath   *
-create_hashjoin_path(RelOptInfo *joinrel,
+HashPath *
+create_hashjoin_path(Query *root,
+                                        RelOptInfo *joinrel,
                                          JoinType jointype,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
-                                        List *hashclauses,
-                                        Selectivity innerbucketsize)
+                                        List *hashclauses)
  {
         HashPath   *pathnode = makeNode(HashPath);
  
@@ -584,10 +584,11 @@ create_hashjoin_path(RelOptInfo *joinrel,
         pathnode->path_hashclauses = hashclauses;
  
         cost_hashjoin(&pathnode->jpath.path,
+                                 root,
                                   outer_path,
                                   inner_path,
                                   restrict_clauses,
-                                 innerbucketsize);
+                                 hashclauses);
  
         return pathnode;
  }
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1c9b3c60b9d70b7fa28fe51e309fb1e90b2be0f6..a56ac81042ae4e94d8ec96574f8ad69faa97a3bc 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.91 2001/05/27 17:37:48 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.92 2001/06/05 05:26:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1132,10 +1132,12 @@ eqjoinsel(PG_FUNCTION_ARGS)
                                 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
                                         (nd1 - nmatches);
                         /*
-                        * For robustness, we average the two estimates.  (Can a case
-                        * be made for taking the min or max instead?)
+                        * Use the smaller of the two estimates.  This can be justified
+                        * in essentially the same terms as given below for the no-stats
+                        * case: to a first approximation, we are estimating from the
+                        * point of view of the relation with smaller nd.
                          */
-                       selec = (totalsel1 + totalsel2) * 0.5;
+                       selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
                 }
                 else
                 {
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 33927edc18d6e47b5fc38cc55e501fcf4b29efc6..4b47a84db5fcd099132bcdfe5fea062547cbf7c2 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: relation.h,v 1.56 2001/05/20 20:28:20 tgl Exp $
+ * $Id: relation.h,v 1.57 2001/06/05 05:26:05 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -492,7 +492,7 @@ typedef struct HashPath
   * path is responsible for identifying the restrict clauses it can use
   * and ignoring the rest.  Clauses not implemented by an indexscan,
   * mergejoin, or hashjoin will be placed in the plan qual or joinqual field
- * of the final Plan node, where they will be enforced by general-purpose
+ * of the finished Plan node, where they will be enforced by general-purpose
   * qual-expression-evaluation code.  (But we are still entitled to count
   * their selectivity when estimating the result tuple count, if we
   * can guess what it is...)
@@ -504,14 +504,16 @@ typedef struct RestrictInfo
  
         Expr       *clause;                     /* the represented clause of WHERE or JOIN */
  
-       Cost            eval_cost;              /* eval cost of clause; -1 if not yet set */
-
         bool            ispusheddown;   /* TRUE if clause was pushed down in level */
  
         /* only used if clause is an OR clause: */
         List       *subclauseindices;           /* indexes matching subclauses */
         /* subclauseindices is a List of Lists of IndexOptInfos */
  
+       /* cache space for costs (currently only used for join clauses) */
+       Cost            eval_cost;              /* eval cost of clause; -1 if not yet set */
+       Selectivity     this_selec;             /* selectivity; -1 if not yet set */
+
         /* valid if clause is mergejoinable, else InvalidOid: */
         Oid                     mergejoinoperator;              /* copy of clause operator */
         Oid                     left_sortop;    /* leftside sortop needed for mergejoin */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index cbf6df063a3cc4ae782cab805acaaf80b9d2025f..83b039c99de1cd2a3f179010482609205ba0cfac 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
+ * $Id: cost.h,v 1.40 2001/06/05 05:26:05 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -52,20 +52,27 @@ extern bool enable_nestloop;
  extern bool enable_mergejoin;
  extern bool enable_hashjoin;
  
-extern void cost_seqscan(Path *path, RelOptInfo *baserel);
+extern void cost_seqscan(Path *path, Query *root,
+                                                RelOptInfo *baserel);
  extern void cost_index(Path *path, Query *root,
-                  RelOptInfo *baserel, IndexOptInfo *index,
-                  List *indexQuals, bool is_injoin);
-extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval);
-extern void cost_sort(Path *path, List *pathkeys, double tuples, int width);
-extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path,
-                         List *restrictlist);
-extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
-                          List *restrictlist,
-                          List *outersortkeys, List *innersortkeys);
-extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-                         List *restrictlist, Selectivity innerbucketsize);
-extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
+                                          RelOptInfo *baserel, IndexOptInfo *index,
+                                          List *indexQuals, bool is_injoin);
+extern void cost_tidscan(Path *path, Query *root,
+                                                RelOptInfo *baserel, List *tideval);
+extern void cost_sort(Path *path, Query *root,
+                                         List *pathkeys, double tuples, int width);
+extern void cost_nestloop(Path *path, Query *root,
+                                                 Path *outer_path, Path *inner_path,
+                                                 List *restrictlist);
+extern void cost_mergejoin(Path *path, Query *root,
+                                                  Path *outer_path, Path *inner_path,
+                                                  List *restrictlist,
+                                                  List *mergeclauses,
+                                                  List *outersortkeys, List *innersortkeys);
+extern void cost_hashjoin(Path *path, Query *root,
+                                                 Path *outer_path, Path *inner_path,
+                                                 List *restrictlist,
+                                                 List *hashclauses);
  extern Cost cost_qual_eval(List *quals);
  extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
  extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
@@ -84,5 +91,8 @@ extern Selectivity restrictlist_selectivity(Query *root,
  extern Selectivity clauselist_selectivity(Query *root,
                                            List *clauses,
                                            int varRelid);
+extern Selectivity clause_selectivity(Query *root,
+                                                                         Node *clause,
+                                                                         int varRelid);
  
  #endif  /* COST_H */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 85ba6936f4888652bdfd5b4613b92979515f51a0..8979c0d947cb20ae5f429f65df6d6120239e59af 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pathnode.h,v 1.37 2001/05/20 20:28:20 tgl Exp $
+ * $Id: pathnode.h,v 1.38 2001/06/05 05:26:05 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -26,40 +26,43 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2,
  extern void set_cheapest(RelOptInfo *parent_rel);
  extern void add_path(RelOptInfo *parent_rel, Path *new_path);
  
-extern Path *create_seqscan_path(RelOptInfo *rel);
+extern Path *create_seqscan_path(Query *root, RelOptInfo *rel);
  extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
                                   IndexOptInfo *index,
                                   List *restriction_clauses,
                                   List *pathkeys,
                                   ScanDirection indexscandir);
-extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval);
+extern TidPath *create_tidscan_path(Query *root, RelOptInfo *rel,
+                                                                       List *tideval);
  extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
  extern Path *create_subqueryscan_path(RelOptInfo *rel);
  
-extern NestPath *create_nestloop_path(RelOptInfo *joinrel,
-                                        JoinType jointype,
-                                        Path *outer_path,
-                                        Path *inner_path,
-                                        List *restrict_clauses,
-                                        List *pathkeys);
+extern NestPath *create_nestloop_path(Query *root,
+                                                                         RelOptInfo *joinrel,
+                                                                         JoinType jointype,
+                                                                         Path *outer_path,
+                                                                         Path *inner_path,
+                                                                         List *restrict_clauses,
+                                                                         List *pathkeys);
  
-extern MergePath *create_mergejoin_path(RelOptInfo *joinrel,
-                                         JoinType jointype,
-                                         Path *outer_path,
-                                         Path *inner_path,
-                                         List *restrict_clauses,
-                                         List *pathkeys,
-                                         List *mergeclauses,
-                                         List *outersortkeys,
-                                         List *innersortkeys);
+extern MergePath *create_mergejoin_path(Query *root,
+                                                                               RelOptInfo *joinrel,
+                                                                               JoinType jointype,
+                                                                               Path *outer_path,
+                                                                               Path *inner_path,
+                                                                               List *restrict_clauses,
+                                                                               List *pathkeys,
+                                                                               List *mergeclauses,
+                                                                               List *outersortkeys,
+                                                                               List *innersortkeys);
  
-extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
-                                        JoinType jointype,
-                                        Path *outer_path,
-                                        Path *inner_path,
-                                        List *restrict_clauses,
-                                        List *hashclauses,
-                                        Selectivity innerbucketsize);
+extern HashPath *create_hashjoin_path(Query *root,
+                                                                         RelOptInfo *joinrel,
+                                                                         JoinType jointype,
+                                                                         Path *outer_path,
+                                                                         Path *inner_path,
+                                                                         List *restrict_clauses,
+                                                                         List *hashclauses);
  
  /*
   * prototypes for relnode.c
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h

index 75b05fa27814405fab5fdf8375c0d800dc83bfbd..974dd9a4572083414e0036c8afe1f83981aa6dd7 100644 (file)
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: planmain.h,v 1.50 2001/03/22 04:00:55 momjian Exp $
+ * $Id: planmain.h,v 1.51 2001/06/05 05:26:05 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -29,9 +29,10 @@ extern Plan *create_plan(Query *root, Path *best_path);
  extern SubqueryScan *make_subqueryscan(List *qptlist, List *qpqual,
                                   Index scanrelid, Plan *subplan);
  extern Append *make_append(List *appendplans, bool isTarget, List *tlist);
-extern Sort *make_sort(List *tlist, Plan *lefttree, int keycount);
-extern Sort *make_sort_from_pathkeys(List *tlist, Plan *lefttree,
-                                               List *pathkeys);
+extern Sort *make_sort(Query *root, List *tlist,
+                                          Plan *lefttree, int keycount);
+extern Sort *make_sort_from_pathkeys(Query *root, List *tlist,
+                                                                        Plan *lefttree, List *pathkeys);
  extern Agg *make_agg(List *tlist, List *qual, Plan *lefttree);
  extern Group *make_group(List *tlist, bool tuplePerGroup, int ngrp,
                    AttrNumber *grpColIdx, Plan *lefttree);
diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h

index 8d79ec16cd94e8f1fe2958203a65e29c9c107a58..4a118482423f8ee4aff224638275894b2d1cd08f 100644 (file)
--- a/src/include/optimizer/planner.h
+++ b/src/include/optimizer/planner.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: planner.h,v 1.19 2001/01/24 19:43:26 momjian Exp $
+ * $Id: planner.h,v 1.20 2001/06/05 05:26:05 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,6 +21,7 @@
  extern Plan *planner(Query *parse);
  extern Plan *subquery_planner(Query *parse, double tuple_fraction);
  
-extern Plan *make_sortplan(List *tlist, Plan *plannode, List *sortcls);
+extern Plan *make_sortplan(Query *parse, List *tlist,
+                                                  Plan *plannode, List *sortcls);
  
  #endif  /* PLANNER_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 5 Jun 2001 05:26:05 +0000 (05:26 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 5 Jun 2001 05:26:05 +0000 (05:26 +0000)
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/clausesel.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/path/tidpath.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history
src/include/optimizer/planmain.h		patch \| blob \| history
src/include/optimizer/planner.h		patch \| blob \| history