Repair logic flaw in cost estimator: cost_nestloop() was estimating CPU

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 22 Mar 2000 22:08:35 +0000 (22:08 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 22 Mar 2000 22:08:35 +0000 (22:08 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 22 Mar 2000 22:08:35 +0000 (22:08 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 22 Mar 2000 22:08:35 +0000 (22:08 +0000)
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 6afa4d8549f7c5e2278813129289cd201366c1d9..836687240d5cf0cb39058affaf3fa50e2de81ab3 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.109 2000/03/14 23:06:27 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.110 2000/03/22 22:08:32 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1135,6 +1135,7 @@ _copyIndexPath(IndexPath *from)
         Node_Copy(from, newnode, indexqual);
         newnode->indexscandir = from->indexscandir;
         newnode->joinrelids = listCopy(from->joinrelids);
+       newnode->rows = from->rows;
  
         return newnode;
  }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 3101706f8af37830ec9d782b165097b22b51319e..139822ffae914a6fb88e5dc16bf768e21354f801 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.64 2000/03/01 18:47:43 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.65 2000/03/22 22:08:32 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -375,6 +375,9 @@ _equalIndexPath(IndexPath *a, IndexPath *b)
                 return false;
         if (!equali(a->joinrelids, b->joinrelids))
                 return false;
+       /* Skip 'rows' because of possibility of floating-point roundoff error.
+        * It should be derivable from the other fields anyway.
+        */
         return true;
  }
  
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 75a70db2e231c1ef6ebcca66685dde20936aa060..6d09cb48524d6df29f4a4d4d0e13304cfc317e89 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.111 2000/03/17 02:36:12 tgl Exp $
+ *     $Header: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v 1.112 2000/03/22 22:08:32 tgl Exp $
   *
   * NOTES
   *       Every (plan) node in POSTGRES has an associated "out" routine which
@@ -1019,6 +1019,9 @@ _outIndexPath(StringInfo str, IndexPath *node)
         appendStringInfo(str, " :indexscandir %d :joinrelids ",
                                          (int) node->indexscandir);
         _outIntList(str, node->joinrelids);
+
+       appendStringInfo(str, " :rows %.2f ",
+                                        node->rows);
  }
  
  /*
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index eba45276c6f82fe5ec2adc9214b1a42ef1450143..3a3059ef4e4328390125b28a89473b4038a5e8fa 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.86 2000/03/17 02:36:12 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.87 2000/03/22 22:08:32 tgl Exp $
   *
   * NOTES
   *       Most of the read functions for plan nodes are tested. (In fact, they
@@ -1510,6 +1510,10 @@ _readIndexPath()
         token = lsptok(NULL, &length);          /* get :joinrelids */
         local_node->joinrelids = toIntList(nodeRead(true));
  
+       token = lsptok(NULL, &length);          /* get :rows */
+       token = lsptok(NULL, &length);          /* now read it */
+       local_node->rows = atof(token);
+
         return local_node;
  }
  
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index e70d2a7abee59cf86cbb6f418ea8cf5f23e58c7e..4270032d278a65c32e760fb2eb7ec7f45eb9349f 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.53 2000/03/14 02:23:14 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.54 2000/03/22 22:08:33 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -202,7 +202,7 @@ cost_nonsequential_access(double relpages)
   * 'index' is the index to be used
   * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
   * 'is_injoin' is T if we are considering using the index scan as the inside
- *             of a nestloop join.
+ *             of a nestloop join (hence, some of the indexQuals are join clauses)
   *
   * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
   * Any additional quals evaluated as qpquals may reduce the number of returned
@@ -281,12 +281,15 @@ cost_index(Path *path, Query *root,
         /* CPU costs */
         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
         /*
-        * Assume that the indexquals will be removed from the list of
-        * restriction clauses that we actually have to evaluate as qpquals.
-        * This is not completely right, but it's close.
-        * For a lossy index, however, we will have to recheck all the quals.
+        * Normally the indexquals will be removed from the list of restriction
+        * clauses that we have to evaluate as qpquals, so we should subtract
+        * their costs from baserestrictcost.  For a lossy index, however, we
+        * will have to recheck all the quals and so mustn't subtract anything.
+        * Also, if we are doing a join then some of the indexquals are join
+        * clauses and shouldn't be subtracted.  Rather than work out exactly
+        * how much to subtract, we don't subtract anything in that case either.
          */
-       if (! index->lossy)
+       if (! index->lossy && ! is_injoin)
                 cpu_per_tuple -= cost_qual_eval(indexQuals);
  
         run_cost += cpu_per_tuple * tuples_fetched;
@@ -418,15 +421,12 @@ cost_sort(Path *path, List *pathkeys, double tuples, int width)
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
   * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'is_indexjoin' is true if we are using an indexscan for the inner relation
- *             (not currently needed here; the indexscan adjusts its cost...)
   */
  void
  cost_nestloop(Path *path,
                           Path *outer_path,
                           Path *inner_path,
-                         List *restrictlist,
-                         bool is_indexjoin)
+                         List *restrictlist)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -447,8 +447,15 @@ cost_nestloop(Path *path,
         run_cost += outer_path->parent->rows *
                 (inner_path->total_cost - inner_path->startup_cost);
  
-       /* number of tuples processed (not number emitted!) */
-       ntuples = outer_path->parent->rows * inner_path->parent->rows;
+       /* Number of tuples processed (not number emitted!).  If inner path is
+        * an indexscan, be sure to use its estimated output row count, which
+        * may be lower than the restriction-clause-only row count of its parent.
+        */
+       if (IsA(inner_path, IndexPath))
+               ntuples = ((IndexPath *) inner_path)->rows;
+       else
+               ntuples = inner_path->parent->rows;
+       ntuples *= outer_path->parent->rows;
  
         /* CPU costs */
         cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c

index edb16ce0d6d26620d9cfc9cba5393ad31f51b341..8c63d9e1c38326cc8adeccb2bb68c905de17bedc 100644 (file)
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.80 2000/02/15 20:49:16 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.81 2000/03/22 22:08:33 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1454,6 +1454,26 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
                 /* joinrelids saves the rels needed on the outer side of the join */
                 pathnode->joinrelids = lfirst(outerrelids_list);
  
+               /*
+                * We must compute the estimated number of output rows for the
+                * indexscan.  This is less than rel->rows because of the additional
+                * selectivity of the join clauses.  Since clausegroup may contain
+                * both restriction and join clauses, we have to do a set union to
+                * get the full set of clauses that must be considered to compute
+                * the correct selectivity.  (We can't just nconc the two lists;
+                * then we might have some restriction clauses appearing twice,
+                * which'd mislead restrictlist_selectivity into double-counting
+                * their selectivity.)
+                */
+               pathnode->rows = rel->tuples *
+                       restrictlist_selectivity(root,
+                                                                        LispUnion(rel->baserestrictinfo,
+                                                                                          clausegroup),
+                                                                        lfirsti(rel->relids));
+               /* Like costsize.c, force estimate to be at least one row */
+               if (pathnode->rows < 1.0)
+                       pathnode->rows = 1.0;
+
                 cost_index(&pathnode->path, root, rel, index, indexquals, true);
  
                 path_list = lappend(path_list, pathnode);
diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c

index 6226100cfc791304a022691b5dc36c7737a82846..e2ae3f0577a8fcd29efcedf9ba15f7eac81b6dfd 100644 (file)
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.37 2000/02/15 20:49:17 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.38 2000/03/22 22:08:33 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -99,7 +99,9 @@ create_or_index_paths(Query *root,
                                 /* We don't actually care what order the index scans in ... */
                                 pathnode->indexscandir = NoMovementScanDirection;
  
+                               /* This isn't a nestloop innerjoin, so: */
                                 pathnode->joinrelids = NIL;     /* no join clauses here */
+                               pathnode->rows = rel->rows;
  
                                 best_or_subclause_indices(root,
                                                                                   rel,
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8d4c836fa55c7b13a3ac330fca81fca153dd912a..8e20ae302e1971bb567cb0c07071af8c0cc7cd82 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.86 2000/02/18 23:47:21 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.87 2000/03/22 22:08:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -314,7 +314,6 @@ create_indexscan_node(Query *root,
         List       *ixid;
         IndexScan  *scan_node;
         bool            lossy = false;
-       double          plan_rows;
  
         /* there should be exactly one base rel involved... */
         Assert(length(best_path->path.parent->relids) == 1);
@@ -350,20 +349,7 @@ create_indexscan_node(Query *root,
          * given by scan_clauses, there will normally be some duplications
          * between the lists.  We get rid of the duplicates, then add back
          * if lossy.
-        *
-        * If this indexscan is a nestloop-join inner indexscan (as indicated
-        * by having nonempty joinrelids), then it uses indexqual conditions
-        * that are not part of the relation's restriction clauses.  The rows
-        * estimate stored in the relation's RelOptInfo will be an overestimate
-        * because it did not take these extra conditions into account.  So,
-        * in this case we recompute the selectivity of the whole scan ---
-        * considering both indexqual and qpqual --- rather than using the
-        * RelOptInfo's rows value.  Since clausesel.c assumes it's working on
-        * minimized (no duplicates) expressions, we have to do that while we
-        * have the duplicate-free qpqual available.
          */
-       plan_rows = best_path->path.parent->rows; /* OK unless nestloop inner */
-
         if (length(indxqual) > 1)
         {
                 /*
@@ -383,15 +369,6 @@ create_indexscan_node(Query *root,
                 qpqual = set_difference(scan_clauses,
                                                                 lcons(indxqual_expr, NIL));
  
-               if (best_path->joinrelids)
-               {
-                       /* recompute output row estimate using all available quals */
-                       plan_rows = best_path->path.parent->tuples *
-                               clauselist_selectivity(root,
-                                                                          lcons(indxqual_expr, qpqual),
-                                                                          baserelid);
-               }
-
                 if (lossy)
                         qpqual = lappend(qpqual, copyObject(indxqual_expr));
         }
@@ -404,15 +381,6 @@ create_indexscan_node(Query *root,
  
                 qpqual = set_difference(scan_clauses, indxqual_list);
  
-               if (best_path->joinrelids)
-               {
-                       /* recompute output row estimate using all available quals */
-                       plan_rows = best_path->path.parent->tuples *
-                               clauselist_selectivity(root,
-                                                                          nconc(listCopy(indxqual_list), qpqual),
-                                                                          baserelid);
-               }
-
                 if (lossy)
                         qpqual = nconc(qpqual, (List *) copyObject(indxqual_list));
         }
@@ -433,10 +401,8 @@ create_indexscan_node(Query *root,
                                                            best_path->indexscandir);
  
         copy_path_costsize(&scan_node->scan.plan, &best_path->path);
-       /* set up rows estimate (just to make EXPLAIN output reasonable) */
-       if (plan_rows < 1.0)
-               plan_rows = 1.0;
-       scan_node->scan.plan.plan_rows = plan_rows;
+       /* use the indexscan-specific rows estimate, not the parent rel's */
+       scan_node->scan.plan.plan_rows = best_path->rows;
  
         return scan_node;
  }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 6f6ef231289265a67f737aae71050d0b324deca6..400c813125cb569703019b68ed340979c5651c60 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.61 2000/02/18 23:47:30 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.62 2000/03/22 22:08:35 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -337,8 +337,16 @@ create_index_path(Query *root,
          */
         pathnode->indexid = lconsi(index->indexoid, NIL);
         pathnode->indexqual = lcons(indexquals, NIL);
+
         pathnode->indexscandir = indexscandir;
+
+       /*
+        * This routine is only used to generate "standalone" indexpaths,
+        * not nestloop inner indexpaths.  So joinrelids is always NIL
+        * and the number of rows is the same as the parent rel's estimate.
+        */
         pathnode->joinrelids = NIL;     /* no join clauses here */
+       pathnode->rows = rel->rows;
  
         cost_index(&pathnode->path, root, rel, index, indexquals, false);
  
@@ -400,8 +408,7 @@ create_nestloop_path(RelOptInfo *joinrel,
         pathnode->joinrestrictinfo = restrict_clauses;
         pathnode->path.pathkeys = pathkeys;
  
-       cost_nestloop(&pathnode->path, outer_path, inner_path,
-                                 restrict_clauses, IsA(inner_path, IndexPath));
+       cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses);
  
         return pathnode;
  }
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index ac6dc764ce0e7a020df5cc247578e3200c5bd90e..f01877a9e892eaae2418a19bc29e893ddc98ada7 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: relation.h,v 1.45 2000/02/18 23:47:17 tgl Exp $
+ * $Id: relation.h,v 1.46 2000/03/22 22:08:29 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -252,6 +252,12 @@ typedef struct Path
   * as the inner path of a nestloop join.  These paths have indexquals
   * that refer to values of other rels, so those other rels must be
   * included in the outer joinrel in order to make a usable join.
+ *
+ * 'rows' is the estimated result tuple count for the indexscan.  This
+ * is the same as path.parent->rows for a simple indexscan, but it is
+ * different for a nestloop inner path, because the additional indexquals
+ * coming from join clauses make the scan more selective than the parent
+ * rel's restrict clauses alone would do.
   *----------
   */
  typedef struct IndexPath
@@ -260,7 +266,8 @@ typedef struct IndexPath
         List       *indexid;
         List       *indexqual;
         ScanDirection indexscandir;
-       Relids          joinrelids;                     /* other rels mentioned in indexqual */
+       Relids          joinrelids;             /* other rels mentioned in indexqual */
+       double          rows;                   /* estimated number of result tuples */
  } IndexPath;
  
  typedef struct TidPath
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 960a2ea9e9aafd72d9a79689ff6581cf0e6c276b..e8673675f8981afaf50b4421e544bd97f70010d3 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.30 2000/02/15 20:49:25 tgl Exp $
+ * $Id: cost.h,v 1.31 2000/03/22 22:08:30 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -58,7 +58,7 @@ extern void cost_index(Path *path, Query *root,
  extern void cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval);
  extern void cost_sort(Path *path, List *pathkeys, double tuples, int width);
  extern void cost_nestloop(Path *path, Path *outer_path, Path *inner_path,
-                                                 List *restrictlist, bool is_indexjoin);
+                                                 List *restrictlist);
  extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
                                                    List *restrictlist,
                                                    List *outersortkeys, List *innersortkeys);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 22 Mar 2000 22:08:35 +0000 (22:08 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 22 Mar 2000 22:08:35 +0000 (22:08 +0000)
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/indxpath.c		patch \| blob \| history
src/backend/optimizer/path/orindxpath.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history