]> granicus.if.org Git - postgresql/commitdiff
Install some slightly realistic cost estimation for bitmap index scans.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Apr 2005 02:28:02 +0000 (02:28 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 21 Apr 2005 02:28:02 +0000 (02:28 +0000)
src/backend/nodes/outfuncs.c
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/indxpath.c
src/backend/optimizer/path/orindxpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/util/pathnode.c
src/include/nodes/relation.h
src/include/optimizer/cost.h

index c241b113674fe78aa780867f07dfc710682f5388..1ea59314ea2710861e15b30455b7820071609b2a 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.247 2005/04/19 22:35:14 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.248 2005/04/21 02:28:01 tgl Exp $
  *
  * NOTES
  *       Every node type that can appear in stored rules' parsetrees *must*
@@ -1024,6 +1024,8 @@ _outIndexPath(StringInfo str, IndexPath *node)
        WRITE_NODE_FIELD(indexquals);
        WRITE_BOOL_FIELD(isjoininner);
        WRITE_ENUM_FIELD(indexscandir, ScanDirection);
+       WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
+       WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
        WRITE_FLOAT_FIELD(rows, "%.0f");
 }
 
index 06ebe18fe789b0a42499a887c9f27158f2137467..a33ba0f796f47624c936babf8d4d8edeb90ca08e 100644 (file)
@@ -49,7 +49,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.142 2005/04/19 22:35:15 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.143 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -103,6 +103,7 @@ bool                enable_hashjoin = true;
 
 
 static bool cost_qual_eval_walker(Node *node, QualCost *total);
+static Selectivity cost_bitmap_qual(Node *bitmapqual, Cost *totalCost);
 static Selectivity approx_selectivity(Query *root, List *quals,
                                   JoinType jointype);
 static Selectivity join_in_selectivity(JoinPath *path, Query *root);
@@ -126,7 +127,7 @@ clamp_row_est(double nrows)
        if (nrows < 1.0)
                nrows = 1.0;
        else
-               nrows = ceil(nrows);
+               nrows = rint(nrows);
 
        return nrows;
 }
@@ -232,6 +233,10 @@ cost_nonsequential_access(double relpages)
  * 'is_injoin' is T if we are considering using the index scan as the inside
  *             of a nestloop join (hence, some of the indexQuals are join clauses)
  *
+ * cost_index() takes an IndexPath not just a Path, because it sets a few
+ * additional fields of the IndexPath besides startup_cost and total_cost.
+ * These fields are needed if the IndexPath is used in a BitmapIndexScan.
+ *
  * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
  * Any additional quals evaluated as qpquals may reduce the number of returned
  * tuples, but they won't reduce the number of tuples we have to fetch from
@@ -241,7 +246,7 @@ cost_nonsequential_access(double relpages)
  * it was a list of bare clause expressions.
  */
 void
-cost_index(Path *path, Query *root,
+cost_index(IndexPath *path, Query *root,
                   IndexOptInfo *index,
                   List *indexQuals,
                   bool is_injoin)
@@ -286,6 +291,14 @@ cost_index(Path *path, Query *root,
                                         PointerGetDatum(&indexSelectivity),
                                         PointerGetDatum(&indexCorrelation));
 
+       /*
+        * Save amcostestimate's results for possible use by cost_bitmap_scan.
+        * We don't bother to save indexStartupCost or indexCorrelation, because
+        * a bitmap scan doesn't care about either.
+        */
+       path->indextotalcost = indexTotalCost;
+       path->indexselectivity = indexSelectivity;
+
        /* all costs for touching index itself included here */
        startup_cost += indexStartupCost;
        run_cost += indexTotalCost - indexStartupCost;
@@ -396,8 +409,8 @@ cost_index(Path *path, Query *root,
 
        run_cost += cpu_per_tuple * tuples_fetched;
 
-       path->startup_cost = startup_cost;
-       path->total_cost = startup_cost + run_cost;
+       path->path.startup_cost = startup_cost;
+       path->path.total_cost = startup_cost + run_cost;
 }
 
 /*
@@ -417,19 +430,151 @@ cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
 {
        Cost            startup_cost = 0;
        Cost            run_cost = 0;
+       Cost            indexTotalCost;
+       Selectivity indexSelectivity;
+       Cost            cpu_per_tuple;
+       Cost            cost_per_page;
+       double          tuples_fetched;
+       double          pages_fetched;
+       double          T;
 
        /* Should only be applied to base relations */
        Assert(IsA(baserel, RelOptInfo));
        Assert(baserel->relid > 0);
        Assert(baserel->rtekind == RTE_RELATION);
 
-       /* XXX lots to do here */
-       run_cost += 10;
+       if (!enable_indexscan)          /* XXX use a separate enable flag? */
+               startup_cost += disable_cost;
+
+       /*
+        * Estimate total cost of obtaining the bitmap, as well as its total
+        * selectivity.
+        */
+       indexTotalCost = 0;
+       indexSelectivity = cost_bitmap_qual(bitmapqual, &indexTotalCost);
+
+       startup_cost += indexTotalCost;
+
+       /*
+        * The number of heap pages that need to be fetched is the same as the
+        * Mackert and Lohman formula for the case T <= b (ie, no re-reads
+        * needed).
+        */
+       tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+
+       T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+       pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+       if (pages_fetched > T)
+               pages_fetched = T;
+
+       /*
+        * For small numbers of pages we should charge random_page_cost apiece,
+        * while if nearly all the table's pages are being read, it's more
+        * appropriate to charge 1.0 apiece.  The effect is nonlinear, too.
+        * For lack of a better idea, interpolate like this to determine the
+        * cost per page.
+        */
+       cost_per_page = random_page_cost -
+               (random_page_cost - 1.0) * sqrt(pages_fetched / T);
+
+       run_cost += pages_fetched * cost_per_page;
+
+       /*
+        * Estimate CPU costs per tuple.
+        *
+        * Often the indexquals don't need to be rechecked at each tuple ...
+        * but not always, especially not if there are enough tuples involved
+        * that the bitmaps become lossy.  For the moment, just assume they
+        * will be rechecked always.
+        */
+       startup_cost += baserel->baserestrictcost.startup;
+       cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+
+       run_cost += cpu_per_tuple * tuples_fetched;
 
        path->startup_cost = startup_cost;
        path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * cost_bitmap_qual
+ *             Recursively examine the AND/OR/IndexPath tree for a bitmap scan
+ *
+ * Total execution costs are added to *totalCost (so caller must be sure
+ * to initialize that to zero).  Estimated total selectivity of the bitmap
+ * is returned as the function result.
+ */
+static Selectivity
+cost_bitmap_qual(Node *bitmapqual, Cost *totalCost)
+{
+       Selectivity     result;
+       Selectivity     subresult;
+       ListCell   *l;
+
+       if (and_clause(bitmapqual))
+       {
+               /*
+                * We estimate AND selectivity on the assumption that the inputs
+                * are independent.  This is probably often wrong, but we don't
+                * have the info to do better.
+                *
+                * The runtime cost of the BitmapAnd itself is estimated at 100x
+                * cpu_operator_cost for each tbm_intersect needed.  Probably too
+                * small, definitely too simplistic?
+                *
+                * This must agree with make_bitmap_and in createplan.c.
+                */
+               result = 1.0;
+               foreach(l, ((BoolExpr *) bitmapqual)->args)
+               {
+                       subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
+                       result *= subresult;
+                       if (l != list_head(((BoolExpr *) bitmapqual)->args))
+                               *totalCost += 100.0 * cpu_operator_cost;
+               }
+       }
+       else if (or_clause(bitmapqual))
+       {
+               /*
+                * We estimate OR selectivity on the assumption that the inputs
+                * are non-overlapping, since that's often the case in "x IN (list)"
+                * type situations.  Of course, we clamp to 1.0 at the end.
+                *
+                * The runtime cost of the BitmapOr itself is estimated at 100x
+                * cpu_operator_cost for each tbm_union needed.  Probably too
+                * small, definitely too simplistic?  We are aware that the tbm_unions
+                * are optimized out when the inputs are BitmapIndexScans.
+                *
+                * This must agree with make_bitmap_or in createplan.c.
+                */
+               result = 0.0;
+               foreach(l, ((BoolExpr *) bitmapqual)->args)
+               {
+                       subresult = cost_bitmap_qual((Node *) lfirst(l), totalCost);
+                       result += subresult;
+                       if (l != list_head(((BoolExpr *) bitmapqual)->args) &&
+                               !IsA((Node *) lfirst(l), IndexPath))
+                               *totalCost += 100.0 * cpu_operator_cost;
+               }
+               result = Min(result, 1.0);
+       }
+       else if (IsA(bitmapqual, IndexPath))
+       {
+               IndexPath *ipath = (IndexPath *) bitmapqual;
+
+               /* this must agree with create_bitmap_subplan in createplan.c */
+               *totalCost += ipath->indextotalcost;
+               result = ipath->indexselectivity;
+       }
+       else
+       {
+               elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
+               result = 0.0;                           /* keep compiler quiet */
+       }
+
+       return result;
+}
+
 /*
  * cost_tidscan
  *       Determines and returns the cost of scanning a relation using TIDs.
index 937e2aed80eb9e000b8ea891694493ffca6035bd..e387a7bd768364d0d2b038b5240618c83231ca2a 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.174 2005/04/20 21:48:04 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.175 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1710,7 +1710,7 @@ make_innerjoin_index_path(Query *root,
        /* Like costsize.c, force estimate to be at least one row */
        pathnode->rows = clamp_row_est(pathnode->rows);
 
-       cost_index(&pathnode->path, root, index, indexquals, true);
+       cost_index(pathnode, root, index, indexquals, true);
 
        return (Path *) pathnode;
 }
index 0843bb6ea88a43c4edbe41efb32117dd37f2a442..c30c26562c5aadf80ccd230b61c282d7b6231eb1 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.67 2005/03/27 06:29:36 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.68 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -353,7 +353,7 @@ best_or_subclause_index(Query *root,
                IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
                List       *indexclauses;
                List       *indexquals;
-               Path            subclause_path;
+               IndexPath       subclause_path;
 
                /*
                 * Ignore partial indexes that do not match the query.  If predOK
@@ -402,13 +402,13 @@ best_or_subclause_index(Query *root,
 
                cost_index(&subclause_path, root, index, indexquals, false);
 
-               if (!found || subclause_path.total_cost < *retTotalCost)
+               if (!found || subclause_path.path.total_cost < *retTotalCost)
                {
                        *retIndexInfo = index;
                        *retIndexClauses = flatten_clausegroups_list(indexclauses);
                        *retIndexQuals = indexquals;
-                       *retStartupCost = subclause_path.startup_cost;
-                       *retTotalCost = subclause_path.total_cost;
+                       *retStartupCost = subclause_path.path.startup_cost;
+                       *retTotalCost = subclause_path.path.total_cost;
                        found = true;
                }
        }
index d15f0c6dcae292bc760fe8bb53cc4cccd4e059e5..0abb900beaaee6b7ae53c7d3b1142c66b1460329 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.180 2005/04/19 22:35:16 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.181 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -976,10 +976,12 @@ create_bitmap_subplan(Query *root, Node *bitmapqual)
                                                                          linitial(iscan->indxqualorig),
                                                                          linitial(iscan->indxstrategy),
                                                                          linitial(iscan->indxsubtype));
-               /* XXX this cost is wrong: */
-               copy_path_costsize(&bscan->scan.plan, &ipath->path);
-               /* use the indexscan-specific rows estimate, not the parent rel's */
-               bscan->scan.plan.plan_rows = ipath->rows;
+               /* this must agree with cost_bitmap_qual in costsize.c */
+               bscan->scan.plan.startup_cost = 0.0;
+               bscan->scan.plan.total_cost = ipath->indextotalcost;
+               bscan->scan.plan.plan_rows =
+                       clamp_row_est(ipath->indexselectivity * ipath->path.parent->tuples);
+               bscan->scan.plan.plan_width = 0; /* meaningless */
                plan = (Plan *) bscan;
        }
        else
@@ -2068,8 +2070,9 @@ make_bitmap_and(List *bitmapplans)
        ListCell   *subnode;
 
        /*
-        * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
+        * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
         * (a pretty arbitrary amount, agreed) for each tbm_intersect needed.
+        * This must agree with cost_bitmap_qual in costsize.c.
         */
        plan->startup_cost = 0;
        plan->total_cost = 0;
@@ -2085,7 +2088,10 @@ make_bitmap_and(List *bitmapplans)
                        plan->plan_rows = subplan->plan_rows;
                }
                else
+               {
+                       plan->total_cost += cpu_operator_cost * 100.0;
                        plan->plan_rows = Min(plan->plan_rows, subplan->plan_rows);
+               }
                plan->total_cost += subplan->total_cost;
        }
 
@@ -2106,10 +2112,12 @@ make_bitmap_or(List *bitmapplans)
        ListCell   *subnode;
 
        /*
-        * Compute cost as sum of subplan costs, plus 10x cpu_operator_cost
+        * Compute cost as sum of subplan costs, plus 100x cpu_operator_cost
         * (a pretty arbitrary amount, agreed) for each tbm_union needed.
         * We assume that tbm_union can be optimized away for BitmapIndexScan
         * subplans.
+        *
+        * This must agree with cost_bitmap_qual in costsize.c.
         */
        plan->startup_cost = 0;
        plan->total_cost = 0;
@@ -2122,7 +2130,7 @@ make_bitmap_or(List *bitmapplans)
                if (subnode == list_head(bitmapplans))  /* first node? */
                        plan->startup_cost = subplan->startup_cost;
                else if (!IsA(subplan, BitmapIndexScan))
-                       plan->total_cost += cpu_operator_cost * 10;
+                       plan->total_cost += cpu_operator_cost * 100.0;
                plan->total_cost += subplan->total_cost;
                plan->plan_rows += subplan->plan_rows; /* ignore overlap */
        }
index ec0fc8a29ab023e5d25f32e04c2913fe0cfa9a1f..823486e2f3b83390b47e246a7bef733c37341bc9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.116 2005/04/19 22:35:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.117 2005/04/21 02:28:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -466,7 +466,7 @@ create_index_path(Query *root,
         */
        pathnode->rows = index->rel->rows;
 
-       cost_index(&pathnode->path, root, index, indexquals, false);
+       cost_index(pathnode, root, index, indexquals, false);
 
        return pathnode;
 }
index 2e4e1834fe639ca9616f58c1aef87aa7faab2ced..4ae0ae3a2c058c7362e42b31ce1106ebefe0ca23 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.105 2005/04/19 22:35:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.106 2005/04/21 02:28:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -374,6 +374,10 @@ typedef struct Path
  * NoMovementScanDirection for an indexscan, but the planner wants to
  * distinguish ordered from unordered indexes for building pathkeys.)
  *
+ * 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that
+ * we need not recompute them when considering using the same index in a
+ * bitmap index/heap scan (see BitmapHeapPath).
+ *
  * 'rows' is the estimated result tuple count for the indexscan.  This
  * is the same as path.parent->rows for a simple indexscan, but it is
  * different for a nestloop inner scan, because the additional indexquals
@@ -389,6 +393,8 @@ typedef struct IndexPath
        List       *indexquals;
        bool            isjoininner;
        ScanDirection indexscandir;
+       Cost            indextotalcost;
+       Selectivity indexselectivity;
        double          rows;                   /* estimated number of result tuples */
 } IndexPath;
 
@@ -401,9 +407,12 @@ typedef struct IndexPath
  *
  * The individual indexscans are represented by IndexPath nodes, and any
  * logic on top of them is represented by regular AND and OR expressions.
- * Notice that we can use the same IndexPath node both to represent an
- * ordered index scan, and as the child of a BitmapHeapPath that represents
- * scanning the same index in an unordered way.
+ * Notice that we can use the same IndexPath node both to represent a regular
+ * IndexScan plan, and as the child of a BitmapHeapPath that represents
+ * scanning the same index using a BitmapIndexScan.  The startup_cost and
+ * total_cost figures of an IndexPath always represent the costs to use it
+ * as a regular IndexScan.  The costs of a BitmapIndexScan can be computed
+ * using the IndexPath's indextotalcost and indexselectivity.
  *
  * BitmapHeapPaths can be nestloop inner indexscans.  The isjoininner and
  * rows fields serve the same purpose as for plain IndexPaths.
index 8b1445dadf1d7d3fcb2885fcd64cefeffadf0ccf..1f7ea96ee04c4d05707af11d3b50af0ee93e2026 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.64 2005/04/19 22:35:18 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.65 2005/04/21 02:28:02 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,7 +51,7 @@ extern bool enable_hashjoin;
 
 extern double clamp_row_est(double nrows);
 extern void cost_seqscan(Path *path, Query *root, RelOptInfo *baserel);
-extern void cost_index(Path *path, Query *root, IndexOptInfo *index,
+extern void cost_index(IndexPath *path, Query *root, IndexOptInfo *index,
                   List *indexQuals, bool is_injoin);
 extern void cost_bitmap_scan(Path *path, Query *root, RelOptInfo *baserel,
                                                         Node *bitmapqual, bool is_injoin);