-<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.13 2006/06/05 02:49:58 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.14 2006/06/06 17:59:57 tgl Exp $ -->
<chapter id="indexam">
<title>Index Access Method Interface Definition</title>
amcostestimate (PlannerInfo *root,
IndexOptInfo *index,
List *indexQuals,
+ RelOptInfo *outer_rel,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
amcostestimate (PlannerInfo *root,
IndexOptInfo *index,
List *indexQuals,
+ RelOptInfo *outer_rel,
Cost *indexStartupCost,
Cost *indexTotalCost,
Selectivity *indexSelectivity,
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>outer_rel</term>
+ <listitem>
+ <para>
+ If the index is being considered for use in a join inner indexscan,
+ the planner's information about the outer side of the join. Otherwise
+ NULL. When non-NULL, some of the qual clauses will be join clauses
+ with this rel rather than being simple restriction clauses. Also,
+ the cost estimator should expect that the index scan will be repeated
+ for each row of the outer rel.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
</para>
table.
</para>
+ <para>
+ In the join case, the returned numbers should be averages expected for
+ any one scan of the index.
+ </para>
+
<procedure>
<title>Cost Estimation</title>
<para>
*indexTotalCost = seq_page_cost * numIndexPages +
(cpu_index_tuple_cost + index_qual_cost.per_tuple) * numIndexTuples;
</programlisting>
+
+ However, the above does not account for amortization of index reads
+ across repeated index scans in the join case.
</para>
</step>
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.157 2006/06/05 20:56:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.158 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* better and to avoid possible divide-by-zero when interpolating costs.
* Make it an integer, too.
*/
- if (nrows < 1.0)
+ if (nrows <= 1.0)
nrows = 1.0;
else
nrows = rint(nrows);
*
* 'index' is the index to be used
* 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
- * 'is_injoin' is T if we are considering using the index scan as the inside
- * of a nestloop join (hence, some of the indexQuals are join clauses)
+ * 'outer_rel' is the outer relation when we are considering using the index
+ * scan as the inside of a nestloop join (hence, some of the indexQuals
+ * are join clauses, and we should expect repeated scans of the index);
+ * NULL for a plain index scan
*
* cost_index() takes an IndexPath not just a Path, because it sets a few
* additional fields of the IndexPath besides startup_cost and total_cost.
cost_index(IndexPath *path, PlannerInfo *root,
IndexOptInfo *index,
List *indexQuals,
- bool is_injoin)
+ RelOptInfo *outer_rel)
{
RelOptInfo *baserel = index->rel;
Cost startup_cost = 0;
Cost cpu_per_tuple;
double tuples_fetched;
double pages_fetched;
- double T,
- b;
/* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo) &&
* the fraction of main-table tuples we will have to retrieve) and its
* correlation to the main-table tuple order.
*/
- OidFunctionCall7(index->amcostestimate,
+ OidFunctionCall8(index->amcostestimate,
PointerGetDatum(root),
PointerGetDatum(index),
PointerGetDatum(indexQuals),
+ PointerGetDatum(outer_rel),
PointerGetDatum(&indexStartupCost),
PointerGetDatum(&indexTotalCost),
PointerGetDatum(&indexSelectivity),
startup_cost += indexStartupCost;
run_cost += indexTotalCost - indexStartupCost;
+ /* estimate number of main-table tuples fetched */
+ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
+
/*----------
- * Estimate number of main-table tuples and pages fetched.
+ * Estimate number of main-table pages fetched, and compute I/O cost.
*
* When the index ordering is uncorrelated with the table ordering,
- * we use an approximation proposed by Mackert and Lohman, "Index Scans
- * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
- * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
- * The Mackert and Lohman approximation is that the number of pages
- * fetched is
- * PF =
- * min(2TNs/(2T+Ns), T) when T <= b
- * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
- * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
- * where
- * T = # pages in table
- * N = # tuples in table
- * s = selectivity = fraction of table to be scanned
- * b = # buffer pages available (we include kernel space here)
+ * we use an approximation proposed by Mackert and Lohman (see
+ * index_pages_fetched() for details) to compute the number of pages
+ * fetched, and then charge random_page_cost per page fetched.
*
* When the index ordering is exactly correlated with the table ordering
* (just after a CLUSTER, for example), the number of pages fetched should
- * be just sT. What's more, these will be sequential fetches, not the
- * random fetches that occur in the uncorrelated case. So, depending on
- * the extent of correlation, we should estimate the actual I/O cost
- * somewhere between s * T * seq_page_cost and PF * random_page_cost.
- * We currently interpolate linearly between these two endpoints based on
- * the correlation squared (XXX is that appropriate?).
- *
- * In any case the number of tuples fetched is Ns.
+ * be exactly selectivity * table_size. What's more, all but the first
+ * will be sequential fetches, not the random fetches that occur in the
+ * uncorrelated case. So if the number of pages is more than 1, we
+ * ought to charge
+ * random_page_cost + (pages_fetched - 1) * seq_page_cost
+ * For partially-correlated indexes, we ought to charge somewhere between
+ * these two estimates. We currently interpolate linearly between the
+ * estimates based on the correlation squared (XXX is that appropriate?).
*----------
*/
+ if (outer_rel != NULL && outer_rel->rows > 1)
+ {
+ /*
+ * For repeated indexscans, scale up the number of tuples fetched
+ * in the Mackert and Lohman formula by the number of scans, so
+ * that we estimate the number of pages fetched by all the scans.
+ * Then pro-rate the costs for one scan. In this case we assume
+ * all the fetches are random accesses. XXX it'd be good to
+ * include correlation in this model, but it's not clear how to do
+ * that without double-counting cache effects.
+ */
+ double num_scans = outer_rel->rows;
- tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);
-
- /* This part is the Mackert and Lohman formula */
+ pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
+ baserel->pages,
+ index->pages);
- T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
- b = (effective_cache_size > 1) ? effective_cache_size : 1.0;
-
- if (T <= b)
- {
- pages_fetched =
- (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
- if (pages_fetched >= T)
- pages_fetched = T;
- else
- pages_fetched = ceil(pages_fetched);
+ run_cost += (pages_fetched * random_page_cost) / num_scans;
}
else
{
- double lim;
+ /*
+ * Normal case: apply the Mackert and Lohman formula, and then
+ * interpolate between that and the correlation-derived result.
+ */
+ pages_fetched = index_pages_fetched(tuples_fetched,
+ baserel->pages,
+ index->pages);
- lim = (2.0 * T * b) / (2.0 * T - b);
- if (tuples_fetched <= lim)
- {
- pages_fetched =
- (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
- }
- else
- {
- pages_fetched =
- b + (tuples_fetched - lim) * (T - b) / T;
- }
- pages_fetched = ceil(pages_fetched);
- }
+ /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
+ max_IO_cost = pages_fetched * random_page_cost;
- /*
- * min_IO_cost corresponds to the perfectly correlated case (csquared=1),
- * max_IO_cost to the perfectly uncorrelated case (csquared=0).
- */
- min_IO_cost = ceil(indexSelectivity * T) * seq_page_cost;
- max_IO_cost = pages_fetched * random_page_cost;
+ /* min_IO_cost is for the perfectly correlated case (csquared=1) */
+ pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+ min_IO_cost = random_page_cost;
+ if (pages_fetched > 1)
+ min_IO_cost += (pages_fetched - 1) * seq_page_cost;
- /*
- * Now interpolate based on estimated index order correlation to get total
- * disk I/O cost for main table accesses.
- */
- csquared = indexCorrelation * indexCorrelation;
+ /*
+ * Now interpolate based on estimated index order correlation to get
+ * total disk I/O cost for main table accesses.
+ */
+ csquared = indexCorrelation * indexCorrelation;
- run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
+ run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
+ }
/*
* Estimate CPU costs per tuple.
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
- if (!is_injoin)
+ if (outer_rel == NULL)
{
QualCost index_qual_cost;
path->path.total_cost = startup_cost + run_cost;
}
+/*
+ * index_pages_fetched
+ * Estimate the number of pages actually fetched after accounting for
+ * cache effects.
+ *
+ * We use an approximation proposed by Mackert and Lohman, "Index Scans
+ * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
+ * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
+ * The Mackert and Lohman approximation is that the number of pages
+ * fetched is
+ * PF =
+ * min(2TNs/(2T+Ns), T) when T <= b
+ * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
+ * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
+ * where
+ * T = # pages in table
+ * N = # tuples in table
+ * s = selectivity = fraction of table to be scanned
+ * b = # buffer pages available (we include kernel space here)
+ *
+ * We assume that effective_cache_size is the total number of buffer pages
+ * available for both table and index, and pro-rate that space between the
+ * table and index. (Ideally other_pages should include all the other
+ * tables and indexes used by the query too; but we don't have a good way
+ * to get that number here.)
+ *
+ * The product Ns is the number of tuples fetched; we pass in that
+ * product rather than calculating it here.
+ *
+ * Caller is expected to have ensured that tuples_fetched is greater than zero
+ * and rounded to integer (see clamp_row_est). The result will likewise be
+ * greater than zero and integral.
+ */
+double
+index_pages_fetched(double tuples_fetched, BlockNumber pages,
+ BlockNumber other_pages)
+{
+ double pages_fetched;
+ double T,
+ b;
+
+ /* T is # pages in table, but don't allow it to be zero */
+ T = (pages > 1) ? (double) pages : 1.0;
+
+ /* b is pro-rated share of effective_cache_size */
+ b = effective_cache_size * T / (T + (double) other_pages);
+ /* force it positive and integral */
+ if (b <= 1.0)
+ b = 1.0;
+ else
+ b = ceil(b);
+
+ /* This part is the Mackert and Lohman formula */
+ if (T <= b)
+ {
+ pages_fetched =
+ (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+ if (pages_fetched >= T)
+ pages_fetched = T;
+ else
+ pages_fetched = ceil(pages_fetched);
+ }
+ else
+ {
+ double lim;
+
+ lim = (2.0 * T * b) / (2.0 * T - b);
+ if (tuples_fetched <= lim)
+ {
+ pages_fetched =
+ (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+ }
+ else
+ {
+ pages_fetched =
+ b + (tuples_fetched - lim) * (T - b) / T;
+ }
+ pages_fetched = ceil(pages_fetched);
+ }
+ return pages_fetched;
+}
+
/*
* cost_bitmap_heap_scan
* Determines and returns the cost of scanning a relation using a bitmap
*
* 'baserel' is the relation to be scanned
* 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
- * 'is_injoin' is T if we are considering using the scan as the inside
- * of a nestloop join (hence, some of the quals are join clauses)
+ *
+ * Note: we take no explicit notice here of whether this is a join inner path.
+ * If it is, the component IndexPaths should have been costed accordingly.
*/
void
cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
- Path *bitmapqual, bool is_injoin)
+ Path *bitmapqual)
{
Cost startup_cost = 0;
Cost run_cost = 0;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.206 2006/05/18 19:56:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.207 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
- bool istoplevel, bool isjoininner,
- Relids outer_relids,
+ bool istoplevel, RelOptInfo *outer_rel,
SaOpControl saop_control);
static List *find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
- bool istoplevel, bool isjoininner,
- Relids outer_relids);
+ bool istoplevel, RelOptInfo *outer_rel);
static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths);
static int bitmap_path_comparator(const void *a, const void *b);
static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths);
*/
indexpaths = find_usable_indexes(root, rel,
rel->baserestrictinfo, NIL,
- true, false, NULL, SAOP_FORBID);
+ true, NULL, SAOP_FORBID);
/*
* We can submit them all to add_path. (This generates access paths for
*/
indexpaths = generate_bitmap_or_paths(root, rel,
rel->baserestrictinfo, NIL,
- false, NULL);
+ NULL);
bitindexpaths = list_concat(bitindexpaths, indexpaths);
/*
*/
indexpaths = find_saop_paths(root, rel,
rel->baserestrictinfo, NIL,
- true, false, NULL);
+ true, NULL);
bitindexpaths = list_concat(bitindexpaths, indexpaths);
/*
* 'outer_clauses' is the list of additional upper-level clauses
* 'istoplevel' is true if clauses are the rel's top-level restriction list
* (outer_clauses must be NIL when this is true)
- * 'isjoininner' is true if forming an inner indexscan (so some of the
- * given clauses are join clauses)
- * 'outer_relids' identifies the outer side of the join (pass NULL
- * if not isjoininner)
+ * 'outer_rel' is the outer side of the join if forming an inner indexscan
+ * (so some of the given clauses are join clauses); NULL if not
* 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used
*
* Note: check_partial_indexes() must have been run previously.
static List *
find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
- bool istoplevel, bool isjoininner,
- Relids outer_relids,
+ bool istoplevel, RelOptInfo *outer_rel,
SaOpControl saop_control)
{
+ Relids outer_relids = outer_rel ? outer_rel->relids : NULL;
List *result = NIL;
List *all_clauses = NIL; /* not computed till needed */
ListCell *ilist;
* relevant unless we are at top level.
*/
index_is_ordered = OidIsValid(index->ordering[0]);
- if (istoplevel && index_is_ordered && !isjoininner)
+ if (index_is_ordered && istoplevel && outer_rel == NULL)
{
index_pathkeys = build_index_pathkeys(root, index,
ForwardScanDirection,
index_is_ordered ?
ForwardScanDirection :
NoMovementScanDirection,
- isjoininner);
+ outer_rel);
result = lappend(result, ipath);
}
* that we failed to match, consider variant ways of achieving the
* ordering. Again, this is only interesting at top level.
*/
- if (istoplevel && index_is_ordered && !isjoininner &&
+ if (index_is_ordered && istoplevel && outer_rel == NULL &&
root->query_pathkeys != NIL &&
pathkeys_useful_for_ordering(root, useful_pathkeys) == 0)
{
restrictclauses,
root->query_pathkeys,
scandir,
- false);
+ outer_rel);
result = lappend(result, ipath);
}
}
static List *
find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
- bool istoplevel, bool isjoininner,
- Relids outer_relids)
+ bool istoplevel, RelOptInfo *outer_rel)
{
bool have_saop = false;
ListCell *l;
return find_usable_indexes(root, rel,
clauses, outer_clauses,
- istoplevel, isjoininner,
- outer_relids,
+ istoplevel, outer_rel,
SAOP_REQUIRE);
}
*
* outer_clauses is a list of additional clauses that can be assumed true
* for the purpose of generating indexquals, but are not to be searched for
- * ORs. (See find_usable_indexes() for motivation.)
+ * ORs. (See find_usable_indexes() for motivation.) outer_rel is the outer
+ * side when we are considering a nestloop inner indexpath.
*/
List *
generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
- bool isjoininner,
- Relids outer_relids)
+ RelOptInfo *outer_rel)
{
List *result = NIL;
List *all_clauses;
andargs,
all_clauses,
false,
- isjoininner,
- outer_relids,
+ outer_rel,
SAOP_ALLOW);
/* Recurse in case there are sub-ORs */
indlist = list_concat(indlist,
generate_bitmap_or_paths(root, rel,
andargs,
all_clauses,
- isjoininner,
- outer_relids));
+ outer_rel));
}
else
{
list_make1(orarg),
all_clauses,
false,
- isjoininner,
- outer_relids,
+ outer_rel,
SAOP_ALLOW);
}
cost_bitmap_and_node(&apath, root);
/* Now we can do cost_bitmap_heap_scan */
- cost_bitmap_heap_scan(&bpath, root, rel, (Path *) &apath, false);
+ cost_bitmap_heap_scan(&bpath, root, rel, (Path *) &apath);
return bpath.total_cost;
}
/*
* best_inner_indexscan
* Finds the best available inner indexscan for a nestloop join
- * with the given rel on the inside and the given outer_relids outside.
+ * with the given rel on the inside and the given outer_rel outside.
* May return NULL if there are no possible inner indexscans.
*
* We ignore ordering considerations (since a nestloop's inner scan's order
*/
Path *
best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids, JoinType jointype)
+ RelOptInfo *outer_rel, JoinType jointype)
{
+ Relids outer_relids;
Path *cheapest;
bool isouterjoin;
List *clause_list;
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
/*
- * Intersect the given outer_relids with index_outer_relids to find the
+ * Intersect the given outer relids with index_outer_relids to find the
* set of outer relids actually relevant for this rel. If there are none,
* again we can fail immediately.
*/
- outer_relids = bms_intersect(rel->index_outer_relids, outer_relids);
+ outer_relids = bms_intersect(rel->index_outer_relids, outer_rel->relids);
if (bms_is_empty(outer_relids))
{
bms_free(outer_relids);
* outerrels. (We include the isouterjoin status in the cache lookup key
* for safety. In practice I suspect this is not necessary because it
* should always be the same for a given innerrel.)
+ *
+ * NOTE: because we cache on outer_relids rather than outer_rel->relids,
+ * we will report the same path and hence path cost for joins with
+ * different sets of irrelevant rels on the outside. Now that cost_index
+ * is sensitive to outer_rel->rows, this is not really right. However
+ * the error is probably not large. Is it worth establishing a separate
+ * cache entry for each distinct outer_rel->relids set to get this right?
*/
foreach(l, rel->index_inner_paths)
{
* that could be plain indexscans, ie, they don't require the join context
* at all. This may seem redundant, but we need to include those scans in
* the input given to choose_bitmap_and() to be sure we find optimal AND
- * combinations of join and non-join scans. The worst case is that we
- * might return a "best inner indexscan" that's really just a plain
- * indexscan, causing some redundant effort in joinpath.c.
+ * combinations of join and non-join scans. Also, even if the "best
+ * inner indexscan" is just a plain indexscan, it will have a different
+ * cost estimate because of cache effects.
*/
clause_list = find_clauses_for_join(root, rel, outer_relids, isouterjoin);
*/
indexpaths = find_usable_indexes(root, rel,
clause_list, NIL,
- false, true,
- outer_relids,
+ false, outer_rel,
SAOP_FORBID);
/*
*/
bitindexpaths = generate_bitmap_or_paths(root, rel,
clause_list, NIL,
- true,
- outer_relids);
+ outer_rel);
/*
* Likewise, generate paths using ScalarArrayOpExpr clauses; these can't
bitindexpaths = list_concat(bitindexpaths,
find_saop_paths(root, rel,
clause_list, NIL,
- false, true,
- outer_relids));
+ false, outer_rel));
/*
* Include the regular index paths in bitindexpaths.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.103 2006/03/05 15:58:28 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.104 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, JoinType jointype);
static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids, JoinType jointype);
+ RelOptInfo *outer_rel, JoinType jointype);
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
{
if (IsA(inner_cheapest_total, AppendPath))
bestinnerjoin = best_appendrel_indexscan(root, innerrel,
- outerrel->relids,
- jointype);
+ outerrel, jointype);
else if (innerrel->rtekind == RTE_RELATION)
bestinnerjoin = best_inner_indexscan(root, innerrel,
- outerrel->relids,
- jointype);
+ outerrel, jointype);
}
}
/*
* best_appendrel_indexscan
* Finds the best available set of inner indexscans for a nestloop join
- * with the given append relation on the inside and the given outer_relids
+ * with the given append relation on the inside and the given outer_rel
* outside. Returns an AppendPath comprising the best inner scans, or
* NULL if there are no possible inner indexscans.
*/
static Path *
best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids, JoinType jointype)
+ RelOptInfo *outer_rel, JoinType jointype)
{
int parentRTindex = rel->relid;
List *append_paths = NIL;
* Get the best innerjoin indexpath (if any) for this child rel.
*/
bestinnerjoin = best_inner_indexscan(root, childrel,
- outer_relids, jointype);
+ outer_rel, jointype);
/*
* If no luck on an indexpath for this rel, we'll still consider
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.78 2006/03/05 15:58:28 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.79 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* Use the generate_bitmap_or_paths() machinery to estimate the
* value of each OR clause. We can use regular restriction
* clauses along with the OR clause contents to generate
- * indexquals. We pass outer_relids = NULL so that sub-clauses
+ * indexquals. We pass outer_rel = NULL so that sub-clauses
* that are actually joins will be ignored.
*/
List *orpaths;
orpaths = generate_bitmap_or_paths(root, rel,
list_make1(rinfo),
rel->baserestrictinfo,
- false, NULL);
+ NULL);
/* Locate the cheapest OR path */
foreach(k, orpaths)
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.14 2006/04/28 20:57:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.15 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
restrictclauses,
NIL,
indexscandir,
- false);
+ NULL);
/*
* Estimate actual cost of fetching just one row.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.127 2006/03/05 15:58:31 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.128 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* 'indexscandir' is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
- * 'isjoininner' is TRUE if this is a join inner indexscan path.
- * (pathkeys and indexscandir are ignored if so.)
+ * 'outer_rel' is the outer relation if this is a join inner indexscan path.
+ * (pathkeys and indexscandir are ignored if so.) NULL if not.
*
* Returns the new path node.
*/
List *clause_groups,
List *pathkeys,
ScanDirection indexscandir,
- bool isjoininner)
+ RelOptInfo *outer_rel)
{
IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel;
* don't really care what order it's scanned in. (We could expect the
* caller to supply the correct values, but it's easier to force it here.)
*/
- if (isjoininner)
+ if (outer_rel != NULL)
{
pathkeys = NIL;
indexscandir = NoMovementScanDirection;
pathnode->indexclauses = allclauses;
pathnode->indexquals = indexquals;
- pathnode->isjoininner = isjoininner;
+ pathnode->isjoininner = (outer_rel != NULL);
pathnode->indexscandir = indexscandir;
- if (isjoininner)
+ if (outer_rel != NULL)
{
/*
* We must compute the estimated number of output rows for the
pathnode->rows = rel->rows;
}
- cost_index(pathnode, root, index, indexquals, isjoininner);
+ cost_index(pathnode, root, index, indexquals, outer_rel);
return pathnode;
}
* Creates a path node for a bitmap scan.
*
* 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
+ *
+ * If this is a join inner indexscan path, the component IndexPaths should
+ * have been costed accordingly, and TRUE should be passed for isjoininner.
*/
BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo *root,
pathnode->rows = rel->rows;
}
- cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, false);
+ cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual);
return pathnode;
}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.206 2006/06/05 02:49:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.207 2006/06/06 17:59:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
static void
genericcostestimate(PlannerInfo *root,
IndexOptInfo *index, List *indexQuals,
+ RelOptInfo *outer_rel,
double numIndexTuples,
Cost *indexStartupCost,
Cost *indexTotalCost,
/*
* Estimate the number of index pages that will be retrieved.
*
- * For all currently-supported index types, the first page of the index is
- * a metadata page, and we should figure on fetching that plus a pro-rated
- * fraction of the remaining pages.
+ * We use the simplistic method of taking a pro-rata fraction of the
+ * total number of index pages. In effect, this counts only leaf pages
+ * and not any overhead such as index metapage or upper tree levels.
+ * In practice this seems a better approximation than charging for
+ * access to the upper levels, perhaps because those tend to stay in
+ * cache under load.
*/
- if (index->pages > 1 && index->tuples > 0)
- {
- numIndexPages = (numIndexTuples / index->tuples) * (index->pages - 1);
- numIndexPages += 1; /* count the metapage too */
- numIndexPages = ceil(numIndexPages);
- }
+ if (index->pages > 1 && index->tuples > 1)
+ numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
else
numIndexPages = 1.0;
/*
- * Compute the index access cost.
+ * Now compute the disk access costs.
*
- * Disk cost: our generic assumption is that the index pages will be read
- * sequentially, so they cost seq_page_cost each, not random_page_cost.
+ * The above calculations are all per-index-scan. However, if we are
+ * in a nestloop inner scan, we can expect the scan to be repeated (with
+ * different search keys) for each row of the outer relation. This
+ * creates the potential for cache effects to reduce the number of
+ * disk page fetches needed. We want to estimate the average per-scan
+ * I/O cost in the presence of caching.
+ *
+ * We use the Mackert-Lohman formula (see costsize.c for details) to
+ * estimate the total number of page fetches that occur. While this
+ * wasn't what it was designed for, it seems a reasonable model anyway.
+ * Note that we are counting pages not tuples anymore, so we take
+ * N = T = index size, as if there were one "tuple" per page.
*/
- *indexTotalCost = seq_page_cost * numIndexPages;
+ if (outer_rel != NULL && outer_rel->rows > 1)
+ {
+ double num_scans = outer_rel->rows;
+ double pages_fetched;
+
+ /* total page fetches ignoring cache effects */
+ pages_fetched = numIndexPages * num_scans;
+
+ /* use Mackert and Lohman formula to adjust for cache effects */
+ pages_fetched = index_pages_fetched(pages_fetched,
+ index->pages,
+ index->rel->pages);
+
+ /*
+ * Now compute the total disk access cost, and then report a
+ * pro-rated share for one index scan.
+ */
+ *indexTotalCost = (pages_fetched * random_page_cost) / num_scans;
+ }
+ else
+ {
+ /*
+ * For a single index scan, we just charge random_page_cost per page
+ * touched.
+ */
+ *indexTotalCost = numIndexPages * random_page_cost;
+ }
/*
* CPU cost: any complex expressions in the indexquals will need to be
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
List *indexQuals = (List *) PG_GETARG_POINTER(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
+ Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+ Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+ Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+ double *indexCorrelation = (double *) PG_GETARG_POINTER(7);
Oid relid;
AttrNumber colnum;
HeapTuple tuple;
numIndexTuples = btreeSelectivity * index->rel->tuples;
}
- genericcostestimate(root, index, indexQuals, numIndexTuples,
+ genericcostestimate(root, index, indexQuals, outer_rel, numIndexTuples,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
List *indexQuals = (List *) PG_GETARG_POINTER(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
+ Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+ Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+ Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+ double *indexCorrelation = (double *) PG_GETARG_POINTER(7);
- genericcostestimate(root, index, indexQuals, 0.0,
+ genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
List *indexQuals = (List *) PG_GETARG_POINTER(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
+ Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+ Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+ Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+ double *indexCorrelation = (double *) PG_GETARG_POINTER(7);
- genericcostestimate(root, index, indexQuals, 0.0,
+ genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
IndexOptInfo *index = (IndexOptInfo *) PG_GETARG_POINTER(1);
List *indexQuals = (List *) PG_GETARG_POINTER(2);
- Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
- Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
- Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
- double *indexCorrelation = (double *) PG_GETARG_POINTER(6);
+ RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(3);
+ Cost *indexStartupCost = (Cost *) PG_GETARG_POINTER(4);
+ Cost *indexTotalCost = (Cost *) PG_GETARG_POINTER(5);
+ Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(6);
+ double *indexCorrelation = (double *) PG_GETARG_POINTER(7);
- genericcostestimate(root, index, indexQuals, 0.0,
+ genericcostestimate(root, index, indexQuals, outer_rel, 0.0,
indexStartupCost, indexTotalCost,
indexSelectivity, indexCorrelation);
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.412 2006/05/19 19:08:26 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.413 2006/06/06 17:59:57 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
DESCR("btree(internal)");
DATA(insert OID = 972 ( btvacuumcleanup PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ btvacuumcleanup - _null_ ));
DESCR("btree(internal)");
-DATA(insert OID = 1268 ( btcostestimate PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ btcostestimate - _null_ ));
+DATA(insert OID = 1268 ( btcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ btcostestimate - _null_ ));
DESCR("btree(internal)");
DATA(insert OID = 339 ( poly_same PGNSP PGUID 12 f f t f i 2 16 "604 604" _null_ _null_ _null_ poly_same - _null_ ));
DESCR("hash(internal)");
DATA(insert OID = 425 ( hashvacuumcleanup PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ hashvacuumcleanup - _null_ ));
DESCR("hash(internal)");
-DATA(insert OID = 438 ( hashcostestimate PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ hashcostestimate - _null_ ));
+DATA(insert OID = 438 ( hashcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ hashcostestimate - _null_ ));
DESCR("hash(internal)");
DATA(insert OID = 449 ( hashint2 PGNSP PGUID 12 f f t f i 1 23 "21" _null_ _null_ _null_ hashint2 - _null_ ));
DESCR("gist(internal)");
DATA(insert OID = 2561 ( gistvacuumcleanup PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ gistvacuumcleanup - _null_ ));
DESCR("gist(internal)");
-DATA(insert OID = 772 ( gistcostestimate PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gistcostestimate - _null_ ));
+DATA(insert OID = 772 ( gistcostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gistcostestimate - _null_ ));
DESCR("gist(internal)");
DATA(insert OID = 784 ( tintervaleq PGNSP PGUID 12 f f t f i 2 16 "704 704" _null_ _null_ _null_ tintervaleq - _null_ ));
DESCR("gin(internal)");
DATA(insert OID = 2740 ( ginvacuumcleanup PGNSP PGUID 12 f f t f v 2 2281 "2281 2281" _null_ _null_ _null_ ginvacuumcleanup - _null_ ));
DESCR("gin(internal)");
-DATA(insert OID = 2741 ( gincostestimate PGNSP PGUID 12 f f t f v 7 2278 "2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gincostestimate - _null_ ));
+DATA(insert OID = 2741 ( gincostestimate PGNSP PGUID 12 f f t f v 8 2278 "2281 2281 2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ gincostestimate - _null_ ));
DESCR("gin(internal)");
/* GIN array support */
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.124 2006/03/05 15:58:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.125 2006/06/06 17:59:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
*
* 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
* some of the index conditions are join rather than restriction clauses).
+ * Note that the path costs will be calculated differently from a plain
+ * indexscan in this case, and in addition there's a special 'rows' value
+ * different from the parent RelOptInfo's (see below).
*
* 'indexscandir' is one of:
* ForwardScanDirection: forward scan of an ordered index
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.75 2006/06/05 03:03:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.76 2006/06/06 17:59:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern DLLIMPORT double cpu_tuple_cost;
extern DLLIMPORT double cpu_index_tuple_cost;
extern DLLIMPORT double cpu_operator_cost;
-extern double effective_cache_size;
+extern DLLIMPORT double effective_cache_size;
extern Cost disable_cost;
extern bool enable_seqscan;
extern bool enable_indexscan;
extern bool constraint_exclusion;
extern double clamp_row_est(double nrows);
+extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
+ BlockNumber other_pages);
extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
extern void cost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index,
- List *indexQuals, bool is_injoin);
+ List *indexQuals, RelOptInfo *outer_rel);
extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
- Path *bitmapqual, bool is_injoin);
+ Path *bitmapqual);
extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.67 2006/03/05 15:58:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/pathnode.h,v 1.68 2006/06/06 17:59:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
List *clause_groups,
List *pathkeys,
ScanDirection indexscandir,
- bool isjoininner);
+ RelOptInfo *outer_rel);
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.92 2006/03/05 15:58:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.93 2006/06/06 17:59:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses,
- bool isjoininner,
- Relids outer_relids);
+ RelOptInfo *outer_rel);
extern Path *best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
- Relids outer_relids, JoinType jointype);
+ RelOptInfo *outer_rel, JoinType jointype);
extern List *group_clauses_by_indexkey(IndexOptInfo *index,
List *clauses, List *outer_clauses,
Relids outer_relids,
* for developers. If you edit any of these, be sure to do a *full*
* rebuild (and an initdb if noted).
*
- * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.21 2006/04/03 23:35:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.22 2006/06/06 17:59:58 tgl Exp $
*------------------------------------------------------------------------
*/
/*
* Maximum number of arguments to a function.
*
- * The minimum value is 8 (index creation uses 8-argument functions).
+ * The minimum value is 8 (index cost estimation uses 8-argument functions).
* The maximum possible value is around 600 (limited by index tuple size in
* pg_proc's index; BLCKSZ larger than 8K would allow more). Values larger
* than needed will waste memory and processing time, but do not directly