1 /*-------------------------------------------------------------------------
4 * Routines to compute (and set) relation sizes and path costs
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.39 1999/07/07 09:11:15 momjian Exp $
12 *-------------------------------------------------------------------------
22 #define MAXINT INT_MAX
30 #include "nodes/relation.h"
31 #include "optimizer/cost.h"
32 #include "optimizer/internal.h"
33 #include "optimizer/keys.h"
34 #include "optimizer/tlist.h"
35 #include "utils/lsyscache.h"
39 static int compute_attribute_width(TargetEntry *tlistentry);
40 static double relation_byte_size(int tuples, int width);
41 static double base_log(double x, double b);
42 static int compute_targetlist_width(List *targetlist);
44 int _disable_cost_ = 30000000;
46 bool _enable_seqscan_ = true;
47 bool _enable_indexscan_ = true;
48 bool _enable_sort_ = true;
49 bool _enable_hash_ = true;
50 bool _enable_nestloop_ = true;
51 bool _enable_mergejoin_ = true;
52 bool _enable_hashjoin_ = true;
54 Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
55 Cost _cpu_index_page_wight_ = _CPU_INDEX_PAGE_WEIGHT_;
59 * Determines and returns the cost of scanning a relation sequentially.
60 * If the relation is a temporary to be materialized from a query
61 * embedded within a data field (determined by 'relid' containing an
62 * attribute reference), then a predetermined constant is returned (we
63 * have NO IDEA how big the result of a POSTQUEL procedure is going to
67 * cpu = *CPU-PAGE-WEIGHT* * t
69 * 'relid' is the relid of the relation to be scanned
70 * 'relpages' is the number of pages in the relation to be scanned
71 * (as determined from the system catalogs)
72 * 'reltuples' is the number of tuples in the relation to be scanned
78 cost_seqscan(int relid, int relpages, int reltuples)
82 if (!_enable_seqscan_)
83 temp += _disable_cost_;
89 * cost of sequentially scanning a materialized temporary relation
91 temp += _NONAME_SCAN_COST_;
96 temp += _cpu_page_weight_ * reltuples;
105 * Determines and returns the cost of scanning a relation using an index.
107 * disk = expected-index-pages + expected-data-pages
108 * cpu = *CPU-PAGE-WEIGHT* *
109 * (expected-index-tuples + expected-data-tuples)
111 * 'indexid' is the index OID
112 * 'expected-indexpages' is the number of index pages examined in the scan
113 * 'selec' is the selectivity of the index
114 * 'relpages' is the number of pages in the main relation
115 * 'reltuples' is the number of tuples in the main relation
116 * 'indexpages' is the number of pages in the index relation
117 * 'indextuples' is the number of tuples in the index relation
123 cost_index(Oid indexid,
124 int expected_indexpages,
134 if (!_enable_indexscan_ && !is_injoin)
135 temp += _disable_cost_;
138 * We want to be sure we estimate the cost of an index scan as more
139 * than the cost of a sequential scan (when selec == 1.0), even if we
140 * don't have good stats. So, disbelieve zero index size.
142 if (expected_indexpages <= 0)
143 expected_indexpages = 1;
144 if (indextuples <= 0)
147 /* expected index relation pages */
148 temp += expected_indexpages;
151 * expected base relation pages XXX this isn't really right, since we
152 * will access the table nonsequentially and might have to fetch the
153 * same page more than once. This calculation assumes the buffer
154 * cache will prevent that from happening...
156 temp += ceil(((double) selec) * ((double) relpages));
158 /* per index tuples */
159 temp += _cpu_index_page_wight_ * selec * indextuples;
161 /* per heap tuples */
162 temp += _cpu_page_weight_ * selec * reltuples;
170 * Determines and returns the cost of sorting a relation by considering
171 * the cost of doing an external sort: XXX this is probably too low
173 * cpu = *CPU-PAGE-WEIGHT* * (t lg t)
175 * 'pathkeys' is a list of sort keys
176 * 'tuples' is the number of tuples in the relation
177 * 'width' is the average tuple width in bytes
179 * NOTE: some callers currently pass NULL for pathkeys because they
180 * can't conveniently supply the sort keys. Since this routine doesn't
181 * currently do anything with pathkeys anyway, that doesn't matter...
182 * but if it ever does, it should react gracefully to lack of key data.
187 cost_sort(List *pathkeys, int tuples, int width)
190 int npages = page_size(tuples, width);
194 temp += _disable_cost_;
197 * We want to be sure the cost of a sort is never estimated as zero,
198 * even if passed-in tuple count is zero. Besides, mustn't do
206 log_npages = ceil(base_log((double) npages, 2.0));
207 if (log_npages <= 0.0)
210 temp += npages * log_npages;
213 * could be base_log(tuples, NBuffers), but we are only doing 2-way
216 temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
226 * Determines and returns the cost of writing a relation of 'tuples'
227 * tuples of 'width' bytes out to a result relation.
234 cost_result(int tuples, int width)
238 temp = temp + page_size(tuples, width);
239 temp = temp + _cpu_page_weight_ * tuples;
248 * Determines and returns the cost of joining two relations using the
249 * nested loop algorithm.
251 * 'outercost' is the (disk+cpu) cost of scanning the outer relation
252 * 'innercost' is the (disk+cpu) cost of scanning the inner relation
253 * 'outertuples' is the number of tuples in the outer relation
259 cost_nestloop(Cost outercost,
268 if (!_enable_nestloop_)
269 temp += _disable_cost_;
271 temp += outertuples * innercost;
279 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
280 * outer and inner relations
281 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
282 * to sort the outer and inner relations (or NIL if no explicit
283 * sort is needed because the source path is already ordered)
284 * 'outertuples' and 'innertuples' are the number of tuples in the outer
285 * and inner relations
286 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
287 * of the tuples of the outer and inner relations
293 cost_mergejoin(Cost outercost,
304 if (!_enable_mergejoin_)
305 temp += _disable_cost_;
309 if (outersortkeys) /* do we need to sort? */
310 temp += cost_sort(outersortkeys, outersize, outerwidth);
311 if (innersortkeys) /* do we need to sort? */
312 temp += cost_sort(innersortkeys, innersize, innerwidth);
313 temp += _cpu_page_weight_ * (outersize + innersize);
321 * cost_hashjoin-- XXX HASH
322 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
323 * outer and inner relations
324 * 'outerkeys' and 'innerkeys' are lists of the keys to be used
325 * to hash the outer and inner relations
326 * 'outersize' and 'innersize' are the number of tuples in the outer
327 * and inner relations
328 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
329 * of the tuples of the outer and inner relations
334 cost_hashjoin(Cost outercost,
344 int outerpages = page_size(outersize, outerwidth);
345 int innerpages = page_size(innersize, innerwidth);
347 if (!_enable_hashjoin_)
348 temp += _disable_cost_;
351 * Bias against putting larger relation on inside.
353 * Code used to use "outerpages < innerpages" but that has poor
354 * resolution when both relations are small.
356 if (relation_byte_size(outersize, outerwidth) <
357 relation_byte_size(innersize, innerwidth))
358 temp += _disable_cost_;
360 /* cost of source data */
361 temp += outercost + innercost;
363 /* cost of computing hash function: must do it once per tuple */
364 temp += _cpu_page_weight_ * (outersize + innersize);
366 /* cost of main-memory hashtable */
367 temp += (innerpages < NBuffers) ? innerpages : NBuffers;
370 * if inner relation is too big then we will need to "batch" the join,
371 * which implies writing and reading most of the tuples to disk an
374 if (innerpages > NBuffers)
375 temp += 2 * (outerpages + innerpages);
384 * Computes the size of each relation in 'rel_list' (after applying
385 * restrictions), by multiplying the selectivity of each restriction
386 * by the original size of the relation.
388 * Sets the 'size' field for each relation entry with this computed size.
393 compute_rel_size(RelOptInfo *rel)
398 temp = rel->tuples * product_selec(rel->restrictinfo);
400 if (temp >= (MAXINT - 1))
403 temp1 = ceil((double) temp);
405 Assert(temp1 <= MAXINT);
411 * Computes the width in bytes of a tuple from 'rel'.
413 * Returns the width of the tuple as a fixnum.
416 compute_rel_width(RelOptInfo *rel)
418 return compute_targetlist_width(get_actual_tlist(rel->targetlist));
422 * compute_targetlist_width
423 * Computes the width in bytes of a tuple made from 'targetlist'.
425 * Returns the width of the tuple as a fixnum.
428 compute_targetlist_width(List *targetlist)
433 foreach(temp_tl, targetlist)
435 tuple_width = tuple_width +
436 compute_attribute_width(lfirst(temp_tl));
442 * compute_attribute_width
443 * Given a target list entry, find the size in bytes of the attribute.
445 * If a field is variable-length, it is assumed to be at least the size
448 * Returns the width of the attribute as a fixnum.
451 compute_attribute_width(TargetEntry *tlistentry)
453 int width = get_typlen(tlistentry->resdom->restype);
456 return _DEFAULT_ATTRIBUTE_WIDTH_;
462 * compute_joinrel_size
463 * Computes the size of the join relation 'joinrel'.
468 compute_joinrel_size(JoinPath *joinpath)
473 /* cartesian product */
474 temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
475 temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
477 temp = temp * product_selec(joinpath->pathinfo);
478 if (temp >= (MAXINT - 1) / 2)
480 /* if we exceed (MAXINT-1)/2, we switch to log scale */
481 /* +1 prevents log(0) */
482 temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
485 temp1 = ceil((double) temp);
493 * Estimate the storage space in bytes for a given number of tuples
494 * of a given width (size in bytes).
495 * To avoid overflow with big relations, result is a double.
499 relation_byte_size(int tuples, int width)
501 return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
506 * Returns an estimate of the number of pages covered by a given
507 * number of tuples of a given width (size in bytes).
510 page_size(int tuples, int width)
514 temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
520 base_log(double x, double b)
522 return log(x) / log(b);