1 /*-------------------------------------------------------------------------
4 * Routines to compute (and set) relation sizes and path costs
6 * Path costs are measured in units of disk accesses: one page fetch
7 * has cost 1. The other primitive unit is the CPU time required to
8 * process one tuple, which we set at "_cpu_page_weight_" of a page
9 * fetch. Obviously, the CPU time per tuple depends on the query
10 * involved, but the relative CPU and disk speeds of a given platform
11 * are so variable that we are lucky if we can get useful numbers
12 * at all. _cpu_page_weight_ is user-settable, in case a particular
13 * user is clueful enough to have a better-than-default estimate
14 * of the ratio for his platform. There is also _cpu_index_page_weight_,
15 * the cost to process a tuple of an index during an index scan.
18 * Copyright (c) 1994, Regents of the University of California
21 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.46 1999/11/23 20:06:54 momjian Exp $
23 *-------------------------------------------------------------------------
33 #define MAXINT INT_MAX
41 #include "miscadmin.h"
42 #include "optimizer/cost.h"
43 #include "optimizer/internal.h"
44 #include "optimizer/tlist.h"
45 #include "utils/lsyscache.h"
48 static int compute_targetlist_width(List *targetlist);
49 static int compute_attribute_width(TargetEntry *tlistentry);
50 static double relation_byte_size(int tuples, int width);
51 static double base_log(double x, double b);
54 int _disable_cost_ = 30000000;
56 bool _enable_seqscan_ = true;
57 bool _enable_indexscan_ = true;
58 bool _enable_sort_ = true;
59 bool _enable_nestloop_ = true;
60 bool _enable_mergejoin_ = true;
61 bool _enable_hashjoin_ = true;
62 bool _enable_tidscan_ = true;
64 Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
65 Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
69 * Determines and returns the cost of scanning a relation sequentially.
70 * If the relation is a temporary to be materialized from a query
71 * embedded within a data field (determined by 'relid' containing an
72 * attribute reference), then a predetermined constant is returned (we
73 * have NO IDEA how big the result of a POSTQUEL procedure is going to
77 * cpu = *CPU-PAGE-WEIGHT* * t
79 * 'relid' is the relid of the relation to be scanned
80 * 'relpages' is the number of pages in the relation to be scanned
81 * (as determined from the system catalogs)
82 * 'reltuples' is the number of tuples in the relation to be scanned
88 cost_seqscan(int relid, int relpages, int reltuples)
92 if (!_enable_seqscan_)
93 temp += _disable_cost_;
99 * cost of sequentially scanning a materialized temporary relation
101 temp += _NONAME_SCAN_COST_;
106 temp += _cpu_page_weight_ * reltuples;
115 * Determines and returns the cost of scanning a relation using an index.
117 * disk = expected-index-pages + expected-data-pages
118 * cpu = *CPU-PAGE-WEIGHT* *
119 * (expected-index-tuples + expected-data-tuples)
121 * 'indexid' is the index OID
122 * 'expected-indexpages' is the number of index pages examined in the scan
123 * 'selec' is the selectivity of the index
124 * 'relpages' is the number of pages in the main relation
125 * 'reltuples' is the number of tuples in the main relation
126 * 'indexpages' is the number of pages in the index relation
127 * 'indextuples' is the number of tuples in the index relation
133 cost_index(Oid indexid,
134 int expected_indexpages,
144 if (!_enable_indexscan_ && !is_injoin)
145 temp += _disable_cost_;
148 * We want to be sure we estimate the cost of an index scan as more
149 * than the cost of a sequential scan (when selec == 1.0), even if we
150 * don't have good stats. So, disbelieve zero index size.
152 if (expected_indexpages <= 0)
153 expected_indexpages = 1;
154 if (indextuples <= 0)
157 /* expected index relation pages */
158 temp += expected_indexpages;
161 * expected base relation pages XXX this isn't really right, since we
162 * will access the table nonsequentially and might have to fetch the
163 * same page more than once. This calculation assumes the buffer
164 * cache will prevent that from happening...
166 temp += ceil(((double) selec) * ((double) relpages));
168 /* per index tuples */
169 temp += _cpu_index_page_weight_ * selec * indextuples;
171 /* per heap tuples */
172 temp += _cpu_page_weight_ * selec * reltuples;
180 * Determines and returns the cost of scanning a relation using tid-s.
182 * disk = number of tids
183 * cpu = *CPU-PAGE-WEIGHT* * number_of_tids
189 cost_tidscan(List *tideval)
193 if (!_enable_tidscan_)
194 temp += _disable_cost_;
196 temp += (1.0 + _cpu_page_weight_) * length(tideval);
203 * Determines and returns the cost of sorting a relation by considering
204 * the cost of doing an external sort: XXX this is probably too low
206 * cpu = *CPU-PAGE-WEIGHT* * (t lg t)
208 * 'pathkeys' is a list of sort keys
209 * 'tuples' is the number of tuples in the relation
210 * 'width' is the average tuple width in bytes
212 * NOTE: some callers currently pass NULL for pathkeys because they
213 * can't conveniently supply the sort keys. Since this routine doesn't
214 * currently do anything with pathkeys anyway, that doesn't matter...
215 * but if it ever does, it should react gracefully to lack of key data.
220 cost_sort(List *pathkeys, int tuples, int width)
223 int npages = page_size(tuples, width);
227 temp += _disable_cost_;
230 * We want to be sure the cost of a sort is never estimated as zero,
231 * even if passed-in tuple count is zero. Besides, mustn't do
239 log_npages = ceil(base_log((double) npages, 2.0));
240 if (log_npages <= 0.0)
243 temp += npages * log_npages;
246 * could be base_log(tuples, NBuffers), but we are only doing 2-way
249 temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
259 * Determines and returns the cost of writing a relation of 'tuples'
260 * tuples of 'width' bytes out to a result relation.
267 cost_result(int tuples, int width)
271 temp = temp + page_size(tuples, width);
272 temp = temp + _cpu_page_weight_ * tuples;
281 * Determines and returns the cost of joining two relations using the
282 * nested loop algorithm.
284 * 'outercost' is the (disk+cpu) cost of scanning the outer relation
285 * 'innercost' is the (disk+cpu) cost of scanning the inner relation
286 * 'outertuples' is the number of tuples in the outer relation
292 cost_nestloop(Cost outercost,
301 if (!_enable_nestloop_)
302 temp += _disable_cost_;
304 temp += outertuples * innercost;
312 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
313 * outer and inner relations
314 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
315 * to sort the outer and inner relations (or NIL if no explicit
316 * sort is needed because the source path is already ordered)
317 * 'outertuples' and 'innertuples' are the number of tuples in the outer
318 * and inner relations
319 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
320 * of the tuples of the outer and inner relations
326 cost_mergejoin(Cost outercost,
337 if (!_enable_mergejoin_)
338 temp += _disable_cost_;
342 if (outersortkeys) /* do we need to sort? */
343 temp += cost_sort(outersortkeys, outersize, outerwidth);
344 if (innersortkeys) /* do we need to sort? */
345 temp += cost_sort(innersortkeys, innersize, innerwidth);
346 temp += _cpu_page_weight_ * (outersize + innersize);
356 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
357 * outer and inner relations
358 * 'outersize' and 'innersize' are the number of tuples in the outer
359 * and inner relations
360 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
361 * of the tuples of the outer and inner relations
362 * 'innerdisbursion' is an estimate of the disbursion statistic
363 * for the inner hash key.
368 cost_hashjoin(Cost outercost,
374 Cost innerdisbursion)
377 double outerbytes = relation_byte_size(outersize, outerwidth);
378 double innerbytes = relation_byte_size(innersize, innerwidth);
379 long hashtablebytes = SortMem * 1024L;
381 if (!_enable_hashjoin_)
382 temp += _disable_cost_;
384 /* cost of source data */
385 temp += outercost + innercost;
387 /* cost of computing hash function: must do it once per tuple */
388 temp += _cpu_page_weight_ * (outersize + innersize);
390 /* the number of tuple comparisons needed is the number of outer
391 * tuples times the typical hash bucket size, which we estimate
392 * conservatively as the inner disbursion times the inner tuple
393 * count. The cost per comparison is set at _cpu_index_page_weight_;
394 * is that reasonable, or do we need another basic parameter?
396 temp += _cpu_index_page_weight_ * outersize *
397 (innersize * innerdisbursion);
400 * if inner relation is too big then we will need to "batch" the join,
401 * which implies writing and reading most of the tuples to disk an
402 * extra time. Charge one cost unit per page of I/O.
404 if (innerbytes > hashtablebytes)
405 temp += 2 * (page_size(outersize, outerwidth) +
406 page_size(innersize, innerwidth));
409 * Bias against putting larger relation on inside. We don't want
410 * an absolute prohibition, though, since larger relation might have
411 * better disbursion --- and we can't trust the size estimates
412 * unreservedly, anyway.
414 if (innerbytes > outerbytes)
415 temp *= 1.1; /* is this an OK fudge factor? */
424 * Computes the size of each relation in 'rel_list' (after applying
425 * restrictions), by multiplying the selectivity of each restriction
426 * by the original size of the relation.
428 * Sets the 'size' field for each relation entry with this computed size.
433 compute_rel_size(RelOptInfo *rel)
438 temp = rel->tuples * product_selec(rel->restrictinfo);
440 if (temp >= (MAXINT - 1))
443 temp1 = ceil((double) temp);
445 Assert(temp1 <= MAXINT);
451 * Computes the width in bytes of a tuple from 'rel'.
453 * Returns the width of the tuple as a fixnum.
456 compute_rel_width(RelOptInfo *rel)
458 return compute_targetlist_width(rel->targetlist);
462 * compute_targetlist_width
463 * Computes the width in bytes of a tuple made from 'targetlist'.
465 * Returns the width of the tuple as a fixnum.
468 compute_targetlist_width(List *targetlist)
473 foreach(temp_tl, targetlist)
475 tuple_width += compute_attribute_width(lfirst(temp_tl));
481 * compute_attribute_width
482 * Given a target list entry, find the size in bytes of the attribute.
484 * If a field is variable-length, it is assumed to be at least the size
487 * Returns the width of the attribute as a fixnum.
490 compute_attribute_width(TargetEntry *tlistentry)
492 int width = get_typlen(tlistentry->resdom->restype);
495 return _DEFAULT_ATTRIBUTE_WIDTH_;
501 * compute_joinrel_size
502 * Computes the size of the join relation 'joinrel'.
507 compute_joinrel_size(JoinPath *joinpath)
512 /* cartesian product */
513 temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
514 temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
516 temp = temp * product_selec(joinpath->pathinfo);
517 if (temp >= (MAXINT - 1) / 2)
519 /* if we exceed (MAXINT-1)/2, we switch to log scale */
520 /* +1 prevents log(0) */
521 temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
524 temp1 = ceil((double) temp);
532 * Estimate the storage space in bytes for a given number of tuples
533 * of a given width (size in bytes).
534 * To avoid overflow with big relations, result is a double.
538 relation_byte_size(int tuples, int width)
540 return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
545 * Returns an estimate of the number of pages covered by a given
546 * number of tuples of a given width (size in bytes).
549 page_size(int tuples, int width)
553 temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
559 base_log(double x, double b)
561 return log(x) / log(b);