1 /*-------------------------------------------------------------------------
4 * Routines to compute (and set) relation sizes and path costs
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.32 1999/02/13 23:16:16 momjian Exp $
12 *-------------------------------------------------------------------------
22 #define MAXINT INT_MAX
30 #include "nodes/relation.h"
31 #include "optimizer/cost.h"
32 #include "optimizer/internal.h"
33 #include "optimizer/keys.h"
34 #include "optimizer/tlist.h"
35 #include "utils/lsyscache.h"
39 static int compute_attribute_width(TargetEntry *tlistentry);
40 static double base_log(double x, double b);
41 static int compute_targetlist_width(List *targetlist);
43 int _disable_cost_ = 30000000;
45 bool _enable_seqscan_ = true;
46 bool _enable_indexscan_ = true;
47 bool _enable_sort_ = true;
48 bool _enable_hash_ = true;
49 bool _enable_nestloop_ = true;
50 bool _enable_mergejoin_ = true;
51 bool _enable_hashjoin_ = true;
53 Cost _cpu_page_wight_ = _CPU_PAGE_WEIGHT_;
54 Cost _cpu_index_page_wight_ = _CPU_INDEX_PAGE_WEIGHT_;
58 * Determines and returns the cost of scanning a relation sequentially.
59 * If the relation is a temporary to be materialized from a query
60 * embedded within a data field (determined by 'relid' containing an
61 * attribute reference), then a predetermined constant is returned (we
62 * have NO IDEA how big the result of a POSTQUEL procedure is going to
66 * cpu = *CPU-PAGE-WEIGHT* * t
68 * 'relid' is the relid of the relation to be scanned
69 * 'relpages' is the number of pages in the relation to be scanned
70 * (as determined from the system catalogs)
71 * 'reltuples' is the number of tuples in the relation to be scanned
77 cost_seqscan(int relid, int relpages, int reltuples)
81 if (!_enable_seqscan_)
82 temp += _disable_cost_;
88 * cost of sequentially scanning a materialized temporary relation
90 temp += _NONAME_SCAN_COST_;
95 temp += _cpu_page_wight_ * reltuples;
104 * Determines and returns the cost of scanning a relation using an index.
106 * disk = expected-index-pages + expected-data-pages
107 * cpu = *CPU-PAGE-WEIGHT* *
108 * (expected-index-tuples + expected-data-tuples)
110 * 'indexid' is the index OID
111 * 'expected-indexpages' is the number of index pages examined in the scan
112 * 'selec' is the selectivity of the index
113 * 'relpages' is the number of pages in the main relation
114 * 'reltuples' is the number of tuples in the main relation
115 * 'indexpages' is the number of pages in the index relation
116 * 'indextuples' is the number of tuples in the index relation
122 cost_index(Oid indexid,
123 int expected_indexpages,
136 if (!_enable_indexscan_ && !is_injoin)
137 temp += _disable_cost_;
139 /* expected index relation pages */
140 temp += expected_indexpages;
142 /* expected base relation pages */
143 temp2 = (reltuples == 0) ? (double) 0 : (double) relpages / reltuples;
144 temp2 = temp2 * (double) selec *indextuples;
146 temp += Min(relpages, (int) ceil(temp2));
148 /* per index tuples */
149 temp = temp + (_cpu_index_page_wight_ * selec * indextuples);
151 /* per heap tuples */
152 temp = temp + (_cpu_page_wight_ * selec * reltuples);
160 * Determines and returns the cost of sorting a relation by considering
161 * 1. the cost of doing an external sort: XXX this is probably too low
163 * cpu = *CPU-PAGE-WEIGHT* * (t lg t)
164 * 2. the cost of reading the sort result into memory (another seqscan)
165 * unless 'noread' is set
167 * 'pathkeys' is a list of sort keys
168 * 'tuples' is the number of tuples in the relation
169 * 'width' is the average tuple width in bytes
170 * 'noread' is a flag indicating that the sort result can remain on disk
171 * (i.e., the sort result is the result relation)
177 cost_sort(List *pathkeys, int tuples, int width, bool noread)
180 int npages = page_size(tuples, width);
181 Cost pages = (Cost) npages;
182 Cost numTuples = tuples;
185 temp += _disable_cost_;
186 if (tuples == 0 || pathkeys == NULL)
191 temp += pages * base_log((double) pages, (double) 2.0);
194 * could be base_log(pages, NBuffers), but we are only doing 2-way
197 temp += _cpu_page_wight_ * numTuples *
198 base_log((double) pages, (double) 2.0);
201 temp = temp + cost_seqscan(_NONAME_RELATION_ID_, npages, tuples);
210 * Determines and returns the cost of writing a relation of 'tuples'
211 * tuples of 'width' bytes out to a result relation.
218 cost_result(int tuples, int width)
222 temp = temp + page_size(tuples, width);
223 temp = temp + _cpu_page_wight_ * tuples;
232 * Determines and returns the cost of joining two relations using the
233 * nested loop algorithm.
235 * 'outercost' is the (disk+cpu) cost of scanning the outer relation
236 * 'innercost' is the (disk+cpu) cost of scanning the inner relation
237 * 'outertuples' is the number of tuples in the outer relation
243 cost_nestloop(Cost outercost,
252 if (!_enable_nestloop_)
253 temp += _disable_cost_;
255 temp += outertuples * innercost;
263 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
264 * outer and inner relations
265 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
266 * to sort the outer and inner relations
267 * 'outertuples' and 'innertuples' are the number of tuples in the outer
268 * and inner relations
269 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
270 * of the tuples of the outer and inner relations
276 cost_mergejoin(Cost outercost,
287 if (!_enable_mergejoin_)
288 temp += _disable_cost_;
292 temp += cost_sort(outersortkeys, outersize, outerwidth, false);
293 temp += cost_sort(innersortkeys, innersize, innerwidth, false);
294 temp += _cpu_page_wight_ * (outersize + innersize);
301 * cost_hashjoin-- XXX HASH
302 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
303 * outer and inner relations
304 * 'outerkeys' and 'innerkeys' are lists of the keys to be used
305 * to hash the outer and inner relations
306 * 'outersize' and 'innersize' are the number of tuples in the outer
307 * and inner relations
308 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
309 * of the tuples of the outer and inner relations
314 cost_hashjoin(Cost outercost,
324 int outerpages = page_size(outersize, outerwidth);
325 int innerpages = page_size(innersize, innerwidth);
326 int nrun = ceil((double) outerpages / (double) NBuffers);
328 if (outerpages < innerpages)
329 return _disable_cost_;
330 if (!_enable_hashjoin_)
331 temp += _disable_cost_;
334 * temp += outercost + (nrun + 1) * innercost;
336 * the innercost shouldn't be used it. Instead the cost of hashing the
337 * innerpath should be used
339 * ASSUME innercost is 1 for now -- a horrible hack - jolly temp +=
340 * outercost + (nrun + 1);
342 * But we must add innercost to result. - vadim 04/24/97
344 temp += outercost + innercost + (nrun + 1);
346 temp += _cpu_page_wight_ * (outersize + nrun * innersize);
354 * Computes the size of each relation in 'rel_list' (after applying
355 * restrictions), by multiplying the selectivity of each restriction
356 * by the original size of the relation.
358 * Sets the 'size' field for each relation entry with this computed size.
363 compute_rel_size(RelOptInfo *rel)
368 temp = rel->tuples * product_selec(rel->restrictinfo);
370 if (temp >= (MAXINT - 1))
373 temp1 = ceil((double) temp);
375 Assert(temp1 <= MAXINT);
381 * Computes the width in bytes of a tuple from 'rel'.
383 * Returns the width of the tuple as a fixnum.
386 compute_rel_width(RelOptInfo *rel)
388 return compute_targetlist_width(get_actual_tlist(rel->targetlist));
392 * compute_targetlist_width
393 * Computes the width in bytes of a tuple made from 'targetlist'.
395 * Returns the width of the tuple as a fixnum.
398 compute_targetlist_width(List *targetlist)
403 foreach(temp_tl, targetlist)
405 tuple_width = tuple_width +
406 compute_attribute_width(lfirst(temp_tl));
412 * compute_attribute_width
413 * Given a target list entry, find the size in bytes of the attribute.
415 * If a field is variable-length, it is assumed to be at least the size
418 * Returns the width of the attribute as a fixnum.
421 compute_attribute_width(TargetEntry *tlistentry)
423 int width = get_typlen(tlistentry->resdom->restype);
426 return _DEFAULT_ATTRIBUTE_WIDTH_;
432 * compute_joinrel_size
433 * Computes the size of the join relation 'joinrel'.
438 compute_joinrel_size(JoinPath *joinpath)
443 temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
444 temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
446 temp = temp * product_selec(joinpath->pathinfo);
447 if (temp >= (MAXINT - 1))
453 * should be ceil here, we don't want joinrel size's of one, do
456 temp1 = ceil((double) temp);
465 * Returns an estimate of the number of pages covered by a given
466 * number of tuples of a given width (size in bytes).
469 page_size(int tuples, int width)
473 temp = ceil((double) (tuples * (width + sizeof(HeapTupleData)))
480 base_log(double x, double b)
482 return log(x) / log(b);