1 /*-------------------------------------------------------------------------
4 * Routines to compute (and set) relation sizes and path costs
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.34 1999/04/05 02:07:07 tgl Exp $
12 *-------------------------------------------------------------------------
22 #define MAXINT INT_MAX
30 #include "nodes/relation.h"
31 #include "optimizer/cost.h"
32 #include "optimizer/internal.h"
33 #include "optimizer/keys.h"
34 #include "optimizer/tlist.h"
35 #include "utils/lsyscache.h"
39 static int compute_attribute_width(TargetEntry *tlistentry);
40 static double relation_byte_size (int tuples, int width);
41 static double base_log(double x, double b);
42 static int compute_targetlist_width(List *targetlist);
44 int _disable_cost_ = 30000000;
46 bool _enable_seqscan_ = true;
47 bool _enable_indexscan_ = true;
48 bool _enable_sort_ = true;
49 bool _enable_hash_ = true;
50 bool _enable_nestloop_ = true;
51 bool _enable_mergejoin_ = true;
52 bool _enable_hashjoin_ = true;
54 Cost _cpu_page_wight_ = _CPU_PAGE_WEIGHT_;
55 Cost _cpu_index_page_wight_ = _CPU_INDEX_PAGE_WEIGHT_;
59 * Determines and returns the cost of scanning a relation sequentially.
60 * If the relation is a temporary to be materialized from a query
61 * embedded within a data field (determined by 'relid' containing an
62 * attribute reference), then a predetermined constant is returned (we
63 * have NO IDEA how big the result of a POSTQUEL procedure is going to
67 * cpu = *CPU-PAGE-WEIGHT* * t
69 * 'relid' is the relid of the relation to be scanned
70 * 'relpages' is the number of pages in the relation to be scanned
71 * (as determined from the system catalogs)
72 * 'reltuples' is the number of tuples in the relation to be scanned
78 cost_seqscan(int relid, int relpages, int reltuples)
82 if (!_enable_seqscan_)
83 temp += _disable_cost_;
89 * cost of sequentially scanning a materialized temporary relation
91 temp += _NONAME_SCAN_COST_;
96 temp += _cpu_page_wight_ * reltuples;
105 * Determines and returns the cost of scanning a relation using an index.
107 * disk = expected-index-pages + expected-data-pages
108 * cpu = *CPU-PAGE-WEIGHT* *
109 * (expected-index-tuples + expected-data-tuples)
111 * 'indexid' is the index OID
112 * 'expected-indexpages' is the number of index pages examined in the scan
113 * 'selec' is the selectivity of the index
114 * 'relpages' is the number of pages in the main relation
115 * 'reltuples' is the number of tuples in the main relation
116 * 'indexpages' is the number of pages in the index relation
117 * 'indextuples' is the number of tuples in the index relation
123 cost_index(Oid indexid,
124 int expected_indexpages,
137 if (!_enable_indexscan_ && !is_injoin)
138 temp += _disable_cost_;
140 /* expected index relation pages */
141 temp += expected_indexpages;
143 /* expected base relation pages */
144 temp2 = (reltuples == 0) ? (double) 0 : (double) relpages / reltuples;
145 temp2 = temp2 * (double) selec *indextuples;
147 temp += Min(relpages, (int) ceil(temp2));
149 /* per index tuples */
150 temp = temp + (_cpu_index_page_wight_ * selec * indextuples);
152 /* per heap tuples */
153 temp = temp + (_cpu_page_wight_ * selec * reltuples);
161 * Determines and returns the cost of sorting a relation by considering
162 * 1. the cost of doing an external sort: XXX this is probably too low
164 * cpu = *CPU-PAGE-WEIGHT* * (t lg t)
165 * 2. the cost of reading the sort result into memory (another seqscan)
166 * unless 'noread' is set
168 * 'pathkeys' is a list of sort keys
169 * 'tuples' is the number of tuples in the relation
170 * 'width' is the average tuple width in bytes
171 * 'noread' is a flag indicating that the sort result can remain on disk
172 * (i.e., the sort result is the result relation)
178 cost_sort(List *pathkeys, int tuples, int width, bool noread)
181 int npages = page_size(tuples, width);
182 Cost pages = (Cost) npages;
183 Cost numTuples = tuples;
186 temp += _disable_cost_;
187 if (tuples == 0 || pathkeys == NULL)
192 temp += pages * base_log((double) pages, (double) 2.0);
195 * could be base_log(pages, NBuffers), but we are only doing 2-way
198 temp += _cpu_page_wight_ * numTuples *
199 base_log((double) pages, (double) 2.0);
202 temp = temp + cost_seqscan(_NONAME_RELATION_ID_, npages, tuples);
211 * Determines and returns the cost of writing a relation of 'tuples'
212 * tuples of 'width' bytes out to a result relation.
219 cost_result(int tuples, int width)
223 temp = temp + page_size(tuples, width);
224 temp = temp + _cpu_page_wight_ * tuples;
233 * Determines and returns the cost of joining two relations using the
234 * nested loop algorithm.
236 * 'outercost' is the (disk+cpu) cost of scanning the outer relation
237 * 'innercost' is the (disk+cpu) cost of scanning the inner relation
238 * 'outertuples' is the number of tuples in the outer relation
244 cost_nestloop(Cost outercost,
253 if (!_enable_nestloop_)
254 temp += _disable_cost_;
256 temp += outertuples * innercost;
264 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
265 * outer and inner relations
266 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
267 * to sort the outer and inner relations
268 * 'outertuples' and 'innertuples' are the number of tuples in the outer
269 * and inner relations
270 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
271 * of the tuples of the outer and inner relations
277 cost_mergejoin(Cost outercost,
288 if (!_enable_mergejoin_)
289 temp += _disable_cost_;
293 temp += cost_sort(outersortkeys, outersize, outerwidth, false);
294 temp += cost_sort(innersortkeys, innersize, innerwidth, false);
295 temp += _cpu_page_wight_ * (outersize + innersize);
302 * cost_hashjoin-- XXX HASH
303 * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
304 * outer and inner relations
305 * 'outerkeys' and 'innerkeys' are lists of the keys to be used
306 * to hash the outer and inner relations
307 * 'outersize' and 'innersize' are the number of tuples in the outer
308 * and inner relations
309 * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
310 * of the tuples of the outer and inner relations
315 cost_hashjoin(Cost outercost,
325 int outerpages = page_size(outersize, outerwidth);
326 int innerpages = page_size(innersize, innerwidth);
328 if (!_enable_hashjoin_)
329 temp += _disable_cost_;
331 /* Bias against putting larger relation on inside.
333 * Code used to use "outerpages < innerpages" but that has
334 * poor resolution when both relations are small.
336 if (relation_byte_size(outersize, outerwidth) <
337 relation_byte_size(innersize, innerwidth))
338 temp += _disable_cost_;
340 /* cost of source data */
341 temp += outercost + innercost;
343 /* cost of computing hash function: must do it once per tuple */
344 temp += _cpu_page_wight_ * (outersize + innersize);
346 /* cost of main-memory hashtable */
347 temp += (innerpages < NBuffers) ? innerpages : NBuffers;
349 /* if inner relation is too big then we will need to "batch" the join,
350 * which implies writing and reading most of the tuples to disk an
353 if (innerpages > NBuffers)
354 temp += 2 * (outerpages + innerpages);
363 * Computes the size of each relation in 'rel_list' (after applying
364 * restrictions), by multiplying the selectivity of each restriction
365 * by the original size of the relation.
367 * Sets the 'size' field for each relation entry with this computed size.
372 compute_rel_size(RelOptInfo *rel)
377 temp = rel->tuples * product_selec(rel->restrictinfo);
379 if (temp >= (MAXINT - 1))
382 temp1 = ceil((double) temp);
384 Assert(temp1 <= MAXINT);
390 * Computes the width in bytes of a tuple from 'rel'.
392 * Returns the width of the tuple as a fixnum.
395 compute_rel_width(RelOptInfo *rel)
397 return compute_targetlist_width(get_actual_tlist(rel->targetlist));
401 * compute_targetlist_width
402 * Computes the width in bytes of a tuple made from 'targetlist'.
404 * Returns the width of the tuple as a fixnum.
407 compute_targetlist_width(List *targetlist)
412 foreach(temp_tl, targetlist)
414 tuple_width = tuple_width +
415 compute_attribute_width(lfirst(temp_tl));
421 * compute_attribute_width
422 * Given a target list entry, find the size in bytes of the attribute.
424 * If a field is variable-length, it is assumed to be at least the size
427 * Returns the width of the attribute as a fixnum.
430 compute_attribute_width(TargetEntry *tlistentry)
432 int width = get_typlen(tlistentry->resdom->restype);
435 return _DEFAULT_ATTRIBUTE_WIDTH_;
441 * compute_joinrel_size
442 * Computes the size of the join relation 'joinrel'.
447 compute_joinrel_size(JoinPath *joinpath)
452 /* cartesian product */
453 temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
454 temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
456 temp = temp * product_selec(joinpath->pathinfo);
457 if (temp >= (MAXINT-1)/2)
459 /* if we exceed (MAXINT-1)/2, we switch to log scale */
460 /* +1 prevents log(0) */
461 temp1 = ceil(log(temp + 1 - (MAXINT-1)/2) + (MAXINT-1)/2);
464 temp1 = ceil((double) temp);
472 * Estimate the storage space in bytes for a given number of tuples
473 * of a given width (size in bytes).
474 * To avoid overflow with big relations, result is a double.
478 relation_byte_size (int tuples, int width)
480 return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
485 * Returns an estimate of the number of pages covered by a given
486 * number of tuples of a given width (size in bytes).
489 page_size(int tuples, int width)
493 temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
499 base_log(double x, double b)
501 return log(x) / log(b);