1 /*-------------------------------------------------------------------------
4 * Routines to compute (and set) relation sizes and path costs
6 * Path costs are measured in units of disk accesses: one sequential page
7 * fetch has cost 1. All else is scaled relative to a page fetch, using
8 * the scaling parameters
10 * random_page_cost Cost of a non-sequential page fetch
11 * cpu_tuple_cost Cost of typical CPU time to process a tuple
12 * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple
13 * cpu_operator_cost Cost of CPU time to process a typical WHERE operator
15 * We also use a rough estimate "effective_cache_size" of the number of
16 * disk pages in Postgres + OS-level disk cache. (We can't simply use
17 * NBuffers for this purpose because that would ignore the effects of
18 * the kernel's disk cache.)
20 * Obviously, taking constants for these values is an oversimplification,
21 * but it's tough enough to get any useful estimates even at this level of
22 * detail. Note that all of these parameters are user-settable, in case
23 * the default values are drastically off for a particular platform.
25 * We compute two separate costs for each path:
26 * total_cost: total estimated cost to fetch all tuples
27 * startup_cost: cost that is expended before first tuple is fetched
28 * In some scenarios, such as when there is a LIMIT or we are implementing
29 * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
30 * path's result. A caller can estimate the cost of fetching a partial
31 * result by interpolating between startup_cost and total_cost. In detail:
32 * actual_cost = startup_cost +
33 * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
34 * Note that a relation's rows count (and, by extension, a Plan's plan_rows)
35 * are set without regard to any LIMIT, so that this equation works properly.
36 * (Also, these routines guarantee not to set the rows count to zero, so there
37 * will be no zero divide.) The LIMIT is applied as a separate Plan node.
40 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
41 * Portions Copyright (c) 1994, Regents of the University of California
44 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
46 *-------------------------------------------------------------------------
53 #include "catalog/pg_statistic.h"
54 #include "executor/nodeHash.h"
55 #include "miscadmin.h"
56 #include "optimizer/clauses.h"
57 #include "optimizer/cost.h"
58 #include "optimizer/pathnode.h"
59 #include "parser/parsetree.h"
60 #include "utils/lsyscache.h"
61 #include "utils/syscache.h"
65 * The length of a variable-length field in bytes (stupid estimate...)
67 #define _DEFAULT_ATTRIBUTE_WIDTH_ 12
70 #define LOG2(x) (log(x) / 0.693147180559945)
71 #define LOG6(x) (log(x) / 1.79175946922805)
74 double effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
75 double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
76 double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
77 double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
78 double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
80 Cost disable_cost = 100000000.0;
82 bool enable_seqscan = true;
83 bool enable_indexscan = true;
84 bool enable_tidscan = true;
85 bool enable_sort = true;
86 bool enable_nestloop = true;
87 bool enable_mergejoin = true;
88 bool enable_hashjoin = true;
91 static bool cost_qual_eval_walker(Node *node, Cost *total);
92 static void set_rel_width(Query *root, RelOptInfo *rel);
93 static int compute_attribute_width(TargetEntry *tlistentry);
94 static double relation_byte_size(double tuples, int width);
95 static double page_size(double tuples, int width);
100 * Determines and returns the cost of scanning a relation sequentially.
102 * Note: for historical reasons, this routine and the others in this module
103 * use the passed result Path only to store their startup_cost and total_cost
104 * results into. All the input data they need is passed as separate
105 * parameters, even though much of it could be extracted from the Path.
108 cost_seqscan(Path *path, RelOptInfo *baserel)
110 Cost startup_cost = 0;
114 /* Should only be applied to base relations */
115 Assert(length(baserel->relids) == 1);
116 Assert(!baserel->issubquery);
119 startup_cost += disable_cost;
124 * The cost of reading a page sequentially is 1.0, by definition. Note
125 * that the Unix kernel will typically do some amount of read-ahead
126 * optimization, so that this cost is less than the true cost of
127 * reading a page from disk. We ignore that issue here, but must take
128 * it into account when estimating the cost of non-sequential
131 run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
134 cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
135 run_cost += cpu_per_tuple * baserel->tuples;
137 path->startup_cost = startup_cost;
138 path->total_cost = startup_cost + run_cost;
142 * cost_nonsequential_access
143 * Estimate the cost of accessing one page at random from a relation
144 * (or sort temp file) of the given size in pages.
146 * The simplistic model that the cost is random_page_cost is what we want
147 * to use for large relations; but for small ones that is a serious
148 * overestimate because of the effects of caching. This routine tries to
151 * Unfortunately we don't have any good way of estimating the effective cache
152 * size we are working with --- we know that Postgres itself has NBuffers
153 * internal buffers, but the size of the kernel's disk cache is uncertain,
154 * and how much of it we get to use is even less certain. We punt the problem
155 * for now by assuming we are given an effective_cache_size parameter.
157 * Given a guesstimated cache size, we estimate the actual I/O cost per page
158 * with the entirely ad-hoc equations:
159 * for rel_size <= effective_cache_size:
160 * 1 + (random_page_cost/2-1) * (rel_size/effective_cache_size) ** 2
161 * for rel_size >= effective_cache_size:
162 * random_page_cost * (1 - (effective_cache_size/rel_size)/2)
163 * These give the right asymptotic behavior (=> 1.0 as rel_size becomes
164 * small, => random_page_cost as it becomes large) and meet in the middle
165 * with the estimate that the cache is about 50% effective for a relation
166 * of the same size as effective_cache_size. (XXX this is probably all
167 * wrong, but I haven't been able to find any theory about how effective
168 * a disk cache should be presumed to be.)
171 cost_nonsequential_access(double relpages)
175 /* don't crash on bad input data */
176 if (relpages <= 0.0 || effective_cache_size <= 0.0)
177 return random_page_cost;
179 relsize = relpages / effective_cache_size;
182 return random_page_cost * (1.0 - 0.5 / relsize);
184 return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize;
189 * Determines and returns the cost of scanning a relation using an index.
191 * NOTE: an indexscan plan node can actually represent several passes,
192 * but here we consider the cost of just one pass.
194 * 'root' is the query root
195 * 'baserel' is the base relation the index is for
196 * 'index' is the index to be used
197 * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
198 * 'is_injoin' is T if we are considering using the index scan as the inside
199 * of a nestloop join (hence, some of the indexQuals are join clauses)
201 * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
202 * Any additional quals evaluated as qpquals may reduce the number of returned
203 * tuples, but they won't reduce the number of tuples we have to fetch from
204 * the table, so they don't reduce the scan cost.
207 cost_index(Path *path, Query *root,
213 Cost startup_cost = 0;
216 Cost indexStartupCost;
218 Selectivity indexSelectivity;
219 double tuples_fetched;
220 double pages_fetched;
222 /* Should only be applied to base relations */
223 Assert(IsA(baserel, RelOptInfo) &&IsA(index, IndexOptInfo));
224 Assert(length(baserel->relids) == 1);
225 Assert(!baserel->issubquery);
227 if (!enable_indexscan && !is_injoin)
228 startup_cost += disable_cost;
231 * Call index-access-method-specific code to estimate the processing
232 * cost for scanning the index, as well as the selectivity of the
233 * index (ie, the fraction of main-table tuples we will have to
236 OidFunctionCall7(index->amcostestimate,
237 PointerGetDatum(root),
238 PointerGetDatum(baserel),
239 PointerGetDatum(index),
240 PointerGetDatum(indexQuals),
241 PointerGetDatum(&indexStartupCost),
242 PointerGetDatum(&indexTotalCost),
243 PointerGetDatum(&indexSelectivity));
245 /* all costs for touching index itself included here */
246 startup_cost += indexStartupCost;
247 run_cost += indexTotalCost - indexStartupCost;
250 * Estimate number of main-table tuples and pages fetched.
252 * If the number of tuples is much smaller than the number of pages in
253 * the relation, each tuple will cost a separate nonsequential fetch.
254 * If it is comparable or larger, then probably we will be able to
255 * avoid some fetches. We use a growth rate of log(#tuples/#pages +
256 * 1) --- probably totally bogus, but intuitively it gives the right
257 * shape of curve at least.
259 * XXX if the relation has recently been "clustered" using this index,
260 * then in fact the target tuples will be highly nonuniformly
261 * distributed, and we will be seriously overestimating the scan cost!
262 * Currently we have no way to know whether the relation has been
263 * clustered, nor how much it's been modified since the last
264 * clustering, so we ignore this effect. Would be nice to do better
268 tuples_fetched = indexSelectivity * baserel->tuples;
269 /* Don't believe estimates less than 1... */
270 if (tuples_fetched < 1.0)
271 tuples_fetched = 1.0;
273 if (baserel->pages > 0)
274 pages_fetched = ceil(baserel->pages *
275 log(tuples_fetched / baserel->pages + 1.0));
277 pages_fetched = tuples_fetched;
280 * Now estimate one nonsequential access per page fetched, plus
281 * appropriate CPU costs per tuple.
284 /* disk costs for main table */
285 run_cost += pages_fetched * cost_nonsequential_access(baserel->pages);
288 cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
291 * Normally the indexquals will be removed from the list of
292 * restriction clauses that we have to evaluate as qpquals, so we
293 * should subtract their costs from baserestrictcost. For a lossy
294 * index, however, we will have to recheck all the quals and so
295 * mustn't subtract anything. Also, if we are doing a join then some
296 * of the indexquals are join clauses and shouldn't be subtracted.
297 * Rather than work out exactly how much to subtract, we don't
298 * subtract anything in that case either.
300 if (!index->lossy && !is_injoin)
301 cpu_per_tuple -= cost_qual_eval(indexQuals);
303 run_cost += cpu_per_tuple * tuples_fetched;
305 path->startup_cost = startup_cost;
306 path->total_cost = startup_cost + run_cost;
311 * Determines and returns the cost of scanning a relation using tid-s.
314 cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
316 Cost startup_cost = 0;
319 int ntuples = length(tideval);
322 startup_cost += disable_cost;
324 /* disk costs --- assume each tuple on a different page */
325 run_cost += random_page_cost * ntuples;
328 cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
329 run_cost += cpu_per_tuple * ntuples;
331 path->startup_cost = startup_cost;
332 path->total_cost = startup_cost + run_cost;
337 * Determines and returns the cost of sorting a relation.
339 * The cost of supplying the input data is NOT included; the caller should
340 * add that cost to both startup and total costs returned from this routine!
342 * If the total volume of data to sort is less than SortMem, we will do
343 * an in-memory sort, which requires no I/O and about t*log2(t) tuple
344 * comparisons for t tuples.
346 * If the total volume exceeds SortMem, we switch to a tape-style merge
347 * algorithm. There will still be about t*log2(t) tuple comparisons in
348 * total, but we will also need to write and read each tuple once per
349 * merge pass. We expect about ceil(log6(r)) merge passes where r is the
350 * number of initial runs formed (log6 because tuplesort.c uses six-tape
351 * merging). Since the average initial run should be about twice SortMem,
353 * disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
354 * cpu = comparison_cost * t * log2(t)
356 * The disk traffic is assumed to be half sequential and half random
357 * accesses (XXX can't we refine that guess?)
359 * We charge two operator evals per tuple comparison, which should be in
360 * the right ballpark in most cases.
362 * 'pathkeys' is a list of sort keys
363 * 'tuples' is the number of tuples in the relation
364 * 'width' is the average tuple width in bytes
366 * NOTE: some callers currently pass NIL for pathkeys because they
367 * can't conveniently supply the sort keys. Since this routine doesn't
368 * currently do anything with pathkeys anyway, that doesn't matter...
369 * but if it ever does, it should react gracefully to lack of key data.
372 cost_sort(Path *path, List *pathkeys, double tuples, int width)
374 Cost startup_cost = 0;
376 double nbytes = relation_byte_size(tuples, width);
377 long sortmembytes = SortMem * 1024L;
380 startup_cost += disable_cost;
383 * We want to be sure the cost of a sort is never estimated as zero,
384 * even if passed-in tuple count is zero. Besides, mustn't do
393 * Assume about two operator evals per tuple comparison and N log2 N
396 startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
399 if (nbytes > sortmembytes)
401 double npages = ceil(nbytes / BLCKSZ);
402 double nruns = nbytes / (sortmembytes * 2);
403 double log_runs = ceil(LOG6(nruns));
404 double npageaccesses;
408 npageaccesses = 2.0 * npages * log_runs;
409 /* Assume half are sequential (cost 1), half are not */
410 startup_cost += npageaccesses *
411 (1.0 + cost_nonsequential_access(npages)) * 0.5;
415 * Note: should we bother to assign a nonzero run_cost to reflect the
416 * overhead of extracting tuples from the sort result? Probably not
417 * worth worrying about.
419 path->startup_cost = startup_cost;
420 path->total_cost = startup_cost + run_cost;
426 * Determines and returns the cost of joining two relations using the
427 * nested loop algorithm.
429 * 'outer_path' is the path for the outer relation
430 * 'inner_path' is the path for the inner relation
431 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
434 cost_nestloop(Path *path,
439 Cost startup_cost = 0;
444 if (!enable_nestloop)
445 startup_cost += disable_cost;
447 /* cost of source data */
450 * NOTE: clearly, we must pay both outer and inner paths' startup_cost
451 * before we can start returning tuples, so the join's startup cost
452 * is their sum. What's not so clear is whether the inner path's
453 * startup_cost must be paid again on each rescan of the inner path.
454 * This is not true if the inner path is materialized, but probably
455 * is true otherwise. Since we don't yet have clean handling of the
456 * decision whether to materialize a path, we can't tell here which
457 * will happen. As a compromise, charge 50% of the inner startup cost
460 startup_cost += outer_path->startup_cost + inner_path->startup_cost;
461 run_cost += outer_path->total_cost - outer_path->startup_cost;
462 run_cost += outer_path->parent->rows *
463 (inner_path->total_cost - inner_path->startup_cost);
464 if (outer_path->parent->rows > 1)
465 run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;
468 * Number of tuples processed (not number emitted!). If inner path is
469 * an indexscan, be sure to use its estimated output row count, which
470 * may be lower than the restriction-clause-only row count of its
473 if (IsA(inner_path, IndexPath))
474 ntuples = ((IndexPath *) inner_path)->rows;
476 ntuples = inner_path->parent->rows;
477 ntuples *= outer_path->parent->rows;
480 cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
481 run_cost += cpu_per_tuple * ntuples;
483 path->startup_cost = startup_cost;
484 path->total_cost = startup_cost + run_cost;
489 * Determines and returns the cost of joining two relations using the
490 * merge join algorithm.
492 * 'outer_path' is the path for the outer relation
493 * 'inner_path' is the path for the inner relation
494 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
495 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
496 * to sort the outer and inner relations, or NIL if no explicit
497 * sort is needed because the source path is already ordered
500 cost_mergejoin(Path *path,
507 Cost startup_cost = 0;
511 Path sort_path; /* dummy for result of cost_sort */
513 if (!enable_mergejoin)
514 startup_cost += disable_cost;
516 /* cost of source data */
519 * Note we are assuming that each source tuple is fetched just once,
520 * which is not right in the presence of equal keys. If we had a way
521 * of estimating the proportion of equal keys, we could apply a
522 * correction factor...
524 if (outersortkeys) /* do we need to sort outer? */
526 startup_cost += outer_path->total_cost;
527 cost_sort(&sort_path,
529 outer_path->parent->rows,
530 outer_path->parent->width);
531 startup_cost += sort_path.startup_cost;
532 run_cost += sort_path.total_cost - sort_path.startup_cost;
536 startup_cost += outer_path->startup_cost;
537 run_cost += outer_path->total_cost - outer_path->startup_cost;
540 if (innersortkeys) /* do we need to sort inner? */
542 startup_cost += inner_path->total_cost;
543 cost_sort(&sort_path,
545 inner_path->parent->rows,
546 inner_path->parent->width);
547 startup_cost += sort_path.startup_cost;
548 run_cost += sort_path.total_cost - sort_path.startup_cost;
552 startup_cost += inner_path->startup_cost;
553 run_cost += inner_path->total_cost - inner_path->startup_cost;
557 * Estimate the number of tuples to be processed in the mergejoin
558 * itself as one per tuple in the two source relations. This could be
559 * a drastic underestimate if there are many equal-keyed tuples in
560 * either relation, but we have no good way of estimating that...
562 ntuples = outer_path->parent->rows + inner_path->parent->rows;
565 cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
566 run_cost += cpu_per_tuple * ntuples;
568 path->startup_cost = startup_cost;
569 path->total_cost = startup_cost + run_cost;
574 * Determines and returns the cost of joining two relations using the
575 * hash join algorithm.
577 * 'outer_path' is the path for the outer relation
578 * 'inner_path' is the path for the inner relation
579 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
580 * 'innerbucketsize' is an estimate of the bucketsize statistic
581 * for the inner hash key.
584 cost_hashjoin(Path *path,
588 Selectivity innerbucketsize)
590 Cost startup_cost = 0;
594 double outerbytes = relation_byte_size(outer_path->parent->rows,
595 outer_path->parent->width);
596 double innerbytes = relation_byte_size(inner_path->parent->rows,
597 inner_path->parent->width);
598 long hashtablebytes = SortMem * 1024L;
600 if (!enable_hashjoin)
601 startup_cost += disable_cost;
603 /* cost of source data */
604 startup_cost += outer_path->startup_cost;
605 run_cost += outer_path->total_cost - outer_path->startup_cost;
606 startup_cost += inner_path->total_cost;
608 /* cost of computing hash function: must do it once per input tuple */
609 startup_cost += cpu_operator_cost * inner_path->parent->rows;
610 run_cost += cpu_operator_cost * outer_path->parent->rows;
613 * The number of tuple comparisons needed is the number of outer
614 * tuples times the typical number of tuples in a hash bucket,
615 * which is the inner relation size times its bucketsize fraction.
616 * We charge one cpu_operator_cost per tuple comparison.
618 run_cost += cpu_operator_cost * outer_path->parent->rows *
619 ceil(inner_path->parent->rows * innerbucketsize);
622 * Estimate the number of tuples that get through the hashing filter
623 * as one per tuple in the two source relations. This could be a
624 * drastic underestimate if there are many equal-keyed tuples in
625 * either relation, but we have no simple way of estimating that;
626 * and since this is only a second-order parameter, it's probably
627 * not worth expending a lot of effort on the estimate.
629 ntuples = outer_path->parent->rows + inner_path->parent->rows;
632 cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
633 run_cost += cpu_per_tuple * ntuples;
636 * if inner relation is too big then we will need to "batch" the join,
637 * which implies writing and reading most of the tuples to disk an
638 * extra time. Charge one cost unit per page of I/O (correct since it
639 * should be nice and sequential...). Writing the inner rel counts as
640 * startup cost, all the rest as run cost.
642 if (innerbytes > hashtablebytes)
644 double outerpages = page_size(outer_path->parent->rows,
645 outer_path->parent->width);
646 double innerpages = page_size(inner_path->parent->rows,
647 inner_path->parent->width);
649 startup_cost += innerpages;
650 run_cost += innerpages + 2 * outerpages;
654 * Bias against putting larger relation on inside. We don't want an
655 * absolute prohibition, though, since larger relation might have
656 * better bucketsize --- and we can't trust the size estimates
657 * unreservedly, anyway. Instead, inflate the startup cost by the
658 * square root of the size ratio. (Why square root? No real good
659 * reason, but it seems reasonable...)
661 if (innerbytes > outerbytes && outerbytes > 0)
662 startup_cost *= sqrt(innerbytes / outerbytes);
664 path->startup_cost = startup_cost;
665 path->total_cost = startup_cost + run_cost;
669 * Estimate hash bucketsize fraction (ie, number of entries in a bucket
670 * divided by total tuples in relation) if the specified Var is used
673 * This statistic is used by cost_hashjoin. We split out the calculation
674 * because it's useful to cache the result for re-use across multiple path
677 * XXX This is really pretty bogus since we're effectively assuming that the
678 * distribution of hash keys will be the same after applying restriction
679 * clauses as it was in the underlying relation. However, we are not nearly
680 * smart enough to figure out how the restrict clauses might change the
681 * distribution, so this will have to do for now.
683 * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
684 * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
685 * a bucketsize fraction of NTUP_PER_BUCKET / ntuples. But that goal will
686 * be reached only if the data values are uniformly distributed among the
687 * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
688 * data values, and (b) a not-too-skewed data distribution. Otherwise the
689 * buckets will be nonuniformly occupied. If the other relation in the join
690 * has a similar distribution, the most-loaded buckets are exactly those
691 * that will be probed most often. Therefore, the "average" bucket size for
692 * costing purposes should really be taken as something close to the "worst
693 * case" bucket size. We try to estimate this by first scaling up if there
694 * are too few distinct data values, and then scaling up again by the
695 * ratio of the most common value's frequency to the average frequency.
697 * If no statistics are available, use a default estimate of 0.1. This will
698 * discourage use of a hash rather strongly if the inner relation is large,
699 * which is what we want. We do not want to hash unless we know that the
700 * inner rel is well-dispersed (or the alternatives seem much worse).
703 estimate_hash_bucketsize(Query *root, Var *var)
708 Form_pg_statistic stats;
718 * Lookup info about var's relation and attribute;
719 * if none available, return default estimate.
724 relid = getrelid(var->varno, root->rtable);
725 if (relid == InvalidOid)
728 rel = get_base_rel(root, var->varno);
730 if (rel->tuples <= 0.0 || rel->rows <= 0.0)
731 return 0.1; /* ensure we can divide below */
733 tuple = SearchSysCache(STATRELATT,
734 ObjectIdGetDatum(relid),
735 Int16GetDatum(var->varattno),
737 if (!HeapTupleIsValid(tuple))
740 * Perhaps the Var is a system attribute; if so, it will have no
741 * entry in pg_statistic, but we may be able to guess something
742 * about its distribution anyway.
744 switch (var->varattno)
746 case ObjectIdAttributeNumber:
747 case SelfItemPointerAttributeNumber:
748 /* these are unique, so buckets should be well-distributed */
749 return (double) NTUP_PER_BUCKET / rel->rows;
750 case TableOidAttributeNumber:
751 /* hashing this is a terrible idea... */
756 stats = (Form_pg_statistic) GETSTRUCT(tuple);
759 * Obtain number of distinct data values in raw relation.
761 ndistinct = stats->stadistinct;
763 ndistinct = -ndistinct * rel->tuples;
766 * Adjust ndistinct to account for restriction clauses. Observe we are
767 * assuming that the data distribution is affected uniformly by the
768 * restriction clauses!
770 * XXX Possibly better way, but much more expensive: multiply by
771 * selectivity of rel's restriction clauses that mention the target Var.
773 ndistinct *= rel->rows / rel->tuples;
776 * Discourage use of hash join if there seem not to be very many distinct
777 * data values. The threshold here is somewhat arbitrary, as is the
778 * fraction used to "discourage" the choice.
780 if (ndistinct < 50.0)
782 ReleaseSysCache(tuple);
787 * Form initial estimate of bucketsize fraction. Here we use rel->rows,
788 * ie the number of rows after applying restriction clauses, because
789 * that's what the fraction will eventually be multiplied by in
792 estfract = (double) NTUP_PER_BUCKET / rel->rows;
795 * Adjust estimated bucketsize if too few distinct values to fill
798 needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
799 if (ndistinct < needdistinct)
800 estfract *= needdistinct / ndistinct;
803 * Look up the frequency of the most common value, if available.
807 if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
808 STATISTIC_KIND_MCV, InvalidOid,
809 NULL, NULL, &numbers, &nnumbers))
812 * The first MCV stat is for the most common value.
815 mcvfreq = numbers[0];
816 free_attstatsslot(var->vartype, NULL, 0,
821 * Adjust estimated bucketsize upward to account for skewed distribution.
823 avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
825 if (avgfreq > 0.0 && mcvfreq > avgfreq)
826 estfract *= mcvfreq / avgfreq;
828 ReleaseSysCache(tuple);
830 return (Selectivity) estfract;
836 * Estimate the CPU cost of evaluating a WHERE clause (once).
837 * The input can be either an implicitly-ANDed list of boolean
838 * expressions, or a list of RestrictInfo nodes.
841 cost_qual_eval(List *quals)
846 /* We don't charge any cost for the implicit ANDing at top level ... */
850 Node *qual = (Node *) lfirst(l);
853 * RestrictInfo nodes contain an eval_cost field reserved for this
854 * routine's use, so that it's not necessary to evaluate the qual
855 * clause's cost more than once. If the clause's cost hasn't been
856 * computed yet, the field will contain -1.
858 if (qual && IsA(qual, RestrictInfo))
860 RestrictInfo *restrictinfo = (RestrictInfo *) qual;
862 if (restrictinfo->eval_cost < 0)
864 restrictinfo->eval_cost = 0;
865 cost_qual_eval_walker((Node *) restrictinfo->clause,
866 &restrictinfo->eval_cost);
868 total += restrictinfo->eval_cost;
872 /* If it's a bare expression, must always do it the hard way */
873 cost_qual_eval_walker(qual, &total);
880 cost_qual_eval_walker(Node *node, Cost *total)
886 * Our basic strategy is to charge one cpu_operator_cost for each
887 * operator or function node in the given tree. Vars and Consts are
888 * charged zero, and so are boolean operators (AND, OR, NOT).
889 * Simplistic, but a lot better than no model at all.
891 * Should we try to account for the possibility of short-circuit
892 * evaluation of AND/OR?
896 Expr *expr = (Expr *) node;
898 switch (expr->opType)
902 *total += cpu_operator_cost;
911 * A subplan node in an expression indicates that the
912 * subplan will be executed on each evaluation, so charge
913 * accordingly. (We assume that sub-selects that can be
914 * executed as InitPlans have already been removed from
917 * NOTE: this logic should agree with the estimates used by
918 * make_subplan() in plan/subselect.c.
921 SubPlan *subplan = (SubPlan *) expr->oper;
922 Plan *plan = subplan->plan;
925 if (subplan->sublink->subLinkType == EXISTS_SUBLINK)
927 /* we only need to fetch 1 tuple */
928 subcost = plan->startup_cost +
929 (plan->total_cost - plan->startup_cost) / plan->plan_rows;
931 else if (subplan->sublink->subLinkType == ALL_SUBLINK ||
932 subplan->sublink->subLinkType == ANY_SUBLINK)
934 /* assume we need 50% of the tuples */
935 subcost = plan->startup_cost +
936 0.50 * (plan->total_cost - plan->startup_cost);
937 /* XXX what if subplan has been materialized? */
941 /* assume we need all tuples */
942 subcost = plan->total_cost;
948 /* fall through to examine args of Expr node */
950 return expression_tree_walker(node, cost_qual_eval_walker,
956 * set_baserel_size_estimates
957 * Set the size estimates for the given base relation.
959 * The rel's targetlist and restrictinfo list must have been constructed
962 * We set the following fields of the rel node:
963 * rows: the estimated number of output tuples (after applying
964 * restriction clauses).
965 * width: the estimated average output tuple width in bytes.
966 * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
969 set_baserel_size_estimates(Query *root, RelOptInfo *rel)
971 /* Should only be applied to base relations */
972 Assert(length(rel->relids) == 1);
974 rel->rows = rel->tuples *
975 restrictlist_selectivity(root,
976 rel->baserestrictinfo,
977 lfirsti(rel->relids));
980 * Force estimate to be at least one row, to make explain output look
981 * better and to avoid possible divide-by-zero when interpolating
987 rel->baserestrictcost = cost_qual_eval(rel->baserestrictinfo);
989 set_rel_width(root, rel);
993 * set_joinrel_size_estimates
994 * Set the size estimates for the given join relation.
996 * The rel's targetlist must have been constructed already, and a
997 * restriction clause list that matches the given component rels must
1000 * Since there is more than one way to make a joinrel for more than two
1001 * base relations, the results we get here could depend on which component
1002 * rel pair is provided. In theory we should get the same answers no matter
1003 * which pair is provided; in practice, since the selectivity estimation
1004 * routines don't handle all cases equally well, we might not. But there's
1005 * not much to be done about it. (Would it make sense to repeat the
1006 * calculations for each pair of input rels that's encountered, and somehow
1007 * average the results? Probably way more trouble than it's worth.)
1009 * We set the same relnode fields as set_baserel_size_estimates() does.
1012 set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
1013 RelOptInfo *outer_rel,
1014 RelOptInfo *inner_rel,
1020 /* Start with the Cartesian product */
1021 temp = outer_rel->rows * inner_rel->rows;
1024 * Apply join restrictivity. Note that we are only considering
1025 * clauses that become restriction clauses at this join level; we are
1026 * not double-counting them because they were not considered in
1027 * estimating the sizes of the component rels.
1029 temp *= restrictlist_selectivity(root,
1034 * If we are doing an outer join, take that into account: the output
1035 * must be at least as large as the non-nullable input. (Is there any
1036 * chance of being even smarter?)
1043 if (temp < outer_rel->rows)
1044 temp = outer_rel->rows;
1047 if (temp < inner_rel->rows)
1048 temp = inner_rel->rows;
1051 if (temp < outer_rel->rows)
1052 temp = outer_rel->rows;
1053 if (temp < inner_rel->rows)
1054 temp = inner_rel->rows;
1057 elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d",
1063 * Force estimate to be at least one row, to make explain output look
1064 * better and to avoid possible divide-by-zero when interpolating
1073 * We could apply set_rel_width() to compute the output tuple width
1074 * from scratch, but at present it's always just the sum of the input
1075 * widths, so why work harder than necessary? If relnode.c is ever
1076 * taught to remove unneeded columns from join targetlists, go back to
1077 * using set_rel_width here.
1079 rel->width = outer_rel->width + inner_rel->width;
1084 * Set the estimated output width of the relation.
1087 set_rel_width(Query *root, RelOptInfo *rel)
1089 int tuple_width = 0;
1092 foreach(tle, rel->targetlist)
1093 tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle));
1094 Assert(tuple_width >= 0);
1095 rel->width = tuple_width;
1099 * compute_attribute_width
1100 * Given a target list entry, find the size in bytes of the attribute.
1102 * If a field is variable-length, we make a default assumption. Would be
1103 * better if VACUUM recorded some stats about the average field width...
1104 * also, we have access to the atttypmod, but fail to use it...
1107 compute_attribute_width(TargetEntry *tlistentry)
1109 int width = get_typlen(tlistentry->resdom->restype);
1112 return _DEFAULT_ATTRIBUTE_WIDTH_;
1118 * relation_byte_size
1119 * Estimate the storage space in bytes for a given number of tuples
1120 * of a given width (size in bytes).
1123 relation_byte_size(double tuples, int width)
1125 return tuples * ((double) (width + sizeof(HeapTupleData)));
1130 * Returns an estimate of the number of pages covered by a given
1131 * number of tuples of a given width (size in bytes).
1134 page_size(double tuples, int width)
1136 return ceil(relation_byte_size(tuples, width) / BLCKSZ);