1 /*-------------------------------------------------------------------------
4 * Routines to compute (and set) relation sizes and path costs
6 * Path costs are measured in units of disk accesses: one sequential page
7 * fetch has cost 1. All else is scaled relative to a page fetch, using
8 * the scaling parameters
10 * random_page_cost Cost of a non-sequential page fetch
11 * cpu_tuple_cost Cost of typical CPU time to process a tuple
12 * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple
13 * cpu_operator_cost Cost of CPU time to process a typical WHERE operator
15 * We also use a rough estimate "effective_cache_size" of the number of
16 * disk pages in Postgres + OS-level disk cache. (We can't simply use
17 * NBuffers for this purpose because that would ignore the effects of
18 * the kernel's disk cache.)
20 * Obviously, taking constants for these values is an oversimplification,
21 * but it's tough enough to get any useful estimates even at this level of
22 * detail. Note that all of these parameters are user-settable, in case
23 * the default values are drastically off for a particular platform.
25 * We compute two separate costs for each path:
26 * total_cost: total estimated cost to fetch all tuples
27 * startup_cost: cost that is expended before first tuple is fetched
28 * In some scenarios, such as when there is a LIMIT or we are implementing
29 * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
30 * path's result. A caller can estimate the cost of fetching a partial
31 * result by interpolating between startup_cost and total_cost. In detail:
32 * actual_cost = startup_cost +
33 * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
34 * Note that a base relation's rows count (and, by extension, plan_rows for
35 * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
36 * that this equation works properly. (Also, these routines guarantee not to
37 * set the rows count to zero, so there will be no zero divide.) The LIMIT is
38 * applied as a top-level plan node.
41 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
42 * Portions Copyright (c) 1994, Regents of the University of California
45 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.102 2003/01/22 20:16:40 tgl Exp $
47 *-------------------------------------------------------------------------
54 #include "catalog/pg_statistic.h"
55 #include "executor/nodeHash.h"
56 #include "miscadmin.h"
57 #include "optimizer/clauses.h"
58 #include "optimizer/cost.h"
59 #include "optimizer/pathnode.h"
60 #include "parser/parsetree.h"
61 #include "utils/selfuncs.h"
62 #include "utils/lsyscache.h"
63 #include "utils/syscache.h"
66 #define LOG2(x) (log(x) / 0.693147180559945)
67 #define LOG6(x) (log(x) / 1.79175946922805)
70 double effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
71 double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
72 double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
73 double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
74 double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
76 Cost disable_cost = 100000000.0;
78 bool enable_seqscan = true;
79 bool enable_indexscan = true;
80 bool enable_tidscan = true;
81 bool enable_sort = true;
82 bool enable_hashagg = true;
83 bool enable_nestloop = true;
84 bool enable_mergejoin = true;
85 bool enable_hashjoin = true;
88 static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
90 static bool cost_qual_eval_walker(Node *node, QualCost *total);
91 static Selectivity approx_selectivity(Query *root, List *quals);
92 static void set_rel_width(Query *root, RelOptInfo *rel);
93 static double relation_byte_size(double tuples, int width);
94 static double page_size(double tuples, int width);
99 * Determines and returns the cost of scanning a relation sequentially.
101 * Note: for historical reasons, this routine and the others in this module
102 * use the passed result Path only to store their startup_cost and total_cost
103 * results into. All the input data they need is passed as separate
104 * parameters, even though much of it could be extracted from the Path.
107 cost_seqscan(Path *path, Query *root,
110 Cost startup_cost = 0;
114 /* Should only be applied to base relations */
115 Assert(length(baserel->relids) == 1);
116 Assert(baserel->rtekind == RTE_RELATION);
119 startup_cost += disable_cost;
124 * The cost of reading a page sequentially is 1.0, by definition. Note
125 * that the Unix kernel will typically do some amount of read-ahead
126 * optimization, so that this cost is less than the true cost of
127 * reading a page from disk. We ignore that issue here, but must take
128 * it into account when estimating the cost of non-sequential
131 run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
134 startup_cost += baserel->baserestrictcost.startup;
135 cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
136 run_cost += cpu_per_tuple * baserel->tuples;
138 path->startup_cost = startup_cost;
139 path->total_cost = startup_cost + run_cost;
143 * cost_nonsequential_access
144 * Estimate the cost of accessing one page at random from a relation
145 * (or sort temp file) of the given size in pages.
147 * The simplistic model that the cost is random_page_cost is what we want
148 * to use for large relations; but for small ones that is a serious
149 * overestimate because of the effects of caching. This routine tries to
152 * Unfortunately we don't have any good way of estimating the effective cache
153 * size we are working with --- we know that Postgres itself has NBuffers
154 * internal buffers, but the size of the kernel's disk cache is uncertain,
155 * and how much of it we get to use is even less certain. We punt the problem
156 * for now by assuming we are given an effective_cache_size parameter.
158 * Given a guesstimated cache size, we estimate the actual I/O cost per page
159 * with the entirely ad-hoc equations:
160 * if relpages >= effective_cache_size:
161 * random_page_cost * (1 - (effective_cache_size/relpages)/2)
162 * if relpages < effective_cache_size:
163 * 1 + (random_page_cost/2-1) * (relpages/effective_cache_size) ** 2
164 * These give the right asymptotic behavior (=> 1.0 as relpages becomes
165 * small, => random_page_cost as it becomes large) and meet in the middle
166 * with the estimate that the cache is about 50% effective for a relation
167 * of the same size as effective_cache_size. (XXX this is probably all
168 * wrong, but I haven't been able to find any theory about how effective
169 * a disk cache should be presumed to be.)
172 cost_nonsequential_access(double relpages)
176 /* don't crash on bad input data */
177 if (relpages <= 0.0 || effective_cache_size <= 0.0)
178 return random_page_cost;
180 relsize = relpages / effective_cache_size;
183 return random_page_cost * (1.0 - 0.5 / relsize);
185 return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize;
190 * Determines and returns the cost of scanning a relation using an index.
192 * NOTE: an indexscan plan node can actually represent several passes,
193 * but here we consider the cost of just one pass.
195 * 'root' is the query root
196 * 'baserel' is the base relation the index is for
197 * 'index' is the index to be used
198 * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
199 * 'is_injoin' is T if we are considering using the index scan as the inside
200 * of a nestloop join (hence, some of the indexQuals are join clauses)
202 * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
203 * Any additional quals evaluated as qpquals may reduce the number of returned
204 * tuples, but they won't reduce the number of tuples we have to fetch from
205 * the table, so they don't reduce the scan cost.
208 cost_index(Path *path, Query *root,
214 Cost startup_cost = 0;
216 Cost indexStartupCost;
218 Selectivity indexSelectivity;
219 double indexCorrelation,
224 double tuples_fetched;
225 double pages_fetched;
229 /* Should only be applied to base relations */
230 Assert(IsA(baserel, RelOptInfo) &&
231 IsA(index, IndexOptInfo));
232 Assert(length(baserel->relids) == 1);
233 Assert(baserel->rtekind == RTE_RELATION);
235 if (!enable_indexscan)
236 startup_cost += disable_cost;
239 * Call index-access-method-specific code to estimate the processing
240 * cost for scanning the index, as well as the selectivity of the
241 * index (ie, the fraction of main-table tuples we will have to
242 * retrieve) and its correlation to the main-table tuple order.
244 OidFunctionCall8(index->amcostestimate,
245 PointerGetDatum(root),
246 PointerGetDatum(baserel),
247 PointerGetDatum(index),
248 PointerGetDatum(indexQuals),
249 PointerGetDatum(&indexStartupCost),
250 PointerGetDatum(&indexTotalCost),
251 PointerGetDatum(&indexSelectivity),
252 PointerGetDatum(&indexCorrelation));
254 /* all costs for touching index itself included here */
255 startup_cost += indexStartupCost;
256 run_cost += indexTotalCost - indexStartupCost;
259 * Estimate number of main-table tuples and pages fetched.
261 * When the index ordering is uncorrelated with the table ordering,
262 * we use an approximation proposed by Mackert and Lohman, "Index Scans
263 * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
264 * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
265 * The Mackert and Lohman approximation is that the number of pages
268 * min(2TNs/(2T+Ns), T) when T <= b
269 * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
270 * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
272 * T = # pages in table
273 * N = # tuples in table
274 * s = selectivity = fraction of table to be scanned
275 * b = # buffer pages available (we include kernel space here)
277 * When the index ordering is exactly correlated with the table ordering
278 * (just after a CLUSTER, for example), the number of pages fetched should
279 * be just sT. What's more, these will be sequential fetches, not the
280 * random fetches that occur in the uncorrelated case. So, depending on
281 * the extent of correlation, we should estimate the actual I/O cost
282 * somewhere between s * T * 1.0 and PF * random_cost. We currently
283 * interpolate linearly between these two endpoints based on the
284 * correlation squared (XXX is that appropriate?).
286 * In any case the number of tuples fetched is Ns.
290 tuples_fetched = indexSelectivity * baserel->tuples;
291 /* Don't believe estimates less than 1... */
292 if (tuples_fetched < 1.0)
293 tuples_fetched = 1.0;
295 /* This part is the Mackert and Lohman formula */
297 T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
298 b = (effective_cache_size > 1) ? effective_cache_size : 1.0;
303 (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
304 if (pages_fetched > T)
311 lim = (2.0 * T * b) / (2.0 * T - b);
312 if (tuples_fetched <= lim)
315 (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
320 b + (tuples_fetched - lim) * (T - b) / T;
325 * min_IO_cost corresponds to the perfectly correlated case
326 * (csquared=1), max_IO_cost to the perfectly uncorrelated case
327 * (csquared=0). Note that we just charge random_page_cost per page
328 * in the uncorrelated case, rather than using
329 * cost_nonsequential_access, since we've already accounted for
330 * caching effects by using the Mackert model.
332 min_IO_cost = ceil(indexSelectivity * T);
333 max_IO_cost = pages_fetched * random_page_cost;
336 * Now interpolate based on estimated index order correlation to get
337 * total disk I/O cost for main table accesses.
339 csquared = indexCorrelation * indexCorrelation;
341 run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
344 * Estimate CPU costs per tuple.
346 * Normally the indexquals will be removed from the list of restriction
347 * clauses that we have to evaluate as qpquals, so we should subtract
348 * their costs from baserestrictcost. But if we are doing a join then
349 * some of the indexquals are join clauses and shouldn't be subtracted.
350 * Rather than work out exactly how much to subtract, we don't subtract
353 * XXX For a lossy index, not all the quals will be removed and so we
354 * really shouldn't subtract their costs; but detecting that seems more
355 * expensive than it's worth.
357 startup_cost += baserel->baserestrictcost.startup;
358 cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
362 QualCost index_qual_cost;
364 cost_qual_eval(&index_qual_cost, indexQuals);
365 cpu_per_tuple -= index_qual_cost.per_tuple;
368 run_cost += cpu_per_tuple * tuples_fetched;
370 path->startup_cost = startup_cost;
371 path->total_cost = startup_cost + run_cost;
376 * Determines and returns the cost of scanning a relation using TIDs.
379 cost_tidscan(Path *path, Query *root,
380 RelOptInfo *baserel, List *tideval)
382 Cost startup_cost = 0;
385 int ntuples = length(tideval);
387 /* Should only be applied to base relations */
388 Assert(length(baserel->relids) == 1);
389 Assert(baserel->rtekind == RTE_RELATION);
392 startup_cost += disable_cost;
394 /* disk costs --- assume each tuple on a different page */
395 run_cost += random_page_cost * ntuples;
398 startup_cost += baserel->baserestrictcost.startup;
399 cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
400 run_cost += cpu_per_tuple * ntuples;
402 path->startup_cost = startup_cost;
403 path->total_cost = startup_cost + run_cost;
408 * Determines and returns the cost of scanning a function RTE.
411 cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
413 Cost startup_cost = 0;
417 /* Should only be applied to base relations that are functions */
418 Assert(length(baserel->relids) == 1);
419 Assert(baserel->rtekind == RTE_FUNCTION);
422 * For now, estimate function's cost at one operator eval per function
423 * call. Someday we should revive the function cost estimate columns
426 cpu_per_tuple = cpu_operator_cost;
428 /* Add scanning CPU costs */
429 startup_cost += baserel->baserestrictcost.startup;
430 cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
431 run_cost += cpu_per_tuple * baserel->tuples;
433 path->startup_cost = startup_cost;
434 path->total_cost = startup_cost + run_cost;
439 * Determines and returns the cost of sorting a relation, including
440 * the cost of reading the input data.
442 * If the total volume of data to sort is less than SortMem, we will do
443 * an in-memory sort, which requires no I/O and about t*log2(t) tuple
444 * comparisons for t tuples.
446 * If the total volume exceeds SortMem, we switch to a tape-style merge
447 * algorithm. There will still be about t*log2(t) tuple comparisons in
448 * total, but we will also need to write and read each tuple once per
449 * merge pass. We expect about ceil(log6(r)) merge passes where r is the
450 * number of initial runs formed (log6 because tuplesort.c uses six-tape
451 * merging). Since the average initial run should be about twice SortMem,
453 * disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
454 * cpu = comparison_cost * t * log2(t)
456 * The disk traffic is assumed to be half sequential and half random
457 * accesses (XXX can't we refine that guess?)
459 * We charge two operator evals per tuple comparison, which should be in
460 * the right ballpark in most cases.
462 * 'pathkeys' is a list of sort keys
463 * 'input_cost' is the total cost for reading the input data
464 * 'tuples' is the number of tuples in the relation
465 * 'width' is the average tuple width in bytes
467 * NOTE: some callers currently pass NIL for pathkeys because they
468 * can't conveniently supply the sort keys. Since this routine doesn't
469 * currently do anything with pathkeys anyway, that doesn't matter...
470 * but if it ever does, it should react gracefully to lack of key data.
471 * (Actually, the thing we'd most likely be interested in is just the number
472 * of sort keys, which all callers *could* supply.)
475 cost_sort(Path *path, Query *root,
476 List *pathkeys, Cost input_cost, double tuples, int width)
478 Cost startup_cost = input_cost;
480 double nbytes = relation_byte_size(tuples, width);
481 long sortmembytes = SortMem * 1024L;
484 startup_cost += disable_cost;
487 * We want to be sure the cost of a sort is never estimated as zero,
488 * even if passed-in tuple count is zero. Besides, mustn't do
497 * Assume about two operator evals per tuple comparison and N log2 N
500 startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
503 if (nbytes > sortmembytes)
505 double npages = ceil(nbytes / BLCKSZ);
506 double nruns = nbytes / (sortmembytes * 2);
507 double log_runs = ceil(LOG6(nruns));
508 double npageaccesses;
512 npageaccesses = 2.0 * npages * log_runs;
513 /* Assume half are sequential (cost 1), half are not */
514 startup_cost += npageaccesses *
515 (1.0 + cost_nonsequential_access(npages)) * 0.5;
519 * Also charge a small amount (arbitrarily set equal to operator cost)
520 * per extracted tuple.
522 run_cost += cpu_operator_cost * tuples;
524 path->startup_cost = startup_cost;
525 path->total_cost = startup_cost + run_cost;
530 * Determines and returns the cost of materializing a relation, including
531 * the cost of reading the input data.
533 * If the total volume of data to materialize exceeds SortMem, we will need
534 * to write it to disk, so the cost is much higher in that case.
537 cost_material(Path *path,
538 Cost input_cost, double tuples, int width)
540 Cost startup_cost = input_cost;
542 double nbytes = relation_byte_size(tuples, width);
543 long sortmembytes = SortMem * 1024L;
546 if (nbytes > sortmembytes)
548 double npages = ceil(nbytes / BLCKSZ);
550 /* We'll write during startup and read during retrieval */
551 startup_cost += npages;
556 * Also charge a small amount per extracted tuple. We use cpu_tuple_cost
557 * so that it doesn't appear worthwhile to materialize a bare seqscan.
559 run_cost += cpu_tuple_cost * tuples;
561 path->startup_cost = startup_cost;
562 path->total_cost = startup_cost + run_cost;
567 * Determines and returns the cost of performing an Agg plan node,
568 * including the cost of its input.
570 * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
571 * are for appropriately-sorted input.
574 cost_agg(Path *path, Query *root,
575 AggStrategy aggstrategy, int numAggs,
576 int numGroupCols, double numGroups,
577 Cost input_startup_cost, Cost input_total_cost,
584 * We charge one cpu_operator_cost per aggregate function per input
585 * tuple, and another one per output tuple (corresponding to transfn
586 * and finalfn calls respectively). If we are grouping, we charge an
587 * additional cpu_operator_cost per grouping column per input tuple
588 * for grouping comparisons.
590 * We will produce a single output tuple if not grouping,
591 * and a tuple per group otherwise.
593 if (aggstrategy == AGG_PLAIN)
595 startup_cost = input_total_cost;
596 startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
597 /* we aren't grouping */
598 total_cost = startup_cost;
600 else if (aggstrategy == AGG_SORTED)
602 /* Here we are able to deliver output on-the-fly */
603 startup_cost = input_startup_cost;
604 total_cost = input_total_cost;
605 total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs;
606 total_cost += cpu_operator_cost * input_tuples * numGroupCols;
610 /* must be AGG_HASHED */
611 startup_cost = input_total_cost;
612 startup_cost += cpu_operator_cost * input_tuples * numAggs;
613 startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
614 total_cost = startup_cost;
615 total_cost += cpu_operator_cost * numGroups * numAggs;
618 path->startup_cost = startup_cost;
619 path->total_cost = total_cost;
624 * Determines and returns the cost of performing a Group plan node,
625 * including the cost of its input.
627 * Note: caller must ensure that input costs are for appropriately-sorted
631 cost_group(Path *path, Query *root,
632 int numGroupCols, double numGroups,
633 Cost input_startup_cost, Cost input_total_cost,
639 startup_cost = input_startup_cost;
640 total_cost = input_total_cost;
643 * Charge one cpu_operator_cost per comparison per input tuple. We
644 * assume all columns get compared at most of the tuples.
646 total_cost += cpu_operator_cost * input_tuples * numGroupCols;
648 path->startup_cost = startup_cost;
649 path->total_cost = total_cost;
654 * Determines and returns the cost of joining two relations using the
655 * nested loop algorithm.
657 * 'outer_path' is the path for the outer relation
658 * 'inner_path' is the path for the inner relation
659 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
662 cost_nestloop(Path *path, Query *root,
667 Cost startup_cost = 0;
670 QualCost restrict_qual_cost;
673 if (!enable_nestloop)
674 startup_cost += disable_cost;
676 /* cost of source data */
679 * NOTE: clearly, we must pay both outer and inner paths' startup_cost
680 * before we can start returning tuples, so the join's startup cost is
681 * their sum. What's not so clear is whether the inner path's
682 * startup_cost must be paid again on each rescan of the inner path.
683 * This is not true if the inner path is materialized or is a hashjoin,
684 * but probably is true otherwise.
686 startup_cost += outer_path->startup_cost + inner_path->startup_cost;
687 run_cost += outer_path->total_cost - outer_path->startup_cost;
688 if (IsA(inner_path, MaterialPath) ||
689 IsA(inner_path, HashPath))
691 /* charge only run cost for each iteration of inner path */
692 run_cost += outer_path->parent->rows *
693 (inner_path->total_cost - inner_path->startup_cost);
698 * charge total cost for each iteration of inner path, except we
699 * already charged the first startup_cost in our own startup
701 run_cost += outer_path->parent->rows * inner_path->total_cost
702 - inner_path->startup_cost;
706 * Number of tuples processed (not number emitted!). If inner path is
707 * an indexscan, be sure to use its estimated output row count, which
708 * may be lower than the restriction-clause-only row count of its
711 if (IsA(inner_path, IndexPath))
712 ntuples = ((IndexPath *) inner_path)->rows;
714 ntuples = inner_path->parent->rows;
715 ntuples *= outer_path->parent->rows;
718 cost_qual_eval(&restrict_qual_cost, restrictlist);
719 startup_cost += restrict_qual_cost.startup;
720 cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
721 run_cost += cpu_per_tuple * ntuples;
723 path->startup_cost = startup_cost;
724 path->total_cost = startup_cost + run_cost;
729 * Determines and returns the cost of joining two relations using the
730 * merge join algorithm.
732 * 'outer_path' is the path for the outer relation
733 * 'inner_path' is the path for the inner relation
734 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
735 * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
736 * (this should be a subset of the restrictlist)
737 * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
738 * to sort the outer and inner relations, or NIL if no explicit
739 * sort is needed because the source path is already ordered
742 cost_mergejoin(Path *path, Query *root,
750 Cost startup_cost = 0;
753 QualCost restrict_qual_cost;
754 RestrictInfo *firstclause;
758 Selectivity outerscansel,
760 Path sort_path; /* dummy for result of cost_sort */
762 if (!enable_mergejoin)
763 startup_cost += disable_cost;
766 * A merge join will stop as soon as it exhausts either input stream.
767 * Estimate fraction of the left and right inputs that will actually
768 * need to be scanned. We use only the first (most significant) merge
769 * clause for this purpose.
771 * Since this calculation is somewhat expensive, and will be the same for
772 * all mergejoin paths associated with the merge clause, we cache the
773 * results in the RestrictInfo node.
775 firstclause = (RestrictInfo *) lfirst(mergeclauses);
776 if (firstclause->left_mergescansel < 0) /* not computed yet? */
777 mergejoinscansel(root, (Node *) firstclause->clause,
778 &firstclause->left_mergescansel,
779 &firstclause->right_mergescansel);
781 if (is_subseti(firstclause->left_relids, outer_path->parent->relids))
783 /* left side of clause is outer */
784 outerscansel = firstclause->left_mergescansel;
785 innerscansel = firstclause->right_mergescansel;
789 /* left side of clause is inner */
790 outerscansel = firstclause->right_mergescansel;
791 innerscansel = firstclause->left_mergescansel;
794 /* convert selectivity to row count; must scan at least one row */
796 outer_rows = ceil(outer_path->parent->rows * outerscansel);
799 inner_rows = ceil(inner_path->parent->rows * innerscansel);
804 * Readjust scan selectivities to account for above rounding. This is
805 * normally an insignificant effect, but when there are only a few rows
806 * in the inputs, failing to do this makes for a large percentage error.
808 outerscansel = outer_rows / outer_path->parent->rows;
809 innerscansel = inner_rows / inner_path->parent->rows;
811 /* cost of source data */
814 * Note we are assuming that each source tuple is fetched just once,
815 * which is not right in the presence of equal keys. If we had a way
816 * of estimating the proportion of equal keys, we could apply a
817 * correction factor...
819 if (outersortkeys) /* do we need to sort outer? */
821 cost_sort(&sort_path,
824 outer_path->total_cost,
825 outer_path->parent->rows,
826 outer_path->parent->width);
827 startup_cost += sort_path.startup_cost;
828 run_cost += (sort_path.total_cost - sort_path.startup_cost)
833 startup_cost += outer_path->startup_cost;
834 run_cost += (outer_path->total_cost - outer_path->startup_cost)
838 if (innersortkeys) /* do we need to sort inner? */
840 cost_sort(&sort_path,
843 inner_path->total_cost,
844 inner_path->parent->rows,
845 inner_path->parent->width);
846 startup_cost += sort_path.startup_cost;
847 run_cost += (sort_path.total_cost - sort_path.startup_cost)
852 startup_cost += inner_path->startup_cost;
853 run_cost += (inner_path->total_cost - inner_path->startup_cost)
858 * The number of tuple comparisons needed depends drastically on the
859 * number of equal keys in the two source relations, which we have no
860 * good way of estimating. (XXX could the MCV statistics help?)
861 * Somewhat arbitrarily, we charge one tuple comparison (one
862 * cpu_operator_cost) for each tuple in the two source relations.
863 * This is probably a lower bound.
865 run_cost += cpu_operator_cost * (outer_rows + inner_rows);
868 * For each tuple that gets through the mergejoin proper, we charge
869 * cpu_tuple_cost plus the cost of evaluating additional restriction
870 * clauses that are to be applied at the join. It's OK to use an
871 * approximate selectivity here, since in most cases this is a minor
872 * component of the cost. NOTE: it's correct to use the unscaled rows
873 * counts here, not the scaled-down counts we obtained above.
875 ntuples = approx_selectivity(root, mergeclauses) *
876 outer_path->parent->rows * inner_path->parent->rows;
879 cost_qual_eval(&restrict_qual_cost, restrictlist);
880 startup_cost += restrict_qual_cost.startup;
881 cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
882 run_cost += cpu_per_tuple * ntuples;
884 path->startup_cost = startup_cost;
885 path->total_cost = startup_cost + run_cost;
890 * Determines and returns the cost of joining two relations using the
891 * hash join algorithm.
893 * 'outer_path' is the path for the outer relation
894 * 'inner_path' is the path for the inner relation
895 * 'restrictlist' are the RestrictInfo nodes to be applied at the join
896 * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
897 * (this should be a subset of the restrictlist)
900 cost_hashjoin(Path *path, Query *root,
906 Cost startup_cost = 0;
909 QualCost restrict_qual_cost;
911 double outerbytes = relation_byte_size(outer_path->parent->rows,
912 outer_path->parent->width);
913 double innerbytes = relation_byte_size(inner_path->parent->rows,
914 inner_path->parent->width);
918 Selectivity innerbucketsize;
921 if (!enable_hashjoin)
922 startup_cost += disable_cost;
924 /* cost of source data */
925 startup_cost += outer_path->startup_cost;
926 run_cost += outer_path->total_cost - outer_path->startup_cost;
927 startup_cost += inner_path->total_cost;
929 /* cost of computing hash function: must do it once per input tuple */
930 startup_cost += cpu_operator_cost * inner_path->parent->rows;
931 run_cost += cpu_operator_cost * outer_path->parent->rows;
933 /* Get hash table size that executor would use for inner relation */
934 ExecChooseHashTableSize(inner_path->parent->rows,
935 inner_path->parent->width,
941 * Determine bucketsize fraction for inner relation. We use the
942 * smallest bucketsize estimated for any individual hashclause;
943 * this is undoubtedly conservative.
945 innerbucketsize = 1.0;
946 foreach(hcl, hashclauses)
948 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
949 Selectivity thisbucketsize;
951 Assert(IsA(restrictinfo, RestrictInfo));
954 * First we have to figure out which side of the hashjoin clause is the
957 * Since we tend to visit the same clauses over and over when planning
958 * a large query, we cache the bucketsize estimate in the RestrictInfo
959 * node to avoid repeated lookups of statistics.
961 if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
963 /* righthand side is inner */
964 thisbucketsize = restrictinfo->right_bucketsize;
965 if (thisbucketsize < 0)
968 thisbucketsize = estimate_hash_bucketsize(root,
969 (Var *) get_rightop(restrictinfo->clause),
971 restrictinfo->right_bucketsize = thisbucketsize;
976 Assert(is_subseti(restrictinfo->left_relids,
977 inner_path->parent->relids));
978 /* lefthand side is inner */
979 thisbucketsize = restrictinfo->left_bucketsize;
980 if (thisbucketsize < 0)
983 thisbucketsize = estimate_hash_bucketsize(root,
984 (Var *) get_leftop(restrictinfo->clause),
986 restrictinfo->left_bucketsize = thisbucketsize;
990 if (innerbucketsize > thisbucketsize)
991 innerbucketsize = thisbucketsize;
995 * The number of tuple comparisons needed is the number of outer
996 * tuples times the typical number of tuples in a hash bucket, which
997 * is the inner relation size times its bucketsize fraction. We charge
998 * one cpu_operator_cost per tuple comparison.
1000 run_cost += cpu_operator_cost * outer_path->parent->rows *
1001 ceil(inner_path->parent->rows * innerbucketsize);
1004 * For each tuple that gets through the hashjoin proper, we charge
1005 * cpu_tuple_cost plus the cost of evaluating additional restriction
1006 * clauses that are to be applied at the join. It's OK to use an
1007 * approximate selectivity here, since in most cases this is a minor
1008 * component of the cost.
1010 ntuples = approx_selectivity(root, hashclauses) *
1011 outer_path->parent->rows * inner_path->parent->rows;
1014 cost_qual_eval(&restrict_qual_cost, restrictlist);
1015 startup_cost += restrict_qual_cost.startup;
1016 cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
1017 run_cost += cpu_per_tuple * ntuples;
1020 * if inner relation is too big then we will need to "batch" the join,
1021 * which implies writing and reading most of the tuples to disk an
1022 * extra time. Charge one cost unit per page of I/O (correct since it
1023 * should be nice and sequential...). Writing the inner rel counts as
1024 * startup cost, all the rest as run cost.
1028 double outerpages = page_size(outer_path->parent->rows,
1029 outer_path->parent->width);
1030 double innerpages = page_size(inner_path->parent->rows,
1031 inner_path->parent->width);
1033 startup_cost += innerpages;
1034 run_cost += innerpages + 2 * outerpages;
1038 * Bias against putting larger relation on inside. We don't want an
1039 * absolute prohibition, though, since larger relation might have
1040 * better bucketsize --- and we can't trust the size estimates
1041 * unreservedly, anyway. Instead, inflate the run cost by the
1042 * square root of the size ratio. (Why square root? No real good
1043 * reason, but it seems reasonable...)
1045 * Note: before 7.4 we implemented this by inflating startup cost;
1046 * but if there's a disable_cost component in the input paths'
1047 * startup cost, that unfairly penalizes the hash. Probably it'd
1048 * be better to keep track of disable penalty separately from cost.
1050 if (innerbytes > outerbytes && outerbytes > 0)
1051 run_cost *= sqrt(innerbytes / outerbytes);
1053 path->startup_cost = startup_cost;
1054 path->total_cost = startup_cost + run_cost;
1058 * Estimate hash bucketsize fraction (ie, number of entries in a bucket
1059 * divided by total tuples in relation) if the specified Var is used
1062 * XXX This is really pretty bogus since we're effectively assuming that the
1063 * distribution of hash keys will be the same after applying restriction
1064 * clauses as it was in the underlying relation. However, we are not nearly
1065 * smart enough to figure out how the restrict clauses might change the
1066 * distribution, so this will have to do for now.
1068 * We are passed the number of buckets the executor will use for the given
1069 * input relation. If the data were perfectly distributed, with the same
1070 * number of tuples going into each available bucket, then the bucketsize
1071 * fraction would be 1/nbuckets. But this happy state of affairs will occur
1072 * only if (a) there are at least nbuckets distinct data values, and (b)
1073 * we have a not-too-skewed data distribution. Otherwise the buckets will
1074 * be nonuniformly occupied. If the other relation in the join has a key
1075 * distribution similar to this one's, then the most-loaded buckets are
1076 * exactly those that will be probed most often. Therefore, the "average"
1077 * bucket size for costing purposes should really be taken as something close
1078 * to the "worst case" bucket size. We try to estimate this by adjusting the
1079 * fraction if there are too few distinct data values, and then scaling up
1080 * by the ratio of the most common value's frequency to the average frequency.
1082 * If no statistics are available, use a default estimate of 0.1. This will
1083 * discourage use of a hash rather strongly if the inner relation is large,
1084 * which is what we want. We do not want to hash unless we know that the
1085 * inner rel is well-dispersed (or the alternatives seem much worse).
1088 estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
1093 Form_pg_statistic stats;
1102 * Lookup info about var's relation and attribute; if none available,
1103 * return default estimate.
1105 if (var == NULL || !IsA(var, Var))
1108 relid = getrelid(var->varno, root->rtable);
1109 if (relid == InvalidOid)
1112 rel = find_base_rel(root, var->varno);
1114 if (rel->tuples <= 0.0 || rel->rows <= 0.0)
1115 return 0.1; /* ensure we can divide below */
1117 tuple = SearchSysCache(STATRELATT,
1118 ObjectIdGetDatum(relid),
1119 Int16GetDatum(var->varattno),
1121 if (!HeapTupleIsValid(tuple))
1124 * Perhaps the Var is a system attribute; if so, it will have no
1125 * entry in pg_statistic, but we may be able to guess something
1126 * about its distribution anyway.
1128 switch (var->varattno)
1130 case ObjectIdAttributeNumber:
1131 case SelfItemPointerAttributeNumber:
1132 /* these are unique, so buckets should be well-distributed */
1133 return 1.0 / (double) nbuckets;
1134 case TableOidAttributeNumber:
1135 /* hashing this is a terrible idea... */
1140 stats = (Form_pg_statistic) GETSTRUCT(tuple);
1143 * Obtain number of distinct data values in raw relation.
1145 ndistinct = stats->stadistinct;
1146 if (ndistinct < 0.0)
1147 ndistinct = -ndistinct * rel->tuples;
1149 if (ndistinct <= 0.0) /* ensure we can divide */
1151 ReleaseSysCache(tuple);
1155 /* Also compute avg freq of all distinct data values in raw relation */
1156 avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
1159 * Adjust ndistinct to account for restriction clauses. Observe we
1160 * are assuming that the data distribution is affected uniformly by
1161 * the restriction clauses!
1163 * XXX Possibly better way, but much more expensive: multiply by
1164 * selectivity of rel's restriction clauses that mention the target
1167 ndistinct *= rel->rows / rel->tuples;
1170 * Initial estimate of bucketsize fraction is 1/nbuckets as long as
1171 * the number of buckets is less than the expected number of distinct
1172 * values; otherwise it is 1/ndistinct.
1174 if (ndistinct > (double) nbuckets)
1175 estfract = 1.0 / (double) nbuckets;
1177 estfract = 1.0 / ndistinct;
1180 * Look up the frequency of the most common value, if available.
1184 if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
1185 STATISTIC_KIND_MCV, InvalidOid,
1186 NULL, NULL, &numbers, &nnumbers))
1189 * The first MCV stat is for the most common value.
1192 mcvfreq = numbers[0];
1193 free_attstatsslot(var->vartype, NULL, 0,
1198 * Adjust estimated bucketsize upward to account for skewed
1201 if (avgfreq > 0.0 && mcvfreq > avgfreq)
1202 estfract *= mcvfreq / avgfreq;
1205 * Clamp bucketsize to sane range (the above adjustment could easily
1206 * produce an out-of-range result). We set the lower bound a little
1207 * above zero, since zero isn't a very sane result.
1209 if (estfract < 1.0e-6)
1211 else if (estfract > 1.0)
1214 ReleaseSysCache(tuple);
1216 return (Selectivity) estfract;
1222 * Estimate the CPU costs of evaluating a WHERE clause.
1223 * The input can be either an implicitly-ANDed list of boolean
1224 * expressions, or a list of RestrictInfo nodes.
1225 * The result includes both a one-time (startup) component,
1226 * and a per-evaluation component.
1229 cost_qual_eval(QualCost *cost, List *quals)
1234 cost->per_tuple = 0;
1236 /* We don't charge any cost for the implicit ANDing at top level ... */
1240 Node *qual = (Node *) lfirst(l);
1243 * RestrictInfo nodes contain an eval_cost field reserved for this
1244 * routine's use, so that it's not necessary to evaluate the qual
1245 * clause's cost more than once. If the clause's cost hasn't been
1246 * computed yet, the field's startup value will contain -1.
1248 if (qual && IsA(qual, RestrictInfo))
1250 RestrictInfo *restrictinfo = (RestrictInfo *) qual;
1252 if (restrictinfo->eval_cost.startup < 0)
1254 restrictinfo->eval_cost.startup = 0;
1255 restrictinfo->eval_cost.per_tuple = 0;
1256 cost_qual_eval_walker((Node *) restrictinfo->clause,
1257 &restrictinfo->eval_cost);
1259 cost->startup += restrictinfo->eval_cost.startup;
1260 cost->per_tuple += restrictinfo->eval_cost.per_tuple;
1264 /* If it's a bare expression, must always do it the hard way */
1265 cost_qual_eval_walker(qual, cost);
1271 cost_qual_eval_walker(Node *node, QualCost *total)
1277 * Our basic strategy is to charge one cpu_operator_cost for each
1278 * operator or function node in the given tree. Vars and Consts are
1279 * charged zero, and so are boolean operators (AND, OR, NOT).
1280 * Simplistic, but a lot better than no model at all.
1282 * Should we try to account for the possibility of short-circuit
1283 * evaluation of AND/OR?
1285 if (IsA(node, FuncExpr) ||
1286 IsA(node, OpExpr) ||
1287 IsA(node, DistinctExpr))
1289 total->per_tuple += cpu_operator_cost;
1291 else if (IsA(node, SubLink))
1293 /* This routine should not be applied to un-planned expressions */
1294 elog(ERROR, "cost_qual_eval: can't handle unplanned sub-select");
1296 else if (IsA(node, SubPlan))
1299 * A subplan node in an expression typically indicates that the
1300 * subplan will be executed on each evaluation, so charge accordingly.
1301 * (Sub-selects that can be executed as InitPlans have already been
1302 * removed from the expression.)
1304 * An exception occurs when we have decided we can implement the
1305 * subplan by hashing.
1308 SubPlan *subplan = (SubPlan *) node;
1309 Plan *plan = subplan->plan;
1311 if (subplan->useHashTable)
1314 * If we are using a hash table for the subquery outputs, then
1315 * the cost of evaluating the query is a one-time cost.
1316 * We charge one cpu_operator_cost per tuple for the work of
1317 * loading the hashtable, too.
1319 total->startup += plan->total_cost +
1320 cpu_operator_cost * plan->plan_rows;
1322 * The per-tuple costs include the cost of evaluating the
1323 * lefthand expressions, plus the cost of probing the hashtable.
1324 * Recursion into the exprs list will handle the lefthand
1325 * expressions properly, and will count one cpu_operator_cost
1326 * for each comparison operator. That is probably too low for
1327 * the probing cost, but it's hard to make a better estimate,
1328 * so live with it for now.
1334 * Otherwise we will be rescanning the subplan output on each
1335 * evaluation. We need to estimate how much of the output
1336 * we will actually need to scan. NOTE: this logic should
1337 * agree with the estimates used by make_subplan() in
1340 Cost plan_run_cost = plan->total_cost - plan->startup_cost;
1342 if (subplan->subLinkType == EXISTS_SUBLINK)
1344 /* we only need to fetch 1 tuple */
1345 total->per_tuple += plan_run_cost / plan->plan_rows;
1347 else if (subplan->subLinkType == ALL_SUBLINK ||
1348 subplan->subLinkType == ANY_SUBLINK)
1350 /* assume we need 50% of the tuples */
1351 total->per_tuple += 0.50 * plan_run_cost;
1352 /* also charge a cpu_operator_cost per row examined */
1353 total->per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
1357 /* assume we need all tuples */
1358 total->per_tuple += plan_run_cost;
1361 * Also account for subplan's startup cost.
1362 * If the subplan is uncorrelated or undirect correlated,
1363 * AND its topmost node is a Sort or Material node, assume
1364 * that we'll only need to pay its startup cost once;
1365 * otherwise assume we pay the startup cost every time.
1367 if (subplan->parParam == NIL &&
1369 IsA(plan, Material)))
1371 total->startup += plan->startup_cost;
1375 total->per_tuple += plan->startup_cost;
1380 return expression_tree_walker(node, cost_qual_eval_walker,
1386 * approx_selectivity
1387 * Quick-and-dirty estimation of clause selectivities.
1388 * The input can be either an implicitly-ANDed list of boolean
1389 * expressions, or a list of RestrictInfo nodes (typically the latter).
1391 * The "quick" part comes from caching the selectivity estimates so we can
1392 * avoid recomputing them later. (Since the same clauses are typically
1393 * examined over and over in different possible join trees, this makes a
1396 * The "dirty" part comes from the fact that the selectivities of multiple
1397 * clauses are estimated independently and multiplied together. Now
1398 * clauselist_selectivity often can't do any better than that anyhow, but
1399 * for some situations (such as range constraints) it is smarter.
1401 * Since we are only using the results to estimate how many potential
1402 * output tuples are generated and passed through qpqual checking, it
1403 * seems OK to live with the approximation.
1406 approx_selectivity(Query *root, List *quals)
1408 Selectivity total = 1.0;
1413 Node *qual = (Node *) lfirst(l);
1417 * RestrictInfo nodes contain a this_selec field reserved for this
1418 * routine's use, so that it's not necessary to evaluate the qual
1419 * clause's selectivity more than once. If the clause's
1420 * selectivity hasn't been computed yet, the field will contain
1423 if (qual && IsA(qual, RestrictInfo))
1425 RestrictInfo *restrictinfo = (RestrictInfo *) qual;
1427 if (restrictinfo->this_selec < 0)
1428 restrictinfo->this_selec =
1429 clause_selectivity(root,
1430 (Node *) restrictinfo->clause,
1432 selec = restrictinfo->this_selec;
1436 /* If it's a bare expression, must always do it the hard way */
1437 selec = clause_selectivity(root, qual, 0);
1446 * set_baserel_size_estimates
1447 * Set the size estimates for the given base relation.
1449 * The rel's targetlist and restrictinfo list must have been constructed
1452 * We set the following fields of the rel node:
1453 * rows: the estimated number of output tuples (after applying
1454 * restriction clauses).
1455 * width: the estimated average output tuple width in bytes.
1456 * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
1459 set_baserel_size_estimates(Query *root, RelOptInfo *rel)
1463 /* Should only be applied to base relations */
1464 Assert(length(rel->relids) == 1);
1466 temp = rel->tuples *
1467 restrictlist_selectivity(root,
1468 rel->baserestrictinfo,
1469 lfirsti(rel->relids));
1472 * Force estimate to be at least one row, to make explain output look
1473 * better and to avoid possible divide-by-zero when interpolating
1474 * cost. Make it an integer, too.
1483 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
1485 set_rel_width(root, rel);
1489 * set_joinrel_size_estimates
1490 * Set the size estimates for the given join relation.
1492 * The rel's targetlist must have been constructed already, and a
1493 * restriction clause list that matches the given component rels must
1496 * Since there is more than one way to make a joinrel for more than two
1497 * base relations, the results we get here could depend on which component
1498 * rel pair is provided. In theory we should get the same answers no matter
1499 * which pair is provided; in practice, since the selectivity estimation
1500 * routines don't handle all cases equally well, we might not. But there's
1501 * not much to be done about it. (Would it make sense to repeat the
1502 * calculations for each pair of input rels that's encountered, and somehow
1503 * average the results? Probably way more trouble than it's worth.)
1505 * We set the same relnode fields as set_baserel_size_estimates() does.
1508 set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
1509 RelOptInfo *outer_rel,
1510 RelOptInfo *inner_rel,
1519 * Compute joinclause selectivity. Note that we are only considering
1520 * clauses that become restriction clauses at this join level; we are
1521 * not double-counting them because they were not considered in
1522 * estimating the sizes of the component rels.
1524 selec = restrictlist_selectivity(root,
1529 * Normally, we multiply size of Cartesian product by selectivity.
1530 * But for JOIN_IN, we just multiply the lefthand size by the selectivity
1531 * (is that really right?). For UNIQUE_OUTER or UNIQUE_INNER, use
1532 * the estimated number of distinct rows (again, is that right?)
1534 * If we are doing an outer join, take that into account: the output
1535 * must be at least as large as the non-nullable input. (Is there any
1536 * chance of being even smarter?)
1541 temp = outer_rel->rows * inner_rel->rows * selec;
1544 temp = outer_rel->rows * inner_rel->rows * selec;
1545 if (temp < outer_rel->rows)
1546 temp = outer_rel->rows;
1549 temp = outer_rel->rows * inner_rel->rows * selec;
1550 if (temp < inner_rel->rows)
1551 temp = inner_rel->rows;
1554 temp = outer_rel->rows * inner_rel->rows * selec;
1555 if (temp < outer_rel->rows)
1556 temp = outer_rel->rows;
1557 if (temp < inner_rel->rows)
1558 temp = inner_rel->rows;
1561 temp = outer_rel->rows * selec;
1563 case JOIN_REVERSE_IN:
1564 temp = inner_rel->rows * selec;
1566 case JOIN_UNIQUE_OUTER:
1567 upath = create_unique_path(root, outer_rel,
1568 outer_rel->cheapest_total_path);
1569 temp = upath->rows * inner_rel->rows * selec;
1571 case JOIN_UNIQUE_INNER:
1572 upath = create_unique_path(root, inner_rel,
1573 inner_rel->cheapest_total_path);
1574 temp = outer_rel->rows * upath->rows * selec;
1577 elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d",
1579 temp = 0; /* keep compiler quiet */
1584 * Force estimate to be at least one row, to make explain output look
1585 * better and to avoid possible divide-by-zero when interpolating
1586 * cost. Make it an integer, too.
1596 * We could apply set_rel_width() to compute the output tuple width
1597 * from scratch, but at present it's always just the sum of the input
1598 * widths, so why work harder than necessary? If relnode.c is ever
1599 * taught to remove unneeded columns from join targetlists, go back to
1600 * using set_rel_width here.
1602 rel->width = outer_rel->width + inner_rel->width;
1606 * set_function_size_estimates
1607 * Set the size estimates for a base relation that is a function call.
1609 * The rel's targetlist and restrictinfo list must have been constructed
1612 * We set the following fields of the rel node:
1613 * rows: the estimated number of output tuples (after applying
1614 * restriction clauses).
1615 * width: the estimated average output tuple width in bytes.
1616 * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
1619 set_function_size_estimates(Query *root, RelOptInfo *rel)
1623 /* Should only be applied to base relations that are functions */
1624 Assert(length(rel->relids) == 1);
1625 Assert(rel->rtekind == RTE_FUNCTION);
1628 * Estimate number of rows the function itself will return.
1630 * XXX no idea how to do this yet; but should at least check whether
1631 * function returns set or not...
1635 /* Now estimate number of output rows */
1636 temp = rel->tuples *
1637 restrictlist_selectivity(root,
1638 rel->baserestrictinfo,
1639 lfirsti(rel->relids));
1642 * Force estimate to be at least one row, to make explain output look
1643 * better and to avoid possible divide-by-zero when interpolating
1644 * cost. Make it an integer, too.
1653 cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
1655 set_rel_width(root, rel);
1661 * Set the estimated output width of the relation.
1663 * NB: this works best on base relations because it prefers to look at
1664 * real Vars. It will fail to make use of pg_statistic info when applied
1665 * to a subquery relation, even if the subquery outputs are simple vars
1666 * that we could have gotten info for. Is it worth trying to be smarter
1670 set_rel_width(Query *root, RelOptInfo *rel)
1672 int32 tuple_width = 0;
1675 foreach(tllist, rel->targetlist)
1677 TargetEntry *tle = (TargetEntry *) lfirst(tllist);
1681 * If it's a Var, try to get statistical info from pg_statistic.
1683 if (tle->expr && IsA(tle->expr, Var))
1685 Var *var = (Var *) tle->expr;
1688 relid = getrelid(var->varno, root->rtable);
1689 if (relid != InvalidOid)
1691 item_width = get_attavgwidth(relid, var->varattno);
1694 tuple_width += item_width;
1701 * Not a Var, or can't find statistics for it. Estimate using
1702 * just the type info.
1704 item_width = get_typavgwidth(tle->resdom->restype,
1705 tle->resdom->restypmod);
1706 Assert(item_width > 0);
1707 tuple_width += item_width;
1709 Assert(tuple_width >= 0);
1710 rel->width = tuple_width;
1714 * relation_byte_size
1715 * Estimate the storage space in bytes for a given number of tuples
1716 * of a given width (size in bytes).
1719 relation_byte_size(double tuples, int width)
1721 return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
1726 * Returns an estimate of the number of pages covered by a given
1727 * number of tuples of a given width (size in bytes).
1730 page_size(double tuples, int width)
1732 return ceil(relation_byte_size(tuples, width) / BLCKSZ);