granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Path costs are measured in units of disk accesses: one sequential page
   7  * fetch has cost 1.  All else is scaled relative to a page fetch, using
   8  * the scaling parameters
   9  *
  10  *      random_page_cost        Cost of a non-sequential page fetch
  11  *      cpu_tuple_cost          Cost of typical CPU time to process a tuple
  12  *      cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
  13  *      cpu_operator_cost       Cost of CPU time to process a typical WHERE operator
  14  *
  15  * We also use a rough estimate "effective_cache_size" of the number of
  16  * disk pages in Postgres + OS-level disk cache.  (We can't simply use
  17  * NBuffers for this purpose because that would ignore the effects of
  18  * the kernel's disk cache.)
  19  *
  20  * Obviously, taking constants for these values is an oversimplification,
  21  * but it's tough enough to get any useful estimates even at this level of
  22  * detail.      Note that all of these parameters are user-settable, in case
  23  * the default values are drastically off for a particular platform.
  24  *
  25  * We compute two separate costs for each path:
  26  *              total_cost: total estimated cost to fetch all tuples
  27  *              startup_cost: cost that is expended before first tuple is fetched
  28  * In some scenarios, such as when there is a LIMIT or we are implementing
  29  * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
  30  * path's result.  A caller can estimate the cost of fetching a partial
  31  * result by interpolating between startup_cost and total_cost.  In detail:
  32  *              actual_cost = startup_cost +
  33  *                      (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
  34  * Note that a base relation's rows count (and, by extension, plan_rows for
  35  * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
  36  * that this equation works properly.  (Also, these routines guarantee not to
  37  * set the rows count to zero, so there will be no zero divide.)  The LIMIT is
  38  * applied as a top-level plan node.
  39  *
  40  * For largely historical reasons, most of the routines in this module use
  41  * the passed result Path only to store their startup_cost and total_cost
  42  * results into.  All the input data they need is passed as separate
  43  * parameters, even though much of it could be extracted from the Path.
  44  * An exception is made for the cost_XXXjoin() routines, which expect all
  45  * the non-cost fields of the passed XXXPath to be filled in.
  46  *
  47  *
  48  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  49  * Portions Copyright (c) 1994, Regents of the University of California
  50  *
  51  * IDENTIFICATION
  52  *        $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.116 2003/11/29 19:51:50 pgsql Exp $
  53  *
  54  *-------------------------------------------------------------------------
  55  */
  56
  57 #include "postgres.h"
  58
  59 #include <math.h>
  60
  61 #include "catalog/pg_statistic.h"
  62 #include "executor/nodeHash.h"
  63 #include "miscadmin.h"
  64 #include "optimizer/clauses.h"
  65 #include "optimizer/cost.h"
  66 #include "optimizer/pathnode.h"
  67 #include "optimizer/plancat.h"
  68 #include "parser/parsetree.h"
  69 #include "utils/selfuncs.h"
  70 #include "utils/lsyscache.h"
  71 #include "utils/syscache.h"
  72
  73
  74 #define LOG2(x)  (log(x) / 0.693147180559945)
  75 #define LOG6(x)  (log(x) / 1.79175946922805)
  76
  77 /*
  78  * Some Paths return less than the nominal number of rows of their parent
  79  * relations; join nodes need to do this to get the correct input count:
  80  */
  81 #define PATH_ROWS(path) \
  82         (IsA(path, UniquePath) ? \
  83          ((UniquePath *) (path))->rows : \
  84          (path)->parent->rows)
  85
  86
  87 double          effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
  88 double          random_page_cost = DEFAULT_RANDOM_PAGE_COST;
  89 double          cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
  90 double          cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
  91 double          cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
  92
  93 Cost            disable_cost = 100000000.0;
  94
  95 bool            enable_seqscan = true;
  96 bool            enable_indexscan = true;
  97 bool            enable_tidscan = true;
  98 bool            enable_sort = true;
  99 bool            enable_hashagg = true;
 100 bool            enable_nestloop = true;
 101 bool            enable_mergejoin = true;
 102 bool            enable_hashjoin = true;
 103
 104
 105 static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
 106                                                  int nbuckets);
 107 static bool cost_qual_eval_walker(Node *node, QualCost *total);
 108 static Selectivity approx_selectivity(Query *root, List *quals,
 109                                    JoinType jointype);
 110 static void set_rel_width(Query *root, RelOptInfo *rel);
 111 static double relation_byte_size(double tuples, int width);
 112 static double page_size(double tuples, int width);
 113
 114
 115 /*
 116  * cost_seqscan
 117  *        Determines and returns the cost of scanning a relation sequentially.
 118  */
 119 void
 120 cost_seqscan(Path *path, Query *root,
 121                          RelOptInfo *baserel)
 122 {
 123         Cost            startup_cost = 0;
 124         Cost            run_cost = 0;
 125         Cost            cpu_per_tuple;
 126
 127         /* Should only be applied to base relations */
 128         Assert(baserel->relid > 0);
 129         Assert(baserel->rtekind == RTE_RELATION);
 130
 131         if (!enable_seqscan)
 132                 startup_cost += disable_cost;
 133
 134         /*
 135          * disk costs
 136          *
 137          * The cost of reading a page sequentially is 1.0, by definition. Note
 138          * that the Unix kernel will typically do some amount of read-ahead
 139          * optimization, so that this cost is less than the true cost of
 140          * reading a page from disk.  We ignore that issue here, but must take
 141          * it into account when estimating the cost of non-sequential
 142          * accesses!
 143          */
 144         run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
 145
 146         /* CPU costs */
 147         startup_cost += baserel->baserestrictcost.startup;
 148         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 149         run_cost += cpu_per_tuple * baserel->tuples;
 150
 151         path->startup_cost = startup_cost;
 152         path->total_cost = startup_cost + run_cost;
 153 }
 154
 155 /*
 156  * cost_nonsequential_access
 157  *        Estimate the cost of accessing one page at random from a relation
 158  *        (or sort temp file) of the given size in pages.
 159  *
 160  * The simplistic model that the cost is random_page_cost is what we want
 161  * to use for large relations; but for small ones that is a serious
 162  * overestimate because of the effects of caching.      This routine tries to
 163  * account for that.
 164  *
 165  * Unfortunately we don't have any good way of estimating the effective cache
 166  * size we are working with --- we know that Postgres itself has NBuffers
 167  * internal buffers, but the size of the kernel's disk cache is uncertain,
 168  * and how much of it we get to use is even less certain.  We punt the problem
 169  * for now by assuming we are given an effective_cache_size parameter.
 170  *
 171  * Given a guesstimated cache size, we estimate the actual I/O cost per page
 172  * with the entirely ad-hoc equations:
 173  *      if relpages >= effective_cache_size:
 174  *              random_page_cost * (1 - (effective_cache_size/relpages)/2)
 175  *      if relpages < effective_cache_size:
 176  *              1 + (random_page_cost/2-1) * (relpages/effective_cache_size) ** 2
 177  * These give the right asymptotic behavior (=> 1.0 as relpages becomes
 178  * small, => random_page_cost as it becomes large) and meet in the middle
 179  * with the estimate that the cache is about 50% effective for a relation
 180  * of the same size as effective_cache_size.  (XXX this is probably all
 181  * wrong, but I haven't been able to find any theory about how effective
 182  * a disk cache should be presumed to be.)
 183  */
 184 static Cost
 185 cost_nonsequential_access(double relpages)
 186 {
 187         double          relsize;
 188
 189         /* don't crash on bad input data */
 190         if (relpages <= 0.0 || effective_cache_size <= 0.0)
 191                 return random_page_cost;
 192
 193         relsize = relpages / effective_cache_size;
 194
 195         if (relsize >= 1.0)
 196                 return random_page_cost * (1.0 - 0.5 / relsize);
 197         else
 198                 return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize;
 199 }
 200
 201 /*
 202  * cost_index
 203  *        Determines and returns the cost of scanning a relation using an index.
 204  *
 205  *        NOTE: an indexscan plan node can actually represent several passes,
 206  *        but here we consider the cost of just one pass.
 207  *
 208  * 'root' is the query root
 209  * 'baserel' is the base relation the index is for
 210  * 'index' is the index to be used
 211  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
 212  * 'is_injoin' is T if we are considering using the index scan as the inside
 213  *              of a nestloop join (hence, some of the indexQuals are join clauses)
 214  *
 215  * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
 216  * Any additional quals evaluated as qpquals may reduce the number of returned
 217  * tuples, but they won't reduce the number of tuples we have to fetch from
 218  * the table, so they don't reduce the scan cost.
 219  */
 220 void
 221 cost_index(Path *path, Query *root,
 222                    RelOptInfo *baserel,
 223                    IndexOptInfo *index,
 224                    List *indexQuals,
 225                    bool is_injoin)
 226 {
 227         Cost            startup_cost = 0;
 228         Cost            run_cost = 0;
 229         Cost            indexStartupCost;
 230         Cost            indexTotalCost;
 231         Selectivity indexSelectivity;
 232         double          indexCorrelation,
 233                                 csquared;
 234         Cost            min_IO_cost,
 235                                 max_IO_cost;
 236         Cost            cpu_per_tuple;
 237         double          tuples_fetched;
 238         double          pages_fetched;
 239         double          T,
 240                                 b;
 241
 242         /* Should only be applied to base relations */
 243         Assert(IsA(baserel, RelOptInfo) &&
 244                    IsA(index, IndexOptInfo));
 245         Assert(baserel->relid > 0);
 246         Assert(baserel->rtekind == RTE_RELATION);
 247
 248         if (!enable_indexscan)
 249                 startup_cost += disable_cost;
 250
 251         /*
 252          * Call index-access-method-specific code to estimate the processing
 253          * cost for scanning the index, as well as the selectivity of the
 254          * index (ie, the fraction of main-table tuples we will have to
 255          * retrieve) and its correlation to the main-table tuple order.
 256          */
 257         OidFunctionCall8(index->amcostestimate,
 258                                          PointerGetDatum(root),
 259                                          PointerGetDatum(baserel),
 260                                          PointerGetDatum(index),
 261                                          PointerGetDatum(indexQuals),
 262                                          PointerGetDatum(&indexStartupCost),
 263                                          PointerGetDatum(&indexTotalCost),
 264                                          PointerGetDatum(&indexSelectivity),
 265                                          PointerGetDatum(&indexCorrelation));
 266
 267         /* all costs for touching index itself included here */
 268         startup_cost += indexStartupCost;
 269         run_cost += indexTotalCost - indexStartupCost;
 270
 271         /*----------
 272          * Estimate number of main-table tuples and pages fetched.
 273          *
 274          * When the index ordering is uncorrelated with the table ordering,
 275          * we use an approximation proposed by Mackert and Lohman, "Index Scans
 276          * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
 277          * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
 278          * The Mackert and Lohman approximation is that the number of pages
 279          * fetched is
 280          *      PF =
 281          *              min(2TNs/(2T+Ns), T)                    when T <= b
 282          *              2TNs/(2T+Ns)                                    when T > b and Ns <= 2Tb/(2T-b)
 283          *              b + (Ns - 2Tb/(2T-b))*(T-b)/T   when T > b and Ns > 2Tb/(2T-b)
 284          * where
 285          *              T = # pages in table
 286          *              N = # tuples in table
 287          *              s = selectivity = fraction of table to be scanned
 288          *              b = # buffer pages available (we include kernel space here)
 289          *
 290          * When the index ordering is exactly correlated with the table ordering
 291          * (just after a CLUSTER, for example), the number of pages fetched should
 292          * be just sT.  What's more, these will be sequential fetches, not the
 293          * random fetches that occur in the uncorrelated case.  So, depending on
 294          * the extent of correlation, we should estimate the actual I/O cost
 295          * somewhere between s * T * 1.0 and PF * random_cost.  We currently
 296          * interpolate linearly between these two endpoints based on the
 297          * correlation squared (XXX is that appropriate?).
 298          *
 299          * In any case the number of tuples fetched is Ns.
 300          *----------
 301          */
 302
 303         tuples_fetched = indexSelectivity * baserel->tuples;
 304         /* Don't believe estimates less than 1... */
 305         if (tuples_fetched < 1.0)
 306                 tuples_fetched = 1.0;
 307
 308         /* This part is the Mackert and Lohman formula */
 309
 310         T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
 311         b = (effective_cache_size > 1) ? effective_cache_size : 1.0;
 312
 313         if (T <= b)
 314         {
 315                 pages_fetched =
 316                         (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
 317                 if (pages_fetched > T)
 318                         pages_fetched = T;
 319         }
 320         else
 321         {
 322                 double          lim;
 323
 324                 lim = (2.0 * T * b) / (2.0 * T - b);
 325                 if (tuples_fetched <= lim)
 326                 {
 327                         pages_fetched =
 328                                 (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
 329                 }
 330                 else
 331                 {
 332                         pages_fetched =
 333                                 b + (tuples_fetched - lim) * (T - b) / T;
 334                 }
 335         }
 336
 337         /*
 338          * min_IO_cost corresponds to the perfectly correlated case
 339          * (csquared=1), max_IO_cost to the perfectly uncorrelated case
 340          * (csquared=0).  Note that we just charge random_page_cost per page
 341          * in the uncorrelated case, rather than using
 342          * cost_nonsequential_access, since we've already accounted for
 343          * caching effects by using the Mackert model.
 344          */
 345         min_IO_cost = ceil(indexSelectivity * T);
 346         max_IO_cost = pages_fetched * random_page_cost;
 347
 348         /*
 349          * Now interpolate based on estimated index order correlation to get
 350          * total disk I/O cost for main table accesses.
 351          */
 352         csquared = indexCorrelation * indexCorrelation;
 353
 354         run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
 355
 356         /*
 357          * Estimate CPU costs per tuple.
 358          *
 359          * Normally the indexquals will be removed from the list of restriction
 360          * clauses that we have to evaluate as qpquals, so we should subtract
 361          * their costs from baserestrictcost.  But if we are doing a join then
 362          * some of the indexquals are join clauses and shouldn't be
 363          * subtracted. Rather than work out exactly how much to subtract, we
 364          * don't subtract anything.
 365          *
 366          * XXX For a lossy index, not all the quals will be removed and so we
 367          * really shouldn't subtract their costs; but detecting that seems
 368          * more expensive than it's worth.
 369          */
 370         startup_cost += baserel->baserestrictcost.startup;
 371         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 372
 373         if (!is_injoin)
 374         {
 375                 QualCost        index_qual_cost;
 376
 377                 cost_qual_eval(&index_qual_cost, indexQuals);
 378                 cpu_per_tuple -= index_qual_cost.per_tuple;
 379         }
 380
 381         run_cost += cpu_per_tuple * tuples_fetched;
 382
 383         path->startup_cost = startup_cost;
 384         path->total_cost = startup_cost + run_cost;
 385 }
 386
 387 /*
 388  * cost_tidscan
 389  *        Determines and returns the cost of scanning a relation using TIDs.
 390  */
 391 void
 392 cost_tidscan(Path *path, Query *root,
 393                          RelOptInfo *baserel, List *tideval)
 394 {
 395         Cost            startup_cost = 0;
 396         Cost            run_cost = 0;
 397         Cost            cpu_per_tuple;
 398         int                     ntuples = length(tideval);
 399
 400         /* Should only be applied to base relations */
 401         Assert(baserel->relid > 0);
 402         Assert(baserel->rtekind == RTE_RELATION);
 403
 404         if (!enable_tidscan)
 405                 startup_cost += disable_cost;
 406
 407         /* disk costs --- assume each tuple on a different page */
 408         run_cost += random_page_cost * ntuples;
 409
 410         /* CPU costs */
 411         startup_cost += baserel->baserestrictcost.startup;
 412         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 413         run_cost += cpu_per_tuple * ntuples;
 414
 415         path->startup_cost = startup_cost;
 416         path->total_cost = startup_cost + run_cost;
 417 }
 418
 419 /*
 420  * cost_subqueryscan
 421  *        Determines and returns the cost of scanning a subquery RTE.
 422  */
 423 void
 424 cost_subqueryscan(Path *path, RelOptInfo *baserel)
 425 {
 426         Cost            startup_cost;
 427         Cost            run_cost;
 428         Cost            cpu_per_tuple;
 429
 430         /* Should only be applied to base relations that are subqueries */
 431         Assert(baserel->relid > 0);
 432         Assert(baserel->rtekind == RTE_SUBQUERY);
 433
 434         /*
 435          * Cost of path is cost of evaluating the subplan, plus cost of
 436          * evaluating any restriction clauses that will be attached to the
 437          * SubqueryScan node, plus cpu_tuple_cost to account for selection and
 438          * projection overhead.
 439          */
 440         path->startup_cost = baserel->subplan->startup_cost;
 441         path->total_cost = baserel->subplan->total_cost;
 442
 443         startup_cost = baserel->baserestrictcost.startup;
 444         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 445         run_cost = cpu_per_tuple * baserel->tuples;
 446
 447         path->startup_cost += startup_cost;
 448         path->total_cost += startup_cost + run_cost;
 449 }
 450
 451 /*
 452  * cost_functionscan
 453  *        Determines and returns the cost of scanning a function RTE.
 454  */
 455 void
 456 cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
 457 {
 458         Cost            startup_cost = 0;
 459         Cost            run_cost = 0;
 460         Cost            cpu_per_tuple;
 461
 462         /* Should only be applied to base relations that are functions */
 463         Assert(baserel->relid > 0);
 464         Assert(baserel->rtekind == RTE_FUNCTION);
 465
 466         /*
 467          * For now, estimate function's cost at one operator eval per function
 468          * call.  Someday we should revive the function cost estimate columns
 469          * in pg_proc...
 470          */
 471         cpu_per_tuple = cpu_operator_cost;
 472
 473         /* Add scanning CPU costs */
 474         startup_cost += baserel->baserestrictcost.startup;
 475         cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 476         run_cost += cpu_per_tuple * baserel->tuples;
 477
 478         path->startup_cost = startup_cost;
 479         path->total_cost = startup_cost + run_cost;
 480 }
 481
 482 /*
 483  * cost_sort
 484  *        Determines and returns the cost of sorting a relation, including
 485  *        the cost of reading the input data.
 486  *
 487  * If the total volume of data to sort is less than SortMem, we will do
 488  * an in-memory sort, which requires no I/O and about t*log2(t) tuple
 489  * comparisons for t tuples.
 490  *
 491  * If the total volume exceeds SortMem, we switch to a tape-style merge
 492  * algorithm.  There will still be about t*log2(t) tuple comparisons in
 493  * total, but we will also need to write and read each tuple once per
 494  * merge pass.  We expect about ceil(log6(r)) merge passes where r is the
 495  * number of initial runs formed (log6 because tuplesort.c uses six-tape
 496  * merging).  Since the average initial run should be about twice SortMem,
 497  * we have
 498  *              disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
 499  *              cpu = comparison_cost * t * log2(t)
 500  *
 501  * The disk traffic is assumed to be half sequential and half random
 502  * accesses (XXX can't we refine that guess?)
 503  *
 504  * We charge two operator evals per tuple comparison, which should be in
 505  * the right ballpark in most cases.
 506  *
 507  * 'pathkeys' is a list of sort keys
 508  * 'input_cost' is the total cost for reading the input data
 509  * 'tuples' is the number of tuples in the relation
 510  * 'width' is the average tuple width in bytes
 511  *
 512  * NOTE: some callers currently pass NIL for pathkeys because they
 513  * can't conveniently supply the sort keys.  Since this routine doesn't
 514  * currently do anything with pathkeys anyway, that doesn't matter...
 515  * but if it ever does, it should react gracefully to lack of key data.
 516  * (Actually, the thing we'd most likely be interested in is just the number
 517  * of sort keys, which all callers *could* supply.)
 518  */
 519 void
 520 cost_sort(Path *path, Query *root,
 521                   List *pathkeys, Cost input_cost, double tuples, int width)
 522 {
 523         Cost            startup_cost = input_cost;
 524         Cost            run_cost = 0;
 525         double          nbytes = relation_byte_size(tuples, width);
 526         long            sortmembytes = SortMem * 1024L;
 527
 528         if (!enable_sort)
 529                 startup_cost += disable_cost;
 530
 531         /*
 532          * We want to be sure the cost of a sort is never estimated as zero,
 533          * even if passed-in tuple count is zero.  Besides, mustn't do
 534          * log(0)...
 535          */
 536         if (tuples < 2.0)
 537                 tuples = 2.0;
 538
 539         /*
 540          * CPU costs
 541          *
 542          * Assume about two operator evals per tuple comparison and N log2 N
 543          * comparisons
 544          */
 545         startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
 546
 547         /* disk costs */
 548         if (nbytes > sortmembytes)
 549         {
 550                 double          npages = ceil(nbytes / BLCKSZ);
 551                 double          nruns = nbytes / (sortmembytes * 2);
 552                 double          log_runs = ceil(LOG6(nruns));
 553                 double          npageaccesses;
 554
 555                 if (log_runs < 1.0)
 556                         log_runs = 1.0;
 557                 npageaccesses = 2.0 * npages * log_runs;
 558                 /* Assume half are sequential (cost 1), half are not */
 559                 startup_cost += npageaccesses *
 560                         (1.0 + cost_nonsequential_access(npages)) * 0.5;
 561         }
 562
 563         /*
 564          * Also charge a small amount (arbitrarily set equal to operator cost)
 565          * per extracted tuple.
 566          */
 567         run_cost += cpu_operator_cost * tuples;
 568
 569         path->startup_cost = startup_cost;
 570         path->total_cost = startup_cost + run_cost;
 571 }
 572
 573 /*
 574  * cost_material
 575  *        Determines and returns the cost of materializing a relation, including
 576  *        the cost of reading the input data.
 577  *
 578  * If the total volume of data to materialize exceeds SortMem, we will need
 579  * to write it to disk, so the cost is much higher in that case.
 580  */
 581 void
 582 cost_material(Path *path,
 583                           Cost input_cost, double tuples, int width)
 584 {
 585         Cost            startup_cost = input_cost;
 586         Cost            run_cost = 0;
 587         double          nbytes = relation_byte_size(tuples, width);
 588         long            sortmembytes = SortMem * 1024L;
 589
 590         /* disk costs */
 591         if (nbytes > sortmembytes)
 592         {
 593                 double          npages = ceil(nbytes / BLCKSZ);
 594
 595                 /* We'll write during startup and read during retrieval */
 596                 startup_cost += npages;
 597                 run_cost += npages;
 598         }
 599
 600         /*
 601          * Also charge a small amount per extracted tuple.      We use
 602          * cpu_tuple_cost so that it doesn't appear worthwhile to materialize
 603          * a bare seqscan.
 604          */
 605         run_cost += cpu_tuple_cost * tuples;
 606
 607         path->startup_cost = startup_cost;
 608         path->total_cost = startup_cost + run_cost;
 609 }
 610
 611 /*
 612  * cost_agg
 613  *              Determines and returns the cost of performing an Agg plan node,
 614  *              including the cost of its input.
 615  *
 616  * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
 617  * are for appropriately-sorted input.
 618  */
 619 void
 620 cost_agg(Path *path, Query *root,
 621                  AggStrategy aggstrategy, int numAggs,
 622                  int numGroupCols, double numGroups,
 623                  Cost input_startup_cost, Cost input_total_cost,
 624                  double input_tuples)
 625 {
 626         Cost            startup_cost;
 627         Cost            total_cost;
 628
 629         /*
 630          * We charge one cpu_operator_cost per aggregate function per input
 631          * tuple, and another one per output tuple (corresponding to transfn
 632          * and finalfn calls respectively).  If we are grouping, we charge an
 633          * additional cpu_operator_cost per grouping column per input tuple
 634          * for grouping comparisons.
 635          *
 636          * We will produce a single output tuple if not grouping, and a tuple per
 637          * group otherwise.
 638          *
 639          * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
 640          * same total CPU cost, but AGG_SORTED has lower startup cost.  If the
 641          * input path is already sorted appropriately, AGG_SORTED should be
 642          * preferred (since it has no risk of memory overflow).  This will
 643          * happen as long as the computed total costs are indeed exactly equal
 644          * --- but if there's roundoff error we might do the wrong thing.  So
 645          * be sure that the computations below form the same intermediate
 646          * values in the same order.
 647          */
 648         if (aggstrategy == AGG_PLAIN)
 649         {
 650                 startup_cost = input_total_cost;
 651                 startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
 652                 /* we aren't grouping */
 653                 total_cost = startup_cost;
 654         }
 655         else if (aggstrategy == AGG_SORTED)
 656         {
 657                 /* Here we are able to deliver output on-the-fly */
 658                 startup_cost = input_startup_cost;
 659                 total_cost = input_total_cost;
 660                 /* calcs phrased this way to match HASHED case, see note above */
 661                 total_cost += cpu_operator_cost * input_tuples * numGroupCols;
 662                 total_cost += cpu_operator_cost * input_tuples * numAggs;
 663                 total_cost += cpu_operator_cost * numGroups * numAggs;
 664         }
 665         else
 666         {
 667                 /* must be AGG_HASHED */
 668                 startup_cost = input_total_cost;
 669                 startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
 670                 startup_cost += cpu_operator_cost * input_tuples * numAggs;
 671                 total_cost = startup_cost;
 672                 total_cost += cpu_operator_cost * numGroups * numAggs;
 673         }
 674
 675         path->startup_cost = startup_cost;
 676         path->total_cost = total_cost;
 677 }
 678
 679 /*
 680  * cost_group
 681  *              Determines and returns the cost of performing a Group plan node,
 682  *              including the cost of its input.
 683  *
 684  * Note: caller must ensure that input costs are for appropriately-sorted
 685  * input.
 686  */
 687 void
 688 cost_group(Path *path, Query *root,
 689                    int numGroupCols, double numGroups,
 690                    Cost input_startup_cost, Cost input_total_cost,
 691                    double input_tuples)
 692 {
 693         Cost            startup_cost;
 694         Cost            total_cost;
 695
 696         startup_cost = input_startup_cost;
 697         total_cost = input_total_cost;
 698
 699         /*
 700          * Charge one cpu_operator_cost per comparison per input tuple. We
 701          * assume all columns get compared at most of the tuples.
 702          */
 703         total_cost += cpu_operator_cost * input_tuples * numGroupCols;
 704
 705         path->startup_cost = startup_cost;
 706         path->total_cost = total_cost;
 707 }
 708
 709 /*
 710  * cost_nestloop
 711  *        Determines and returns the cost of joining two relations using the
 712  *        nested loop algorithm.
 713  *
 714  * 'path' is already filled in except for the cost fields
 715  */
 716 void
 717 cost_nestloop(NestPath *path, Query *root)
 718 {
 719         Path       *outer_path = path->outerjoinpath;
 720         Path       *inner_path = path->innerjoinpath;
 721         List       *restrictlist = path->joinrestrictinfo;
 722         Cost            startup_cost = 0;
 723         Cost            run_cost = 0;
 724         Cost            cpu_per_tuple;
 725         QualCost        restrict_qual_cost;
 726         double          outer_path_rows = PATH_ROWS(outer_path);
 727         double          inner_path_rows = PATH_ROWS(inner_path);
 728         double          ntuples;
 729         Selectivity joininfactor;
 730
 731         if (!enable_nestloop)
 732                 startup_cost += disable_cost;
 733
 734         /*
 735          * If we're doing JOIN_IN then we will stop scanning inner tuples for
 736          * an outer tuple as soon as we have one match.  Account for the
 737          * effects of this by scaling down the cost estimates in proportion to
 738          * the expected output size.  (This assumes that all the quals
 739          * attached to the join are IN quals, which should be true.)
 740          *
 741          * Note: it's probably bogus to use the normal selectivity calculation
 742          * here when either the outer or inner path is a UniquePath.
 743          */
 744         if (path->jointype == JOIN_IN)
 745         {
 746                 Selectivity qual_selec = approx_selectivity(root, restrictlist,
 747                                                                                                         path->jointype);
 748                 double          qptuples;
 749
 750                 qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
 751                 if (qptuples > path->path.parent->rows)
 752                         joininfactor = path->path.parent->rows / qptuples;
 753                 else
 754                         joininfactor = 1.0;
 755         }
 756         else
 757                 joininfactor = 1.0;
 758
 759         /* cost of source data */
 760
 761         /*
 762          * NOTE: clearly, we must pay both outer and inner paths' startup_cost
 763          * before we can start returning tuples, so the join's startup cost is
 764          * their sum.  What's not so clear is whether the inner path's
 765          * startup_cost must be paid again on each rescan of the inner path.
 766          * This is not true if the inner path is materialized or is a
 767          * hashjoin, but probably is true otherwise.
 768          */
 769         startup_cost += outer_path->startup_cost + inner_path->startup_cost;
 770         run_cost += outer_path->total_cost - outer_path->startup_cost;
 771         if (IsA(inner_path, MaterialPath) ||
 772                 IsA(inner_path, HashPath))
 773         {
 774                 /* charge only run cost for each iteration of inner path */
 775         }
 776         else
 777         {
 778                 /*
 779                  * charge startup cost for each iteration of inner path, except we
 780                  * already charged the first startup_cost in our own startup
 781                  */
 782                 run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
 783         }
 784         run_cost += outer_path_rows *
 785                 (inner_path->total_cost - inner_path->startup_cost) * joininfactor;
 786
 787         /*
 788          * Compute number of tuples processed (not number emitted!). If inner
 789          * path is an indexscan, be sure to use its estimated output row
 790          * count, which may be lower than the restriction-clause-only row
 791          * count of its parent.  (We don't include this case in the PATH_ROWS
 792          * macro because it applies *only* to a nestloop's inner relation.)
 793          * Note: it is correct to use the unadjusted inner_path_rows in the
 794          * above calculation for joininfactor, since otherwise we'd be
 795          * double-counting the selectivity of the join clause being used for
 796          * the index.
 797          */
 798         if (IsA(inner_path, IndexPath))
 799                 inner_path_rows = ((IndexPath *) inner_path)->rows;
 800
 801         ntuples = inner_path_rows * outer_path_rows;
 802
 803         /* CPU costs */
 804         cost_qual_eval(&restrict_qual_cost, restrictlist);
 805         startup_cost += restrict_qual_cost.startup;
 806         cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
 807         run_cost += cpu_per_tuple * ntuples;
 808
 809         path->path.startup_cost = startup_cost;
 810         path->path.total_cost = startup_cost + run_cost;
 811 }
 812
 813 /*
 814  * cost_mergejoin
 815  *        Determines and returns the cost of joining two relations using the
 816  *        merge join algorithm.
 817  *
 818  * 'path' is already filled in except for the cost fields
 819  *
 820  * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
 821  * outersortkeys and innersortkeys are lists of the keys to be used
 822  * to sort the outer and inner relations, or NIL if no explicit
 823  * sort is needed because the source path is already ordered.
 824  */
 825 void
 826 cost_mergejoin(MergePath *path, Query *root)
 827 {
 828         Path       *outer_path = path->jpath.outerjoinpath;
 829         Path       *inner_path = path->jpath.innerjoinpath;
 830         List       *restrictlist = path->jpath.joinrestrictinfo;
 831         List       *mergeclauses = path->path_mergeclauses;
 832         List       *outersortkeys = path->outersortkeys;
 833         List       *innersortkeys = path->innersortkeys;
 834         Cost            startup_cost = 0;
 835         Cost            run_cost = 0;
 836         Cost            cpu_per_tuple;
 837         Selectivity merge_selec;
 838         Selectivity qp_selec;
 839         QualCost        merge_qual_cost;
 840         QualCost        qp_qual_cost;
 841         RestrictInfo *firstclause;
 842         List       *qpquals;
 843         double          outer_path_rows = PATH_ROWS(outer_path);
 844         double          inner_path_rows = PATH_ROWS(inner_path);
 845         double          outer_rows,
 846                                 inner_rows;
 847         double          mergejointuples,
 848                                 rescannedtuples;
 849         double          qptuples;
 850         double          rescanratio;
 851         Selectivity outerscansel,
 852                                 innerscansel;
 853         Selectivity joininfactor;
 854         Path            sort_path;              /* dummy for result of cost_sort */
 855
 856         if (!enable_mergejoin)
 857                 startup_cost += disable_cost;
 858
 859         /*
 860          * Compute cost and selectivity of the mergequals and qpquals (other
 861          * restriction clauses) separately.  We use approx_selectivity here
 862          * for speed --- in most cases, any errors won't affect the result
 863          * much.
 864          *
 865          * Note: it's probably bogus to use the normal selectivity calculation
 866          * here when either the outer or inner path is a UniquePath.
 867          */
 868         merge_selec = approx_selectivity(root, mergeclauses,
 869                                                                          path->jpath.jointype);
 870         cost_qual_eval(&merge_qual_cost, mergeclauses);
 871         qpquals = set_ptrDifference(restrictlist, mergeclauses);
 872         qp_selec = approx_selectivity(root, qpquals,
 873                                                                   path->jpath.jointype);
 874         cost_qual_eval(&qp_qual_cost, qpquals);
 875         freeList(qpquals);
 876
 877         /* approx # tuples passing the merge quals */
 878         mergejointuples = ceil(merge_selec * outer_path_rows * inner_path_rows);
 879         /* approx # tuples passing qpquals as well */
 880         qptuples = ceil(mergejointuples * qp_selec);
 881
 882         /*
 883          * When there are equal merge keys in the outer relation, the
 884          * mergejoin must rescan any matching tuples in the inner relation.
 885          * This means re-fetching inner tuples.  Our cost model for this is
 886          * that a re-fetch costs the same as an original fetch, which is
 887          * probably an overestimate; but on the other hand we ignore the
 888          * bookkeeping costs of mark/restore. Not clear if it's worth
 889          * developing a more refined model.
 890          *
 891          * The number of re-fetches can be estimated approximately as size of
 892          * merge join output minus size of inner relation.      Assume that the
 893          * distinct key values are 1, 2, ..., and denote the number of values
 894          * of each key in the outer relation as m1, m2, ...; in the inner
 895          * relation, n1, n2, ...  Then we have
 896          *
 897          * size of join = m1 * n1 + m2 * n2 + ...
 898          *
 899          * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
 900          * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
 901          * relation
 902          *
 903          * This equation works correctly for outer tuples having no inner match
 904          * (nk = 0), but not for inner tuples having no outer match (mk = 0);
 905          * we are effectively subtracting those from the number of rescanned
 906          * tuples, when we should not.  Can we do better without expensive
 907          * selectivity computations?
 908          */
 909         if (IsA(outer_path, UniquePath))
 910                 rescannedtuples = 0;
 911         else
 912         {
 913                 rescannedtuples = mergejointuples - inner_path_rows;
 914                 /* Must clamp because of possible underestimate */
 915                 if (rescannedtuples < 0)
 916                         rescannedtuples = 0;
 917         }
 918         /* We'll inflate inner run cost this much to account for rescanning */
 919         rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
 920
 921         /*
 922          * A merge join will stop as soon as it exhausts either input stream.
 923          * Estimate fraction of the left and right inputs that will actually
 924          * need to be scanned.  We use only the first (most significant) merge
 925          * clause for this purpose.
 926          *
 927          * Since this calculation is somewhat expensive, and will be the same for
 928          * all mergejoin paths associated with the merge clause, we cache the
 929          * results in the RestrictInfo node.
 930          */
 931         firstclause = (RestrictInfo *) lfirst(mergeclauses);
 932         if (firstclause->left_mergescansel < 0)         /* not computed yet? */
 933                 mergejoinscansel(root, (Node *) firstclause->clause,
 934                                                  &firstclause->left_mergescansel,
 935                                                  &firstclause->right_mergescansel);
 936
 937         if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))
 938         {
 939                 /* left side of clause is outer */
 940                 outerscansel = firstclause->left_mergescansel;
 941                 innerscansel = firstclause->right_mergescansel;
 942         }
 943         else
 944         {
 945                 /* left side of clause is inner */
 946                 outerscansel = firstclause->right_mergescansel;
 947                 innerscansel = firstclause->left_mergescansel;
 948         }
 949
 950         /* convert selectivity to row count; must scan at least one row */
 951
 952         outer_rows = ceil(outer_path_rows * outerscansel);
 953         if (outer_rows < 1)
 954                 outer_rows = 1;
 955         inner_rows = ceil(inner_path_rows * innerscansel);
 956         if (inner_rows < 1)
 957                 inner_rows = 1;
 958
 959         /*
 960          * Readjust scan selectivities to account for above rounding.  This is
 961          * normally an insignificant effect, but when there are only a few
 962          * rows in the inputs, failing to do this makes for a large percentage
 963          * error.
 964          */
 965         outerscansel = outer_rows / outer_path_rows;
 966         innerscansel = inner_rows / inner_path_rows;
 967
 968         /* cost of source data */
 969
 970         if (outersortkeys)                      /* do we need to sort outer? */
 971         {
 972                 cost_sort(&sort_path,
 973                                   root,
 974                                   outersortkeys,
 975                                   outer_path->total_cost,
 976                                   outer_path_rows,
 977                                   outer_path->parent->width);
 978                 startup_cost += sort_path.startup_cost;
 979                 run_cost += (sort_path.total_cost - sort_path.startup_cost)
 980                         * outerscansel;
 981         }
 982         else
 983         {
 984                 startup_cost += outer_path->startup_cost;
 985                 run_cost += (outer_path->total_cost - outer_path->startup_cost)
 986                         * outerscansel;
 987         }
 988
 989         if (innersortkeys)                      /* do we need to sort inner? */
 990         {
 991                 cost_sort(&sort_path,
 992                                   root,
 993                                   innersortkeys,
 994                                   inner_path->total_cost,
 995                                   inner_path_rows,
 996                                   inner_path->parent->width);
 997                 startup_cost += sort_path.startup_cost;
 998                 run_cost += (sort_path.total_cost - sort_path.startup_cost)
 999                         * innerscansel * rescanratio;
1000         }
1001         else
1002         {
1003                 startup_cost += inner_path->startup_cost;
1004                 run_cost += (inner_path->total_cost - inner_path->startup_cost)
1005                         * innerscansel * rescanratio;
1006         }
1007
1008         /* CPU costs */
1009
1010         /*
1011          * If we're doing JOIN_IN then we will stop outputting inner tuples
1012          * for an outer tuple as soon as we have one match.  Account for the
1013          * effects of this by scaling down the cost estimates in proportion to
1014          * the expected output size.  (This assumes that all the quals
1015          * attached to the join are IN quals, which should be true.)
1016          */
1017         if (path->jpath.jointype == JOIN_IN &&
1018                 qptuples > path->jpath.path.parent->rows)
1019                 joininfactor = path->jpath.path.parent->rows / qptuples;
1020         else
1021                 joininfactor = 1.0;
1022
1023         /*
1024          * The number of tuple comparisons needed is approximately number of
1025          * outer rows plus number of inner rows plus number of rescanned
1026          * tuples (can we refine this?).  At each one, we need to evaluate the
1027          * mergejoin quals.  NOTE: JOIN_IN mode does not save any work here,
1028          * so do NOT include joininfactor.
1029          */
1030         startup_cost += merge_qual_cost.startup;
1031         run_cost += merge_qual_cost.per_tuple *
1032                 (outer_rows + inner_rows * rescanratio);
1033
1034         /*
1035          * For each tuple that gets through the mergejoin proper, we charge
1036          * cpu_tuple_cost plus the cost of evaluating additional restriction
1037          * clauses that are to be applied at the join.  (This is pessimistic
1038          * since not all of the quals may get evaluated at each tuple.)  This
1039          * work is skipped in JOIN_IN mode, so apply the factor.
1040          */
1041         startup_cost += qp_qual_cost.startup;
1042         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
1043         run_cost += cpu_per_tuple * mergejointuples * joininfactor;
1044
1045         path->jpath.path.startup_cost = startup_cost;
1046         path->jpath.path.total_cost = startup_cost + run_cost;
1047 }
1048
1049 /*
1050  * cost_hashjoin
1051  *        Determines and returns the cost of joining two relations using the
1052  *        hash join algorithm.
1053  *
1054  * 'path' is already filled in except for the cost fields
1055  *
1056  * Note: path's hashclauses should be a subset of the joinrestrictinfo list
1057  */
1058 void
1059 cost_hashjoin(HashPath *path, Query *root)
1060 {
1061         Path       *outer_path = path->jpath.outerjoinpath;
1062         Path       *inner_path = path->jpath.innerjoinpath;
1063         List       *restrictlist = path->jpath.joinrestrictinfo;
1064         List       *hashclauses = path->path_hashclauses;
1065         Cost            startup_cost = 0;
1066         Cost            run_cost = 0;
1067         Cost            cpu_per_tuple;
1068         Selectivity hash_selec;
1069         Selectivity qp_selec;
1070         QualCost        hash_qual_cost;
1071         QualCost        qp_qual_cost;
1072         double          hashjointuples;
1073         double          qptuples;
1074         double          outer_path_rows = PATH_ROWS(outer_path);
1075         double          inner_path_rows = PATH_ROWS(inner_path);
1076         double          outerbytes = relation_byte_size(outer_path_rows,
1077                                                                                           outer_path->parent->width);
1078         double          innerbytes = relation_byte_size(inner_path_rows,
1079                                                                                           inner_path->parent->width);
1080         int                     num_hashclauses = length(hashclauses);
1081         int                     virtualbuckets;
1082         int                     physicalbuckets;
1083         int                     numbatches;
1084         Selectivity innerbucketsize;
1085         Selectivity joininfactor;
1086         List       *hcl;
1087         List       *qpquals;
1088
1089         if (!enable_hashjoin)
1090                 startup_cost += disable_cost;
1091
1092         /*
1093          * Compute cost and selectivity of the hashquals and qpquals (other
1094          * restriction clauses) separately.  We use approx_selectivity here
1095          * for speed --- in most cases, any errors won't affect the result
1096          * much.
1097          *
1098          * Note: it's probably bogus to use the normal selectivity calculation
1099          * here when either the outer or inner path is a UniquePath.
1100          */
1101         hash_selec = approx_selectivity(root, hashclauses,
1102                                                                         path->jpath.jointype);
1103         cost_qual_eval(&hash_qual_cost, hashclauses);
1104         qpquals = set_ptrDifference(restrictlist, hashclauses);
1105         qp_selec = approx_selectivity(root, qpquals,
1106                                                                   path->jpath.jointype);
1107         cost_qual_eval(&qp_qual_cost, qpquals);
1108         freeList(qpquals);
1109
1110         /* approx # tuples passing the hash quals */
1111         hashjointuples = ceil(hash_selec * outer_path_rows * inner_path_rows);
1112         /* approx # tuples passing qpquals as well */
1113         qptuples = ceil(hashjointuples * qp_selec);
1114
1115         /* cost of source data */
1116         startup_cost += outer_path->startup_cost;
1117         run_cost += outer_path->total_cost - outer_path->startup_cost;
1118         startup_cost += inner_path->total_cost;
1119
1120         /*
1121          * Cost of computing hash function: must do it once per input tuple.
1122          * We charge one cpu_operator_cost for each column's hash function.
1123          *
1124          * XXX when a hashclause is more complex than a single operator, we
1125          * really should charge the extra eval costs of the left or right
1126          * side, as appropriate, here.  This seems more work than it's worth
1127          * at the moment.
1128          */
1129         startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;
1130         run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
1131
1132         /* Get hash table size that executor would use for inner relation */
1133         ExecChooseHashTableSize(inner_path_rows,
1134                                                         inner_path->parent->width,
1135                                                         &virtualbuckets,
1136                                                         &physicalbuckets,
1137                                                         &numbatches);
1138
1139         /*
1140          * Determine bucketsize fraction for inner relation.  We use the
1141          * smallest bucketsize estimated for any individual hashclause; this
1142          * is undoubtedly conservative.
1143          *
1144          * BUT: if inner relation has been unique-ified, we can assume it's good
1145          * for hashing.  This is important both because it's the right answer,
1146          * and because we avoid contaminating the cache with a value that's
1147          * wrong for non-unique-ified paths.
1148          */
1149         if (IsA(inner_path, UniquePath))
1150                 innerbucketsize = 1.0 / virtualbuckets;
1151         else
1152         {
1153                 innerbucketsize = 1.0;
1154                 foreach(hcl, hashclauses)
1155                 {
1156                         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
1157                         Selectivity thisbucketsize;
1158
1159                         Assert(IsA(restrictinfo, RestrictInfo));
1160
1161                         /*
1162                          * First we have to figure out which side of the hashjoin
1163                          * clause is the inner side.
1164                          *
1165                          * Since we tend to visit the same clauses over and over when
1166                          * planning a large query, we cache the bucketsize estimate in
1167                          * the RestrictInfo node to avoid repeated lookups of
1168                          * statistics.
1169                          */
1170                         if (bms_is_subset(restrictinfo->right_relids,
1171                                                           inner_path->parent->relids))
1172                         {
1173                                 /* righthand side is inner */
1174                                 thisbucketsize = restrictinfo->right_bucketsize;
1175                                 if (thisbucketsize < 0)
1176                                 {
1177                                         /* not cached yet */
1178                                         thisbucketsize =
1179                                                 estimate_hash_bucketsize(root,
1180                                                            (Var *) get_rightop(restrictinfo->clause),
1181                                                                                                  virtualbuckets);
1182                                         restrictinfo->right_bucketsize = thisbucketsize;
1183                                 }
1184                         }
1185                         else
1186                         {
1187                                 Assert(bms_is_subset(restrictinfo->left_relids,
1188                                                                          inner_path->parent->relids));
1189                                 /* lefthand side is inner */
1190                                 thisbucketsize = restrictinfo->left_bucketsize;
1191                                 if (thisbucketsize < 0)
1192                                 {
1193                                         /* not cached yet */
1194                                         thisbucketsize =
1195                                                 estimate_hash_bucketsize(root,
1196                                                                 (Var *) get_leftop(restrictinfo->clause),
1197                                                                                                  virtualbuckets);
1198                                         restrictinfo->left_bucketsize = thisbucketsize;
1199                                 }
1200                         }
1201
1202                         if (innerbucketsize > thisbucketsize)
1203                                 innerbucketsize = thisbucketsize;
1204                 }
1205         }
1206
1207         /*
1208          * if inner relation is too big then we will need to "batch" the join,
1209          * which implies writing and reading most of the tuples to disk an
1210          * extra time.  Charge one cost unit per page of I/O (correct since it
1211          * should be nice and sequential...).  Writing the inner rel counts as
1212          * startup cost, all the rest as run cost.
1213          */
1214         if (numbatches)
1215         {
1216                 double          outerpages = page_size(outer_path_rows,
1217                                                                                    outer_path->parent->width);
1218                 double          innerpages = page_size(inner_path_rows,
1219                                                                                    inner_path->parent->width);
1220
1221                 startup_cost += innerpages;
1222                 run_cost += innerpages + 2 * outerpages;
1223         }
1224
1225         /* CPU costs */
1226
1227         /*
1228          * If we're doing JOIN_IN then we will stop comparing inner tuples to
1229          * an outer tuple as soon as we have one match.  Account for the
1230          * effects of this by scaling down the cost estimates in proportion to
1231          * the expected output size.  (This assumes that all the quals
1232          * attached to the join are IN quals, which should be true.)
1233          */
1234         if (path->jpath.jointype == JOIN_IN &&
1235                 qptuples > path->jpath.path.parent->rows)
1236                 joininfactor = path->jpath.path.parent->rows / qptuples;
1237         else
1238                 joininfactor = 1.0;
1239
1240         /*
1241          * The number of tuple comparisons needed is the number of outer
1242          * tuples times the typical number of tuples in a hash bucket, which
1243          * is the inner relation size times its bucketsize fraction.  At each
1244          * one, we need to evaluate the hashjoin quals.
1245          */
1246         startup_cost += hash_qual_cost.startup;
1247         run_cost += hash_qual_cost.per_tuple *
1248                 outer_path_rows * ceil(inner_path_rows * innerbucketsize) *
1249                 joininfactor;
1250
1251         /*
1252          * For each tuple that gets through the hashjoin proper, we charge
1253          * cpu_tuple_cost plus the cost of evaluating additional restriction
1254          * clauses that are to be applied at the join.  (This is pessimistic
1255          * since not all of the quals may get evaluated at each tuple.)
1256          */
1257         startup_cost += qp_qual_cost.startup;
1258         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
1259         run_cost += cpu_per_tuple * hashjointuples * joininfactor;
1260
1261         /*
1262          * Bias against putting larger relation on inside.      We don't want an
1263          * absolute prohibition, though, since larger relation might have
1264          * better bucketsize --- and we can't trust the size estimates
1265          * unreservedly, anyway.  Instead, inflate the run cost by the square
1266          * root of the size ratio.      (Why square root?  No real good reason,
1267          * but it seems reasonable...)
1268          *
1269          * Note: before 7.4 we implemented this by inflating startup cost; but if
1270          * there's a disable_cost component in the input paths' startup cost,
1271          * that unfairly penalizes the hash.  Probably it'd be better to keep
1272          * track of disable penalty separately from cost.
1273          */
1274         if (innerbytes > outerbytes && outerbytes > 0)
1275                 run_cost *= sqrt(innerbytes / outerbytes);
1276
1277         path->jpath.path.startup_cost = startup_cost;
1278         path->jpath.path.total_cost = startup_cost + run_cost;
1279 }
1280
1281 /*
1282  * Estimate hash bucketsize fraction (ie, number of entries in a bucket
1283  * divided by total tuples in relation) if the specified Var is used
1284  * as a hash key.
1285  *
1286  * XXX This is really pretty bogus since we're effectively assuming that the
1287  * distribution of hash keys will be the same after applying restriction
1288  * clauses as it was in the underlying relation.  However, we are not nearly
1289  * smart enough to figure out how the restrict clauses might change the
1290  * distribution, so this will have to do for now.
1291  *
1292  * We are passed the number of buckets the executor will use for the given
1293  * input relation.      If the data were perfectly distributed, with the same
1294  * number of tuples going into each available bucket, then the bucketsize
1295  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
1296  * only if (a) there are at least nbuckets distinct data values, and (b)
1297  * we have a not-too-skewed data distribution.  Otherwise the buckets will
1298  * be nonuniformly occupied.  If the other relation in the join has a key
1299  * distribution similar to this one's, then the most-loaded buckets are
1300  * exactly those that will be probed most often.  Therefore, the "average"
1301  * bucket size for costing purposes should really be taken as something close
1302  * to the "worst case" bucket size.  We try to estimate this by adjusting the
1303  * fraction if there are too few distinct data values, and then scaling up
1304  * by the ratio of the most common value's frequency to the average frequency.
1305  *
1306  * If no statistics are available, use a default estimate of 0.1.  This will
1307  * discourage use of a hash rather strongly if the inner relation is large,
1308  * which is what we want.  We do not want to hash unless we know that the
1309  * inner rel is well-dispersed (or the alternatives seem much worse).
1310  */
1311 static Selectivity
1312 estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
1313 {
1314         Oid                     relid;
1315         RelOptInfo *rel;
1316         HeapTuple       tuple;
1317         Form_pg_statistic stats;
1318         double          estfract,
1319                                 ndistinct,
1320                                 mcvfreq,
1321                                 avgfreq;
1322         float4     *numbers;
1323         int                     nnumbers;
1324
1325         /*
1326          * Lookup info about var's relation and attribute; if none available,
1327          * return default estimate.
1328          */
1329         if (var == NULL || !IsA(var, Var))
1330                 return 0.1;
1331
1332         relid = getrelid(var->varno, root->rtable);
1333         if (relid == InvalidOid)
1334                 return 0.1;
1335
1336         rel = find_base_rel(root, var->varno);
1337
1338         if (rel->tuples <= 0.0 || rel->rows <= 0.0)
1339                 return 0.1;                             /* ensure we can divide below */
1340
1341         tuple = SearchSysCache(STATRELATT,
1342                                                    ObjectIdGetDatum(relid),
1343                                                    Int16GetDatum(var->varattno),
1344                                                    0, 0);
1345         if (!HeapTupleIsValid(tuple))
1346         {
1347                 /*
1348                  * If the attribute is known unique because of an index,
1349                  * we can treat it as well-distributed.
1350                  */
1351                 if (has_unique_index(rel, var->varattno))
1352                         return 1.0 / (double) nbuckets;
1353
1354                 /*
1355                  * Perhaps the Var is a system attribute; if so, it will have no
1356                  * entry in pg_statistic, but we may be able to guess something
1357                  * about its distribution anyway.
1358                  */
1359                 switch (var->varattno)
1360                 {
1361                         case ObjectIdAttributeNumber:
1362                         case SelfItemPointerAttributeNumber:
1363                                 /* these are unique, so buckets should be well-distributed */
1364                                 return 1.0 / (double) nbuckets;
1365                         case TableOidAttributeNumber:
1366                                 /* hashing this is a terrible idea... */
1367                                 return 1.0;
1368                 }
1369                 return 0.1;
1370         }
1371         stats = (Form_pg_statistic) GETSTRUCT(tuple);
1372
1373         /*
1374          * Obtain number of distinct data values in raw relation.
1375          */
1376         ndistinct = stats->stadistinct;
1377         if (ndistinct < 0.0)
1378                 ndistinct = -ndistinct * rel->tuples;
1379
1380         if (ndistinct <= 0.0)           /* ensure we can divide */
1381         {
1382                 ReleaseSysCache(tuple);
1383                 return 0.1;
1384         }
1385
1386         /* Also compute avg freq of all distinct data values in raw relation */
1387         avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
1388
1389         /*
1390          * Adjust ndistinct to account for restriction clauses.  Observe we
1391          * are assuming that the data distribution is affected uniformly by
1392          * the restriction clauses!
1393          *
1394          * XXX Possibly better way, but much more expensive: multiply by
1395          * selectivity of rel's restriction clauses that mention the target
1396          * Var.
1397          */
1398         ndistinct *= rel->rows / rel->tuples;
1399
1400         /*
1401          * Initial estimate of bucketsize fraction is 1/nbuckets as long as
1402          * the number of buckets is less than the expected number of distinct
1403          * values; otherwise it is 1/ndistinct.
1404          */
1405         if (ndistinct > (double) nbuckets)
1406                 estfract = 1.0 / (double) nbuckets;
1407         else
1408                 estfract = 1.0 / ndistinct;
1409
1410         /*
1411          * Look up the frequency of the most common value, if available.
1412          */
1413         mcvfreq = 0.0;
1414
1415         if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
1416                                                  STATISTIC_KIND_MCV, InvalidOid,
1417                                                  NULL, NULL, &numbers, &nnumbers))
1418         {
1419                 /*
1420                  * The first MCV stat is for the most common value.
1421                  */
1422                 if (nnumbers > 0)
1423                         mcvfreq = numbers[0];
1424                 free_attstatsslot(var->vartype, NULL, 0,
1425                                                   numbers, nnumbers);
1426         }
1427
1428         /*
1429          * Adjust estimated bucketsize upward to account for skewed
1430          * distribution.
1431          */
1432         if (avgfreq > 0.0 && mcvfreq > avgfreq)
1433                 estfract *= mcvfreq / avgfreq;
1434
1435         /*
1436          * Clamp bucketsize to sane range (the above adjustment could easily
1437          * produce an out-of-range result).  We set the lower bound a little
1438          * above zero, since zero isn't a very sane result.
1439          */
1440         if (estfract < 1.0e-6)
1441                 estfract = 1.0e-6;
1442         else if (estfract > 1.0)
1443                 estfract = 1.0;
1444
1445         ReleaseSysCache(tuple);
1446
1447         return (Selectivity) estfract;
1448 }
1449
1450
1451 /*
1452  * cost_qual_eval
1453  *              Estimate the CPU costs of evaluating a WHERE clause.
1454  *              The input can be either an implicitly-ANDed list of boolean
1455  *              expressions, or a list of RestrictInfo nodes.
1456  *              The result includes both a one-time (startup) component,
1457  *              and a per-evaluation component.
1458  */
1459 void
1460 cost_qual_eval(QualCost *cost, List *quals)
1461 {
1462         List       *l;
1463
1464         cost->startup = 0;
1465         cost->per_tuple = 0;
1466
1467         /* We don't charge any cost for the implicit ANDing at top level ... */
1468
1469         foreach(l, quals)
1470         {
1471                 Node       *qual = (Node *) lfirst(l);
1472
1473                 /*
1474                  * RestrictInfo nodes contain an eval_cost field reserved for this
1475                  * routine's use, so that it's not necessary to evaluate the qual
1476                  * clause's cost more than once.  If the clause's cost hasn't been
1477                  * computed yet, the field's startup value will contain -1.
1478                  */
1479                 if (qual && IsA(qual, RestrictInfo))
1480                 {
1481                         RestrictInfo *restrictinfo = (RestrictInfo *) qual;
1482
1483                         if (restrictinfo->eval_cost.startup < 0)
1484                         {
1485                                 restrictinfo->eval_cost.startup = 0;
1486                                 restrictinfo->eval_cost.per_tuple = 0;
1487                                 cost_qual_eval_walker((Node *) restrictinfo->clause,
1488                                                                           &restrictinfo->eval_cost);
1489                         }
1490                         cost->startup += restrictinfo->eval_cost.startup;
1491                         cost->per_tuple += restrictinfo->eval_cost.per_tuple;
1492                 }
1493                 else
1494                 {
1495                         /* If it's a bare expression, must always do it the hard way */
1496                         cost_qual_eval_walker(qual, cost);
1497                 }
1498         }
1499 }
1500
1501 static bool
1502 cost_qual_eval_walker(Node *node, QualCost *total)
1503 {
1504         if (node == NULL)
1505                 return false;
1506
1507         /*
1508          * Our basic strategy is to charge one cpu_operator_cost for each
1509          * operator or function node in the given tree.  Vars and Consts are
1510          * charged zero, and so are boolean operators (AND, OR, NOT).
1511          * Simplistic, but a lot better than no model at all.
1512          *
1513          * Should we try to account for the possibility of short-circuit
1514          * evaluation of AND/OR?
1515          */
1516         if (IsA(node, FuncExpr) ||
1517                 IsA(node, OpExpr) ||
1518                 IsA(node, DistinctExpr) ||
1519                 IsA(node, NullIfExpr))
1520                 total->per_tuple += cpu_operator_cost;
1521         else if (IsA(node, ScalarArrayOpExpr))
1522         {
1523                 /* should charge more than 1 op cost, but how many? */
1524                 total->per_tuple += cpu_operator_cost * 10;
1525         }
1526         else if (IsA(node, SubLink))
1527         {
1528                 /* This routine should not be applied to un-planned expressions */
1529                 elog(ERROR, "cannot handle unplanned sub-select");
1530         }
1531         else if (IsA(node, SubPlan))
1532         {
1533                 /*
1534                  * A subplan node in an expression typically indicates that the
1535                  * subplan will be executed on each evaluation, so charge
1536                  * accordingly. (Sub-selects that can be executed as InitPlans
1537                  * have already been removed from the expression.)
1538                  *
1539                  * An exception occurs when we have decided we can implement the
1540                  * subplan by hashing.
1541                  *
1542                  */
1543                 SubPlan    *subplan = (SubPlan *) node;
1544                 Plan       *plan = subplan->plan;
1545
1546                 if (subplan->useHashTable)
1547                 {
1548                         /*
1549                          * If we are using a hash table for the subquery outputs, then
1550                          * the cost of evaluating the query is a one-time cost. We
1551                          * charge one cpu_operator_cost per tuple for the work of
1552                          * loading the hashtable, too.
1553                          */
1554                         total->startup += plan->total_cost +
1555                                 cpu_operator_cost * plan->plan_rows;
1556
1557                         /*
1558                          * The per-tuple costs include the cost of evaluating the
1559                          * lefthand expressions, plus the cost of probing the
1560                          * hashtable. Recursion into the exprs list will handle the
1561                          * lefthand expressions properly, and will count one
1562                          * cpu_operator_cost for each comparison operator.      That is
1563                          * probably too low for the probing cost, but it's hard to
1564                          * make a better estimate, so live with it for now.
1565                          */
1566                 }
1567                 else
1568                 {
1569                         /*
1570                          * Otherwise we will be rescanning the subplan output on each
1571                          * evaluation.  We need to estimate how much of the output we
1572                          * will actually need to scan.  NOTE: this logic should agree
1573                          * with the estimates used by make_subplan() in
1574                          * plan/subselect.c.
1575                          */
1576                         Cost            plan_run_cost = plan->total_cost - plan->startup_cost;
1577
1578                         if (subplan->subLinkType == EXISTS_SUBLINK)
1579                         {
1580                                 /* we only need to fetch 1 tuple */
1581                                 total->per_tuple += plan_run_cost / plan->plan_rows;
1582                         }
1583                         else if (subplan->subLinkType == ALL_SUBLINK ||
1584                                          subplan->subLinkType == ANY_SUBLINK)
1585                         {
1586                                 /* assume we need 50% of the tuples */
1587                                 total->per_tuple += 0.50 * plan_run_cost;
1588                                 /* also charge a cpu_operator_cost per row examined */
1589                                 total->per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
1590                         }
1591                         else
1592                         {
1593                                 /* assume we need all tuples */
1594                                 total->per_tuple += plan_run_cost;
1595                         }
1596
1597                         /*
1598                          * Also account for subplan's startup cost. If the subplan is
1599                          * uncorrelated or undirect correlated, AND its topmost node
1600                          * is a Sort or Material node, assume that we'll only need to
1601                          * pay its startup cost once; otherwise assume we pay the
1602                          * startup cost every time.
1603                          */
1604                         if (subplan->parParam == NIL &&
1605                                 (IsA(plan, Sort) ||
1606                                  IsA(plan, Material)))
1607                                 total->startup += plan->startup_cost;
1608                         else
1609                                 total->per_tuple += plan->startup_cost;
1610                 }
1611         }
1612
1613         return expression_tree_walker(node, cost_qual_eval_walker,
1614                                                                   (void *) total);
1615 }
1616
1617
1618 /*
1619  * approx_selectivity
1620  *              Quick-and-dirty estimation of clause selectivities.
1621  *              The input can be either an implicitly-ANDed list of boolean
1622  *              expressions, or a list of RestrictInfo nodes (typically the latter).
1623  *
1624  * The "quick" part comes from caching the selectivity estimates so we can
1625  * avoid recomputing them later.  (Since the same clauses are typically
1626  * examined over and over in different possible join trees, this makes a
1627  * big difference.)
1628  *
1629  * The "dirty" part comes from the fact that the selectivities of multiple
1630  * clauses are estimated independently and multiplied together.  Now
1631  * clauselist_selectivity often can't do any better than that anyhow, but
1632  * for some situations (such as range constraints) it is smarter.
1633  *
1634  * Since we are only using the results to estimate how many potential
1635  * output tuples are generated and passed through qpqual checking, it
1636  * seems OK to live with the approximation.
1637  */
1638 static Selectivity
1639 approx_selectivity(Query *root, List *quals, JoinType jointype)
1640 {
1641         Selectivity total = 1.0;
1642         List       *l;
1643
1644         foreach(l, quals)
1645         {
1646                 Node       *qual = (Node *) lfirst(l);
1647                 Selectivity selec;
1648
1649                 /*
1650                  * RestrictInfo nodes contain a this_selec field reserved for this
1651                  * routine's use, so that it's not necessary to evaluate the qual
1652                  * clause's selectivity more than once.  If the clause's
1653                  * selectivity hasn't been computed yet, the field will contain
1654                  * -1.
1655                  */
1656                 if (qual && IsA(qual, RestrictInfo))
1657                 {
1658                         RestrictInfo *restrictinfo = (RestrictInfo *) qual;
1659
1660                         if (restrictinfo->this_selec < 0)
1661                                 restrictinfo->this_selec =
1662                                         clause_selectivity(root,
1663                                                                            (Node *) restrictinfo->clause,
1664                                                                            0,
1665                                                                            jointype);
1666                         selec = restrictinfo->this_selec;
1667                 }
1668                 else
1669                 {
1670                         /* If it's a bare expression, must always do it the hard way */
1671                         selec = clause_selectivity(root, qual, 0, jointype);
1672                 }
1673                 total *= selec;
1674         }
1675         return total;
1676 }
1677
1678
1679 /*
1680  * set_baserel_size_estimates
1681  *              Set the size estimates for the given base relation.
1682  *
1683  * The rel's targetlist and restrictinfo list must have been constructed
1684  * already.
1685  *
1686  * We set the following fields of the rel node:
1687  *      rows: the estimated number of output tuples (after applying
1688  *                restriction clauses).
1689  *      width: the estimated average output tuple width in bytes.
1690  *      baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
1691  */
1692 void
1693 set_baserel_size_estimates(Query *root, RelOptInfo *rel)
1694 {
1695         double          temp;
1696
1697         /* Should only be applied to base relations */
1698         Assert(rel->relid > 0);
1699
1700         temp = rel->tuples *
1701                 restrictlist_selectivity(root,
1702                                                                  rel->baserestrictinfo,
1703                                                                  rel->relid,
1704                                                                  JOIN_INNER);
1705
1706         /*
1707          * Force estimate to be at least one row, to make explain output look
1708          * better and to avoid possible divide-by-zero when interpolating
1709          * cost.  Make it an integer, too.
1710          */
1711         if (temp < 1.0)
1712                 temp = 1.0;
1713         else
1714                 temp = ceil(temp);
1715
1716         rel->rows = temp;
1717
1718         cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
1719
1720         set_rel_width(root, rel);
1721 }
1722
1723 /*
1724  * set_joinrel_size_estimates
1725  *              Set the size estimates for the given join relation.
1726  *
1727  * The rel's targetlist must have been constructed already, and a
1728  * restriction clause list that matches the given component rels must
1729  * be provided.
1730  *
1731  * Since there is more than one way to make a joinrel for more than two
1732  * base relations, the results we get here could depend on which component
1733  * rel pair is provided.  In theory we should get the same answers no matter
1734  * which pair is provided; in practice, since the selectivity estimation
1735  * routines don't handle all cases equally well, we might not.  But there's
1736  * not much to be done about it.  (Would it make sense to repeat the
1737  * calculations for each pair of input rels that's encountered, and somehow
1738  * average the results?  Probably way more trouble than it's worth.)
1739  *
1740  * It's important that the results for symmetric JoinTypes be symmetric,
1741  * eg, (rel1, rel2, JOIN_LEFT) should produce the same result as (rel2,
1742  * rel1, JOIN_RIGHT).  Also, JOIN_IN should produce the same result as
1743  * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER.
1744  *
1745  * We set the same relnode fields as set_baserel_size_estimates() does.
1746  */
1747 void
1748 set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
1749                                                    RelOptInfo *outer_rel,
1750                                                    RelOptInfo *inner_rel,
1751                                                    JoinType jointype,
1752                                                    List *restrictlist)
1753 {
1754         Selectivity selec;
1755         double          temp;
1756         UniquePath *upath;
1757
1758         /*
1759          * Compute joinclause selectivity.      Note that we are only considering
1760          * clauses that become restriction clauses at this join level; we are
1761          * not double-counting them because they were not considered in
1762          * estimating the sizes of the component rels.
1763          */
1764         selec = restrictlist_selectivity(root,
1765                                                                          restrictlist,
1766                                                                          0,
1767                                                                          jointype);
1768
1769         /*
1770          * Basically, we multiply size of Cartesian product by selectivity.
1771          *
1772          * If we are doing an outer join, take that into account: the output must
1773          * be at least as large as the non-nullable input.      (Is there any
1774          * chance of being even smarter?)
1775          *
1776          * For JOIN_IN and variants, the Cartesian product is figured with
1777          * respect to a unique-ified input, and then we can clamp to the size
1778          * of the other input.
1779          */
1780         switch (jointype)
1781         {
1782                 case JOIN_INNER:
1783                         temp = outer_rel->rows * inner_rel->rows * selec;
1784                         break;
1785                 case JOIN_LEFT:
1786                         temp = outer_rel->rows * inner_rel->rows * selec;
1787                         if (temp < outer_rel->rows)
1788                                 temp = outer_rel->rows;
1789                         break;
1790                 case JOIN_RIGHT:
1791                         temp = outer_rel->rows * inner_rel->rows * selec;
1792                         if (temp < inner_rel->rows)
1793                                 temp = inner_rel->rows;
1794                         break;
1795                 case JOIN_FULL:
1796                         temp = outer_rel->rows * inner_rel->rows * selec;
1797                         if (temp < outer_rel->rows)
1798                                 temp = outer_rel->rows;
1799                         if (temp < inner_rel->rows)
1800                                 temp = inner_rel->rows;
1801                         break;
1802                 case JOIN_IN:
1803                 case JOIN_UNIQUE_INNER:
1804                         upath = create_unique_path(root, inner_rel,
1805                                                                            inner_rel->cheapest_total_path);
1806                         temp = outer_rel->rows * upath->rows * selec;
1807                         if (temp > outer_rel->rows)
1808                                 temp = outer_rel->rows;
1809                         break;
1810                 case JOIN_REVERSE_IN:
1811                 case JOIN_UNIQUE_OUTER:
1812                         upath = create_unique_path(root, outer_rel,
1813                                                                            outer_rel->cheapest_total_path);
1814                         temp = upath->rows * inner_rel->rows * selec;
1815                         if (temp > inner_rel->rows)
1816                                 temp = inner_rel->rows;
1817                         break;
1818                 default:
1819                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
1820                         temp = 0;                       /* keep compiler quiet */
1821                         break;
1822         }
1823
1824         /*
1825          * Force estimate to be at least one row, to make explain output look
1826          * better and to avoid possible divide-by-zero when interpolating
1827          * cost.  Make it an integer, too.
1828          */
1829         if (temp < 1.0)
1830                 temp = 1.0;
1831         else
1832                 temp = ceil(temp);
1833
1834         rel->rows = temp;
1835
1836         /*
1837          * We need not compute the output width here, because
1838          * build_joinrel_tlist already did.
1839          */
1840 }
1841
1842 /*
1843  * set_function_size_estimates
1844  *              Set the size estimates for a base relation that is a function call.
1845  *
1846  * The rel's targetlist and restrictinfo list must have been constructed
1847  * already.
1848  *
1849  * We set the following fields of the rel node:
1850  *      rows: the estimated number of output tuples (after applying
1851  *                restriction clauses).
1852  *      width: the estimated average output tuple width in bytes.
1853  *      baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
1854  */
1855 void
1856 set_function_size_estimates(Query *root, RelOptInfo *rel)
1857 {
1858         double          temp;
1859
1860         /* Should only be applied to base relations that are functions */
1861         Assert(rel->relid > 0);
1862         Assert(rel->rtekind == RTE_FUNCTION);
1863
1864         /*
1865          * Estimate number of rows the function itself will return.
1866          *
1867          * XXX no idea how to do this yet; but should at least check whether
1868          * function returns set or not...
1869          */
1870         rel->tuples = 1000;
1871
1872         /* Now estimate number of output rows */
1873         temp = rel->tuples *
1874                 restrictlist_selectivity(root,
1875                                                                  rel->baserestrictinfo,
1876                                                                  rel->relid,
1877                                                                  JOIN_INNER);
1878
1879         /*
1880          * Force estimate to be at least one row, to make explain output look
1881          * better and to avoid possible divide-by-zero when interpolating
1882          * cost.  Make it an integer, too.
1883          */
1884         if (temp < 1.0)
1885                 temp = 1.0;
1886         else
1887                 temp = ceil(temp);
1888
1889         rel->rows = temp;
1890
1891         cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
1892
1893         set_rel_width(root, rel);
1894 }
1895
1896
1897 /*
1898  * set_rel_width
1899  *              Set the estimated output width of a base relation.
1900  *
1901  * NB: this works best on plain relations because it prefers to look at
1902  * real Vars.  It will fail to make use of pg_statistic info when applied
1903  * to a subquery relation, even if the subquery outputs are simple vars
1904  * that we could have gotten info for.  Is it worth trying to be smarter
1905  * about subqueries?
1906  *
1907  * The per-attribute width estimates are cached for possible re-use while
1908  * building join relations.
1909  */
1910 static void
1911 set_rel_width(Query *root, RelOptInfo *rel)
1912 {
1913         int32           tuple_width = 0;
1914         List       *tllist;
1915
1916         foreach(tllist, FastListValue(&rel->reltargetlist))
1917         {
1918                 Var                *var = (Var *) lfirst(tllist);
1919                 int                     ndx = var->varattno - rel->min_attr;
1920                 Oid                     relid;
1921                 int32           item_width;
1922
1923                 Assert(IsA(var, Var));
1924
1925                 /*
1926                  * The width probably hasn't been cached yet, but may as well
1927                  * check
1928                  */
1929                 if (rel->attr_widths[ndx] > 0)
1930                 {
1931                         tuple_width += rel->attr_widths[ndx];
1932                         continue;
1933                 }
1934
1935                 relid = getrelid(var->varno, root->rtable);
1936                 if (relid != InvalidOid)
1937                 {
1938                         item_width = get_attavgwidth(relid, var->varattno);
1939                         if (item_width > 0)
1940                         {
1941                                 rel->attr_widths[ndx] = item_width;
1942                                 tuple_width += item_width;
1943                                 continue;
1944                         }
1945                 }
1946
1947                 /*
1948                  * Not a plain relation, or can't find statistics for it. Estimate
1949                  * using just the type info.
1950                  */
1951                 item_width = get_typavgwidth(var->vartype, var->vartypmod);
1952                 Assert(item_width > 0);
1953                 rel->attr_widths[ndx] = item_width;
1954                 tuple_width += item_width;
1955         }
1956         Assert(tuple_width >= 0);
1957         rel->width = tuple_width;
1958 }
1959
1960 /*
1961  * relation_byte_size
1962  *        Estimate the storage space in bytes for a given number of tuples
1963  *        of a given width (size in bytes).
1964  */
1965 static double
1966 relation_byte_size(double tuples, int width)
1967 {
1968         return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
1969 }
1970
1971 /*
1972  * page_size
1973  *        Returns an estimate of the number of pages covered by a given
1974  *        number of tuples of a given width (size in bytes).
1975  */
1976 static double
1977 page_size(double tuples, int width)
1978 {
1979         return ceil(relation_byte_size(tuples, width) / BLCKSZ);
1980 }