granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Path costs are measured in units of disk accesses: one sequential page
   7  * fetch has cost 1.  All else is scaled relative to a page fetch, using
   8  * the scaling parameters
   9  *
  10  *      random_page_cost        Cost of a non-sequential page fetch
  11  *      cpu_tuple_cost          Cost of typical CPU time to process a tuple
  12  *      cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
  13  *      cpu_operator_cost       Cost of CPU time to process a typical WHERE operator
  14  *
  15  * We also use a rough estimate "effective_cache_size" of the number of
  16  * disk pages in Postgres + OS-level disk cache.  (We can't simply use
  17  * NBuffers for this purpose because that would ignore the effects of
  18  * the kernel's disk cache.)
  19  *
  20  * Obviously, taking constants for these values is an oversimplification,
  21  * but it's tough enough to get any useful estimates even at this level of
  22  * detail.      Note that all of these parameters are user-settable, in case
  23  * the default values are drastically off for a particular platform.
  24  *
  25  * We compute two separate costs for each path:
  26  *              total_cost: total estimated cost to fetch all tuples
  27  *              startup_cost: cost that is expended before first tuple is fetched
  28  * In some scenarios, such as when there is a LIMIT or we are implementing
  29  * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
  30  * path's result.  A caller can estimate the cost of fetching a partial
  31  * result by interpolating between startup_cost and total_cost.  In detail:
  32  *              actual_cost = startup_cost +
  33  *                      (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
  34  * Note that a base relation's rows count (and, by extension, plan_rows for
  35  * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
  36  * that this equation works properly.  (Also, these routines guarantee not to
  37  * set the rows count to zero, so there will be no zero divide.)  The LIMIT is
  38  * applied as a top-level plan node.
  39  *
  40  * For largely historical reasons, most of the routines in this module use
  41  * the passed result Path only to store their startup_cost and total_cost
  42  * results into.  All the input data they need is passed as separate
  43  * parameters, even though much of it could be extracted from the Path.
  44  * An exception is made for the cost_XXXjoin() routines, which expect all
  45  * the non-cost fields of the passed XXXPath to be filled in.
  46  *
  47  *
  48  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  49  * Portions Copyright (c) 1994, Regents of the University of California
  50  *
  51  * IDENTIFICATION
  52  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.112 2003/08/04 00:43:20 momjian Exp $
  53  *
  54  *-------------------------------------------------------------------------
  55  */
  56
  57 #include "postgres.h"
  58
  59 #include <math.h>
  60
  61 #include "catalog/pg_statistic.h"
  62 #include "executor/nodeHash.h"
  63 #include "miscadmin.h"
  64 #include "optimizer/clauses.h"
  65 #include "optimizer/cost.h"
  66 #include "optimizer/pathnode.h"
  67 #include "parser/parsetree.h"
  68 #include "utils/selfuncs.h"
  69 #include "utils/lsyscache.h"
  70 #include "utils/syscache.h"
  71
  72
  73 #define LOG2(x)  (log(x) / 0.693147180559945)
  74 #define LOG6(x)  (log(x) / 1.79175946922805)
  75
  76 /*
  77  * Some Paths return less than the nominal number of rows of their parent
  78  * relations; join nodes need to do this to get the correct input count:
  79  */
  80 #define PATH_ROWS(path) \
  81         (IsA(path, UniquePath) ? \
  82          ((UniquePath *) (path))->rows : \
  83          (path)->parent->rows)
  84
  85
  86 double          effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
  87 double          random_page_cost = DEFAULT_RANDOM_PAGE_COST;
  88 double          cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
  89 double          cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
  90 double          cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
  91
  92 Cost            disable_cost = 100000000.0;
  93
  94 bool            enable_seqscan = true;
  95 bool            enable_indexscan = true;
  96 bool            enable_tidscan = true;
  97 bool            enable_sort = true;
  98 bool            enable_hashagg = true;
  99 bool            enable_nestloop = true;
 100 bool            enable_mergejoin = true;
 101 bool            enable_hashjoin = true;
 102
 103
 104 static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
 105                                                  int nbuckets);
 106 static bool cost_qual_eval_walker(Node *node, QualCost * total);
 107 static Selectivity approx_selectivity(Query *root, List *quals,
 108                                    JoinType jointype);
 109 static void set_rel_width(Query *root, RelOptInfo *rel);
 110 static double relation_byte_size(double tuples, int width);
 111 static double page_size(double tuples, int width);
 112
 113
 114 /*
 115  * cost_seqscan
 116  *        Determines and returns the cost of scanning a relation sequentially.
 117  */
 118 void
 119 cost_seqscan(Path *path, Query *root,
 120                          RelOptInfo *baserel)
 121 {
 122         Cost            startup_cost = 0;
 123         Cost            run_cost = 0;
 124         Cost            cpu_per_tuple;
 125
 126         /* Should only be applied to base relations */
 127         Assert(baserel->relid > 0);
 128         Assert(baserel->rtekind == RTE_RELATION);
 129
 130         if (!enable_seqscan)
 131                 startup_cost += disable_cost;
 132
 133         /*
 134          * disk costs
 135          *
 136          * The cost of reading a page sequentially is 1.0, by definition. Note
 137          * that the Unix kernel will typically do some amount of read-ahead
 138          * optimization, so that this cost is less than the true cost of
 139          * reading a page from disk.  We ignore that issue here, but must take
 140          * it into account when estimating the cost of non-sequential
 141          * accesses!
 142          */
 143         run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
 144
 145         /* CPU costs */
 146         startup_cost += baserel->baserestrictcost.startup;
 147         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 148         run_cost += cpu_per_tuple * baserel->tuples;
 149
 150         path->startup_cost = startup_cost;
 151         path->total_cost = startup_cost + run_cost;
 152 }
 153
 154 /*
 155  * cost_nonsequential_access
 156  *        Estimate the cost of accessing one page at random from a relation
 157  *        (or sort temp file) of the given size in pages.
 158  *
 159  * The simplistic model that the cost is random_page_cost is what we want
 160  * to use for large relations; but for small ones that is a serious
 161  * overestimate because of the effects of caching.      This routine tries to
 162  * account for that.
 163  *
 164  * Unfortunately we don't have any good way of estimating the effective cache
 165  * size we are working with --- we know that Postgres itself has NBuffers
 166  * internal buffers, but the size of the kernel's disk cache is uncertain,
 167  * and how much of it we get to use is even less certain.  We punt the problem
 168  * for now by assuming we are given an effective_cache_size parameter.
 169  *
 170  * Given a guesstimated cache size, we estimate the actual I/O cost per page
 171  * with the entirely ad-hoc equations:
 172  *      if relpages >= effective_cache_size:
 173  *              random_page_cost * (1 - (effective_cache_size/relpages)/2)
 174  *      if relpages < effective_cache_size:
 175  *              1 + (random_page_cost/2-1) * (relpages/effective_cache_size) ** 2
 176  * These give the right asymptotic behavior (=> 1.0 as relpages becomes
 177  * small, => random_page_cost as it becomes large) and meet in the middle
 178  * with the estimate that the cache is about 50% effective for a relation
 179  * of the same size as effective_cache_size.  (XXX this is probably all
 180  * wrong, but I haven't been able to find any theory about how effective
 181  * a disk cache should be presumed to be.)
 182  */
 183 static Cost
 184 cost_nonsequential_access(double relpages)
 185 {
 186         double          relsize;
 187
 188         /* don't crash on bad input data */
 189         if (relpages <= 0.0 || effective_cache_size <= 0.0)
 190                 return random_page_cost;
 191
 192         relsize = relpages / effective_cache_size;
 193
 194         if (relsize >= 1.0)
 195                 return random_page_cost * (1.0 - 0.5 / relsize);
 196         else
 197                 return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize;
 198 }
 199
 200 /*
 201  * cost_index
 202  *        Determines and returns the cost of scanning a relation using an index.
 203  *
 204  *        NOTE: an indexscan plan node can actually represent several passes,
 205  *        but here we consider the cost of just one pass.
 206  *
 207  * 'root' is the query root
 208  * 'baserel' is the base relation the index is for
 209  * 'index' is the index to be used
 210  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
 211  * 'is_injoin' is T if we are considering using the index scan as the inside
 212  *              of a nestloop join (hence, some of the indexQuals are join clauses)
 213  *
 214  * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
 215  * Any additional quals evaluated as qpquals may reduce the number of returned
 216  * tuples, but they won't reduce the number of tuples we have to fetch from
 217  * the table, so they don't reduce the scan cost.
 218  */
 219 void
 220 cost_index(Path *path, Query *root,
 221                    RelOptInfo *baserel,
 222                    IndexOptInfo *index,
 223                    List *indexQuals,
 224                    bool is_injoin)
 225 {
 226         Cost            startup_cost = 0;
 227         Cost            run_cost = 0;
 228         Cost            indexStartupCost;
 229         Cost            indexTotalCost;
 230         Selectivity indexSelectivity;
 231         double          indexCorrelation,
 232                                 csquared;
 233         Cost            min_IO_cost,
 234                                 max_IO_cost;
 235         Cost            cpu_per_tuple;
 236         double          tuples_fetched;
 237         double          pages_fetched;
 238         double          T,
 239                                 b;
 240
 241         /* Should only be applied to base relations */
 242         Assert(IsA(baserel, RelOptInfo) &&
 243                    IsA(index, IndexOptInfo));
 244         Assert(baserel->relid > 0);
 245         Assert(baserel->rtekind == RTE_RELATION);
 246
 247         if (!enable_indexscan)
 248                 startup_cost += disable_cost;
 249
 250         /*
 251          * Call index-access-method-specific code to estimate the processing
 252          * cost for scanning the index, as well as the selectivity of the
 253          * index (ie, the fraction of main-table tuples we will have to
 254          * retrieve) and its correlation to the main-table tuple order.
 255          */
 256         OidFunctionCall8(index->amcostestimate,
 257                                          PointerGetDatum(root),
 258                                          PointerGetDatum(baserel),
 259                                          PointerGetDatum(index),
 260                                          PointerGetDatum(indexQuals),
 261                                          PointerGetDatum(&indexStartupCost),
 262                                          PointerGetDatum(&indexTotalCost),
 263                                          PointerGetDatum(&indexSelectivity),
 264                                          PointerGetDatum(&indexCorrelation));
 265
 266         /* all costs for touching index itself included here */
 267         startup_cost += indexStartupCost;
 268         run_cost += indexTotalCost - indexStartupCost;
 269
 270         /*----------
 271          * Estimate number of main-table tuples and pages fetched.
 272          *
 273          * When the index ordering is uncorrelated with the table ordering,
 274          * we use an approximation proposed by Mackert and Lohman, "Index Scans
 275          * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
 276          * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
 277          * The Mackert and Lohman approximation is that the number of pages
 278          * fetched is
 279          *      PF =
 280          *              min(2TNs/(2T+Ns), T)                    when T <= b
 281          *              2TNs/(2T+Ns)                                    when T > b and Ns <= 2Tb/(2T-b)
 282          *              b + (Ns - 2Tb/(2T-b))*(T-b)/T   when T > b and Ns > 2Tb/(2T-b)
 283          * where
 284          *              T = # pages in table
 285          *              N = # tuples in table
 286          *              s = selectivity = fraction of table to be scanned
 287          *              b = # buffer pages available (we include kernel space here)
 288          *
 289          * When the index ordering is exactly correlated with the table ordering
 290          * (just after a CLUSTER, for example), the number of pages fetched should
 291          * be just sT.  What's more, these will be sequential fetches, not the
 292          * random fetches that occur in the uncorrelated case.  So, depending on
 293          * the extent of correlation, we should estimate the actual I/O cost
 294          * somewhere between s * T * 1.0 and PF * random_cost.  We currently
 295          * interpolate linearly between these two endpoints based on the
 296          * correlation squared (XXX is that appropriate?).
 297          *
 298          * In any case the number of tuples fetched is Ns.
 299          *----------
 300          */
 301
 302         tuples_fetched = indexSelectivity * baserel->tuples;
 303         /* Don't believe estimates less than 1... */
 304         if (tuples_fetched < 1.0)
 305                 tuples_fetched = 1.0;
 306
 307         /* This part is the Mackert and Lohman formula */
 308
 309         T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
 310         b = (effective_cache_size > 1) ? effective_cache_size : 1.0;
 311
 312         if (T <= b)
 313         {
 314                 pages_fetched =
 315                         (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
 316                 if (pages_fetched > T)
 317                         pages_fetched = T;
 318         }
 319         else
 320         {
 321                 double          lim;
 322
 323                 lim = (2.0 * T * b) / (2.0 * T - b);
 324                 if (tuples_fetched <= lim)
 325                 {
 326                         pages_fetched =
 327                                 (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
 328                 }
 329                 else
 330                 {
 331                         pages_fetched =
 332                                 b + (tuples_fetched - lim) * (T - b) / T;
 333                 }
 334         }
 335
 336         /*
 337          * min_IO_cost corresponds to the perfectly correlated case
 338          * (csquared=1), max_IO_cost to the perfectly uncorrelated case
 339          * (csquared=0).  Note that we just charge random_page_cost per page
 340          * in the uncorrelated case, rather than using
 341          * cost_nonsequential_access, since we've already accounted for
 342          * caching effects by using the Mackert model.
 343          */
 344         min_IO_cost = ceil(indexSelectivity * T);
 345         max_IO_cost = pages_fetched * random_page_cost;
 346
 347         /*
 348          * Now interpolate based on estimated index order correlation to get
 349          * total disk I/O cost for main table accesses.
 350          */
 351         csquared = indexCorrelation * indexCorrelation;
 352
 353         run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
 354
 355         /*
 356          * Estimate CPU costs per tuple.
 357          *
 358          * Normally the indexquals will be removed from the list of restriction
 359          * clauses that we have to evaluate as qpquals, so we should subtract
 360          * their costs from baserestrictcost.  But if we are doing a join then
 361          * some of the indexquals are join clauses and shouldn't be
 362          * subtracted. Rather than work out exactly how much to subtract, we
 363          * don't subtract anything.
 364          *
 365          * XXX For a lossy index, not all the quals will be removed and so we
 366          * really shouldn't subtract their costs; but detecting that seems
 367          * more expensive than it's worth.
 368          */
 369         startup_cost += baserel->baserestrictcost.startup;
 370         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 371
 372         if (!is_injoin)
 373         {
 374                 QualCost        index_qual_cost;
 375
 376                 cost_qual_eval(&index_qual_cost, indexQuals);
 377                 cpu_per_tuple -= index_qual_cost.per_tuple;
 378         }
 379
 380         run_cost += cpu_per_tuple * tuples_fetched;
 381
 382         path->startup_cost = startup_cost;
 383         path->total_cost = startup_cost + run_cost;
 384 }
 385
 386 /*
 387  * cost_tidscan
 388  *        Determines and returns the cost of scanning a relation using TIDs.
 389  */
 390 void
 391 cost_tidscan(Path *path, Query *root,
 392                          RelOptInfo *baserel, List *tideval)
 393 {
 394         Cost            startup_cost = 0;
 395         Cost            run_cost = 0;
 396         Cost            cpu_per_tuple;
 397         int                     ntuples = length(tideval);
 398
 399         /* Should only be applied to base relations */
 400         Assert(baserel->relid > 0);
 401         Assert(baserel->rtekind == RTE_RELATION);
 402
 403         if (!enable_tidscan)
 404                 startup_cost += disable_cost;
 405
 406         /* disk costs --- assume each tuple on a different page */
 407         run_cost += random_page_cost * ntuples;
 408
 409         /* CPU costs */
 410         startup_cost += baserel->baserestrictcost.startup;
 411         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 412         run_cost += cpu_per_tuple * ntuples;
 413
 414         path->startup_cost = startup_cost;
 415         path->total_cost = startup_cost + run_cost;
 416 }
 417
 418 /*
 419  * cost_subqueryscan
 420  *        Determines and returns the cost of scanning a subquery RTE.
 421  */
 422 void
 423 cost_subqueryscan(Path *path, RelOptInfo *baserel)
 424 {
 425         Cost            startup_cost;
 426         Cost            run_cost;
 427         Cost            cpu_per_tuple;
 428
 429         /* Should only be applied to base relations that are subqueries */
 430         Assert(baserel->relid > 0);
 431         Assert(baserel->rtekind == RTE_SUBQUERY);
 432
 433         /*
 434          * Cost of path is cost of evaluating the subplan, plus cost of
 435          * evaluating any restriction clauses that will be attached to the
 436          * SubqueryScan node, plus cpu_tuple_cost to account for selection and
 437          * projection overhead.
 438          */
 439         path->startup_cost = baserel->subplan->startup_cost;
 440         path->total_cost = baserel->subplan->total_cost;
 441
 442         startup_cost = baserel->baserestrictcost.startup;
 443         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 444         run_cost = cpu_per_tuple * baserel->tuples;
 445
 446         path->startup_cost += startup_cost;
 447         path->total_cost += startup_cost + run_cost;
 448 }
 449
 450 /*
 451  * cost_functionscan
 452  *        Determines and returns the cost of scanning a function RTE.
 453  */
 454 void
 455 cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
 456 {
 457         Cost            startup_cost = 0;
 458         Cost            run_cost = 0;
 459         Cost            cpu_per_tuple;
 460
 461         /* Should only be applied to base relations that are functions */
 462         Assert(baserel->relid > 0);
 463         Assert(baserel->rtekind == RTE_FUNCTION);
 464
 465         /*
 466          * For now, estimate function's cost at one operator eval per function
 467          * call.  Someday we should revive the function cost estimate columns
 468          * in pg_proc...
 469          */
 470         cpu_per_tuple = cpu_operator_cost;
 471
 472         /* Add scanning CPU costs */
 473         startup_cost += baserel->baserestrictcost.startup;
 474         cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
 475         run_cost += cpu_per_tuple * baserel->tuples;
 476
 477         path->startup_cost = startup_cost;
 478         path->total_cost = startup_cost + run_cost;
 479 }
 480
 481 /*
 482  * cost_sort
 483  *        Determines and returns the cost of sorting a relation, including
 484  *        the cost of reading the input data.
 485  *
 486  * If the total volume of data to sort is less than SortMem, we will do
 487  * an in-memory sort, which requires no I/O and about t*log2(t) tuple
 488  * comparisons for t tuples.
 489  *
 490  * If the total volume exceeds SortMem, we switch to a tape-style merge
 491  * algorithm.  There will still be about t*log2(t) tuple comparisons in
 492  * total, but we will also need to write and read each tuple once per
 493  * merge pass.  We expect about ceil(log6(r)) merge passes where r is the
 494  * number of initial runs formed (log6 because tuplesort.c uses six-tape
 495  * merging).  Since the average initial run should be about twice SortMem,
 496  * we have
 497  *              disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
 498  *              cpu = comparison_cost * t * log2(t)
 499  *
 500  * The disk traffic is assumed to be half sequential and half random
 501  * accesses (XXX can't we refine that guess?)
 502  *
 503  * We charge two operator evals per tuple comparison, which should be in
 504  * the right ballpark in most cases.
 505  *
 506  * 'pathkeys' is a list of sort keys
 507  * 'input_cost' is the total cost for reading the input data
 508  * 'tuples' is the number of tuples in the relation
 509  * 'width' is the average tuple width in bytes
 510  *
 511  * NOTE: some callers currently pass NIL for pathkeys because they
 512  * can't conveniently supply the sort keys.  Since this routine doesn't
 513  * currently do anything with pathkeys anyway, that doesn't matter...
 514  * but if it ever does, it should react gracefully to lack of key data.
 515  * (Actually, the thing we'd most likely be interested in is just the number
 516  * of sort keys, which all callers *could* supply.)
 517  */
 518 void
 519 cost_sort(Path *path, Query *root,
 520                   List *pathkeys, Cost input_cost, double tuples, int width)
 521 {
 522         Cost            startup_cost = input_cost;
 523         Cost            run_cost = 0;
 524         double          nbytes = relation_byte_size(tuples, width);
 525         long            sortmembytes = SortMem * 1024L;
 526
 527         if (!enable_sort)
 528                 startup_cost += disable_cost;
 529
 530         /*
 531          * We want to be sure the cost of a sort is never estimated as zero,
 532          * even if passed-in tuple count is zero.  Besides, mustn't do
 533          * log(0)...
 534          */
 535         if (tuples < 2.0)
 536                 tuples = 2.0;
 537
 538         /*
 539          * CPU costs
 540          *
 541          * Assume about two operator evals per tuple comparison and N log2 N
 542          * comparisons
 543          */
 544         startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
 545
 546         /* disk costs */
 547         if (nbytes > sortmembytes)
 548         {
 549                 double          npages = ceil(nbytes / BLCKSZ);
 550                 double          nruns = nbytes / (sortmembytes * 2);
 551                 double          log_runs = ceil(LOG6(nruns));
 552                 double          npageaccesses;
 553
 554                 if (log_runs < 1.0)
 555                         log_runs = 1.0;
 556                 npageaccesses = 2.0 * npages * log_runs;
 557                 /* Assume half are sequential (cost 1), half are not */
 558                 startup_cost += npageaccesses *
 559                         (1.0 + cost_nonsequential_access(npages)) * 0.5;
 560         }
 561
 562         /*
 563          * Also charge a small amount (arbitrarily set equal to operator cost)
 564          * per extracted tuple.
 565          */
 566         run_cost += cpu_operator_cost * tuples;
 567
 568         path->startup_cost = startup_cost;
 569         path->total_cost = startup_cost + run_cost;
 570 }
 571
 572 /*
 573  * cost_material
 574  *        Determines and returns the cost of materializing a relation, including
 575  *        the cost of reading the input data.
 576  *
 577  * If the total volume of data to materialize exceeds SortMem, we will need
 578  * to write it to disk, so the cost is much higher in that case.
 579  */
 580 void
 581 cost_material(Path *path,
 582                           Cost input_cost, double tuples, int width)
 583 {
 584         Cost            startup_cost = input_cost;
 585         Cost            run_cost = 0;
 586         double          nbytes = relation_byte_size(tuples, width);
 587         long            sortmembytes = SortMem * 1024L;
 588
 589         /* disk costs */
 590         if (nbytes > sortmembytes)
 591         {
 592                 double          npages = ceil(nbytes / BLCKSZ);
 593
 594                 /* We'll write during startup and read during retrieval */
 595                 startup_cost += npages;
 596                 run_cost += npages;
 597         }
 598
 599         /*
 600          * Also charge a small amount per extracted tuple.      We use
 601          * cpu_tuple_cost so that it doesn't appear worthwhile to materialize
 602          * a bare seqscan.
 603          */
 604         run_cost += cpu_tuple_cost * tuples;
 605
 606         path->startup_cost = startup_cost;
 607         path->total_cost = startup_cost + run_cost;
 608 }
 609
 610 /*
 611  * cost_agg
 612  *              Determines and returns the cost of performing an Agg plan node,
 613  *              including the cost of its input.
 614  *
 615  * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
 616  * are for appropriately-sorted input.
 617  */
 618 void
 619 cost_agg(Path *path, Query *root,
 620                  AggStrategy aggstrategy, int numAggs,
 621                  int numGroupCols, double numGroups,
 622                  Cost input_startup_cost, Cost input_total_cost,
 623                  double input_tuples)
 624 {
 625         Cost            startup_cost;
 626         Cost            total_cost;
 627
 628         /*
 629          * We charge one cpu_operator_cost per aggregate function per input
 630          * tuple, and another one per output tuple (corresponding to transfn
 631          * and finalfn calls respectively).  If we are grouping, we charge an
 632          * additional cpu_operator_cost per grouping column per input tuple
 633          * for grouping comparisons.
 634          *
 635          * We will produce a single output tuple if not grouping, and a tuple per
 636          * group otherwise.
 637          *
 638          * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
 639          * same total CPU cost, but AGG_SORTED has lower startup cost.  If the
 640          * input path is already sorted appropriately, AGG_SORTED should be
 641          * preferred (since it has no risk of memory overflow).  This will
 642          * happen as long as the computed total costs are indeed exactly equal
 643          * --- but if there's roundoff error we might do the wrong thing.  So
 644          * be sure that the computations below form the same intermediate
 645          * values in the same order.
 646          */
 647         if (aggstrategy == AGG_PLAIN)
 648         {
 649                 startup_cost = input_total_cost;
 650                 startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
 651                 /* we aren't grouping */
 652                 total_cost = startup_cost;
 653         }
 654         else if (aggstrategy == AGG_SORTED)
 655         {
 656                 /* Here we are able to deliver output on-the-fly */
 657                 startup_cost = input_startup_cost;
 658                 total_cost = input_total_cost;
 659                 /* calcs phrased this way to match HASHED case, see note above */
 660                 total_cost += cpu_operator_cost * input_tuples * numGroupCols;
 661                 total_cost += cpu_operator_cost * input_tuples * numAggs;
 662                 total_cost += cpu_operator_cost * numGroups * numAggs;
 663         }
 664         else
 665         {
 666                 /* must be AGG_HASHED */
 667                 startup_cost = input_total_cost;
 668                 startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
 669                 startup_cost += cpu_operator_cost * input_tuples * numAggs;
 670                 total_cost = startup_cost;
 671                 total_cost += cpu_operator_cost * numGroups * numAggs;
 672         }
 673
 674         path->startup_cost = startup_cost;
 675         path->total_cost = total_cost;
 676 }
 677
 678 /*
 679  * cost_group
 680  *              Determines and returns the cost of performing a Group plan node,
 681  *              including the cost of its input.
 682  *
 683  * Note: caller must ensure that input costs are for appropriately-sorted
 684  * input.
 685  */
 686 void
 687 cost_group(Path *path, Query *root,
 688                    int numGroupCols, double numGroups,
 689                    Cost input_startup_cost, Cost input_total_cost,
 690                    double input_tuples)
 691 {
 692         Cost            startup_cost;
 693         Cost            total_cost;
 694
 695         startup_cost = input_startup_cost;
 696         total_cost = input_total_cost;
 697
 698         /*
 699          * Charge one cpu_operator_cost per comparison per input tuple. We
 700          * assume all columns get compared at most of the tuples.
 701          */
 702         total_cost += cpu_operator_cost * input_tuples * numGroupCols;
 703
 704         path->startup_cost = startup_cost;
 705         path->total_cost = total_cost;
 706 }
 707
 708 /*
 709  * cost_nestloop
 710  *        Determines and returns the cost of joining two relations using the
 711  *        nested loop algorithm.
 712  *
 713  * 'path' is already filled in except for the cost fields
 714  */
 715 void
 716 cost_nestloop(NestPath *path, Query *root)
 717 {
 718         Path       *outer_path = path->outerjoinpath;
 719         Path       *inner_path = path->innerjoinpath;
 720         List       *restrictlist = path->joinrestrictinfo;
 721         Cost            startup_cost = 0;
 722         Cost            run_cost = 0;
 723         Cost            cpu_per_tuple;
 724         QualCost        restrict_qual_cost;
 725         double          outer_path_rows = PATH_ROWS(outer_path);
 726         double          inner_path_rows = PATH_ROWS(inner_path);
 727         double          ntuples;
 728         Selectivity joininfactor;
 729
 730         if (!enable_nestloop)
 731                 startup_cost += disable_cost;
 732
 733         /*
 734          * If we're doing JOIN_IN then we will stop scanning inner tuples for
 735          * an outer tuple as soon as we have one match.  Account for the
 736          * effects of this by scaling down the cost estimates in proportion to
 737          * the expected output size.  (This assumes that all the quals
 738          * attached to the join are IN quals, which should be true.)
 739          *
 740          * Note: it's probably bogus to use the normal selectivity calculation
 741          * here when either the outer or inner path is a UniquePath.
 742          */
 743         if (path->jointype == JOIN_IN)
 744         {
 745                 Selectivity qual_selec = approx_selectivity(root, restrictlist,
 746                                                                                                         path->jointype);
 747                 double          qptuples;
 748
 749                 qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
 750                 if (qptuples > path->path.parent->rows)
 751                         joininfactor = path->path.parent->rows / qptuples;
 752                 else
 753                         joininfactor = 1.0;
 754         }
 755         else
 756                 joininfactor = 1.0;
 757
 758         /* cost of source data */
 759
 760         /*
 761          * NOTE: clearly, we must pay both outer and inner paths' startup_cost
 762          * before we can start returning tuples, so the join's startup cost is
 763          * their sum.  What's not so clear is whether the inner path's
 764          * startup_cost must be paid again on each rescan of the inner path.
 765          * This is not true if the inner path is materialized or is a
 766          * hashjoin, but probably is true otherwise.
 767          */
 768         startup_cost += outer_path->startup_cost + inner_path->startup_cost;
 769         run_cost += outer_path->total_cost - outer_path->startup_cost;
 770         if (IsA(inner_path, MaterialPath) ||
 771                 IsA(inner_path, HashPath))
 772         {
 773                 /* charge only run cost for each iteration of inner path */
 774         }
 775         else
 776         {
 777                 /*
 778                  * charge startup cost for each iteration of inner path, except we
 779                  * already charged the first startup_cost in our own startup
 780                  */
 781                 run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
 782         }
 783         run_cost += outer_path_rows *
 784                 (inner_path->total_cost - inner_path->startup_cost) * joininfactor;
 785
 786         /*
 787          * Compute number of tuples processed (not number emitted!). If inner
 788          * path is an indexscan, be sure to use its estimated output row
 789          * count, which may be lower than the restriction-clause-only row
 790          * count of its parent.  (We don't include this case in the PATH_ROWS
 791          * macro because it applies *only* to a nestloop's inner relation.)
 792          * Note: it is correct to use the unadjusted inner_path_rows in the
 793          * above calculation for joininfactor, since otherwise we'd be
 794          * double-counting the selectivity of the join clause being used for
 795          * the index.
 796          */
 797         if (IsA(inner_path, IndexPath))
 798                 inner_path_rows = ((IndexPath *) inner_path)->rows;
 799
 800         ntuples = inner_path_rows * outer_path_rows;
 801
 802         /* CPU costs */
 803         cost_qual_eval(&restrict_qual_cost, restrictlist);
 804         startup_cost += restrict_qual_cost.startup;
 805         cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
 806         run_cost += cpu_per_tuple * ntuples;
 807
 808         path->path.startup_cost = startup_cost;
 809         path->path.total_cost = startup_cost + run_cost;
 810 }
 811
 812 /*
 813  * cost_mergejoin
 814  *        Determines and returns the cost of joining two relations using the
 815  *        merge join algorithm.
 816  *
 817  * 'path' is already filled in except for the cost fields
 818  *
 819  * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
 820  * outersortkeys and innersortkeys are lists of the keys to be used
 821  * to sort the outer and inner relations, or NIL if no explicit
 822  * sort is needed because the source path is already ordered.
 823  */
 824 void
 825 cost_mergejoin(MergePath *path, Query *root)
 826 {
 827         Path       *outer_path = path->jpath.outerjoinpath;
 828         Path       *inner_path = path->jpath.innerjoinpath;
 829         List       *restrictlist = path->jpath.joinrestrictinfo;
 830         List       *mergeclauses = path->path_mergeclauses;
 831         List       *outersortkeys = path->outersortkeys;
 832         List       *innersortkeys = path->innersortkeys;
 833         Cost            startup_cost = 0;
 834         Cost            run_cost = 0;
 835         Cost            cpu_per_tuple;
 836         Selectivity merge_selec;
 837         Selectivity qp_selec;
 838         QualCost        merge_qual_cost;
 839         QualCost        qp_qual_cost;
 840         RestrictInfo *firstclause;
 841         List       *qpquals;
 842         double          outer_path_rows = PATH_ROWS(outer_path);
 843         double          inner_path_rows = PATH_ROWS(inner_path);
 844         double          outer_rows,
 845                                 inner_rows;
 846         double          mergejointuples,
 847                                 rescannedtuples;
 848         double          qptuples;
 849         double          rescanratio;
 850         Selectivity outerscansel,
 851                                 innerscansel;
 852         Selectivity joininfactor;
 853         Path            sort_path;              /* dummy for result of cost_sort */
 854
 855         if (!enable_mergejoin)
 856                 startup_cost += disable_cost;
 857
 858         /*
 859          * Compute cost and selectivity of the mergequals and qpquals (other
 860          * restriction clauses) separately.  We use approx_selectivity here
 861          * for speed --- in most cases, any errors won't affect the result
 862          * much.
 863          *
 864          * Note: it's probably bogus to use the normal selectivity calculation
 865          * here when either the outer or inner path is a UniquePath.
 866          */
 867         merge_selec = approx_selectivity(root, mergeclauses,
 868                                                                          path->jpath.jointype);
 869         cost_qual_eval(&merge_qual_cost, mergeclauses);
 870         qpquals = set_ptrDifference(restrictlist, mergeclauses);
 871         qp_selec = approx_selectivity(root, qpquals,
 872                                                                   path->jpath.jointype);
 873         cost_qual_eval(&qp_qual_cost, qpquals);
 874         freeList(qpquals);
 875
 876         /* approx # tuples passing the merge quals */
 877         mergejointuples = ceil(merge_selec * outer_path_rows * inner_path_rows);
 878         /* approx # tuples passing qpquals as well */
 879         qptuples = ceil(mergejointuples * qp_selec);
 880
 881         /*
 882          * When there are equal merge keys in the outer relation, the
 883          * mergejoin must rescan any matching tuples in the inner relation.
 884          * This means re-fetching inner tuples.  Our cost model for this is
 885          * that a re-fetch costs the same as an original fetch, which is
 886          * probably an overestimate; but on the other hand we ignore the
 887          * bookkeeping costs of mark/restore. Not clear if it's worth
 888          * developing a more refined model.
 889          *
 890          * The number of re-fetches can be estimated approximately as size of
 891          * merge join output minus size of inner relation.      Assume that the
 892          * distinct key values are 1, 2, ..., and denote the number of values
 893          * of each key in the outer relation as m1, m2, ...; in the inner
 894          * relation, n1, n2, ...  Then we have
 895          *
 896          * size of join = m1 * n1 + m2 * n2 + ...
 897          *
 898          * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
 899          * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
 900          * relation
 901          *
 902          * This equation works correctly for outer tuples having no inner match
 903          * (nk = 0), but not for inner tuples having no outer match (mk = 0);
 904          * we are effectively subtracting those from the number of rescanned
 905          * tuples, when we should not.  Can we do better without expensive
 906          * selectivity computations?
 907          */
 908         if (IsA(outer_path, UniquePath))
 909                 rescannedtuples = 0;
 910         else
 911         {
 912                 rescannedtuples = mergejointuples - inner_path_rows;
 913                 /* Must clamp because of possible underestimate */
 914                 if (rescannedtuples < 0)
 915                         rescannedtuples = 0;
 916         }
 917         /* We'll inflate inner run cost this much to account for rescanning */
 918         rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
 919
 920         /*
 921          * A merge join will stop as soon as it exhausts either input stream.
 922          * Estimate fraction of the left and right inputs that will actually
 923          * need to be scanned.  We use only the first (most significant) merge
 924          * clause for this purpose.
 925          *
 926          * Since this calculation is somewhat expensive, and will be the same for
 927          * all mergejoin paths associated with the merge clause, we cache the
 928          * results in the RestrictInfo node.
 929          */
 930         firstclause = (RestrictInfo *) lfirst(mergeclauses);
 931         if (firstclause->left_mergescansel < 0)         /* not computed yet? */
 932                 mergejoinscansel(root, (Node *) firstclause->clause,
 933                                                  &firstclause->left_mergescansel,
 934                                                  &firstclause->right_mergescansel);
 935
 936         if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))
 937         {
 938                 /* left side of clause is outer */
 939                 outerscansel = firstclause->left_mergescansel;
 940                 innerscansel = firstclause->right_mergescansel;
 941         }
 942         else
 943         {
 944                 /* left side of clause is inner */
 945                 outerscansel = firstclause->right_mergescansel;
 946                 innerscansel = firstclause->left_mergescansel;
 947         }
 948
 949         /* convert selectivity to row count; must scan at least one row */
 950
 951         outer_rows = ceil(outer_path_rows * outerscansel);
 952         if (outer_rows < 1)
 953                 outer_rows = 1;
 954         inner_rows = ceil(inner_path_rows * innerscansel);
 955         if (inner_rows < 1)
 956                 inner_rows = 1;
 957
 958         /*
 959          * Readjust scan selectivities to account for above rounding.  This is
 960          * normally an insignificant effect, but when there are only a few
 961          * rows in the inputs, failing to do this makes for a large percentage
 962          * error.
 963          */
 964         outerscansel = outer_rows / outer_path_rows;
 965         innerscansel = inner_rows / inner_path_rows;
 966
 967         /* cost of source data */
 968
 969         if (outersortkeys)                      /* do we need to sort outer? */
 970         {
 971                 cost_sort(&sort_path,
 972                                   root,
 973                                   outersortkeys,
 974                                   outer_path->total_cost,
 975                                   outer_path_rows,
 976                                   outer_path->parent->width);
 977                 startup_cost += sort_path.startup_cost;
 978                 run_cost += (sort_path.total_cost - sort_path.startup_cost)
 979                         * outerscansel;
 980         }
 981         else
 982         {
 983                 startup_cost += outer_path->startup_cost;
 984                 run_cost += (outer_path->total_cost - outer_path->startup_cost)
 985                         * outerscansel;
 986         }
 987
 988         if (innersortkeys)                      /* do we need to sort inner? */
 989         {
 990                 cost_sort(&sort_path,
 991                                   root,
 992                                   innersortkeys,
 993                                   inner_path->total_cost,
 994                                   inner_path_rows,
 995                                   inner_path->parent->width);
 996                 startup_cost += sort_path.startup_cost;
 997                 run_cost += (sort_path.total_cost - sort_path.startup_cost)
 998                         * innerscansel * rescanratio;
 999         }
1000         else
1001         {
1002                 startup_cost += inner_path->startup_cost;
1003                 run_cost += (inner_path->total_cost - inner_path->startup_cost)
1004                         * innerscansel * rescanratio;
1005         }
1006
1007         /* CPU costs */
1008
1009         /*
1010          * If we're doing JOIN_IN then we will stop outputting inner tuples
1011          * for an outer tuple as soon as we have one match.  Account for the
1012          * effects of this by scaling down the cost estimates in proportion to
1013          * the expected output size.  (This assumes that all the quals
1014          * attached to the join are IN quals, which should be true.)
1015          */
1016         if (path->jpath.jointype == JOIN_IN &&
1017                 qptuples > path->jpath.path.parent->rows)
1018                 joininfactor = path->jpath.path.parent->rows / qptuples;
1019         else
1020                 joininfactor = 1.0;
1021
1022         /*
1023          * The number of tuple comparisons needed is approximately number of
1024          * outer rows plus number of inner rows plus number of rescanned
1025          * tuples (can we refine this?).  At each one, we need to evaluate the
1026          * mergejoin quals.  NOTE: JOIN_IN mode does not save any work here,
1027          * so do NOT include joininfactor.
1028          */
1029         startup_cost += merge_qual_cost.startup;
1030         run_cost += merge_qual_cost.per_tuple *
1031                 (outer_rows + inner_rows * rescanratio);
1032
1033         /*
1034          * For each tuple that gets through the mergejoin proper, we charge
1035          * cpu_tuple_cost plus the cost of evaluating additional restriction
1036          * clauses that are to be applied at the join.  (This is pessimistic
1037          * since not all of the quals may get evaluated at each tuple.)  This
1038          * work is skipped in JOIN_IN mode, so apply the factor.
1039          */
1040         startup_cost += qp_qual_cost.startup;
1041         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
1042         run_cost += cpu_per_tuple * mergejointuples * joininfactor;
1043
1044         path->jpath.path.startup_cost = startup_cost;
1045         path->jpath.path.total_cost = startup_cost + run_cost;
1046 }
1047
1048 /*
1049  * cost_hashjoin
1050  *        Determines and returns the cost of joining two relations using the
1051  *        hash join algorithm.
1052  *
1053  * 'path' is already filled in except for the cost fields
1054  *
1055  * Note: path's hashclauses should be a subset of the joinrestrictinfo list
1056  */
1057 void
1058 cost_hashjoin(HashPath *path, Query *root)
1059 {
1060         Path       *outer_path = path->jpath.outerjoinpath;
1061         Path       *inner_path = path->jpath.innerjoinpath;
1062         List       *restrictlist = path->jpath.joinrestrictinfo;
1063         List       *hashclauses = path->path_hashclauses;
1064         Cost            startup_cost = 0;
1065         Cost            run_cost = 0;
1066         Cost            cpu_per_tuple;
1067         Selectivity hash_selec;
1068         Selectivity qp_selec;
1069         QualCost        hash_qual_cost;
1070         QualCost        qp_qual_cost;
1071         double          hashjointuples;
1072         double          qptuples;
1073         double          outer_path_rows = PATH_ROWS(outer_path);
1074         double          inner_path_rows = PATH_ROWS(inner_path);
1075         double          outerbytes = relation_byte_size(outer_path_rows,
1076                                                                                           outer_path->parent->width);
1077         double          innerbytes = relation_byte_size(inner_path_rows,
1078                                                                                           inner_path->parent->width);
1079         int                     num_hashclauses = length(hashclauses);
1080         int                     virtualbuckets;
1081         int                     physicalbuckets;
1082         int                     numbatches;
1083         Selectivity innerbucketsize;
1084         Selectivity joininfactor;
1085         List       *hcl;
1086         List       *qpquals;
1087
1088         if (!enable_hashjoin)
1089                 startup_cost += disable_cost;
1090
1091         /*
1092          * Compute cost and selectivity of the hashquals and qpquals (other
1093          * restriction clauses) separately.  We use approx_selectivity here
1094          * for speed --- in most cases, any errors won't affect the result
1095          * much.
1096          *
1097          * Note: it's probably bogus to use the normal selectivity calculation
1098          * here when either the outer or inner path is a UniquePath.
1099          */
1100         hash_selec = approx_selectivity(root, hashclauses,
1101                                                                         path->jpath.jointype);
1102         cost_qual_eval(&hash_qual_cost, hashclauses);
1103         qpquals = set_ptrDifference(restrictlist, hashclauses);
1104         qp_selec = approx_selectivity(root, qpquals,
1105                                                                   path->jpath.jointype);
1106         cost_qual_eval(&qp_qual_cost, qpquals);
1107         freeList(qpquals);
1108
1109         /* approx # tuples passing the hash quals */
1110         hashjointuples = ceil(hash_selec * outer_path_rows * inner_path_rows);
1111         /* approx # tuples passing qpquals as well */
1112         qptuples = ceil(hashjointuples * qp_selec);
1113
1114         /* cost of source data */
1115         startup_cost += outer_path->startup_cost;
1116         run_cost += outer_path->total_cost - outer_path->startup_cost;
1117         startup_cost += inner_path->total_cost;
1118
1119         /*
1120          * Cost of computing hash function: must do it once per input tuple.
1121          * We charge one cpu_operator_cost for each column's hash function.
1122          *
1123          * XXX when a hashclause is more complex than a single operator, we
1124          * really should charge the extra eval costs of the left or right
1125          * side, as appropriate, here.  This seems more work than it's worth
1126          * at the moment.
1127          */
1128         startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;
1129         run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
1130
1131         /* Get hash table size that executor would use for inner relation */
1132         ExecChooseHashTableSize(inner_path_rows,
1133                                                         inner_path->parent->width,
1134                                                         &virtualbuckets,
1135                                                         &physicalbuckets,
1136                                                         &numbatches);
1137
1138         /*
1139          * Determine bucketsize fraction for inner relation.  We use the
1140          * smallest bucketsize estimated for any individual hashclause; this
1141          * is undoubtedly conservative.
1142          *
1143          * BUT: if inner relation has been unique-ified, we can assume it's good
1144          * for hashing.  This is important both because it's the right answer,
1145          * and because we avoid contaminating the cache with a value that's
1146          * wrong for non-unique-ified paths.
1147          */
1148         if (IsA(inner_path, UniquePath))
1149                 innerbucketsize = 1.0 / virtualbuckets;
1150         else
1151         {
1152                 innerbucketsize = 1.0;
1153                 foreach(hcl, hashclauses)
1154                 {
1155                         RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
1156                         Selectivity thisbucketsize;
1157
1158                         Assert(IsA(restrictinfo, RestrictInfo));
1159
1160                         /*
1161                          * First we have to figure out which side of the hashjoin
1162                          * clause is the inner side.
1163                          *
1164                          * Since we tend to visit the same clauses over and over when
1165                          * planning a large query, we cache the bucketsize estimate in
1166                          * the RestrictInfo node to avoid repeated lookups of
1167                          * statistics.
1168                          */
1169                         if (bms_is_subset(restrictinfo->right_relids,
1170                                                           inner_path->parent->relids))
1171                         {
1172                                 /* righthand side is inner */
1173                                 thisbucketsize = restrictinfo->right_bucketsize;
1174                                 if (thisbucketsize < 0)
1175                                 {
1176                                         /* not cached yet */
1177                                         thisbucketsize =
1178                                                 estimate_hash_bucketsize(root,
1179                                                            (Var *) get_rightop(restrictinfo->clause),
1180                                                                                                  virtualbuckets);
1181                                         restrictinfo->right_bucketsize = thisbucketsize;
1182                                 }
1183                         }
1184                         else
1185                         {
1186                                 Assert(bms_is_subset(restrictinfo->left_relids,
1187                                                                          inner_path->parent->relids));
1188                                 /* lefthand side is inner */
1189                                 thisbucketsize = restrictinfo->left_bucketsize;
1190                                 if (thisbucketsize < 0)
1191                                 {
1192                                         /* not cached yet */
1193                                         thisbucketsize =
1194                                                 estimate_hash_bucketsize(root,
1195                                                                 (Var *) get_leftop(restrictinfo->clause),
1196                                                                                                  virtualbuckets);
1197                                         restrictinfo->left_bucketsize = thisbucketsize;
1198                                 }
1199                         }
1200
1201                         if (innerbucketsize > thisbucketsize)
1202                                 innerbucketsize = thisbucketsize;
1203                 }
1204         }
1205
1206         /*
1207          * if inner relation is too big then we will need to "batch" the join,
1208          * which implies writing and reading most of the tuples to disk an
1209          * extra time.  Charge one cost unit per page of I/O (correct since it
1210          * should be nice and sequential...).  Writing the inner rel counts as
1211          * startup cost, all the rest as run cost.
1212          */
1213         if (numbatches)
1214         {
1215                 double          outerpages = page_size(outer_path_rows,
1216                                                                                    outer_path->parent->width);
1217                 double          innerpages = page_size(inner_path_rows,
1218                                                                                    inner_path->parent->width);
1219
1220                 startup_cost += innerpages;
1221                 run_cost += innerpages + 2 * outerpages;
1222         }
1223
1224         /* CPU costs */
1225
1226         /*
1227          * If we're doing JOIN_IN then we will stop comparing inner tuples to
1228          * an outer tuple as soon as we have one match.  Account for the
1229          * effects of this by scaling down the cost estimates in proportion to
1230          * the expected output size.  (This assumes that all the quals
1231          * attached to the join are IN quals, which should be true.)
1232          */
1233         if (path->jpath.jointype == JOIN_IN &&
1234                 qptuples > path->jpath.path.parent->rows)
1235                 joininfactor = path->jpath.path.parent->rows / qptuples;
1236         else
1237                 joininfactor = 1.0;
1238
1239         /*
1240          * The number of tuple comparisons needed is the number of outer
1241          * tuples times the typical number of tuples in a hash bucket, which
1242          * is the inner relation size times its bucketsize fraction.  At each
1243          * one, we need to evaluate the hashjoin quals.
1244          */
1245         startup_cost += hash_qual_cost.startup;
1246         run_cost += hash_qual_cost.per_tuple *
1247                 outer_path_rows * ceil(inner_path_rows * innerbucketsize) *
1248                 joininfactor;
1249
1250         /*
1251          * For each tuple that gets through the hashjoin proper, we charge
1252          * cpu_tuple_cost plus the cost of evaluating additional restriction
1253          * clauses that are to be applied at the join.  (This is pessimistic
1254          * since not all of the quals may get evaluated at each tuple.)
1255          */
1256         startup_cost += qp_qual_cost.startup;
1257         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
1258         run_cost += cpu_per_tuple * hashjointuples * joininfactor;
1259
1260         /*
1261          * Bias against putting larger relation on inside.      We don't want an
1262          * absolute prohibition, though, since larger relation might have
1263          * better bucketsize --- and we can't trust the size estimates
1264          * unreservedly, anyway.  Instead, inflate the run cost by the square
1265          * root of the size ratio.      (Why square root?  No real good reason,
1266          * but it seems reasonable...)
1267          *
1268          * Note: before 7.4 we implemented this by inflating startup cost; but if
1269          * there's a disable_cost component in the input paths' startup cost,
1270          * that unfairly penalizes the hash.  Probably it'd be better to keep
1271          * track of disable penalty separately from cost.
1272          */
1273         if (innerbytes > outerbytes && outerbytes > 0)
1274                 run_cost *= sqrt(innerbytes / outerbytes);
1275
1276         path->jpath.path.startup_cost = startup_cost;
1277         path->jpath.path.total_cost = startup_cost + run_cost;
1278 }
1279
1280 /*
1281  * Estimate hash bucketsize fraction (ie, number of entries in a bucket
1282  * divided by total tuples in relation) if the specified Var is used
1283  * as a hash key.
1284  *
1285  * XXX This is really pretty bogus since we're effectively assuming that the
1286  * distribution of hash keys will be the same after applying restriction
1287  * clauses as it was in the underlying relation.  However, we are not nearly
1288  * smart enough to figure out how the restrict clauses might change the
1289  * distribution, so this will have to do for now.
1290  *
1291  * We are passed the number of buckets the executor will use for the given
1292  * input relation.      If the data were perfectly distributed, with the same
1293  * number of tuples going into each available bucket, then the bucketsize
1294  * fraction would be 1/nbuckets.  But this happy state of affairs will occur
1295  * only if (a) there are at least nbuckets distinct data values, and (b)
1296  * we have a not-too-skewed data distribution.  Otherwise the buckets will
1297  * be nonuniformly occupied.  If the other relation in the join has a key
1298  * distribution similar to this one's, then the most-loaded buckets are
1299  * exactly those that will be probed most often.  Therefore, the "average"
1300  * bucket size for costing purposes should really be taken as something close
1301  * to the "worst case" bucket size.  We try to estimate this by adjusting the
1302  * fraction if there are too few distinct data values, and then scaling up
1303  * by the ratio of the most common value's frequency to the average frequency.
1304  *
1305  * If no statistics are available, use a default estimate of 0.1.  This will
1306  * discourage use of a hash rather strongly if the inner relation is large,
1307  * which is what we want.  We do not want to hash unless we know that the
1308  * inner rel is well-dispersed (or the alternatives seem much worse).
1309  */
1310 static Selectivity
1311 estimate_hash_bucketsize(Query *root, Var *var, int nbuckets)
1312 {
1313         Oid                     relid;
1314         RelOptInfo *rel;
1315         HeapTuple       tuple;
1316         Form_pg_statistic stats;
1317         double          estfract,
1318                                 ndistinct,
1319                                 mcvfreq,
1320                                 avgfreq;
1321         float4     *numbers;
1322         int                     nnumbers;
1323
1324         /*
1325          * Lookup info about var's relation and attribute; if none available,
1326          * return default estimate.
1327          */
1328         if (var == NULL || !IsA(var, Var))
1329                 return 0.1;
1330
1331         relid = getrelid(var->varno, root->rtable);
1332         if (relid == InvalidOid)
1333                 return 0.1;
1334
1335         rel = find_base_rel(root, var->varno);
1336
1337         if (rel->tuples <= 0.0 || rel->rows <= 0.0)
1338                 return 0.1;                             /* ensure we can divide below */
1339
1340         tuple = SearchSysCache(STATRELATT,
1341                                                    ObjectIdGetDatum(relid),
1342                                                    Int16GetDatum(var->varattno),
1343                                                    0, 0);
1344         if (!HeapTupleIsValid(tuple))
1345         {
1346                 /*
1347                  * Perhaps the Var is a system attribute; if so, it will have no
1348                  * entry in pg_statistic, but we may be able to guess something
1349                  * about its distribution anyway.
1350                  */
1351                 switch (var->varattno)
1352                 {
1353                         case ObjectIdAttributeNumber:
1354                         case SelfItemPointerAttributeNumber:
1355                                 /* these are unique, so buckets should be well-distributed */
1356                                 return 1.0 / (double) nbuckets;
1357                         case TableOidAttributeNumber:
1358                                 /* hashing this is a terrible idea... */
1359                                 return 1.0;
1360                 }
1361                 return 0.1;
1362         }
1363         stats = (Form_pg_statistic) GETSTRUCT(tuple);
1364
1365         /*
1366          * Obtain number of distinct data values in raw relation.
1367          */
1368         ndistinct = stats->stadistinct;
1369         if (ndistinct < 0.0)
1370                 ndistinct = -ndistinct * rel->tuples;
1371
1372         if (ndistinct <= 0.0)           /* ensure we can divide */
1373         {
1374                 ReleaseSysCache(tuple);
1375                 return 0.1;
1376         }
1377
1378         /* Also compute avg freq of all distinct data values in raw relation */
1379         avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
1380
1381         /*
1382          * Adjust ndistinct to account for restriction clauses.  Observe we
1383          * are assuming that the data distribution is affected uniformly by
1384          * the restriction clauses!
1385          *
1386          * XXX Possibly better way, but much more expensive: multiply by
1387          * selectivity of rel's restriction clauses that mention the target
1388          * Var.
1389          */
1390         ndistinct *= rel->rows / rel->tuples;
1391
1392         /*
1393          * Initial estimate of bucketsize fraction is 1/nbuckets as long as
1394          * the number of buckets is less than the expected number of distinct
1395          * values; otherwise it is 1/ndistinct.
1396          */
1397         if (ndistinct > (double) nbuckets)
1398                 estfract = 1.0 / (double) nbuckets;
1399         else
1400                 estfract = 1.0 / ndistinct;
1401
1402         /*
1403          * Look up the frequency of the most common value, if available.
1404          */
1405         mcvfreq = 0.0;
1406
1407         if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
1408                                                  STATISTIC_KIND_MCV, InvalidOid,
1409                                                  NULL, NULL, &numbers, &nnumbers))
1410         {
1411                 /*
1412                  * The first MCV stat is for the most common value.
1413                  */
1414                 if (nnumbers > 0)
1415                         mcvfreq = numbers[0];
1416                 free_attstatsslot(var->vartype, NULL, 0,
1417                                                   numbers, nnumbers);
1418         }
1419
1420         /*
1421          * Adjust estimated bucketsize upward to account for skewed
1422          * distribution.
1423          */
1424         if (avgfreq > 0.0 && mcvfreq > avgfreq)
1425                 estfract *= mcvfreq / avgfreq;
1426
1427         /*
1428          * Clamp bucketsize to sane range (the above adjustment could easily
1429          * produce an out-of-range result).  We set the lower bound a little
1430          * above zero, since zero isn't a very sane result.
1431          */
1432         if (estfract < 1.0e-6)
1433                 estfract = 1.0e-6;
1434         else if (estfract > 1.0)
1435                 estfract = 1.0;
1436
1437         ReleaseSysCache(tuple);
1438
1439         return (Selectivity) estfract;
1440 }
1441
1442
1443 /*
1444  * cost_qual_eval
1445  *              Estimate the CPU costs of evaluating a WHERE clause.
1446  *              The input can be either an implicitly-ANDed list of boolean
1447  *              expressions, or a list of RestrictInfo nodes.
1448  *              The result includes both a one-time (startup) component,
1449  *              and a per-evaluation component.
1450  */
1451 void
1452 cost_qual_eval(QualCost * cost, List *quals)
1453 {
1454         List       *l;
1455
1456         cost->startup = 0;
1457         cost->per_tuple = 0;
1458
1459         /* We don't charge any cost for the implicit ANDing at top level ... */
1460
1461         foreach(l, quals)
1462         {
1463                 Node       *qual = (Node *) lfirst(l);
1464
1465                 /*
1466                  * RestrictInfo nodes contain an eval_cost field reserved for this
1467                  * routine's use, so that it's not necessary to evaluate the qual
1468                  * clause's cost more than once.  If the clause's cost hasn't been
1469                  * computed yet, the field's startup value will contain -1.
1470                  */
1471                 if (qual && IsA(qual, RestrictInfo))
1472                 {
1473                         RestrictInfo *restrictinfo = (RestrictInfo *) qual;
1474
1475                         if (restrictinfo->eval_cost.startup < 0)
1476                         {
1477                                 restrictinfo->eval_cost.startup = 0;
1478                                 restrictinfo->eval_cost.per_tuple = 0;
1479                                 cost_qual_eval_walker((Node *) restrictinfo->clause,
1480                                                                           &restrictinfo->eval_cost);
1481                         }
1482                         cost->startup += restrictinfo->eval_cost.startup;
1483                         cost->per_tuple += restrictinfo->eval_cost.per_tuple;
1484                 }
1485                 else
1486                 {
1487                         /* If it's a bare expression, must always do it the hard way */
1488                         cost_qual_eval_walker(qual, cost);
1489                 }
1490         }
1491 }
1492
1493 static bool
1494 cost_qual_eval_walker(Node *node, QualCost * total)
1495 {
1496         if (node == NULL)
1497                 return false;
1498
1499         /*
1500          * Our basic strategy is to charge one cpu_operator_cost for each
1501          * operator or function node in the given tree.  Vars and Consts are
1502          * charged zero, and so are boolean operators (AND, OR, NOT).
1503          * Simplistic, but a lot better than no model at all.
1504          *
1505          * Should we try to account for the possibility of short-circuit
1506          * evaluation of AND/OR?
1507          */
1508         if (IsA(node, FuncExpr) ||
1509                 IsA(node, OpExpr) ||
1510                 IsA(node, DistinctExpr) ||
1511                 IsA(node, NullIfExpr))
1512                 total->per_tuple += cpu_operator_cost;
1513         else if (IsA(node, ScalarArrayOpExpr))
1514         {
1515                 /* should charge more than 1 op cost, but how many? */
1516                 total->per_tuple += cpu_operator_cost * 10;
1517         }
1518         else if (IsA(node, SubLink))
1519         {
1520                 /* This routine should not be applied to un-planned expressions */
1521                 elog(ERROR, "cannot handle unplanned sub-select");
1522         }
1523         else if (IsA(node, SubPlan))
1524         {
1525                 /*
1526                  * A subplan node in an expression typically indicates that the
1527                  * subplan will be executed on each evaluation, so charge
1528                  * accordingly. (Sub-selects that can be executed as InitPlans
1529                  * have already been removed from the expression.)
1530                  *
1531                  * An exception occurs when we have decided we can implement the
1532                  * subplan by hashing.
1533                  *
1534                  */
1535                 SubPlan    *subplan = (SubPlan *) node;
1536                 Plan       *plan = subplan->plan;
1537
1538                 if (subplan->useHashTable)
1539                 {
1540                         /*
1541                          * If we are using a hash table for the subquery outputs, then
1542                          * the cost of evaluating the query is a one-time cost. We
1543                          * charge one cpu_operator_cost per tuple for the work of
1544                          * loading the hashtable, too.
1545                          */
1546                         total->startup += plan->total_cost +
1547                                 cpu_operator_cost * plan->plan_rows;
1548
1549                         /*
1550                          * The per-tuple costs include the cost of evaluating the
1551                          * lefthand expressions, plus the cost of probing the
1552                          * hashtable. Recursion into the exprs list will handle the
1553                          * lefthand expressions properly, and will count one
1554                          * cpu_operator_cost for each comparison operator.      That is
1555                          * probably too low for the probing cost, but it's hard to
1556                          * make a better estimate, so live with it for now.
1557                          */
1558                 }
1559                 else
1560                 {
1561                         /*
1562                          * Otherwise we will be rescanning the subplan output on each
1563                          * evaluation.  We need to estimate how much of the output we
1564                          * will actually need to scan.  NOTE: this logic should agree
1565                          * with the estimates used by make_subplan() in
1566                          * plan/subselect.c.
1567                          */
1568                         Cost            plan_run_cost = plan->total_cost - plan->startup_cost;
1569
1570                         if (subplan->subLinkType == EXISTS_SUBLINK)
1571                         {
1572                                 /* we only need to fetch 1 tuple */
1573                                 total->per_tuple += plan_run_cost / plan->plan_rows;
1574                         }
1575                         else if (subplan->subLinkType == ALL_SUBLINK ||
1576                                          subplan->subLinkType == ANY_SUBLINK)
1577                         {
1578                                 /* assume we need 50% of the tuples */
1579                                 total->per_tuple += 0.50 * plan_run_cost;
1580                                 /* also charge a cpu_operator_cost per row examined */
1581                                 total->per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost;
1582                         }
1583                         else
1584                         {
1585                                 /* assume we need all tuples */
1586                                 total->per_tuple += plan_run_cost;
1587                         }
1588
1589                         /*
1590                          * Also account for subplan's startup cost. If the subplan is
1591                          * uncorrelated or undirect correlated, AND its topmost node
1592                          * is a Sort or Material node, assume that we'll only need to
1593                          * pay its startup cost once; otherwise assume we pay the
1594                          * startup cost every time.
1595                          */
1596                         if (subplan->parParam == NIL &&
1597                                 (IsA(plan, Sort) ||
1598                                  IsA(plan, Material)))
1599                                 total->startup += plan->startup_cost;
1600                         else
1601                                 total->per_tuple += plan->startup_cost;
1602                 }
1603         }
1604
1605         return expression_tree_walker(node, cost_qual_eval_walker,
1606                                                                   (void *) total);
1607 }
1608
1609
1610 /*
1611  * approx_selectivity
1612  *              Quick-and-dirty estimation of clause selectivities.
1613  *              The input can be either an implicitly-ANDed list of boolean
1614  *              expressions, or a list of RestrictInfo nodes (typically the latter).
1615  *
1616  * The "quick" part comes from caching the selectivity estimates so we can
1617  * avoid recomputing them later.  (Since the same clauses are typically
1618  * examined over and over in different possible join trees, this makes a
1619  * big difference.)
1620  *
1621  * The "dirty" part comes from the fact that the selectivities of multiple
1622  * clauses are estimated independently and multiplied together.  Now
1623  * clauselist_selectivity often can't do any better than that anyhow, but
1624  * for some situations (such as range constraints) it is smarter.
1625  *
1626  * Since we are only using the results to estimate how many potential
1627  * output tuples are generated and passed through qpqual checking, it
1628  * seems OK to live with the approximation.
1629  */
1630 static Selectivity
1631 approx_selectivity(Query *root, List *quals, JoinType jointype)
1632 {
1633         Selectivity total = 1.0;
1634         List       *l;
1635
1636         foreach(l, quals)
1637         {
1638                 Node       *qual = (Node *) lfirst(l);
1639                 Selectivity selec;
1640
1641                 /*
1642                  * RestrictInfo nodes contain a this_selec field reserved for this
1643                  * routine's use, so that it's not necessary to evaluate the qual
1644                  * clause's selectivity more than once.  If the clause's
1645                  * selectivity hasn't been computed yet, the field will contain
1646                  * -1.
1647                  */
1648                 if (qual && IsA(qual, RestrictInfo))
1649                 {
1650                         RestrictInfo *restrictinfo = (RestrictInfo *) qual;
1651
1652                         if (restrictinfo->this_selec < 0)
1653                                 restrictinfo->this_selec =
1654                                         clause_selectivity(root,
1655                                                                            (Node *) restrictinfo->clause,
1656                                                                            0,
1657                                                                            jointype);
1658                         selec = restrictinfo->this_selec;
1659                 }
1660                 else
1661                 {
1662                         /* If it's a bare expression, must always do it the hard way */
1663                         selec = clause_selectivity(root, qual, 0, jointype);
1664                 }
1665                 total *= selec;
1666         }
1667         return total;
1668 }
1669
1670
1671 /*
1672  * set_baserel_size_estimates
1673  *              Set the size estimates for the given base relation.
1674  *
1675  * The rel's targetlist and restrictinfo list must have been constructed
1676  * already.
1677  *
1678  * We set the following fields of the rel node:
1679  *      rows: the estimated number of output tuples (after applying
1680  *                restriction clauses).
1681  *      width: the estimated average output tuple width in bytes.
1682  *      baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
1683  */
1684 void
1685 set_baserel_size_estimates(Query *root, RelOptInfo *rel)
1686 {
1687         double          temp;
1688
1689         /* Should only be applied to base relations */
1690         Assert(rel->relid > 0);
1691
1692         temp = rel->tuples *
1693                 restrictlist_selectivity(root,
1694                                                                  rel->baserestrictinfo,
1695                                                                  rel->relid,
1696                                                                  JOIN_INNER);
1697
1698         /*
1699          * Force estimate to be at least one row, to make explain output look
1700          * better and to avoid possible divide-by-zero when interpolating
1701          * cost.  Make it an integer, too.
1702          */
1703         if (temp < 1.0)
1704                 temp = 1.0;
1705         else
1706                 temp = ceil(temp);
1707
1708         rel->rows = temp;
1709
1710         cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
1711
1712         set_rel_width(root, rel);
1713 }
1714
1715 /*
1716  * set_joinrel_size_estimates
1717  *              Set the size estimates for the given join relation.
1718  *
1719  * The rel's targetlist must have been constructed already, and a
1720  * restriction clause list that matches the given component rels must
1721  * be provided.
1722  *
1723  * Since there is more than one way to make a joinrel for more than two
1724  * base relations, the results we get here could depend on which component
1725  * rel pair is provided.  In theory we should get the same answers no matter
1726  * which pair is provided; in practice, since the selectivity estimation
1727  * routines don't handle all cases equally well, we might not.  But there's
1728  * not much to be done about it.  (Would it make sense to repeat the
1729  * calculations for each pair of input rels that's encountered, and somehow
1730  * average the results?  Probably way more trouble than it's worth.)
1731  *
1732  * It's important that the results for symmetric JoinTypes be symmetric,
1733  * eg, (rel1, rel2, JOIN_LEFT) should produce the same result as (rel2,
1734  * rel1, JOIN_RIGHT).  Also, JOIN_IN should produce the same result as
1735  * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER.
1736  *
1737  * We set the same relnode fields as set_baserel_size_estimates() does.
1738  */
1739 void
1740 set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
1741                                                    RelOptInfo *outer_rel,
1742                                                    RelOptInfo *inner_rel,
1743                                                    JoinType jointype,
1744                                                    List *restrictlist)
1745 {
1746         Selectivity selec;
1747         double          temp;
1748         UniquePath *upath;
1749
1750         /*
1751          * Compute joinclause selectivity.      Note that we are only considering
1752          * clauses that become restriction clauses at this join level; we are
1753          * not double-counting them because they were not considered in
1754          * estimating the sizes of the component rels.
1755          */
1756         selec = restrictlist_selectivity(root,
1757                                                                          restrictlist,
1758                                                                          0,
1759                                                                          jointype);
1760
1761         /*
1762          * Basically, we multiply size of Cartesian product by selectivity.
1763          *
1764          * If we are doing an outer join, take that into account: the output must
1765          * be at least as large as the non-nullable input.      (Is there any
1766          * chance of being even smarter?)
1767          *
1768          * For JOIN_IN and variants, the Cartesian product is figured with
1769          * respect to a unique-ified input, and then we can clamp to the size
1770          * of the other input.
1771          */
1772         switch (jointype)
1773         {
1774                 case JOIN_INNER:
1775                         temp = outer_rel->rows * inner_rel->rows * selec;
1776                         break;
1777                 case JOIN_LEFT:
1778                         temp = outer_rel->rows * inner_rel->rows * selec;
1779                         if (temp < outer_rel->rows)
1780                                 temp = outer_rel->rows;
1781                         break;
1782                 case JOIN_RIGHT:
1783                         temp = outer_rel->rows * inner_rel->rows * selec;
1784                         if (temp < inner_rel->rows)
1785                                 temp = inner_rel->rows;
1786                         break;
1787                 case JOIN_FULL:
1788                         temp = outer_rel->rows * inner_rel->rows * selec;
1789                         if (temp < outer_rel->rows)
1790                                 temp = outer_rel->rows;
1791                         if (temp < inner_rel->rows)
1792                                 temp = inner_rel->rows;
1793                         break;
1794                 case JOIN_IN:
1795                 case JOIN_UNIQUE_INNER:
1796                         upath = create_unique_path(root, inner_rel,
1797                                                                            inner_rel->cheapest_total_path);
1798                         temp = outer_rel->rows * upath->rows * selec;
1799                         if (temp > outer_rel->rows)
1800                                 temp = outer_rel->rows;
1801                         break;
1802                 case JOIN_REVERSE_IN:
1803                 case JOIN_UNIQUE_OUTER:
1804                         upath = create_unique_path(root, outer_rel,
1805                                                                            outer_rel->cheapest_total_path);
1806                         temp = upath->rows * inner_rel->rows * selec;
1807                         if (temp > inner_rel->rows)
1808                                 temp = inner_rel->rows;
1809                         break;
1810                 default:
1811                         elog(ERROR, "unrecognized join type: %d", (int) jointype);
1812                         temp = 0;                       /* keep compiler quiet */
1813                         break;
1814         }
1815
1816         /*
1817          * Force estimate to be at least one row, to make explain output look
1818          * better and to avoid possible divide-by-zero when interpolating
1819          * cost.  Make it an integer, too.
1820          */
1821         if (temp < 1.0)
1822                 temp = 1.0;
1823         else
1824                 temp = ceil(temp);
1825
1826         rel->rows = temp;
1827
1828         /*
1829          * We need not compute the output width here, because
1830          * build_joinrel_tlist already did.
1831          */
1832 }
1833
1834 /*
1835  * set_function_size_estimates
1836  *              Set the size estimates for a base relation that is a function call.
1837  *
1838  * The rel's targetlist and restrictinfo list must have been constructed
1839  * already.
1840  *
1841  * We set the following fields of the rel node:
1842  *      rows: the estimated number of output tuples (after applying
1843  *                restriction clauses).
1844  *      width: the estimated average output tuple width in bytes.
1845  *      baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
1846  */
1847 void
1848 set_function_size_estimates(Query *root, RelOptInfo *rel)
1849 {
1850         double          temp;
1851
1852         /* Should only be applied to base relations that are functions */
1853         Assert(rel->relid > 0);
1854         Assert(rel->rtekind == RTE_FUNCTION);
1855
1856         /*
1857          * Estimate number of rows the function itself will return.
1858          *
1859          * XXX no idea how to do this yet; but should at least check whether
1860          * function returns set or not...
1861          */
1862         rel->tuples = 1000;
1863
1864         /* Now estimate number of output rows */
1865         temp = rel->tuples *
1866                 restrictlist_selectivity(root,
1867                                                                  rel->baserestrictinfo,
1868                                                                  rel->relid,
1869                                                                  JOIN_INNER);
1870
1871         /*
1872          * Force estimate to be at least one row, to make explain output look
1873          * better and to avoid possible divide-by-zero when interpolating
1874          * cost.  Make it an integer, too.
1875          */
1876         if (temp < 1.0)
1877                 temp = 1.0;
1878         else
1879                 temp = ceil(temp);
1880
1881         rel->rows = temp;
1882
1883         cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo);
1884
1885         set_rel_width(root, rel);
1886 }
1887
1888
1889 /*
1890  * set_rel_width
1891  *              Set the estimated output width of a base relation.
1892  *
1893  * NB: this works best on plain relations because it prefers to look at
1894  * real Vars.  It will fail to make use of pg_statistic info when applied
1895  * to a subquery relation, even if the subquery outputs are simple vars
1896  * that we could have gotten info for.  Is it worth trying to be smarter
1897  * about subqueries?
1898  *
1899  * The per-attribute width estimates are cached for possible re-use while
1900  * building join relations.
1901  */
1902 static void
1903 set_rel_width(Query *root, RelOptInfo *rel)
1904 {
1905         int32           tuple_width = 0;
1906         List       *tllist;
1907
1908         foreach(tllist, FastListValue(&rel->reltargetlist))
1909         {
1910                 Var                *var = (Var *) lfirst(tllist);
1911                 int                     ndx = var->varattno - rel->min_attr;
1912                 Oid                     relid;
1913                 int32           item_width;
1914
1915                 Assert(IsA(var, Var));
1916
1917                 /*
1918                  * The width probably hasn't been cached yet, but may as well
1919                  * check
1920                  */
1921                 if (rel->attr_widths[ndx] > 0)
1922                 {
1923                         tuple_width += rel->attr_widths[ndx];
1924                         continue;
1925                 }
1926
1927                 relid = getrelid(var->varno, root->rtable);
1928                 if (relid != InvalidOid)
1929                 {
1930                         item_width = get_attavgwidth(relid, var->varattno);
1931                         if (item_width > 0)
1932                         {
1933                                 rel->attr_widths[ndx] = item_width;
1934                                 tuple_width += item_width;
1935                                 continue;
1936                         }
1937                 }
1938
1939                 /*
1940                  * Not a plain relation, or can't find statistics for it. Estimate
1941                  * using just the type info.
1942                  */
1943                 item_width = get_typavgwidth(var->vartype, var->vartypmod);
1944                 Assert(item_width > 0);
1945                 rel->attr_widths[ndx] = item_width;
1946                 tuple_width += item_width;
1947         }
1948         Assert(tuple_width >= 0);
1949         rel->width = tuple_width;
1950 }
1951
1952 /*
1953  * relation_byte_size
1954  *        Estimate the storage space in bytes for a given number of tuples
1955  *        of a given width (size in bytes).
1956  */
1957 static double
1958 relation_byte_size(double tuples, int width)
1959 {
1960         return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
1961 }
1962
1963 /*
1964  * page_size
1965  *        Returns an estimate of the number of pages covered by a given
1966  *        number of tuples of a given width (size in bytes).
1967  */
1968 static double
1969 page_size(double tuples, int width)
1970 {
1971         return ceil(relation_byte_size(tuples, width) / BLCKSZ);
1972 }