granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Path costs are measured in units of disk accesses: one page fetch
   7  * has cost 1.  The other primitive unit is the CPU time required to
   8  * process one tuple, which we set at "_cpu_page_weight_" of a page
   9  * fetch.  Obviously, the CPU time per tuple depends on the query
  10  * involved, but the relative CPU and disk speeds of a given platform
  11  * are so variable that we are lucky if we can get useful numbers
  12  * at all.  _cpu_page_weight_ is user-settable, in case a particular
  13  * user is clueful enough to have a better-than-default estimate
  14  * of the ratio for his platform.  There is also _cpu_index_page_weight_,
  15  * the cost to process a tuple of an index during an index scan.
  16  *
  17  *
  18  * Copyright (c) 1994, Regents of the University of California
  19  *
  20  * IDENTIFICATION
  21  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.46 1999/11/23 20:06:54 momjian Exp $
  22  *
  23  *-------------------------------------------------------------------------
  24  */
  25
  26 #include <math.h>
  27
  28 #include "postgres.h"
  29
  30 #ifdef HAVE_LIMITS_H
  31 #include <limits.h>
  32 #ifndef MAXINT
  33 #define MAXINT            INT_MAX
  34 #endif
  35 #else
  36 #ifdef HAVE_VALUES_H
  37 #include <values.h>
  38 #endif
  39 #endif
  40
  41 #include "miscadmin.h"
  42 #include "optimizer/cost.h"
  43 #include "optimizer/internal.h"
  44 #include "optimizer/tlist.h"
  45 #include "utils/lsyscache.h"
  46
  47
  48 static int      compute_targetlist_width(List *targetlist);
  49 static int      compute_attribute_width(TargetEntry *tlistentry);
  50 static double relation_byte_size(int tuples, int width);
  51 static double base_log(double x, double b);
  52
  53
  54 int                     _disable_cost_ = 30000000;
  55
  56 bool            _enable_seqscan_ = true;
  57 bool            _enable_indexscan_ = true;
  58 bool            _enable_sort_ = true;
  59 bool            _enable_nestloop_ = true;
  60 bool            _enable_mergejoin_ = true;
  61 bool            _enable_hashjoin_ = true;
  62 bool            _enable_tidscan_ = true;
  63
  64 Cost             _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
  65 Cost            _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
  66
  67 /*
  68  * cost_seqscan
  69  *        Determines and returns the cost of scanning a relation sequentially.
  70  *        If the relation is a temporary to be materialized from a query
  71  *        embedded within a data field (determined by 'relid' containing an
  72  *        attribute reference), then a predetermined constant is returned (we
  73  *        have NO IDEA how big the result of a POSTQUEL procedure is going to
  74  *        be).
  75  *
  76  *              disk = p
  77  *              cpu = *CPU-PAGE-WEIGHT* * t
  78  *
  79  * 'relid' is the relid of the relation to be scanned
  80  * 'relpages' is the number of pages in the relation to be scanned
  81  *              (as determined from the system catalogs)
  82  * 'reltuples' is the number of tuples in the relation to be scanned
  83  *
  84  * Returns a flonum.
  85  *
  86  */
  87 Cost
  88 cost_seqscan(int relid, int relpages, int reltuples)
  89 {
  90         Cost            temp = 0;
  91
  92         if (!_enable_seqscan_)
  93                 temp += _disable_cost_;
  94
  95         if (relid < 0)
  96         {
  97
  98                 /*
  99                  * cost of sequentially scanning a materialized temporary relation
 100                  */
 101                 temp += _NONAME_SCAN_COST_;
 102         }
 103         else
 104         {
 105                 temp += relpages;
 106                 temp += _cpu_page_weight_ * reltuples;
 107         }
 108         Assert(temp >= 0);
 109         return temp;
 110 }
 111
 112
 113 /*
 114  * cost_index
 115  *        Determines and returns the cost of scanning a relation using an index.
 116  *
 117  *              disk = expected-index-pages + expected-data-pages
 118  *              cpu = *CPU-PAGE-WEIGHT* *
 119  *                              (expected-index-tuples + expected-data-tuples)
 120  *
 121  * 'indexid' is the index OID
 122  * 'expected-indexpages' is the number of index pages examined in the scan
 123  * 'selec' is the selectivity of the index
 124  * 'relpages' is the number of pages in the main relation
 125  * 'reltuples' is the number of tuples in the main relation
 126  * 'indexpages' is the number of pages in the index relation
 127  * 'indextuples' is the number of tuples in the index relation
 128  *
 129  * Returns a flonum.
 130  *
 131  */
 132 Cost
 133 cost_index(Oid indexid,
 134                    int expected_indexpages,
 135                    Cost selec,
 136                    int relpages,
 137                    int reltuples,
 138                    int indexpages,
 139                    int indextuples,
 140                    bool is_injoin)
 141 {
 142         Cost            temp = 0;
 143
 144         if (!_enable_indexscan_ && !is_injoin)
 145                 temp += _disable_cost_;
 146
 147         /*
 148          * We want to be sure we estimate the cost of an index scan as more
 149          * than the cost of a sequential scan (when selec == 1.0), even if we
 150          * don't have good stats.  So, disbelieve zero index size.
 151          */
 152         if (expected_indexpages <= 0)
 153                 expected_indexpages = 1;
 154         if (indextuples <= 0)
 155                 indextuples = 1;
 156
 157         /* expected index relation pages */
 158         temp += expected_indexpages;
 159
 160         /*
 161          * expected base relation pages XXX this isn't really right, since we
 162          * will access the table nonsequentially and might have to fetch the
 163          * same page more than once.  This calculation assumes the buffer
 164          * cache will prevent that from happening...
 165          */
 166         temp += ceil(((double) selec) * ((double) relpages));
 167
 168         /* per index tuples */
 169         temp += _cpu_index_page_weight_ * selec * indextuples;
 170
 171         /* per heap tuples */
 172         temp += _cpu_page_weight_ * selec * reltuples;
 173
 174         Assert(temp >= 0);
 175         return temp;
 176 }
 177
 178 /*
 179  * cost_tidscan
 180  *        Determines and returns the cost of scanning a relation using tid-s.
 181  *
 182  *              disk = number of tids
 183  *              cpu = *CPU-PAGE-WEIGHT* * number_of_tids
 184  *
 185  * Returns a flonum.
 186  *
 187  */
 188 Cost
 189 cost_tidscan(List *tideval)
 190 {
 191         Cost    temp = 0;
 192
 193         if (!_enable_tidscan_)
 194                 temp += _disable_cost_;
 195
 196         temp += (1.0 + _cpu_page_weight_) * length(tideval);
 197
 198         return temp;
 199 }
 200
 201 /*
 202  * cost_sort
 203  *        Determines and returns the cost of sorting a relation by considering
 204  *        the cost of doing an external sort:   XXX this is probably too low
 205  *                              disk = (p lg p)
 206  *                              cpu = *CPU-PAGE-WEIGHT* * (t lg t)
 207  *
 208  * 'pathkeys' is a list of sort keys
 209  * 'tuples' is the number of tuples in the relation
 210  * 'width' is the average tuple width in bytes
 211  *
 212  * NOTE: some callers currently pass NULL for pathkeys because they
 213  * can't conveniently supply the sort keys.  Since this routine doesn't
 214  * currently do anything with pathkeys anyway, that doesn't matter...
 215  * but if it ever does, it should react gracefully to lack of key data.
 216  *
 217  * Returns a flonum.
 218  */
 219 Cost
 220 cost_sort(List *pathkeys, int tuples, int width)
 221 {
 222         Cost            temp = 0;
 223         int                     npages = page_size(tuples, width);
 224         double          log_npages;
 225
 226         if (!_enable_sort_)
 227                 temp += _disable_cost_;
 228
 229         /*
 230          * We want to be sure the cost of a sort is never estimated as zero,
 231          * even if passed-in tuple count is zero.  Besides, mustn't do
 232          * log(0)...
 233          */
 234         if (tuples <= 0)
 235                 tuples = 1;
 236         if (npages <= 0)
 237                 npages = 1;
 238
 239         log_npages = ceil(base_log((double) npages, 2.0));
 240         if (log_npages <= 0.0)
 241                 log_npages = 1.0;
 242
 243         temp += npages * log_npages;
 244
 245         /*
 246          * could be base_log(tuples, NBuffers), but we are only doing 2-way
 247          * merges
 248          */
 249         temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
 250
 251         Assert(temp > 0);
 252
 253         return temp;
 254 }
 255
 256
 257 /*
 258  * cost_result
 259  *        Determines and returns the cost of writing a relation of 'tuples'
 260  *        tuples of 'width' bytes out to a result relation.
 261  *
 262  * Returns a flonum.
 263  *
 264  */
 265 #ifdef NOT_USED
 266 Cost
 267 cost_result(int tuples, int width)
 268 {
 269         Cost            temp = 0;
 270
 271         temp = temp + page_size(tuples, width);
 272         temp = temp + _cpu_page_weight_ * tuples;
 273         Assert(temp >= 0);
 274         return temp;
 275 }
 276
 277 #endif
 278
 279 /*
 280  * cost_nestloop
 281  *        Determines and returns the cost of joining two relations using the
 282  *        nested loop algorithm.
 283  *
 284  * 'outercost' is the (disk+cpu) cost of scanning the outer relation
 285  * 'innercost' is the (disk+cpu) cost of scanning the inner relation
 286  * 'outertuples' is the number of tuples in the outer relation
 287  *
 288  * Returns a flonum.
 289  *
 290  */
 291 Cost
 292 cost_nestloop(Cost outercost,
 293                           Cost innercost,
 294                           int outertuples,
 295                           int innertuples,
 296                           int outerpages,
 297                           bool is_indexjoin)
 298 {
 299         Cost            temp = 0;
 300
 301         if (!_enable_nestloop_)
 302                 temp += _disable_cost_;
 303         temp += outercost;
 304         temp += outertuples * innercost;
 305         Assert(temp >= 0);
 306
 307         return temp;
 308 }
 309
 310 /*
 311  * cost_mergejoin
 312  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 313  *                              outer and inner relations
 314  *        'outersortkeys' and 'innersortkeys' are lists of the keys to be used
 315  *                              to sort the outer and inner relations (or NIL if no explicit
 316  *                              sort is needed because the source path is already ordered)
 317  *        'outertuples' and 'innertuples' are the number of tuples in the outer
 318  *                              and inner relations
 319  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 320  *                              of the tuples of the outer and inner relations
 321  *
 322  * Returns a flonum.
 323  *
 324  */
 325 Cost
 326 cost_mergejoin(Cost outercost,
 327                            Cost innercost,
 328                            List *outersortkeys,
 329                            List *innersortkeys,
 330                            int outersize,
 331                            int innersize,
 332                            int outerwidth,
 333                            int innerwidth)
 334 {
 335         Cost            temp = 0;
 336
 337         if (!_enable_mergejoin_)
 338                 temp += _disable_cost_;
 339
 340         temp += outercost;
 341         temp += innercost;
 342         if (outersortkeys)                      /* do we need to sort? */
 343                 temp += cost_sort(outersortkeys, outersize, outerwidth);
 344         if (innersortkeys)                      /* do we need to sort? */
 345                 temp += cost_sort(innersortkeys, innersize, innerwidth);
 346         temp += _cpu_page_weight_ * (outersize + innersize);
 347
 348         Assert(temp >= 0);
 349
 350         return temp;
 351 }
 352
 353 /*
 354  * cost_hashjoin
 355  *
 356  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 357  *                              outer and inner relations
 358  *        'outersize' and 'innersize' are the number of tuples in the outer
 359  *                              and inner relations
 360  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 361  *                              of the tuples of the outer and inner relations
 362  *        'innerdisbursion' is an estimate of the disbursion statistic
 363  *                              for the inner hash key.
 364  *
 365  * Returns a flonum.
 366  */
 367 Cost
 368 cost_hashjoin(Cost outercost,
 369                           Cost innercost,
 370                           int outersize,
 371                           int innersize,
 372                           int outerwidth,
 373                           int innerwidth,
 374                           Cost innerdisbursion)
 375 {
 376         Cost            temp = 0;
 377         double          outerbytes = relation_byte_size(outersize, outerwidth);
 378         double          innerbytes = relation_byte_size(innersize, innerwidth);
 379         long            hashtablebytes = SortMem * 1024L;
 380
 381         if (!_enable_hashjoin_)
 382                 temp += _disable_cost_;
 383
 384         /* cost of source data */
 385         temp += outercost + innercost;
 386
 387         /* cost of computing hash function: must do it once per tuple */
 388         temp += _cpu_page_weight_ * (outersize + innersize);
 389
 390         /* the number of tuple comparisons needed is the number of outer
 391          * tuples times the typical hash bucket size, which we estimate
 392          * conservatively as the inner disbursion times the inner tuple
 393          * count.  The cost per comparison is set at _cpu_index_page_weight_;
 394          * is that reasonable, or do we need another basic parameter?
 395          */
 396         temp += _cpu_index_page_weight_ * outersize *
 397                 (innersize * innerdisbursion);
 398
 399         /*
 400          * if inner relation is too big then we will need to "batch" the join,
 401          * which implies writing and reading most of the tuples to disk an
 402          * extra time.  Charge one cost unit per page of I/O.
 403          */
 404         if (innerbytes > hashtablebytes)
 405                 temp += 2 * (page_size(outersize, outerwidth) +
 406                                          page_size(innersize, innerwidth));
 407
 408         /*
 409          * Bias against putting larger relation on inside.  We don't want
 410          * an absolute prohibition, though, since larger relation might have
 411          * better disbursion --- and we can't trust the size estimates
 412          * unreservedly, anyway.
 413          */
 414         if (innerbytes > outerbytes)
 415                 temp *= 1.1;                    /* is this an OK fudge factor? */
 416
 417         Assert(temp >= 0);
 418
 419         return temp;
 420 }
 421
 422 /*
 423  * compute_rel_size
 424  *        Computes the size of each relation in 'rel_list' (after applying
 425  *        restrictions), by multiplying the selectivity of each restriction
 426  *        by the original size of the relation.
 427  *
 428  *        Sets the 'size' field for each relation entry with this computed size.
 429  *
 430  * Returns the size.
 431  */
 432 int
 433 compute_rel_size(RelOptInfo *rel)
 434 {
 435         Cost            temp;
 436         int                     temp1;
 437
 438         temp = rel->tuples * product_selec(rel->restrictinfo);
 439         Assert(temp >= 0);
 440         if (temp >= (MAXINT - 1))
 441                 temp1 = MAXINT;
 442         else
 443                 temp1 = ceil((double) temp);
 444         Assert(temp1 >= 0);
 445         Assert(temp1 <= MAXINT);
 446         return temp1;
 447 }
 448
 449 /*
 450  * compute_rel_width
 451  *        Computes the width in bytes of a tuple from 'rel'.
 452  *
 453  * Returns the width of the tuple as a fixnum.
 454  */
 455 int
 456 compute_rel_width(RelOptInfo *rel)
 457 {
 458         return compute_targetlist_width(rel->targetlist);
 459 }
 460
 461 /*
 462  * compute_targetlist_width
 463  *        Computes the width in bytes of a tuple made from 'targetlist'.
 464  *
 465  * Returns the width of the tuple as a fixnum.
 466  */
 467 static int
 468 compute_targetlist_width(List *targetlist)
 469 {
 470         List       *temp_tl;
 471         int                     tuple_width = 0;
 472
 473         foreach(temp_tl, targetlist)
 474         {
 475                 tuple_width += compute_attribute_width(lfirst(temp_tl));
 476         }
 477         return tuple_width;
 478 }
 479
 480 /*
 481  * compute_attribute_width
 482  *        Given a target list entry, find the size in bytes of the attribute.
 483  *
 484  *        If a field is variable-length, it is assumed to be at least the size
 485  *        of a TID field.
 486  *
 487  * Returns the width of the attribute as a fixnum.
 488  */
 489 static int
 490 compute_attribute_width(TargetEntry *tlistentry)
 491 {
 492         int                     width = get_typlen(tlistentry->resdom->restype);
 493
 494         if (width < 0)
 495                 return _DEFAULT_ATTRIBUTE_WIDTH_;
 496         else
 497                 return width;
 498 }
 499
 500 /*
 501  * compute_joinrel_size
 502  *        Computes the size of the join relation 'joinrel'.
 503  *
 504  * Returns a fixnum.
 505  */
 506 int
 507 compute_joinrel_size(JoinPath *joinpath)
 508 {
 509         Cost            temp = 1.0;
 510         int                     temp1 = 0;
 511
 512         /* cartesian product */
 513         temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
 514         temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
 515
 516         temp = temp * product_selec(joinpath->pathinfo);
 517         if (temp >= (MAXINT - 1) / 2)
 518         {
 519                 /* if we exceed (MAXINT-1)/2, we switch to log scale */
 520                 /* +1 prevents log(0) */
 521                 temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
 522         }
 523         else
 524                 temp1 = ceil((double) temp);
 525         Assert(temp1 >= 0);
 526
 527         return temp1;
 528 }
 529
 530 /*
 531  * relation_byte_size
 532  *        Estimate the storage space in bytes for a given number of tuples
 533  *        of a given width (size in bytes).
 534  *        To avoid overflow with big relations, result is a double.
 535  */
 536
 537 static double
 538 relation_byte_size(int tuples, int width)
 539 {
 540         return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
 541 }
 542
 543 /*
 544  * page_size
 545  *        Returns an estimate of the number of pages covered by a given
 546  *        number of tuples of a given width (size in bytes).
 547  */
 548 int
 549 page_size(int tuples, int width)
 550 {
 551         int                     temp;
 552
 553         temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
 554         Assert(temp >= 0);
 555         return temp;
 556 }
 557
 558 static double
 559 base_log(double x, double b)
 560 {
 561         return log(x) / log(b);
 562 }