granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Copyright (c) 1994, Regents of the University of California
   7  *
   8  *
   9  * IDENTIFICATION
  10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.39 1999/07/07 09:11:15 momjian Exp $
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14
  15 #include "postgres.h"
  16
  17 #include <math.h>
  18
  19 #ifdef HAVE_LIMITS_H
  20 #include <limits.h>
  21 #ifndef MAXINT
  22 #define MAXINT            INT_MAX
  23 #endif
  24 #else
  25 #ifdef HAVE_VALUES_H
  26 #include <values.h>
  27 #endif
  28 #endif
  29
  30 #include "nodes/relation.h"
  31 #include "optimizer/cost.h"
  32 #include "optimizer/internal.h"
  33 #include "optimizer/keys.h"
  34 #include "optimizer/tlist.h"
  35 #include "utils/lsyscache.h"
  36
  37 extern int      NBuffers;
  38
  39 static int      compute_attribute_width(TargetEntry *tlistentry);
  40 static double relation_byte_size(int tuples, int width);
  41 static double base_log(double x, double b);
  42 static int      compute_targetlist_width(List *targetlist);
  43
  44 int                     _disable_cost_ = 30000000;
  45
  46 bool            _enable_seqscan_ = true;
  47 bool            _enable_indexscan_ = true;
  48 bool            _enable_sort_ = true;
  49 bool            _enable_hash_ = true;
  50 bool            _enable_nestloop_ = true;
  51 bool            _enable_mergejoin_ = true;
  52 bool            _enable_hashjoin_ = true;
  53
  54 Cost            _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
  55 Cost            _cpu_index_page_wight_ = _CPU_INDEX_PAGE_WEIGHT_;
  56
  57 /*
  58  * cost_seqscan
  59  *        Determines and returns the cost of scanning a relation sequentially.
  60  *        If the relation is a temporary to be materialized from a query
  61  *        embedded within a data field (determined by 'relid' containing an
  62  *        attribute reference), then a predetermined constant is returned (we
  63  *        have NO IDEA how big the result of a POSTQUEL procedure is going to
  64  *        be).
  65  *
  66  *              disk = p
  67  *              cpu = *CPU-PAGE-WEIGHT* * t
  68  *
  69  * 'relid' is the relid of the relation to be scanned
  70  * 'relpages' is the number of pages in the relation to be scanned
  71  *              (as determined from the system catalogs)
  72  * 'reltuples' is the number of tuples in the relation to be scanned
  73  *
  74  * Returns a flonum.
  75  *
  76  */
  77 Cost
  78 cost_seqscan(int relid, int relpages, int reltuples)
  79 {
  80         Cost            temp = 0;
  81
  82         if (!_enable_seqscan_)
  83                 temp += _disable_cost_;
  84
  85         if (relid < 0)
  86         {
  87
  88                 /*
  89                  * cost of sequentially scanning a materialized temporary relation
  90                  */
  91                 temp += _NONAME_SCAN_COST_;
  92         }
  93         else
  94         {
  95                 temp += relpages;
  96                 temp += _cpu_page_weight_ * reltuples;
  97         }
  98         Assert(temp >= 0);
  99         return temp;
 100 }
 101
 102
 103 /*
 104  * cost_index
 105  *        Determines and returns the cost of scanning a relation using an index.
 106  *
 107  *              disk = expected-index-pages + expected-data-pages
 108  *              cpu = *CPU-PAGE-WEIGHT* *
 109  *                              (expected-index-tuples + expected-data-tuples)
 110  *
 111  * 'indexid' is the index OID
 112  * 'expected-indexpages' is the number of index pages examined in the scan
 113  * 'selec' is the selectivity of the index
 114  * 'relpages' is the number of pages in the main relation
 115  * 'reltuples' is the number of tuples in the main relation
 116  * 'indexpages' is the number of pages in the index relation
 117  * 'indextuples' is the number of tuples in the index relation
 118  *
 119  * Returns a flonum.
 120  *
 121  */
 122 Cost
 123 cost_index(Oid indexid,
 124                    int expected_indexpages,
 125                    Cost selec,
 126                    int relpages,
 127                    int reltuples,
 128                    int indexpages,
 129                    int indextuples,
 130                    bool is_injoin)
 131 {
 132         Cost            temp = 0;
 133
 134         if (!_enable_indexscan_ && !is_injoin)
 135                 temp += _disable_cost_;
 136
 137         /*
 138          * We want to be sure we estimate the cost of an index scan as more
 139          * than the cost of a sequential scan (when selec == 1.0), even if we
 140          * don't have good stats.  So, disbelieve zero index size.
 141          */
 142         if (expected_indexpages <= 0)
 143                 expected_indexpages = 1;
 144         if (indextuples <= 0)
 145                 indextuples = 1;
 146
 147         /* expected index relation pages */
 148         temp += expected_indexpages;
 149
 150         /*
 151          * expected base relation pages XXX this isn't really right, since we
 152          * will access the table nonsequentially and might have to fetch the
 153          * same page more than once.  This calculation assumes the buffer
 154          * cache will prevent that from happening...
 155          */
 156         temp += ceil(((double) selec) * ((double) relpages));
 157
 158         /* per index tuples */
 159         temp += _cpu_index_page_wight_ * selec * indextuples;
 160
 161         /* per heap tuples */
 162         temp += _cpu_page_weight_ * selec * reltuples;
 163
 164         Assert(temp >= 0);
 165         return temp;
 166 }
 167
 168 /*
 169  * cost_sort
 170  *        Determines and returns the cost of sorting a relation by considering
 171  *        the cost of doing an external sort:   XXX this is probably too low
 172  *                              disk = (p lg p)
 173  *                              cpu = *CPU-PAGE-WEIGHT* * (t lg t)
 174  *
 175  * 'pathkeys' is a list of sort keys
 176  * 'tuples' is the number of tuples in the relation
 177  * 'width' is the average tuple width in bytes
 178  *
 179  * NOTE: some callers currently pass NULL for pathkeys because they
 180  * can't conveniently supply the sort keys.  Since this routine doesn't
 181  * currently do anything with pathkeys anyway, that doesn't matter...
 182  * but if it ever does, it should react gracefully to lack of key data.
 183  *
 184  * Returns a flonum.
 185  */
 186 Cost
 187 cost_sort(List *pathkeys, int tuples, int width)
 188 {
 189         Cost            temp = 0;
 190         int                     npages = page_size(tuples, width);
 191         double          log_npages;
 192
 193         if (!_enable_sort_)
 194                 temp += _disable_cost_;
 195
 196         /*
 197          * We want to be sure the cost of a sort is never estimated as zero,
 198          * even if passed-in tuple count is zero.  Besides, mustn't do
 199          * log(0)...
 200          */
 201         if (tuples <= 0)
 202                 tuples = 1;
 203         if (npages <= 0)
 204                 npages = 1;
 205
 206         log_npages = ceil(base_log((double) npages, 2.0));
 207         if (log_npages <= 0.0)
 208                 log_npages = 1.0;
 209
 210         temp += npages * log_npages;
 211
 212         /*
 213          * could be base_log(tuples, NBuffers), but we are only doing 2-way
 214          * merges
 215          */
 216         temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
 217
 218         Assert(temp > 0);
 219
 220         return temp;
 221 }
 222
 223
 224 /*
 225  * cost_result
 226  *        Determines and returns the cost of writing a relation of 'tuples'
 227  *        tuples of 'width' bytes out to a result relation.
 228  *
 229  * Returns a flonum.
 230  *
 231  */
 232 #ifdef NOT_USED
 233 Cost
 234 cost_result(int tuples, int width)
 235 {
 236         Cost            temp = 0;
 237
 238         temp = temp + page_size(tuples, width);
 239         temp = temp + _cpu_page_weight_ * tuples;
 240         Assert(temp >= 0);
 241         return temp;
 242 }
 243
 244 #endif
 245
 246 /*
 247  * cost_nestloop
 248  *        Determines and returns the cost of joining two relations using the
 249  *        nested loop algorithm.
 250  *
 251  * 'outercost' is the (disk+cpu) cost of scanning the outer relation
 252  * 'innercost' is the (disk+cpu) cost of scanning the inner relation
 253  * 'outertuples' is the number of tuples in the outer relation
 254  *
 255  * Returns a flonum.
 256  *
 257  */
 258 Cost
 259 cost_nestloop(Cost outercost,
 260                           Cost innercost,
 261                           int outertuples,
 262                           int innertuples,
 263                           int outerpages,
 264                           bool is_indexjoin)
 265 {
 266         Cost            temp = 0;
 267
 268         if (!_enable_nestloop_)
 269                 temp += _disable_cost_;
 270         temp += outercost;
 271         temp += outertuples * innercost;
 272         Assert(temp >= 0);
 273
 274         return temp;
 275 }
 276
 277 /*
 278  * cost_mergejoin
 279  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 280  *                              outer and inner relations
 281  *        'outersortkeys' and 'innersortkeys' are lists of the keys to be used
 282  *                              to sort the outer and inner relations (or NIL if no explicit
 283  *                              sort is needed because the source path is already ordered)
 284  *        'outertuples' and 'innertuples' are the number of tuples in the outer
 285  *                              and inner relations
 286  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 287  *                              of the tuples of the outer and inner relations
 288  *
 289  * Returns a flonum.
 290  *
 291  */
 292 Cost
 293 cost_mergejoin(Cost outercost,
 294                            Cost innercost,
 295                            List *outersortkeys,
 296                            List *innersortkeys,
 297                            int outersize,
 298                            int innersize,
 299                            int outerwidth,
 300                            int innerwidth)
 301 {
 302         Cost            temp = 0;
 303
 304         if (!_enable_mergejoin_)
 305                 temp += _disable_cost_;
 306
 307         temp += outercost;
 308         temp += innercost;
 309         if (outersortkeys)                      /* do we need to sort? */
 310                 temp += cost_sort(outersortkeys, outersize, outerwidth);
 311         if (innersortkeys)                      /* do we need to sort? */
 312                 temp += cost_sort(innersortkeys, innersize, innerwidth);
 313         temp += _cpu_page_weight_ * (outersize + innersize);
 314
 315         Assert(temp >= 0);
 316
 317         return temp;
 318 }
 319
 320 /*
 321  * cost_hashjoin--                              XXX HASH
 322  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 323  *                              outer and inner relations
 324  *        'outerkeys' and 'innerkeys' are lists of the keys to be used
 325  *                              to hash the outer and inner relations
 326  *        'outersize' and 'innersize' are the number of tuples in the outer
 327  *                              and inner relations
 328  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 329  *                              of the tuples of the outer and inner relations
 330  *
 331  * Returns a flonum.
 332  */
 333 Cost
 334 cost_hashjoin(Cost outercost,
 335                           Cost innercost,
 336                           List *outerkeys,
 337                           List *innerkeys,
 338                           int outersize,
 339                           int innersize,
 340                           int outerwidth,
 341                           int innerwidth)
 342 {
 343         Cost            temp = 0;
 344         int                     outerpages = page_size(outersize, outerwidth);
 345         int                     innerpages = page_size(innersize, innerwidth);
 346
 347         if (!_enable_hashjoin_)
 348                 temp += _disable_cost_;
 349
 350         /*
 351          * Bias against putting larger relation on inside.
 352          *
 353          * Code used to use "outerpages < innerpages" but that has poor
 354          * resolution when both relations are small.
 355          */
 356         if (relation_byte_size(outersize, outerwidth) <
 357                 relation_byte_size(innersize, innerwidth))
 358                 temp += _disable_cost_;
 359
 360         /* cost of source data */
 361         temp += outercost + innercost;
 362
 363         /* cost of computing hash function: must do it once per tuple */
 364         temp += _cpu_page_weight_ * (outersize + innersize);
 365
 366         /* cost of main-memory hashtable */
 367         temp += (innerpages < NBuffers) ? innerpages : NBuffers;
 368
 369         /*
 370          * if inner relation is too big then we will need to "batch" the join,
 371          * which implies writing and reading most of the tuples to disk an
 372          * extra time.
 373          */
 374         if (innerpages > NBuffers)
 375                 temp += 2 * (outerpages + innerpages);
 376
 377         Assert(temp >= 0);
 378
 379         return temp;
 380 }
 381
 382 /*
 383  * compute_rel_size
 384  *        Computes the size of each relation in 'rel_list' (after applying
 385  *        restrictions), by multiplying the selectivity of each restriction
 386  *        by the original size of the relation.
 387  *
 388  *        Sets the 'size' field for each relation entry with this computed size.
 389  *
 390  * Returns the size.
 391  */
 392 int
 393 compute_rel_size(RelOptInfo *rel)
 394 {
 395         Cost            temp;
 396         int                     temp1;
 397
 398         temp = rel->tuples * product_selec(rel->restrictinfo);
 399         Assert(temp >= 0);
 400         if (temp >= (MAXINT - 1))
 401                 temp1 = MAXINT;
 402         else
 403                 temp1 = ceil((double) temp);
 404         Assert(temp1 >= 0);
 405         Assert(temp1 <= MAXINT);
 406         return temp1;
 407 }
 408
 409 /*
 410  * compute_rel_width
 411  *        Computes the width in bytes of a tuple from 'rel'.
 412  *
 413  * Returns the width of the tuple as a fixnum.
 414  */
 415 int
 416 compute_rel_width(RelOptInfo *rel)
 417 {
 418         return compute_targetlist_width(get_actual_tlist(rel->targetlist));
 419 }
 420
 421 /*
 422  * compute_targetlist_width
 423  *        Computes the width in bytes of a tuple made from 'targetlist'.
 424  *
 425  * Returns the width of the tuple as a fixnum.
 426  */
 427 static int
 428 compute_targetlist_width(List *targetlist)
 429 {
 430         List       *temp_tl;
 431         int                     tuple_width = 0;
 432
 433         foreach(temp_tl, targetlist)
 434         {
 435                 tuple_width = tuple_width +
 436                         compute_attribute_width(lfirst(temp_tl));
 437         }
 438         return tuple_width;
 439 }
 440
 441 /*
 442  * compute_attribute_width
 443  *        Given a target list entry, find the size in bytes of the attribute.
 444  *
 445  *        If a field is variable-length, it is assumed to be at least the size
 446  *        of a TID field.
 447  *
 448  * Returns the width of the attribute as a fixnum.
 449  */
 450 static int
 451 compute_attribute_width(TargetEntry *tlistentry)
 452 {
 453         int                     width = get_typlen(tlistentry->resdom->restype);
 454
 455         if (width < 0)
 456                 return _DEFAULT_ATTRIBUTE_WIDTH_;
 457         else
 458                 return width;
 459 }
 460
 461 /*
 462  * compute_joinrel_size
 463  *        Computes the size of the join relation 'joinrel'.
 464  *
 465  * Returns a fixnum.
 466  */
 467 int
 468 compute_joinrel_size(JoinPath *joinpath)
 469 {
 470         Cost            temp = 1.0;
 471         int                     temp1 = 0;
 472
 473         /* cartesian product */
 474         temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
 475         temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
 476
 477         temp = temp * product_selec(joinpath->pathinfo);
 478         if (temp >= (MAXINT - 1) / 2)
 479         {
 480                 /* if we exceed (MAXINT-1)/2, we switch to log scale */
 481                 /* +1 prevents log(0) */
 482                 temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
 483         }
 484         else
 485                 temp1 = ceil((double) temp);
 486         Assert(temp1 >= 0);
 487
 488         return temp1;
 489 }
 490
 491 /*
 492  * relation_byte_size
 493  *        Estimate the storage space in bytes for a given number of tuples
 494  *        of a given width (size in bytes).
 495  *        To avoid overflow with big relations, result is a double.
 496  */
 497
 498 static double
 499 relation_byte_size(int tuples, int width)
 500 {
 501         return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
 502 }
 503
 504 /*
 505  * page_size
 506  *        Returns an estimate of the number of pages covered by a given
 507  *        number of tuples of a given width (size in bytes).
 508  */
 509 int
 510 page_size(int tuples, int width)
 511 {
 512         int                     temp;
 513
 514         temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
 515         Assert(temp >= 0);
 516         return temp;
 517 }
 518
 519 static double
 520 base_log(double x, double b)
 521 {
 522         return log(x) / log(b);
 523 }