granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Copyright (c) 1994, Regents of the University of California
   7  *
   8  *
   9  * IDENTIFICATION
  10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.43 1999/07/16 04:59:14 momjian Exp $
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14
  15 #include <math.h>
  16
  17 #include "postgres.h"
  18 #ifdef HAVE_LIMITS_H
  19 #include <limits.h>
  20 #ifndef MAXINT
  21 #define MAXINT            INT_MAX
  22 #endif
  23 #else
  24 #ifdef HAVE_VALUES_H
  25 #include <values.h>
  26 #endif
  27 #endif
  28
  29
  30 #include "optimizer/cost.h"
  31 #include "optimizer/internal.h"
  32 #include "optimizer/tlist.h"
  33 #include "utils/lsyscache.h"
  34
  35 extern int      NBuffers;
  36
  37 static int      compute_attribute_width(TargetEntry *tlistentry);
  38 static double relation_byte_size(int tuples, int width);
  39 static double base_log(double x, double b);
  40 static int      compute_targetlist_width(List *targetlist);
  41
  42 int                     _disable_cost_ = 30000000;
  43
  44 bool            _enable_seqscan_ = true;
  45 bool            _enable_indexscan_ = true;
  46 bool            _enable_sort_ = true;
  47 bool            _enable_hash_ = true;
  48 bool            _enable_nestloop_ = true;
  49 bool            _enable_mergejoin_ = true;
  50 bool            _enable_hashjoin_ = true;
  51
  52 Cost             _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
  53 Cost            _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
  54
  55 /*
  56  * cost_seqscan
  57  *        Determines and returns the cost of scanning a relation sequentially.
  58  *        If the relation is a temporary to be materialized from a query
  59  *        embedded within a data field (determined by 'relid' containing an
  60  *        attribute reference), then a predetermined constant is returned (we
  61  *        have NO IDEA how big the result of a POSTQUEL procedure is going to
  62  *        be).
  63  *
  64  *              disk = p
  65  *              cpu = *CPU-PAGE-WEIGHT* * t
  66  *
  67  * 'relid' is the relid of the relation to be scanned
  68  * 'relpages' is the number of pages in the relation to be scanned
  69  *              (as determined from the system catalogs)
  70  * 'reltuples' is the number of tuples in the relation to be scanned
  71  *
  72  * Returns a flonum.
  73  *
  74  */
  75 Cost
  76 cost_seqscan(int relid, int relpages, int reltuples)
  77 {
  78         Cost            temp = 0;
  79
  80         if (!_enable_seqscan_)
  81                 temp += _disable_cost_;
  82
  83         if (relid < 0)
  84         {
  85
  86                 /*
  87                  * cost of sequentially scanning a materialized temporary relation
  88                  */
  89                 temp += _NONAME_SCAN_COST_;
  90         }
  91         else
  92         {
  93                 temp += relpages;
  94                 temp += _cpu_page_weight_ * reltuples;
  95         }
  96         Assert(temp >= 0);
  97         return temp;
  98 }
  99
 100
 101 /*
 102  * cost_index
 103  *        Determines and returns the cost of scanning a relation using an index.
 104  *
 105  *              disk = expected-index-pages + expected-data-pages
 106  *              cpu = *CPU-PAGE-WEIGHT* *
 107  *                              (expected-index-tuples + expected-data-tuples)
 108  *
 109  * 'indexid' is the index OID
 110  * 'expected-indexpages' is the number of index pages examined in the scan
 111  * 'selec' is the selectivity of the index
 112  * 'relpages' is the number of pages in the main relation
 113  * 'reltuples' is the number of tuples in the main relation
 114  * 'indexpages' is the number of pages in the index relation
 115  * 'indextuples' is the number of tuples in the index relation
 116  *
 117  * Returns a flonum.
 118  *
 119  */
 120 Cost
 121 cost_index(Oid indexid,
 122                    int expected_indexpages,
 123                    Cost selec,
 124                    int relpages,
 125                    int reltuples,
 126                    int indexpages,
 127                    int indextuples,
 128                    bool is_injoin)
 129 {
 130         Cost            temp = 0;
 131
 132         if (!_enable_indexscan_ && !is_injoin)
 133                 temp += _disable_cost_;
 134
 135         /*
 136          * We want to be sure we estimate the cost of an index scan as more
 137          * than the cost of a sequential scan (when selec == 1.0), even if we
 138          * don't have good stats.  So, disbelieve zero index size.
 139          */
 140         if (expected_indexpages <= 0)
 141                 expected_indexpages = 1;
 142         if (indextuples <= 0)
 143                 indextuples = 1;
 144
 145         /* expected index relation pages */
 146         temp += expected_indexpages;
 147
 148         /*
 149          * expected base relation pages XXX this isn't really right, since we
 150          * will access the table nonsequentially and might have to fetch the
 151          * same page more than once.  This calculation assumes the buffer
 152          * cache will prevent that from happening...
 153          */
 154         temp += ceil(((double) selec) * ((double) relpages));
 155
 156         /* per index tuples */
 157         temp += _cpu_index_page_weight_ * selec * indextuples;
 158
 159         /* per heap tuples */
 160         temp += _cpu_page_weight_ * selec * reltuples;
 161
 162         Assert(temp >= 0);
 163         return temp;
 164 }
 165
 166 /*
 167  * cost_sort
 168  *        Determines and returns the cost of sorting a relation by considering
 169  *        the cost of doing an external sort:   XXX this is probably too low
 170  *                              disk = (p lg p)
 171  *                              cpu = *CPU-PAGE-WEIGHT* * (t lg t)
 172  *
 173  * 'pathkeys' is a list of sort keys
 174  * 'tuples' is the number of tuples in the relation
 175  * 'width' is the average tuple width in bytes
 176  *
 177  * NOTE: some callers currently pass NULL for pathkeys because they
 178  * can't conveniently supply the sort keys.  Since this routine doesn't
 179  * currently do anything with pathkeys anyway, that doesn't matter...
 180  * but if it ever does, it should react gracefully to lack of key data.
 181  *
 182  * Returns a flonum.
 183  */
 184 Cost
 185 cost_sort(List *pathkeys, int tuples, int width)
 186 {
 187         Cost            temp = 0;
 188         int                     npages = page_size(tuples, width);
 189         double          log_npages;
 190
 191         if (!_enable_sort_)
 192                 temp += _disable_cost_;
 193
 194         /*
 195          * We want to be sure the cost of a sort is never estimated as zero,
 196          * even if passed-in tuple count is zero.  Besides, mustn't do
 197          * log(0)...
 198          */
 199         if (tuples <= 0)
 200                 tuples = 1;
 201         if (npages <= 0)
 202                 npages = 1;
 203
 204         log_npages = ceil(base_log((double) npages, 2.0));
 205         if (log_npages <= 0.0)
 206                 log_npages = 1.0;
 207
 208         temp += npages * log_npages;
 209
 210         /*
 211          * could be base_log(tuples, NBuffers), but we are only doing 2-way
 212          * merges
 213          */
 214         temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
 215
 216         Assert(temp > 0);
 217
 218         return temp;
 219 }
 220
 221
 222 /*
 223  * cost_result
 224  *        Determines and returns the cost of writing a relation of 'tuples'
 225  *        tuples of 'width' bytes out to a result relation.
 226  *
 227  * Returns a flonum.
 228  *
 229  */
 230 #ifdef NOT_USED
 231 Cost
 232 cost_result(int tuples, int width)
 233 {
 234         Cost            temp = 0;
 235
 236         temp = temp + page_size(tuples, width);
 237         temp = temp + _cpu_page_weight_ * tuples;
 238         Assert(temp >= 0);
 239         return temp;
 240 }
 241
 242 #endif
 243
 244 /*
 245  * cost_nestloop
 246  *        Determines and returns the cost of joining two relations using the
 247  *        nested loop algorithm.
 248  *
 249  * 'outercost' is the (disk+cpu) cost of scanning the outer relation
 250  * 'innercost' is the (disk+cpu) cost of scanning the inner relation
 251  * 'outertuples' is the number of tuples in the outer relation
 252  *
 253  * Returns a flonum.
 254  *
 255  */
 256 Cost
 257 cost_nestloop(Cost outercost,
 258                           Cost innercost,
 259                           int outertuples,
 260                           int innertuples,
 261                           int outerpages,
 262                           bool is_indexjoin)
 263 {
 264         Cost            temp = 0;
 265
 266         if (!_enable_nestloop_)
 267                 temp += _disable_cost_;
 268         temp += outercost;
 269         temp += outertuples * innercost;
 270         Assert(temp >= 0);
 271
 272         return temp;
 273 }
 274
 275 /*
 276  * cost_mergejoin
 277  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 278  *                              outer and inner relations
 279  *        'outersortkeys' and 'innersortkeys' are lists of the keys to be used
 280  *                              to sort the outer and inner relations (or NIL if no explicit
 281  *                              sort is needed because the source path is already ordered)
 282  *        'outertuples' and 'innertuples' are the number of tuples in the outer
 283  *                              and inner relations
 284  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 285  *                              of the tuples of the outer and inner relations
 286  *
 287  * Returns a flonum.
 288  *
 289  */
 290 Cost
 291 cost_mergejoin(Cost outercost,
 292                            Cost innercost,
 293                            List *outersortkeys,
 294                            List *innersortkeys,
 295                            int outersize,
 296                            int innersize,
 297                            int outerwidth,
 298                            int innerwidth)
 299 {
 300         Cost            temp = 0;
 301
 302         if (!_enable_mergejoin_)
 303                 temp += _disable_cost_;
 304
 305         temp += outercost;
 306         temp += innercost;
 307         if (outersortkeys)                      /* do we need to sort? */
 308                 temp += cost_sort(outersortkeys, outersize, outerwidth);
 309         if (innersortkeys)                      /* do we need to sort? */
 310                 temp += cost_sort(innersortkeys, innersize, innerwidth);
 311         temp += _cpu_page_weight_ * (outersize + innersize);
 312
 313         Assert(temp >= 0);
 314
 315         return temp;
 316 }
 317
 318 /*
 319  * cost_hashjoin--                              XXX HASH
 320  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 321  *                              outer and inner relations
 322  *        'outerkeys' and 'innerkeys' are lists of the keys to be used
 323  *                              to hash the outer and inner relations
 324  *        'outersize' and 'innersize' are the number of tuples in the outer
 325  *                              and inner relations
 326  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 327  *                              of the tuples of the outer and inner relations
 328  *
 329  * Returns a flonum.
 330  */
 331 Cost
 332 cost_hashjoin(Cost outercost,
 333                           Cost innercost,
 334                           List *outerkeys,
 335                           List *innerkeys,
 336                           int outersize,
 337                           int innersize,
 338                           int outerwidth,
 339                           int innerwidth)
 340 {
 341         Cost            temp = 0;
 342         int                     outerpages = page_size(outersize, outerwidth);
 343         int                     innerpages = page_size(innersize, innerwidth);
 344
 345         if (!_enable_hashjoin_)
 346                 temp += _disable_cost_;
 347
 348         /*
 349          * Bias against putting larger relation on inside.
 350          *
 351          * Code used to use "outerpages < innerpages" but that has poor
 352          * resolution when both relations are small.
 353          */
 354         if (relation_byte_size(outersize, outerwidth) <
 355                 relation_byte_size(innersize, innerwidth))
 356                 temp += _disable_cost_;
 357
 358         /* cost of source data */
 359         temp += outercost + innercost;
 360
 361         /* cost of computing hash function: must do it once per tuple */
 362         temp += _cpu_page_weight_ * (outersize + innersize);
 363
 364         /* cost of main-memory hashtable */
 365         temp += (innerpages < NBuffers) ? innerpages : NBuffers;
 366
 367         /*
 368          * if inner relation is too big then we will need to "batch" the join,
 369          * which implies writing and reading most of the tuples to disk an
 370          * extra time.
 371          */
 372         if (innerpages > NBuffers)
 373                 temp += 2 * (outerpages + innerpages);
 374
 375         Assert(temp >= 0);
 376
 377         return temp;
 378 }
 379
 380 /*
 381  * compute_rel_size
 382  *        Computes the size of each relation in 'rel_list' (after applying
 383  *        restrictions), by multiplying the selectivity of each restriction
 384  *        by the original size of the relation.
 385  *
 386  *        Sets the 'size' field for each relation entry with this computed size.
 387  *
 388  * Returns the size.
 389  */
 390 int
 391 compute_rel_size(RelOptInfo *rel)
 392 {
 393         Cost            temp;
 394         int                     temp1;
 395
 396         temp = rel->tuples * product_selec(rel->restrictinfo);
 397         Assert(temp >= 0);
 398         if (temp >= (MAXINT - 1))
 399                 temp1 = MAXINT;
 400         else
 401                 temp1 = ceil((double) temp);
 402         Assert(temp1 >= 0);
 403         Assert(temp1 <= MAXINT);
 404         return temp1;
 405 }
 406
 407 /*
 408  * compute_rel_width
 409  *        Computes the width in bytes of a tuple from 'rel'.
 410  *
 411  * Returns the width of the tuple as a fixnum.
 412  */
 413 int
 414 compute_rel_width(RelOptInfo *rel)
 415 {
 416         return compute_targetlist_width(get_actual_tlist(rel->targetlist));
 417 }
 418
 419 /*
 420  * compute_targetlist_width
 421  *        Computes the width in bytes of a tuple made from 'targetlist'.
 422  *
 423  * Returns the width of the tuple as a fixnum.
 424  */
 425 static int
 426 compute_targetlist_width(List *targetlist)
 427 {
 428         List       *temp_tl;
 429         int                     tuple_width = 0;
 430
 431         foreach(temp_tl, targetlist)
 432         {
 433                 tuple_width = tuple_width +
 434                         compute_attribute_width(lfirst(temp_tl));
 435         }
 436         return tuple_width;
 437 }
 438
 439 /*
 440  * compute_attribute_width
 441  *        Given a target list entry, find the size in bytes of the attribute.
 442  *
 443  *        If a field is variable-length, it is assumed to be at least the size
 444  *        of a TID field.
 445  *
 446  * Returns the width of the attribute as a fixnum.
 447  */
 448 static int
 449 compute_attribute_width(TargetEntry *tlistentry)
 450 {
 451         int                     width = get_typlen(tlistentry->resdom->restype);
 452
 453         if (width < 0)
 454                 return _DEFAULT_ATTRIBUTE_WIDTH_;
 455         else
 456                 return width;
 457 }
 458
 459 /*
 460  * compute_joinrel_size
 461  *        Computes the size of the join relation 'joinrel'.
 462  *
 463  * Returns a fixnum.
 464  */
 465 int
 466 compute_joinrel_size(JoinPath *joinpath)
 467 {
 468         Cost            temp = 1.0;
 469         int                     temp1 = 0;
 470
 471         /* cartesian product */
 472         temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
 473         temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
 474
 475         temp = temp * product_selec(joinpath->pathinfo);
 476         if (temp >= (MAXINT - 1) / 2)
 477         {
 478                 /* if we exceed (MAXINT-1)/2, we switch to log scale */
 479                 /* +1 prevents log(0) */
 480                 temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
 481         }
 482         else
 483                 temp1 = ceil((double) temp);
 484         Assert(temp1 >= 0);
 485
 486         return temp1;
 487 }
 488
 489 /*
 490  * relation_byte_size
 491  *        Estimate the storage space in bytes for a given number of tuples
 492  *        of a given width (size in bytes).
 493  *        To avoid overflow with big relations, result is a double.
 494  */
 495
 496 static double
 497 relation_byte_size(int tuples, int width)
 498 {
 499         return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
 500 }
 501
 502 /*
 503  * page_size
 504  *        Returns an estimate of the number of pages covered by a given
 505  *        number of tuples of a given width (size in bytes).
 506  */
 507 int
 508 page_size(int tuples, int width)
 509 {
 510         int                     temp;
 511
 512         temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
 513         Assert(temp >= 0);
 514         return temp;
 515 }
 516
 517 static double
 518 base_log(double x, double b)
 519 {
 520         return log(x) / log(b);
 521 }