granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Copyright (c) 1994, Regents of the University of California
   7  *
   8  *
   9  * IDENTIFICATION
  10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.34 1999/04/05 02:07:07 tgl Exp $
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14
  15 #include "postgres.h"
  16
  17 #include <math.h>
  18
  19 #ifdef HAVE_LIMITS_H
  20 #include <limits.h>
  21 #ifndef MAXINT
  22 #define MAXINT            INT_MAX
  23 #endif
  24 #else
  25 #ifdef HAVE_VALUES_H
  26 #include <values.h>
  27 #endif
  28 #endif
  29
  30 #include "nodes/relation.h"
  31 #include "optimizer/cost.h"
  32 #include "optimizer/internal.h"
  33 #include "optimizer/keys.h"
  34 #include "optimizer/tlist.h"
  35 #include "utils/lsyscache.h"
  36
  37 extern int      NBuffers;
  38
  39 static int      compute_attribute_width(TargetEntry *tlistentry);
  40 static double relation_byte_size (int tuples, int width);
  41 static double base_log(double x, double b);
  42 static int      compute_targetlist_width(List *targetlist);
  43
  44 int                     _disable_cost_ = 30000000;
  45
  46 bool            _enable_seqscan_ = true;
  47 bool            _enable_indexscan_ = true;
  48 bool            _enable_sort_ = true;
  49 bool            _enable_hash_ = true;
  50 bool            _enable_nestloop_ = true;
  51 bool            _enable_mergejoin_ = true;
  52 bool            _enable_hashjoin_ = true;
  53
  54 Cost            _cpu_page_wight_ = _CPU_PAGE_WEIGHT_;
  55 Cost            _cpu_index_page_wight_ = _CPU_INDEX_PAGE_WEIGHT_;
  56
  57 /*
  58  * cost_seqscan
  59  *        Determines and returns the cost of scanning a relation sequentially.
  60  *        If the relation is a temporary to be materialized from a query
  61  *        embedded within a data field (determined by 'relid' containing an
  62  *        attribute reference), then a predetermined constant is returned (we
  63  *        have NO IDEA how big the result of a POSTQUEL procedure is going to
  64  *        be).
  65  *
  66  *              disk = p
  67  *              cpu = *CPU-PAGE-WEIGHT* * t
  68  *
  69  * 'relid' is the relid of the relation to be scanned
  70  * 'relpages' is the number of pages in the relation to be scanned
  71  *              (as determined from the system catalogs)
  72  * 'reltuples' is the number of tuples in the relation to be scanned
  73  *
  74  * Returns a flonum.
  75  *
  76  */
  77 Cost
  78 cost_seqscan(int relid, int relpages, int reltuples)
  79 {
  80         Cost            temp = 0;
  81
  82         if (!_enable_seqscan_)
  83                 temp += _disable_cost_;
  84
  85         if (relid < 0)
  86         {
  87
  88                 /*
  89                  * cost of sequentially scanning a materialized temporary relation
  90                  */
  91                 temp += _NONAME_SCAN_COST_;
  92         }
  93         else
  94         {
  95                 temp += relpages;
  96                 temp += _cpu_page_wight_ * reltuples;
  97         }
  98         Assert(temp >= 0);
  99         return temp;
 100 }
 101
 102
 103 /*
 104  * cost_index
 105  *        Determines and returns the cost of scanning a relation using an index.
 106  *
 107  *              disk = expected-index-pages + expected-data-pages
 108  *              cpu = *CPU-PAGE-WEIGHT* *
 109  *                              (expected-index-tuples + expected-data-tuples)
 110  *
 111  * 'indexid' is the index OID
 112  * 'expected-indexpages' is the number of index pages examined in the scan
 113  * 'selec' is the selectivity of the index
 114  * 'relpages' is the number of pages in the main relation
 115  * 'reltuples' is the number of tuples in the main relation
 116  * 'indexpages' is the number of pages in the index relation
 117  * 'indextuples' is the number of tuples in the index relation
 118  *
 119  * Returns a flonum.
 120  *
 121  */
 122 Cost
 123 cost_index(Oid indexid,
 124                    int expected_indexpages,
 125                    Cost selec,
 126                    int relpages,
 127                    int reltuples,
 128                    int indexpages,
 129                    int indextuples,
 130                    bool is_injoin)
 131 {
 132         Cost            temp;
 133         double          temp2;
 134
 135         temp = (Cost) 0;
 136
 137         if (!_enable_indexscan_ && !is_injoin)
 138                 temp += _disable_cost_;
 139
 140         /* expected index relation pages */
 141         temp += expected_indexpages;
 142
 143         /* expected base relation pages */
 144         temp2 = (reltuples == 0) ? (double) 0 : (double) relpages / reltuples;
 145         temp2 = temp2 * (double) selec *indextuples;
 146
 147         temp += Min(relpages, (int) ceil(temp2));
 148
 149         /* per index tuples */
 150         temp = temp + (_cpu_index_page_wight_ * selec * indextuples);
 151
 152         /* per heap tuples */
 153         temp = temp + (_cpu_page_wight_ * selec * reltuples);
 154
 155         Assert(temp >= 0);
 156         return temp;
 157 }
 158
 159 /*
 160  * cost_sort
 161  *        Determines and returns the cost of sorting a relation by considering
 162  *        1. the cost of doing an external sort:        XXX this is probably too low
 163  *                              disk = (p lg p)
 164  *                              cpu = *CPU-PAGE-WEIGHT* * (t lg t)
 165  *        2. the cost of reading the sort result into memory (another seqscan)
 166  *               unless 'noread' is set
 167  *
 168  * 'pathkeys' is a list of sort keys
 169  * 'tuples' is the number of tuples in the relation
 170  * 'width' is the average tuple width in bytes
 171  * 'noread' is a flag indicating that the sort result can remain on disk
 172  *                              (i.e., the sort result is the result relation)
 173  *
 174  * Returns a flonum.
 175  *
 176  */
 177 Cost
 178 cost_sort(List *pathkeys, int tuples, int width, bool noread)
 179 {
 180         Cost            temp = 0;
 181         int                     npages = page_size(tuples, width);
 182         Cost            pages = (Cost) npages;
 183         Cost            numTuples = tuples;
 184
 185         if (!_enable_sort_)
 186                 temp += _disable_cost_;
 187         if (tuples == 0 || pathkeys == NULL)
 188         {
 189                 Assert(temp >= 0);
 190                 return temp;
 191         }
 192         temp += pages * base_log((double) pages, (double) 2.0);
 193
 194         /*
 195          * could be base_log(pages, NBuffers), but we are only doing 2-way
 196          * merges
 197          */
 198         temp += _cpu_page_wight_ * numTuples *
 199                 base_log((double) pages, (double) 2.0);
 200
 201         if (!noread)
 202                 temp = temp + cost_seqscan(_NONAME_RELATION_ID_, npages, tuples);
 203         Assert(temp >= 0);
 204
 205         return temp;
 206 }
 207
 208
 209 /*
 210  * cost_result
 211  *        Determines and returns the cost of writing a relation of 'tuples'
 212  *        tuples of 'width' bytes out to a result relation.
 213  *
 214  * Returns a flonum.
 215  *
 216  */
 217 #ifdef NOT_USED
 218 Cost
 219 cost_result(int tuples, int width)
 220 {
 221         Cost            temp = 0;
 222
 223         temp = temp + page_size(tuples, width);
 224         temp = temp + _cpu_page_wight_ * tuples;
 225         Assert(temp >= 0);
 226         return temp;
 227 }
 228
 229 #endif
 230
 231 /*
 232  * cost_nestloop
 233  *        Determines and returns the cost of joining two relations using the
 234  *        nested loop algorithm.
 235  *
 236  * 'outercost' is the (disk+cpu) cost of scanning the outer relation
 237  * 'innercost' is the (disk+cpu) cost of scanning the inner relation
 238  * 'outertuples' is the number of tuples in the outer relation
 239  *
 240  * Returns a flonum.
 241  *
 242  */
 243 Cost
 244 cost_nestloop(Cost outercost,
 245                           Cost innercost,
 246                           int outertuples,
 247                           int innertuples,
 248                           int outerpages,
 249                           bool is_indexjoin)
 250 {
 251         Cost            temp = 0;
 252
 253         if (!_enable_nestloop_)
 254                 temp += _disable_cost_;
 255         temp += outercost;
 256         temp += outertuples * innercost;
 257         Assert(temp >= 0);
 258
 259         return temp;
 260 }
 261
 262 /*
 263  * cost_mergejoin
 264  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 265  *                              outer and inner relations
 266  *        'outersortkeys' and 'innersortkeys' are lists of the keys to be used
 267  *                              to sort the outer and inner relations
 268  *        'outertuples' and 'innertuples' are the number of tuples in the outer
 269  *                              and inner relations
 270  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 271  *                              of the tuples of the outer and inner relations
 272  *
 273  * Returns a flonum.
 274  *
 275  */
 276 Cost
 277 cost_mergejoin(Cost outercost,
 278                            Cost innercost,
 279                            List *outersortkeys,
 280                            List *innersortkeys,
 281                            int outersize,
 282                            int innersize,
 283                            int outerwidth,
 284                            int innerwidth)
 285 {
 286         Cost            temp = 0;
 287
 288         if (!_enable_mergejoin_)
 289                 temp += _disable_cost_;
 290
 291         temp += outercost;
 292         temp += innercost;
 293         temp += cost_sort(outersortkeys, outersize, outerwidth, false);
 294         temp += cost_sort(innersortkeys, innersize, innerwidth, false);
 295         temp += _cpu_page_wight_ * (outersize + innersize);
 296         Assert(temp >= 0);
 297
 298         return temp;
 299 }
 300
 301 /*
 302  * cost_hashjoin--                              XXX HASH
 303  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 304  *                              outer and inner relations
 305  *        'outerkeys' and 'innerkeys' are lists of the keys to be used
 306  *                              to hash the outer and inner relations
 307  *        'outersize' and 'innersize' are the number of tuples in the outer
 308  *                              and inner relations
 309  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 310  *                              of the tuples of the outer and inner relations
 311  *
 312  * Returns a flonum.
 313  */
 314 Cost
 315 cost_hashjoin(Cost outercost,
 316                           Cost innercost,
 317                           List *outerkeys,
 318                           List *innerkeys,
 319                           int outersize,
 320                           int innersize,
 321                           int outerwidth,
 322                           int innerwidth)
 323 {
 324         Cost            temp = 0;
 325         int                     outerpages = page_size(outersize, outerwidth);
 326         int                     innerpages = page_size(innersize, innerwidth);
 327
 328         if (!_enable_hashjoin_)
 329                 temp += _disable_cost_;
 330
 331         /* Bias against putting larger relation on inside.
 332          *
 333          * Code used to use "outerpages < innerpages" but that has
 334          * poor resolution when both relations are small.
 335          */
 336         if (relation_byte_size(outersize, outerwidth) <
 337                 relation_byte_size(innersize, innerwidth))
 338                 temp += _disable_cost_;
 339
 340         /* cost of source data */
 341         temp += outercost + innercost;
 342
 343         /* cost of computing hash function: must do it once per tuple */
 344         temp += _cpu_page_wight_ * (outersize + innersize);
 345
 346         /* cost of main-memory hashtable */
 347         temp += (innerpages < NBuffers) ? innerpages : NBuffers;
 348
 349         /* if inner relation is too big then we will need to "batch" the join,
 350          * which implies writing and reading most of the tuples to disk an
 351          * extra time.
 352          */
 353         if (innerpages > NBuffers)
 354                 temp += 2 * (outerpages + innerpages);
 355
 356         Assert(temp >= 0);
 357
 358         return temp;
 359 }
 360
 361 /*
 362  * compute_rel_size
 363  *        Computes the size of each relation in 'rel_list' (after applying
 364  *        restrictions), by multiplying the selectivity of each restriction
 365  *        by the original size of the relation.
 366  *
 367  *        Sets the 'size' field for each relation entry with this computed size.
 368  *
 369  * Returns the size.
 370  */
 371 int
 372 compute_rel_size(RelOptInfo *rel)
 373 {
 374         Cost            temp;
 375         int                     temp1;
 376
 377         temp = rel->tuples * product_selec(rel->restrictinfo);
 378         Assert(temp >= 0);
 379         if (temp >= (MAXINT - 1))
 380                 temp1 = MAXINT;
 381         else
 382                 temp1 = ceil((double) temp);
 383         Assert(temp1 >= 0);
 384         Assert(temp1 <= MAXINT);
 385         return temp1;
 386 }
 387
 388 /*
 389  * compute_rel_width
 390  *        Computes the width in bytes of a tuple from 'rel'.
 391  *
 392  * Returns the width of the tuple as a fixnum.
 393  */
 394 int
 395 compute_rel_width(RelOptInfo *rel)
 396 {
 397         return compute_targetlist_width(get_actual_tlist(rel->targetlist));
 398 }
 399
 400 /*
 401  * compute_targetlist_width
 402  *        Computes the width in bytes of a tuple made from 'targetlist'.
 403  *
 404  * Returns the width of the tuple as a fixnum.
 405  */
 406 static int
 407 compute_targetlist_width(List *targetlist)
 408 {
 409         List       *temp_tl;
 410         int                     tuple_width = 0;
 411
 412         foreach(temp_tl, targetlist)
 413         {
 414                 tuple_width = tuple_width +
 415                         compute_attribute_width(lfirst(temp_tl));
 416         }
 417         return tuple_width;
 418 }
 419
 420 /*
 421  * compute_attribute_width
 422  *        Given a target list entry, find the size in bytes of the attribute.
 423  *
 424  *        If a field is variable-length, it is assumed to be at least the size
 425  *        of a TID field.
 426  *
 427  * Returns the width of the attribute as a fixnum.
 428  */
 429 static int
 430 compute_attribute_width(TargetEntry *tlistentry)
 431 {
 432         int                     width = get_typlen(tlistentry->resdom->restype);
 433
 434         if (width < 0)
 435                 return _DEFAULT_ATTRIBUTE_WIDTH_;
 436         else
 437                 return width;
 438 }
 439
 440 /*
 441  * compute_joinrel_size
 442  *        Computes the size of the join relation 'joinrel'.
 443  *
 444  * Returns a fixnum.
 445  */
 446 int
 447 compute_joinrel_size(JoinPath *joinpath)
 448 {
 449         Cost            temp = 1.0;
 450         int                     temp1 = 0;
 451
 452         /* cartesian product */
 453         temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
 454         temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
 455
 456         temp = temp * product_selec(joinpath->pathinfo);
 457         if (temp >= (MAXINT-1)/2)
 458         {
 459                 /* if we exceed (MAXINT-1)/2, we switch to log scale */
 460                 /* +1 prevents log(0) */
 461                 temp1 = ceil(log(temp + 1 - (MAXINT-1)/2) + (MAXINT-1)/2);
 462         }
 463         else
 464                 temp1 = ceil((double) temp);
 465         Assert(temp1 >= 0);
 466
 467         return temp1;
 468 }
 469
 470 /*
 471  * relation_byte_size
 472  *    Estimate the storage space in bytes for a given number of tuples
 473  *    of a given width (size in bytes).
 474  *    To avoid overflow with big relations, result is a double.
 475  */
 476
 477 static double
 478 relation_byte_size (int tuples, int width)
 479 {
 480         return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
 481 }
 482
 483 /*
 484  * page_size
 485  *        Returns an estimate of the number of pages covered by a given
 486  *        number of tuples of a given width (size in bytes).
 487  */
 488 int
 489 page_size(int tuples, int width)
 490 {
 491         int                     temp;
 492
 493         temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
 494         Assert(temp >= 0);
 495         return temp;
 496 }
 497
 498 static double
 499 base_log(double x, double b)
 500 {
 501         return log(x) / log(b);
 502 }