granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/costsize.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * costsize.c
   4  *        Routines to compute (and set) relation sizes and path costs
   5  *
   6  * Copyright (c) 1994, Regents of the University of California
   7  *
   8  *
   9  * IDENTIFICATION
  10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.32 1999/02/13 23:16:16 momjian Exp $
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14
  15 #include "postgres.h"
  16
  17 #include <math.h>
  18
  19 #ifdef HAVE_LIMITS_H
  20 #include <limits.h>
  21 #ifndef MAXINT
  22 #define MAXINT            INT_MAX
  23 #endif
  24 #else
  25 #ifdef HAVE_VALUES_H
  26 #include <values.h>
  27 #endif
  28 #endif
  29
  30 #include "nodes/relation.h"
  31 #include "optimizer/cost.h"
  32 #include "optimizer/internal.h"
  33 #include "optimizer/keys.h"
  34 #include "optimizer/tlist.h"
  35 #include "utils/lsyscache.h"
  36
  37 extern int      NBuffers;
  38
  39 static int      compute_attribute_width(TargetEntry *tlistentry);
  40 static double base_log(double x, double b);
  41 static int      compute_targetlist_width(List *targetlist);
  42
  43 int                     _disable_cost_ = 30000000;
  44
  45 bool            _enable_seqscan_ = true;
  46 bool            _enable_indexscan_ = true;
  47 bool            _enable_sort_ = true;
  48 bool            _enable_hash_ = true;
  49 bool            _enable_nestloop_ = true;
  50 bool            _enable_mergejoin_ = true;
  51 bool            _enable_hashjoin_ = true;
  52
  53 Cost            _cpu_page_wight_ = _CPU_PAGE_WEIGHT_;
  54 Cost            _cpu_index_page_wight_ = _CPU_INDEX_PAGE_WEIGHT_;
  55
  56 /*
  57  * cost_seqscan
  58  *        Determines and returns the cost of scanning a relation sequentially.
  59  *        If the relation is a temporary to be materialized from a query
  60  *        embedded within a data field (determined by 'relid' containing an
  61  *        attribute reference), then a predetermined constant is returned (we
  62  *        have NO IDEA how big the result of a POSTQUEL procedure is going to
  63  *        be).
  64  *
  65  *              disk = p
  66  *              cpu = *CPU-PAGE-WEIGHT* * t
  67  *
  68  * 'relid' is the relid of the relation to be scanned
  69  * 'relpages' is the number of pages in the relation to be scanned
  70  *              (as determined from the system catalogs)
  71  * 'reltuples' is the number of tuples in the relation to be scanned
  72  *
  73  * Returns a flonum.
  74  *
  75  */
  76 Cost
  77 cost_seqscan(int relid, int relpages, int reltuples)
  78 {
  79         Cost            temp = 0;
  80
  81         if (!_enable_seqscan_)
  82                 temp += _disable_cost_;
  83
  84         if (relid < 0)
  85         {
  86
  87                 /*
  88                  * cost of sequentially scanning a materialized temporary relation
  89                  */
  90                 temp += _NONAME_SCAN_COST_;
  91         }
  92         else
  93         {
  94                 temp += relpages;
  95                 temp += _cpu_page_wight_ * reltuples;
  96         }
  97         Assert(temp >= 0);
  98         return temp;
  99 }
 100
 101
 102 /*
 103  * cost_index
 104  *        Determines and returns the cost of scanning a relation using an index.
 105  *
 106  *              disk = expected-index-pages + expected-data-pages
 107  *              cpu = *CPU-PAGE-WEIGHT* *
 108  *                              (expected-index-tuples + expected-data-tuples)
 109  *
 110  * 'indexid' is the index OID
 111  * 'expected-indexpages' is the number of index pages examined in the scan
 112  * 'selec' is the selectivity of the index
 113  * 'relpages' is the number of pages in the main relation
 114  * 'reltuples' is the number of tuples in the main relation
 115  * 'indexpages' is the number of pages in the index relation
 116  * 'indextuples' is the number of tuples in the index relation
 117  *
 118  * Returns a flonum.
 119  *
 120  */
 121 Cost
 122 cost_index(Oid indexid,
 123                    int expected_indexpages,
 124                    Cost selec,
 125                    int relpages,
 126                    int reltuples,
 127                    int indexpages,
 128                    int indextuples,
 129                    bool is_injoin)
 130 {
 131         Cost            temp;
 132         double          temp2;
 133
 134         temp = (Cost) 0;
 135
 136         if (!_enable_indexscan_ && !is_injoin)
 137                 temp += _disable_cost_;
 138
 139         /* expected index relation pages */
 140         temp += expected_indexpages;
 141
 142         /* expected base relation pages */
 143         temp2 = (reltuples == 0) ? (double) 0 : (double) relpages / reltuples;
 144         temp2 = temp2 * (double) selec *indextuples;
 145
 146         temp += Min(relpages, (int) ceil(temp2));
 147
 148         /* per index tuples */
 149         temp = temp + (_cpu_index_page_wight_ * selec * indextuples);
 150
 151         /* per heap tuples */
 152         temp = temp + (_cpu_page_wight_ * selec * reltuples);
 153
 154         Assert(temp >= 0);
 155         return temp;
 156 }
 157
 158 /*
 159  * cost_sort
 160  *        Determines and returns the cost of sorting a relation by considering
 161  *        1. the cost of doing an external sort:        XXX this is probably too low
 162  *                              disk = (p lg p)
 163  *                              cpu = *CPU-PAGE-WEIGHT* * (t lg t)
 164  *        2. the cost of reading the sort result into memory (another seqscan)
 165  *               unless 'noread' is set
 166  *
 167  * 'pathkeys' is a list of sort keys
 168  * 'tuples' is the number of tuples in the relation
 169  * 'width' is the average tuple width in bytes
 170  * 'noread' is a flag indicating that the sort result can remain on disk
 171  *                              (i.e., the sort result is the result relation)
 172  *
 173  * Returns a flonum.
 174  *
 175  */
 176 Cost
 177 cost_sort(List *pathkeys, int tuples, int width, bool noread)
 178 {
 179         Cost            temp = 0;
 180         int                     npages = page_size(tuples, width);
 181         Cost            pages = (Cost) npages;
 182         Cost            numTuples = tuples;
 183
 184         if (!_enable_sort_)
 185                 temp += _disable_cost_;
 186         if (tuples == 0 || pathkeys == NULL)
 187         {
 188                 Assert(temp >= 0);
 189                 return temp;
 190         }
 191         temp += pages * base_log((double) pages, (double) 2.0);
 192
 193         /*
 194          * could be base_log(pages, NBuffers), but we are only doing 2-way
 195          * merges
 196          */
 197         temp += _cpu_page_wight_ * numTuples *
 198                 base_log((double) pages, (double) 2.0);
 199
 200         if (!noread)
 201                 temp = temp + cost_seqscan(_NONAME_RELATION_ID_, npages, tuples);
 202         Assert(temp >= 0);
 203
 204         return temp;
 205 }
 206
 207
 208 /*
 209  * cost_result
 210  *        Determines and returns the cost of writing a relation of 'tuples'
 211  *        tuples of 'width' bytes out to a result relation.
 212  *
 213  * Returns a flonum.
 214  *
 215  */
 216 #ifdef NOT_USED
 217 Cost
 218 cost_result(int tuples, int width)
 219 {
 220         Cost            temp = 0;
 221
 222         temp = temp + page_size(tuples, width);
 223         temp = temp + _cpu_page_wight_ * tuples;
 224         Assert(temp >= 0);
 225         return temp;
 226 }
 227
 228 #endif
 229
 230 /*
 231  * cost_nestloop
 232  *        Determines and returns the cost of joining two relations using the
 233  *        nested loop algorithm.
 234  *
 235  * 'outercost' is the (disk+cpu) cost of scanning the outer relation
 236  * 'innercost' is the (disk+cpu) cost of scanning the inner relation
 237  * 'outertuples' is the number of tuples in the outer relation
 238  *
 239  * Returns a flonum.
 240  *
 241  */
 242 Cost
 243 cost_nestloop(Cost outercost,
 244                           Cost innercost,
 245                           int outertuples,
 246                           int innertuples,
 247                           int outerpages,
 248                           bool is_indexjoin)
 249 {
 250         Cost            temp = 0;
 251
 252         if (!_enable_nestloop_)
 253                 temp += _disable_cost_;
 254         temp += outercost;
 255         temp += outertuples * innercost;
 256         Assert(temp >= 0);
 257
 258         return temp;
 259 }
 260
 261 /*
 262  * cost_mergejoin
 263  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 264  *                              outer and inner relations
 265  *        'outersortkeys' and 'innersortkeys' are lists of the keys to be used
 266  *                              to sort the outer and inner relations
 267  *        'outertuples' and 'innertuples' are the number of tuples in the outer
 268  *                              and inner relations
 269  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 270  *                              of the tuples of the outer and inner relations
 271  *
 272  * Returns a flonum.
 273  *
 274  */
 275 Cost
 276 cost_mergejoin(Cost outercost,
 277                            Cost innercost,
 278                            List *outersortkeys,
 279                            List *innersortkeys,
 280                            int outersize,
 281                            int innersize,
 282                            int outerwidth,
 283                            int innerwidth)
 284 {
 285         Cost            temp = 0;
 286
 287         if (!_enable_mergejoin_)
 288                 temp += _disable_cost_;
 289
 290         temp += outercost;
 291         temp += innercost;
 292         temp += cost_sort(outersortkeys, outersize, outerwidth, false);
 293         temp += cost_sort(innersortkeys, innersize, innerwidth, false);
 294         temp += _cpu_page_wight_ * (outersize + innersize);
 295         Assert(temp >= 0);
 296
 297         return temp;
 298 }
 299
 300 /*
 301  * cost_hashjoin--                              XXX HASH
 302  *        'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
 303  *                              outer and inner relations
 304  *        'outerkeys' and 'innerkeys' are lists of the keys to be used
 305  *                              to hash the outer and inner relations
 306  *        'outersize' and 'innersize' are the number of tuples in the outer
 307  *                              and inner relations
 308  *        'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
 309  *                              of the tuples of the outer and inner relations
 310  *
 311  * Returns a flonum.
 312  */
 313 Cost
 314 cost_hashjoin(Cost outercost,
 315                           Cost innercost,
 316                           List *outerkeys,
 317                           List *innerkeys,
 318                           int outersize,
 319                           int innersize,
 320                           int outerwidth,
 321                           int innerwidth)
 322 {
 323         Cost            temp = 0;
 324         int                     outerpages = page_size(outersize, outerwidth);
 325         int                     innerpages = page_size(innersize, innerwidth);
 326         int                     nrun = ceil((double) outerpages / (double) NBuffers);
 327
 328         if (outerpages < innerpages)
 329                 return _disable_cost_;
 330         if (!_enable_hashjoin_)
 331                 temp += _disable_cost_;
 332
 333         /*
 334          * temp += outercost + (nrun + 1) * innercost;
 335          *
 336          * the innercost shouldn't be used it.  Instead the cost of hashing the
 337          * innerpath should be used
 338          *
 339          * ASSUME innercost is 1 for now -- a horrible hack - jolly temp +=
 340          * outercost + (nrun + 1);
 341          *
 342          * But we must add innercost to result.         - vadim 04/24/97
 343          */
 344         temp += outercost + innercost + (nrun + 1);
 345
 346         temp += _cpu_page_wight_ * (outersize + nrun * innersize);
 347         Assert(temp >= 0);
 348
 349         return temp;
 350 }
 351
 352 /*
 353  * compute_rel_size
 354  *        Computes the size of each relation in 'rel_list' (after applying
 355  *        restrictions), by multiplying the selectivity of each restriction
 356  *        by the original size of the relation.
 357  *
 358  *        Sets the 'size' field for each relation entry with this computed size.
 359  *
 360  * Returns the size.
 361  */
 362 int
 363 compute_rel_size(RelOptInfo *rel)
 364 {
 365         Cost            temp;
 366         int                     temp1;
 367
 368         temp = rel->tuples * product_selec(rel->restrictinfo);
 369         Assert(temp >= 0);
 370         if (temp >= (MAXINT - 1))
 371                 temp1 = MAXINT;
 372         else
 373                 temp1 = ceil((double) temp);
 374         Assert(temp1 >= 0);
 375         Assert(temp1 <= MAXINT);
 376         return temp1;
 377 }
 378
 379 /*
 380  * compute_rel_width
 381  *        Computes the width in bytes of a tuple from 'rel'.
 382  *
 383  * Returns the width of the tuple as a fixnum.
 384  */
 385 int
 386 compute_rel_width(RelOptInfo *rel)
 387 {
 388         return compute_targetlist_width(get_actual_tlist(rel->targetlist));
 389 }
 390
 391 /*
 392  * compute_targetlist_width
 393  *        Computes the width in bytes of a tuple made from 'targetlist'.
 394  *
 395  * Returns the width of the tuple as a fixnum.
 396  */
 397 static int
 398 compute_targetlist_width(List *targetlist)
 399 {
 400         List       *temp_tl;
 401         int                     tuple_width = 0;
 402
 403         foreach(temp_tl, targetlist)
 404         {
 405                 tuple_width = tuple_width +
 406                         compute_attribute_width(lfirst(temp_tl));
 407         }
 408         return tuple_width;
 409 }
 410
 411 /*
 412  * compute_attribute_width
 413  *        Given a target list entry, find the size in bytes of the attribute.
 414  *
 415  *        If a field is variable-length, it is assumed to be at least the size
 416  *        of a TID field.
 417  *
 418  * Returns the width of the attribute as a fixnum.
 419  */
 420 static int
 421 compute_attribute_width(TargetEntry *tlistentry)
 422 {
 423         int                     width = get_typlen(tlistentry->resdom->restype);
 424
 425         if (width < 0)
 426                 return _DEFAULT_ATTRIBUTE_WIDTH_;
 427         else
 428                 return width;
 429 }
 430
 431 /*
 432  * compute_joinrel_size
 433  *        Computes the size of the join relation 'joinrel'.
 434  *
 435  * Returns a fixnum.
 436  */
 437 int
 438 compute_joinrel_size(JoinPath *joinpath)
 439 {
 440         Cost            temp = 1.0;
 441         int                     temp1 = 0;
 442
 443         temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
 444         temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
 445
 446         temp = temp * product_selec(joinpath->pathinfo);
 447         if (temp >= (MAXINT - 1))
 448                 temp1 = MAXINT;
 449         else
 450         {
 451
 452                 /*
 453                  * should be ceil here, we don't want joinrel size's of one, do
 454                  * we?
 455                  */
 456                 temp1 = ceil((double) temp);
 457         }
 458         Assert(temp1 >= 0);
 459
 460         return temp1;
 461 }
 462
 463 /*
 464  * page_size
 465  *        Returns an estimate of the number of pages covered by a given
 466  *        number of tuples of a given width (size in bytes).
 467  */
 468 int
 469 page_size(int tuples, int width)
 470 {
 471         int                     temp = 0;
 472
 473         temp = ceil((double) (tuples * (width + sizeof(HeapTupleData)))
 474                                 / BLCKSZ);
 475         Assert(temp >= 0);
 476         return temp;
 477 }
 478
 479 static double
 480 base_log(double x, double b)
 481 {
 482         return log(x) / log(b);
 483 }