granicus.if.org Git - postgresql/blob - src/backend/optimizer/util/pathnode.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * pathnode.c
   4  *        Routines to manipulate pathlists and create path nodes
   5  *
   6  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.77 2002/05/12 20:10:03 tgl Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <math.h>
  18
  19 #include "nodes/plannodes.h"
  20 #include "optimizer/cost.h"
  21 #include "optimizer/pathnode.h"
  22 #include "optimizer/paths.h"
  23 #include "optimizer/restrictinfo.h"
  24
  25
  26 /*****************************************************************************
  27  *              MISC. PATH UTILITIES
  28  *****************************************************************************/
  29
  30 /*
  31  * compare_path_costs
  32  *        Return -1, 0, or +1 according as path1 is cheaper, the same cost,
  33  *        or more expensive than path2 for the specified criterion.
  34  */
  35 int
  36 compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
  37 {
  38         if (criterion == STARTUP_COST)
  39         {
  40                 if (path1->startup_cost < path2->startup_cost)
  41                         return -1;
  42                 if (path1->startup_cost > path2->startup_cost)
  43                         return +1;
  44
  45                 /*
  46                  * If paths have the same startup cost (not at all unlikely),
  47                  * order them by total cost.
  48                  */
  49                 if (path1->total_cost < path2->total_cost)
  50                         return -1;
  51                 if (path1->total_cost > path2->total_cost)
  52                         return +1;
  53         }
  54         else
  55         {
  56                 if (path1->total_cost < path2->total_cost)
  57                         return -1;
  58                 if (path1->total_cost > path2->total_cost)
  59                         return +1;
  60
  61                 /*
  62                  * If paths have the same total cost, order them by startup cost.
  63                  */
  64                 if (path1->startup_cost < path2->startup_cost)
  65                         return -1;
  66                 if (path1->startup_cost > path2->startup_cost)
  67                         return +1;
  68         }
  69         return 0;
  70 }
  71
  72 /*
  73  * compare_path_fractional_costs
  74  *        Return -1, 0, or +1 according as path1 is cheaper, the same cost,
  75  *        or more expensive than path2 for fetching the specified fraction
  76  *        of the total tuples.
  77  *
  78  * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
  79  * path with the cheaper total_cost.
  80  */
  81 int
  82 compare_fractional_path_costs(Path *path1, Path *path2,
  83                                                           double fraction)
  84 {
  85         Cost            cost1,
  86                                 cost2;
  87
  88         if (fraction <= 0.0 || fraction >= 1.0)
  89                 return compare_path_costs(path1, path2, TOTAL_COST);
  90         cost1 = path1->startup_cost +
  91                 fraction * (path1->total_cost - path1->startup_cost);
  92         cost2 = path2->startup_cost +
  93                 fraction * (path2->total_cost - path2->startup_cost);
  94         if (cost1 < cost2)
  95                 return -1;
  96         if (cost1 > cost2)
  97                 return +1;
  98         return 0;
  99 }
 100
 101 /*
 102  * set_cheapest
 103  *        Find the minimum-cost paths from among a relation's paths,
 104  *        and save them in the rel's cheapest-path fields.
 105  *
 106  * This is normally called only after we've finished constructing the path
 107  * list for the rel node.
 108  *
 109  * If we find two paths of identical costs, try to keep the better-sorted one.
 110  * The paths might have unrelated sort orderings, in which case we can only
 111  * guess which might be better to keep, but if one is superior then we
 112  * definitely should keep it.
 113  */
 114 void
 115 set_cheapest(RelOptInfo *parent_rel)
 116 {
 117         List       *pathlist = parent_rel->pathlist;
 118         List       *p;
 119         Path       *cheapest_startup_path;
 120         Path       *cheapest_total_path;
 121
 122         Assert(IsA(parent_rel, RelOptInfo));
 123
 124         if (pathlist == NIL)
 125                 elog(ERROR, "Unable to devise a query plan for the given query");
 126
 127         cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);
 128
 129         foreach(p, lnext(pathlist))
 130         {
 131                 Path       *path = (Path *) lfirst(p);
 132                 int                     cmp;
 133
 134                 cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
 135                 if (cmp > 0 ||
 136                         (cmp == 0 &&
 137                          compare_pathkeys(cheapest_startup_path->pathkeys,
 138                                                           path->pathkeys) == PATHKEYS_BETTER2))
 139                         cheapest_startup_path = path;
 140
 141                 cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
 142                 if (cmp > 0 ||
 143                         (cmp == 0 &&
 144                          compare_pathkeys(cheapest_total_path->pathkeys,
 145                                                           path->pathkeys) == PATHKEYS_BETTER2))
 146                         cheapest_total_path = path;
 147         }
 148
 149         parent_rel->cheapest_startup_path = cheapest_startup_path;
 150         parent_rel->cheapest_total_path = cheapest_total_path;
 151 }
 152
 153 /*
 154  * add_path
 155  *        Consider a potential implementation path for the specified parent rel,
 156  *        and add it to the rel's pathlist if it is worthy of consideration.
 157  *        A path is worthy if it has either a better sort order (better pathkeys)
 158  *        or cheaper cost (on either dimension) than any of the existing old paths.
 159  *
 160  *        Unless parent_rel->pruneable is false, we also remove from the rel's
 161  *        pathlist any old paths that are dominated by new_path --- that is,
 162  *        new_path is both cheaper and at least as well ordered.
 163  *
 164  *        The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
 165  *        at the front.  No code depends on that for correctness; it's simply
 166  *        a speed hack within this routine.  Doing it that way makes it more
 167  *        likely that we will reject an inferior path after a few comparisons,
 168  *        rather than many comparisons.
 169  *
 170  *        NOTE: discarded Path objects are immediately pfree'd to reduce planner
 171  *        memory consumption.  We dare not try to free the substructure of a Path,
 172  *        since much of it may be shared with other Paths or the query tree itself;
 173  *        but just recycling discarded Path nodes is a very useful savings in
 174  *        a large join tree.  We can recycle the List nodes of pathlist, too.
 175  *
 176  * 'parent_rel' is the relation entry to which the path corresponds.
 177  * 'new_path' is a potential path for parent_rel.
 178  *
 179  * Returns nothing, but modifies parent_rel->pathlist.
 180  */
 181 void
 182 add_path(RelOptInfo *parent_rel, Path *new_path)
 183 {
 184         bool            accept_new = true;              /* unless we find a superior old
 185                                                                                  * path */
 186         List       *insert_after = NIL;         /* where to insert new item */
 187         List       *p1_prev = NIL;
 188         List       *p1;
 189
 190         /*
 191          * Loop to check proposed new path against old paths.  Note it is
 192          * possible for more than one old path to be tossed out because
 193          * new_path dominates it.
 194          */
 195         p1 = parent_rel->pathlist;      /* cannot use foreach here */
 196         while (p1 != NIL)
 197         {
 198                 Path       *old_path = (Path *) lfirst(p1);
 199                 bool            remove_old = false; /* unless new proves superior */
 200                 int                     costcmp;
 201
 202                 costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
 203
 204                 /*
 205                  * If the two paths compare differently for startup and total
 206                  * cost, then we want to keep both, and we can skip the (much
 207                  * slower) comparison of pathkeys.      If they compare the same,
 208                  * proceed with the pathkeys comparison.  Note: this test relies
 209                  * on the fact that compare_path_costs will only return 0 if both
 210                  * costs are equal (and, therefore, there's no need to call it
 211                  * twice in that case).
 212                  */
 213                 if (costcmp == 0 ||
 214                         costcmp == compare_path_costs(new_path, old_path,
 215                                                                                   STARTUP_COST))
 216                 {
 217                         switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
 218                         {
 219                                 case PATHKEYS_EQUAL:
 220                                         if (costcmp < 0)
 221                                                 remove_old = true;              /* new dominates old */
 222                                         else
 223                                                 accept_new = false;             /* old equals or dominates
 224                                                                                                  * new */
 225                                         break;
 226                                 case PATHKEYS_BETTER1:
 227                                         if (costcmp <= 0)
 228                                                 remove_old = true;              /* new dominates old */
 229                                         break;
 230                                 case PATHKEYS_BETTER2:
 231                                         if (costcmp >= 0)
 232                                                 accept_new = false;             /* old dominates new */
 233                                         break;
 234                                 case PATHKEYS_DIFFERENT:
 235                                         /* keep both paths, since they have different ordering */
 236                                         break;
 237                         }
 238                 }
 239
 240                 /*
 241                  * Remove current element from pathlist if dominated by new,
 242                  * unless xfunc told us not to remove any paths.
 243                  */
 244                 if (remove_old && parent_rel->pruneable)
 245                 {
 246                         List       *p1_next = lnext(p1);
 247
 248                         if (p1_prev)
 249                                 lnext(p1_prev) = p1_next;
 250                         else
 251                                 parent_rel->pathlist = p1_next;
 252                         pfree(old_path);
 253                         pfree(p1);                      /* this is why we can't use foreach */
 254                         p1 = p1_next;
 255                 }
 256                 else
 257                 {
 258                         /* new belongs after this old path if it has cost >= old's */
 259                         if (costcmp >= 0)
 260                                 insert_after = p1;
 261                         p1_prev = p1;
 262                         p1 = lnext(p1);
 263                 }
 264
 265                 /*
 266                  * If we found an old path that dominates new_path, we can quit
 267                  * scanning the pathlist; we will not add new_path, and we assume
 268                  * new_path cannot dominate any other elements of the pathlist.
 269                  */
 270                 if (!accept_new)
 271                         break;
 272         }
 273
 274         if (accept_new)
 275         {
 276                 /* Accept the new path: insert it at proper place in pathlist */
 277                 if (insert_after)
 278                         lnext(insert_after) = lcons(new_path, lnext(insert_after));
 279                 else
 280                         parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
 281         }
 282         else
 283         {
 284                 /* Reject and recycle the new path */
 285                 pfree(new_path);
 286         }
 287 }
 288
 289
 290 /*****************************************************************************
 291  *              PATH NODE CREATION ROUTINES
 292  *****************************************************************************/
 293
 294 /*
 295  * create_seqscan_path
 296  *        Creates a path corresponding to a sequential scan, returning the
 297  *        pathnode.
 298  */
 299 Path *
 300 create_seqscan_path(Query *root, RelOptInfo *rel)
 301 {
 302         Path       *pathnode = makeNode(Path);
 303
 304         pathnode->pathtype = T_SeqScan;
 305         pathnode->parent = rel;
 306         pathnode->pathkeys = NIL;       /* seqscan has unordered result */
 307
 308         cost_seqscan(pathnode, root, rel);
 309
 310         return pathnode;
 311 }
 312
 313 /*
 314  * create_index_path
 315  *        Creates a path node for an index scan.
 316  *
 317  * 'rel' is the parent rel
 318  * 'index' is an index on 'rel'
 319  * 'restriction_clauses' is a list of RestrictInfo nodes
 320  *                      to be used as index qual conditions in the scan.
 321  * 'pathkeys' describes the ordering of the path.
 322  * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
 323  *                      for an ordered index, or NoMovementScanDirection for
 324  *                      an unordered index.
 325  *
 326  * Returns the new path node.
 327  */
 328 IndexPath *
 329 create_index_path(Query *root,
 330                                   RelOptInfo *rel,
 331                                   IndexOptInfo *index,
 332                                   List *restriction_clauses,
 333                                   List *pathkeys,
 334                                   ScanDirection indexscandir)
 335 {
 336         IndexPath  *pathnode = makeNode(IndexPath);
 337         List       *indexquals;
 338
 339         pathnode->path.pathtype = T_IndexScan;
 340         pathnode->path.parent = rel;
 341         pathnode->path.pathkeys = pathkeys;
 342
 343         indexquals = get_actual_clauses(restriction_clauses);
 344         /* expand special operators to indexquals the executor can handle */
 345         indexquals = expand_indexqual_conditions(indexquals);
 346
 347         /*
 348          * We are making a pathnode for a single-scan indexscan; therefore,
 349          * both indexinfo and indexqual should be single-element lists.
 350          */
 351         pathnode->indexinfo = makeList1(index);
 352         pathnode->indexqual = makeList1(indexquals);
 353
 354         pathnode->indexscandir = indexscandir;
 355
 356         /*
 357          * This routine is only used to generate "standalone" indexpaths, not
 358          * nestloop inner indexpaths.  So joinrelids is always NIL and the
 359          * number of rows is the same as the parent rel's estimate.
 360          */
 361         pathnode->joinrelids = NIL; /* no join clauses here */
 362         pathnode->alljoinquals = false;
 363         pathnode->rows = rel->rows;
 364
 365         /*
 366          * Not sure if this is necessary, but it should help if the statistics
 367          * are too far off
 368          */
 369         if (index->indpred && index->tuples < pathnode->rows)
 370                 pathnode->rows = index->tuples;
 371
 372         cost_index(&pathnode->path, root, rel, index, indexquals, false);
 373
 374         return pathnode;
 375 }
 376
 377 /*
 378  * create_tidscan_path
 379  *        Creates a path corresponding to a tid_direct scan, returning the
 380  *        pathnode.
 381  */
 382 TidPath *
 383 create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
 384 {
 385         TidPath    *pathnode = makeNode(TidPath);
 386
 387         pathnode->path.pathtype = T_TidScan;
 388         pathnode->path.parent = rel;
 389         pathnode->path.pathkeys = NIL;
 390         pathnode->tideval = copyObject(tideval);        /* is copy really
 391                                                                                                  * necessary? */
 392         pathnode->unjoined_relids = NIL;
 393
 394         cost_tidscan(&pathnode->path, root, rel, tideval);
 395
 396         /*
 397          * divide selectivity for each clause to get an equal selectivity as
 398          * IndexScan does OK ?
 399          */
 400
 401         return pathnode;
 402 }
 403
 404 /*
 405  * create_append_path
 406  *        Creates a path corresponding to an Append plan, returning the
 407  *        pathnode.
 408  *
 409  */
 410 AppendPath *
 411 create_append_path(RelOptInfo *rel, List *subpaths)
 412 {
 413         AppendPath *pathnode = makeNode(AppendPath);
 414         List       *l;
 415
 416         pathnode->path.pathtype = T_Append;
 417         pathnode->path.parent = rel;
 418         pathnode->path.pathkeys = NIL;          /* result is always considered
 419                                                                                  * unsorted */
 420         pathnode->subpaths = subpaths;
 421
 422         pathnode->path.startup_cost = 0;
 423         pathnode->path.total_cost = 0;
 424         foreach(l, subpaths)
 425         {
 426                 Path       *subpath = (Path *) lfirst(l);
 427
 428                 if (l == subpaths)              /* first node? */
 429                         pathnode->path.startup_cost = subpath->startup_cost;
 430                 pathnode->path.total_cost += subpath->total_cost;
 431         }
 432
 433         return pathnode;
 434 }
 435
 436 /*
 437  * create_subqueryscan_path
 438  *        Creates a path corresponding to a sequential scan of a subquery,
 439  *        returning the pathnode.
 440  */
 441 Path *
 442 create_subqueryscan_path(RelOptInfo *rel)
 443 {
 444         Path       *pathnode = makeNode(Path);
 445
 446         pathnode->pathtype = T_SubqueryScan;
 447         pathnode->parent = rel;
 448         pathnode->pathkeys = NIL;       /* for now, assume unordered result */
 449
 450         /* just copy the subplan's cost estimates */
 451         pathnode->startup_cost = rel->subplan->startup_cost;
 452         pathnode->total_cost = rel->subplan->total_cost;
 453
 454         return pathnode;
 455 }
 456
 457 /*
 458  * create_functionscan_path
 459  *        Creates a path corresponding to a sequential scan of a function,
 460  *        returning the pathnode.
 461  */
 462 Path *
 463 create_functionscan_path(Query *root, RelOptInfo *rel)
 464 {
 465         Path       *pathnode = makeNode(Path);
 466
 467         pathnode->pathtype = T_FunctionScan;
 468         pathnode->parent = rel;
 469         pathnode->pathkeys = NIL;       /* for now, assume unordered result */
 470
 471         cost_functionscan(pathnode, root, rel);
 472
 473         return pathnode;
 474 }
 475
 476 /*
 477  * create_nestloop_path
 478  *        Creates a pathnode corresponding to a nestloop join between two
 479  *        relations.
 480  *
 481  * 'joinrel' is the join relation.
 482  * 'jointype' is the type of join required
 483  * 'outer_path' is the outer path
 484  * 'inner_path' is the inner path
 485  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
 486  * 'pathkeys' are the path keys of the new join path
 487  *
 488  * Returns the resulting path node.
 489  */
 490 NestPath *
 491 create_nestloop_path(Query *root,
 492                                          RelOptInfo *joinrel,
 493                                          JoinType jointype,
 494                                          Path *outer_path,
 495                                          Path *inner_path,
 496                                          List *restrict_clauses,
 497                                          List *pathkeys)
 498 {
 499         NestPath   *pathnode = makeNode(NestPath);
 500
 501         pathnode->path.pathtype = T_NestLoop;
 502         pathnode->path.parent = joinrel;
 503         pathnode->jointype = jointype;
 504         pathnode->outerjoinpath = outer_path;
 505         pathnode->innerjoinpath = inner_path;
 506         pathnode->joinrestrictinfo = restrict_clauses;
 507         pathnode->path.pathkeys = pathkeys;
 508
 509         cost_nestloop(&pathnode->path, root, outer_path, inner_path,
 510                                   restrict_clauses);
 511
 512         return pathnode;
 513 }
 514
 515 /*
 516  * create_mergejoin_path
 517  *        Creates a pathnode corresponding to a mergejoin join between
 518  *        two relations
 519  *
 520  * 'joinrel' is the join relation
 521  * 'jointype' is the type of join required
 522  * 'outer_path' is the outer path
 523  * 'inner_path' is the inner path
 524  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
 525  * 'pathkeys' are the path keys of the new join path
 526  * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
 527  *              (this should be a subset of the restrict_clauses list)
 528  * 'outersortkeys' are the sort varkeys for the outer relation
 529  * 'innersortkeys' are the sort varkeys for the inner relation
 530  */
 531 MergePath *
 532 create_mergejoin_path(Query *root,
 533                                           RelOptInfo *joinrel,
 534                                           JoinType jointype,
 535                                           Path *outer_path,
 536                                           Path *inner_path,
 537                                           List *restrict_clauses,
 538                                           List *pathkeys,
 539                                           List *mergeclauses,
 540                                           List *outersortkeys,
 541                                           List *innersortkeys)
 542 {
 543         MergePath  *pathnode = makeNode(MergePath);
 544
 545         /*
 546          * If the given paths are already well enough ordered, we can skip
 547          * doing an explicit sort.
 548          */
 549         if (outersortkeys &&
 550                 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
 551                 outersortkeys = NIL;
 552         if (innersortkeys &&
 553                 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
 554                 innersortkeys = NIL;
 555
 556         pathnode->jpath.path.pathtype = T_MergeJoin;
 557         pathnode->jpath.path.parent = joinrel;
 558         pathnode->jpath.jointype = jointype;
 559         pathnode->jpath.outerjoinpath = outer_path;
 560         pathnode->jpath.innerjoinpath = inner_path;
 561         pathnode->jpath.joinrestrictinfo = restrict_clauses;
 562         pathnode->jpath.path.pathkeys = pathkeys;
 563         pathnode->path_mergeclauses = mergeclauses;
 564         pathnode->outersortkeys = outersortkeys;
 565         pathnode->innersortkeys = innersortkeys;
 566
 567         cost_mergejoin(&pathnode->jpath.path,
 568                                    root,
 569                                    outer_path,
 570                                    inner_path,
 571                                    restrict_clauses,
 572                                    mergeclauses,
 573                                    outersortkeys,
 574                                    innersortkeys);
 575
 576         return pathnode;
 577 }
 578
 579 /*
 580  * create_hashjoin_path
 581  *        Creates a pathnode corresponding to a hash join between two relations.
 582  *
 583  * 'joinrel' is the join relation
 584  * 'jointype' is the type of join required
 585  * 'outer_path' is the cheapest outer path
 586  * 'inner_path' is the cheapest inner path
 587  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
 588  * 'hashclauses' is a list of the hash join clause (always a 1-element list)
 589  *              (this should be a subset of the restrict_clauses list)
 590  */
 591 HashPath *
 592 create_hashjoin_path(Query *root,
 593                                          RelOptInfo *joinrel,
 594                                          JoinType jointype,
 595                                          Path *outer_path,
 596                                          Path *inner_path,
 597                                          List *restrict_clauses,
 598                                          List *hashclauses)
 599 {
 600         HashPath   *pathnode = makeNode(HashPath);
 601
 602         pathnode->jpath.path.pathtype = T_HashJoin;
 603         pathnode->jpath.path.parent = joinrel;
 604         pathnode->jpath.jointype = jointype;
 605         pathnode->jpath.outerjoinpath = outer_path;
 606         pathnode->jpath.innerjoinpath = inner_path;
 607         pathnode->jpath.joinrestrictinfo = restrict_clauses;
 608         /* A hashjoin never has pathkeys, since its ordering is unpredictable */
 609         pathnode->jpath.path.pathkeys = NIL;
 610         pathnode->path_hashclauses = hashclauses;
 611
 612         cost_hashjoin(&pathnode->jpath.path,
 613                                   root,
 614                                   outer_path,
 615                                   inner_path,
 616                                   restrict_clauses,
 617                                   hashclauses);
 618
 619         return pathnode;
 620 }