1 /*-------------------------------------------------------------------------
4 * Routines to manipulate pathlists and create path nodes
6 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.77 2002/05/12 20:10:03 tgl Exp $
13 *-------------------------------------------------------------------------
19 #include "nodes/plannodes.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/pathnode.h"
22 #include "optimizer/paths.h"
23 #include "optimizer/restrictinfo.h"
26 /*****************************************************************************
27 * MISC. PATH UTILITIES
28 *****************************************************************************/
32 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
33 * or more expensive than path2 for the specified criterion.
36 compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
38 if (criterion == STARTUP_COST)
40 if (path1->startup_cost < path2->startup_cost)
42 if (path1->startup_cost > path2->startup_cost)
46 * If paths have the same startup cost (not at all unlikely),
47 * order them by total cost.
49 if (path1->total_cost < path2->total_cost)
51 if (path1->total_cost > path2->total_cost)
56 if (path1->total_cost < path2->total_cost)
58 if (path1->total_cost > path2->total_cost)
62 * If paths have the same total cost, order them by startup cost.
64 if (path1->startup_cost < path2->startup_cost)
66 if (path1->startup_cost > path2->startup_cost)
73 * compare_path_fractional_costs
74 * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
75 * or more expensive than path2 for fetching the specified fraction
76 * of the total tuples.
78 * If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
79 * path with the cheaper total_cost.
82 compare_fractional_path_costs(Path *path1, Path *path2,
88 if (fraction <= 0.0 || fraction >= 1.0)
89 return compare_path_costs(path1, path2, TOTAL_COST);
90 cost1 = path1->startup_cost +
91 fraction * (path1->total_cost - path1->startup_cost);
92 cost2 = path2->startup_cost +
93 fraction * (path2->total_cost - path2->startup_cost);
103 * Find the minimum-cost paths from among a relation's paths,
104 * and save them in the rel's cheapest-path fields.
106 * This is normally called only after we've finished constructing the path
107 * list for the rel node.
109 * If we find two paths of identical costs, try to keep the better-sorted one.
110 * The paths might have unrelated sort orderings, in which case we can only
111 * guess which might be better to keep, but if one is superior then we
112 * definitely should keep it.
115 set_cheapest(RelOptInfo *parent_rel)
117 List *pathlist = parent_rel->pathlist;
119 Path *cheapest_startup_path;
120 Path *cheapest_total_path;
122 Assert(IsA(parent_rel, RelOptInfo));
125 elog(ERROR, "Unable to devise a query plan for the given query");
127 cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);
129 foreach(p, lnext(pathlist))
131 Path *path = (Path *) lfirst(p);
134 cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
137 compare_pathkeys(cheapest_startup_path->pathkeys,
138 path->pathkeys) == PATHKEYS_BETTER2))
139 cheapest_startup_path = path;
141 cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
144 compare_pathkeys(cheapest_total_path->pathkeys,
145 path->pathkeys) == PATHKEYS_BETTER2))
146 cheapest_total_path = path;
149 parent_rel->cheapest_startup_path = cheapest_startup_path;
150 parent_rel->cheapest_total_path = cheapest_total_path;
155 * Consider a potential implementation path for the specified parent rel,
156 * and add it to the rel's pathlist if it is worthy of consideration.
157 * A path is worthy if it has either a better sort order (better pathkeys)
158 * or cheaper cost (on either dimension) than any of the existing old paths.
160 * Unless parent_rel->pruneable is false, we also remove from the rel's
161 * pathlist any old paths that are dominated by new_path --- that is,
162 * new_path is both cheaper and at least as well ordered.
164 * The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
165 * at the front. No code depends on that for correctness; it's simply
166 * a speed hack within this routine. Doing it that way makes it more
167 * likely that we will reject an inferior path after a few comparisons,
168 * rather than many comparisons.
170 * NOTE: discarded Path objects are immediately pfree'd to reduce planner
171 * memory consumption. We dare not try to free the substructure of a Path,
172 * since much of it may be shared with other Paths or the query tree itself;
173 * but just recycling discarded Path nodes is a very useful savings in
174 * a large join tree. We can recycle the List nodes of pathlist, too.
176 * 'parent_rel' is the relation entry to which the path corresponds.
177 * 'new_path' is a potential path for parent_rel.
179 * Returns nothing, but modifies parent_rel->pathlist.
182 add_path(RelOptInfo *parent_rel, Path *new_path)
184 bool accept_new = true; /* unless we find a superior old
186 List *insert_after = NIL; /* where to insert new item */
191 * Loop to check proposed new path against old paths. Note it is
192 * possible for more than one old path to be tossed out because
193 * new_path dominates it.
195 p1 = parent_rel->pathlist; /* cannot use foreach here */
198 Path *old_path = (Path *) lfirst(p1);
199 bool remove_old = false; /* unless new proves superior */
202 costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
205 * If the two paths compare differently for startup and total
206 * cost, then we want to keep both, and we can skip the (much
207 * slower) comparison of pathkeys. If they compare the same,
208 * proceed with the pathkeys comparison. Note: this test relies
209 * on the fact that compare_path_costs will only return 0 if both
210 * costs are equal (and, therefore, there's no need to call it
211 * twice in that case).
214 costcmp == compare_path_costs(new_path, old_path,
217 switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
221 remove_old = true; /* new dominates old */
223 accept_new = false; /* old equals or dominates
226 case PATHKEYS_BETTER1:
228 remove_old = true; /* new dominates old */
230 case PATHKEYS_BETTER2:
232 accept_new = false; /* old dominates new */
234 case PATHKEYS_DIFFERENT:
235 /* keep both paths, since they have different ordering */
241 * Remove current element from pathlist if dominated by new,
242 * unless xfunc told us not to remove any paths.
244 if (remove_old && parent_rel->pruneable)
246 List *p1_next = lnext(p1);
249 lnext(p1_prev) = p1_next;
251 parent_rel->pathlist = p1_next;
253 pfree(p1); /* this is why we can't use foreach */
258 /* new belongs after this old path if it has cost >= old's */
266 * If we found an old path that dominates new_path, we can quit
267 * scanning the pathlist; we will not add new_path, and we assume
268 * new_path cannot dominate any other elements of the pathlist.
276 /* Accept the new path: insert it at proper place in pathlist */
278 lnext(insert_after) = lcons(new_path, lnext(insert_after));
280 parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
284 /* Reject and recycle the new path */
290 /*****************************************************************************
291 * PATH NODE CREATION ROUTINES
292 *****************************************************************************/
295 * create_seqscan_path
296 * Creates a path corresponding to a sequential scan, returning the
300 create_seqscan_path(Query *root, RelOptInfo *rel)
302 Path *pathnode = makeNode(Path);
304 pathnode->pathtype = T_SeqScan;
305 pathnode->parent = rel;
306 pathnode->pathkeys = NIL; /* seqscan has unordered result */
308 cost_seqscan(pathnode, root, rel);
315 * Creates a path node for an index scan.
317 * 'rel' is the parent rel
318 * 'index' is an index on 'rel'
319 * 'restriction_clauses' is a list of RestrictInfo nodes
320 * to be used as index qual conditions in the scan.
321 * 'pathkeys' describes the ordering of the path.
322 * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
323 * for an ordered index, or NoMovementScanDirection for
324 * an unordered index.
326 * Returns the new path node.
329 create_index_path(Query *root,
332 List *restriction_clauses,
334 ScanDirection indexscandir)
336 IndexPath *pathnode = makeNode(IndexPath);
339 pathnode->path.pathtype = T_IndexScan;
340 pathnode->path.parent = rel;
341 pathnode->path.pathkeys = pathkeys;
343 indexquals = get_actual_clauses(restriction_clauses);
344 /* expand special operators to indexquals the executor can handle */
345 indexquals = expand_indexqual_conditions(indexquals);
348 * We are making a pathnode for a single-scan indexscan; therefore,
349 * both indexinfo and indexqual should be single-element lists.
351 pathnode->indexinfo = makeList1(index);
352 pathnode->indexqual = makeList1(indexquals);
354 pathnode->indexscandir = indexscandir;
357 * This routine is only used to generate "standalone" indexpaths, not
358 * nestloop inner indexpaths. So joinrelids is always NIL and the
359 * number of rows is the same as the parent rel's estimate.
361 pathnode->joinrelids = NIL; /* no join clauses here */
362 pathnode->alljoinquals = false;
363 pathnode->rows = rel->rows;
366 * Not sure if this is necessary, but it should help if the statistics
369 if (index->indpred && index->tuples < pathnode->rows)
370 pathnode->rows = index->tuples;
372 cost_index(&pathnode->path, root, rel, index, indexquals, false);
378 * create_tidscan_path
379 * Creates a path corresponding to a tid_direct scan, returning the
383 create_tidscan_path(Query *root, RelOptInfo *rel, List *tideval)
385 TidPath *pathnode = makeNode(TidPath);
387 pathnode->path.pathtype = T_TidScan;
388 pathnode->path.parent = rel;
389 pathnode->path.pathkeys = NIL;
390 pathnode->tideval = copyObject(tideval); /* is copy really
392 pathnode->unjoined_relids = NIL;
394 cost_tidscan(&pathnode->path, root, rel, tideval);
397 * divide selectivity for each clause to get an equal selectivity as
398 * IndexScan does OK ?
406 * Creates a path corresponding to an Append plan, returning the
411 create_append_path(RelOptInfo *rel, List *subpaths)
413 AppendPath *pathnode = makeNode(AppendPath);
416 pathnode->path.pathtype = T_Append;
417 pathnode->path.parent = rel;
418 pathnode->path.pathkeys = NIL; /* result is always considered
420 pathnode->subpaths = subpaths;
422 pathnode->path.startup_cost = 0;
423 pathnode->path.total_cost = 0;
426 Path *subpath = (Path *) lfirst(l);
428 if (l == subpaths) /* first node? */
429 pathnode->path.startup_cost = subpath->startup_cost;
430 pathnode->path.total_cost += subpath->total_cost;
437 * create_subqueryscan_path
438 * Creates a path corresponding to a sequential scan of a subquery,
439 * returning the pathnode.
442 create_subqueryscan_path(RelOptInfo *rel)
444 Path *pathnode = makeNode(Path);
446 pathnode->pathtype = T_SubqueryScan;
447 pathnode->parent = rel;
448 pathnode->pathkeys = NIL; /* for now, assume unordered result */
450 /* just copy the subplan's cost estimates */
451 pathnode->startup_cost = rel->subplan->startup_cost;
452 pathnode->total_cost = rel->subplan->total_cost;
458 * create_functionscan_path
459 * Creates a path corresponding to a sequential scan of a function,
460 * returning the pathnode.
463 create_functionscan_path(Query *root, RelOptInfo *rel)
465 Path *pathnode = makeNode(Path);
467 pathnode->pathtype = T_FunctionScan;
468 pathnode->parent = rel;
469 pathnode->pathkeys = NIL; /* for now, assume unordered result */
471 cost_functionscan(pathnode, root, rel);
477 * create_nestloop_path
478 * Creates a pathnode corresponding to a nestloop join between two
481 * 'joinrel' is the join relation.
482 * 'jointype' is the type of join required
483 * 'outer_path' is the outer path
484 * 'inner_path' is the inner path
485 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
486 * 'pathkeys' are the path keys of the new join path
488 * Returns the resulting path node.
491 create_nestloop_path(Query *root,
496 List *restrict_clauses,
499 NestPath *pathnode = makeNode(NestPath);
501 pathnode->path.pathtype = T_NestLoop;
502 pathnode->path.parent = joinrel;
503 pathnode->jointype = jointype;
504 pathnode->outerjoinpath = outer_path;
505 pathnode->innerjoinpath = inner_path;
506 pathnode->joinrestrictinfo = restrict_clauses;
507 pathnode->path.pathkeys = pathkeys;
509 cost_nestloop(&pathnode->path, root, outer_path, inner_path,
516 * create_mergejoin_path
517 * Creates a pathnode corresponding to a mergejoin join between
520 * 'joinrel' is the join relation
521 * 'jointype' is the type of join required
522 * 'outer_path' is the outer path
523 * 'inner_path' is the inner path
524 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
525 * 'pathkeys' are the path keys of the new join path
526 * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
527 * (this should be a subset of the restrict_clauses list)
528 * 'outersortkeys' are the sort varkeys for the outer relation
529 * 'innersortkeys' are the sort varkeys for the inner relation
532 create_mergejoin_path(Query *root,
537 List *restrict_clauses,
543 MergePath *pathnode = makeNode(MergePath);
546 * If the given paths are already well enough ordered, we can skip
547 * doing an explicit sort.
550 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
553 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
556 pathnode->jpath.path.pathtype = T_MergeJoin;
557 pathnode->jpath.path.parent = joinrel;
558 pathnode->jpath.jointype = jointype;
559 pathnode->jpath.outerjoinpath = outer_path;
560 pathnode->jpath.innerjoinpath = inner_path;
561 pathnode->jpath.joinrestrictinfo = restrict_clauses;
562 pathnode->jpath.path.pathkeys = pathkeys;
563 pathnode->path_mergeclauses = mergeclauses;
564 pathnode->outersortkeys = outersortkeys;
565 pathnode->innersortkeys = innersortkeys;
567 cost_mergejoin(&pathnode->jpath.path,
580 * create_hashjoin_path
581 * Creates a pathnode corresponding to a hash join between two relations.
583 * 'joinrel' is the join relation
584 * 'jointype' is the type of join required
585 * 'outer_path' is the cheapest outer path
586 * 'inner_path' is the cheapest inner path
587 * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
588 * 'hashclauses' is a list of the hash join clause (always a 1-element list)
589 * (this should be a subset of the restrict_clauses list)
592 create_hashjoin_path(Query *root,
597 List *restrict_clauses,
600 HashPath *pathnode = makeNode(HashPath);
602 pathnode->jpath.path.pathtype = T_HashJoin;
603 pathnode->jpath.path.parent = joinrel;
604 pathnode->jpath.jointype = jointype;
605 pathnode->jpath.outerjoinpath = outer_path;
606 pathnode->jpath.innerjoinpath = inner_path;
607 pathnode->jpath.joinrestrictinfo = restrict_clauses;
608 /* A hashjoin never has pathkeys, since its ordering is unpredictable */
609 pathnode->jpath.path.pathkeys = NIL;
610 pathnode->path_hashclauses = hashclauses;
612 cost_hashjoin(&pathnode->jpath.path,