1 /*-------------------------------------------------------------------------
4 * Routines to find all possible paths for processing a set of joins
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.16 1999/02/09 03:51:19 momjian Exp $
12 *-------------------------------------------------------------------------
14 #include <sys/types.h>
19 #include "storage/buf_internals.h"
21 #include "nodes/pg_list.h"
22 #include "nodes/relation.h"
23 #include "nodes/plannodes.h"
25 #include "optimizer/internal.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/keys.h"
29 #include "optimizer/cost.h" /* for _enable_{hashjoin,
30 * _enable_mergejoin} */
32 static Path *best_innerjoin(List *join_paths, List *outer_relid);
33 static List *sort_inner_and_outer(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
34 List *mergeinfo_list);
35 static List *match_unsorted_outer(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
36 List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin,
37 List *mergeinfo_list);
38 static List *match_unsorted_inner(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
39 List *innerpath_list, List *mergeinfo_list);
40 static bool EnoughMemoryForHashjoin(RelOptInfo * hashrel);
41 static List *hash_inner_and_outer(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
45 * find-all-join-paths--
46 * Creates all possible ways to process joins for each of the join
47 * relations in the list 'joinrels.' Each unique path will be included
48 * in the join relation's 'pathlist' field.
50 * In postgres, n-way joins are handled left-only(permuting clauseless
51 * joins doesn't usually win much).
53 * if BushyPlanFlag is true, bushy tree plans will be generated
55 * 'joinrels' is the list of relation entries to be joined
57 * Modifies the pathlist field of the appropriate rel node to contain
58 * the unique join paths.
59 * If bushy trees are considered, may modify the relid field of the
60 * join rel nodes to flatten the lists.
62 * Returns nothing of interest. (?)
63 * It does a destructive modification.
66 find_all_join_paths(Query *root, List *joinrels)
68 List *mergeinfo_list = NIL;
69 List *hashinfo_list = NIL;
70 List *temp_list = NIL;
73 while (joinrels != NIL)
75 RelOptInfo *joinrel = (RelOptInfo *) lfirst(joinrels);
83 innerrelids = lsecond(joinrel->relids);
84 outerrelids = lfirst(joinrel->relids);
87 * base relation id is an integer and join relation relid is a
90 innerrel = (length(innerrelids) == 1) ?
91 get_base_rel(root, lfirsti(innerrelids)) : get_join_rel(root, innerrelids);
92 outerrel = (length(outerrelids) == 1) ?
93 get_base_rel(root, lfirsti(outerrelids)) : get_join_rel(root, outerrelids);
95 bestinnerjoin = best_innerjoin(innerrel->innerjoin,
97 if (_enable_mergejoin_)
99 mergeinfo_list = group_clauses_by_order(joinrel->restrictinfo,
100 lfirsti(innerrel->relids));
103 if (_enable_hashjoin_)
105 hashinfo_list = group_clauses_by_hashop(joinrel->restrictinfo,
106 lfirsti(innerrel->relids));
109 /* need to flatten the relids list */
110 joinrel->relids = intAppend(outerrelids, innerrelids);
113 * 1. Consider mergejoin paths where both relations must be
116 pathlist = sort_inner_and_outer(joinrel, outerrel,
117 innerrel, mergeinfo_list);
120 * 2. Consider paths where the outer relation need not be
121 * explicitly sorted. This may include either nestloops and
122 * mergejoins where the outer path is already ordered.
124 pathlist = add_pathlist(joinrel, pathlist,
125 match_unsorted_outer(joinrel,
129 (Path *) innerrel->cheapestpath,
134 * 3. Consider paths where the inner relation need not be
135 * explicitly sorted. This may include nestloops and mergejoins
136 * the actual nestloop nodes were constructed in
137 * (match-unsorted-outer).
139 pathlist = add_pathlist(joinrel, pathlist,
140 match_unsorted_inner(joinrel, outerrel,
146 * 4. Consider paths where both outer and inner relations must be
147 * hashed before being joined.
150 pathlist = add_pathlist(joinrel, pathlist,
151 hash_inner_and_outer(joinrel, outerrel,
152 innerrel, hashinfo_list));
154 joinrel->pathlist = pathlist;
157 * 'OuterJoinCost is only valid when calling
158 * (match-unsorted-inner) with the same arguments as the previous
159 * invokation of (match-unsorted-outer), so clear the field before
162 temp_list = innerrel->pathlist;
163 foreach(path, temp_list)
168 * This gross hack is to get around an apparent optimizer bug on
169 * Sparc (or maybe it is a bug of ours?) that causes really
172 if (IsA_JoinPath(path))
173 ((Path *) lfirst(path))->outerjoincost = (Cost) 0;
176 * do it iff it is a join path, which is not always true, esp
177 * since the base level
181 joinrels = lnext(joinrels);
187 * Find the cheapest index path that has already been identified by
188 * (indexable_joinclauses) as being a possible inner path for the given
189 * outer relation in a nestloop join.
191 * 'join-paths' is a list of join nodes
192 * 'outer-relid' is the relid of the outer join relation
194 * Returns the pathnode of the selected path.
197 best_innerjoin(List *join_paths, List *outer_relids)
199 Path *cheapest = (Path *) NULL;
202 foreach(join_path, join_paths)
204 Path *path = (Path *) lfirst(join_path);
206 if (intMember(lfirsti(path->joinid), outer_relids)
207 && ((cheapest == NULL ||
208 path_is_cheaper((Path *) lfirst(join_path), cheapest))))
211 cheapest = (Path *) lfirst(join_path);
218 * sort-inner-and-outer--
219 * Create mergejoin join paths by explicitly sorting both the outer and
220 * inner join relations on each available merge ordering.
222 * 'joinrel' is the join relation
223 * 'outerrel' is the outer join relation
224 * 'innerrel' is the inner join relation
225 * 'mergeinfo-list' is a list of nodes containing info on(mergejoinable)
226 * clauses for joining the relations
228 * Returns a list of mergejoin paths.
231 sort_inner_and_outer(RelOptInfo * joinrel,
232 RelOptInfo * outerrel,
233 RelOptInfo * innerrel,
234 List *mergeinfo_list)
237 MergeInfo *xmergeinfo = (MergeInfo *) NULL;
238 MergePath *temp_node = (MergePath *) NULL;
240 List *outerkeys = NIL;
241 List *innerkeys = NIL;
242 List *merge_pathkeys = NIL;
244 foreach(i, mergeinfo_list)
246 xmergeinfo = (MergeInfo *) lfirst(i);
248 outerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
249 outerrel->targetlist,
252 innerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
253 innerrel->targetlist,
256 merge_pathkeys = new_join_pathkeys(outerkeys, joinrel->targetlist,
257 xmergeinfo->jmethod.clauses);
259 temp_node = create_mergejoin_path(joinrel,
264 (Path *) outerrel->cheapestpath,
265 (Path *) innerrel->cheapestpath,
267 xmergeinfo->m_ordering,
268 xmergeinfo->jmethod.clauses,
272 ms_list = lappend(ms_list, temp_node);
278 * match-unsorted-outer--
279 * Creates possible join paths for processing a single join relation
280 * 'joinrel' by employing either iterative substitution or
281 * mergejoining on each of its possible outer paths(assuming that the
282 * outer relation need not be explicitly sorted).
284 * 1. The inner path is the cheapest available inner path.
285 * 2. Mergejoin wherever possible. Mergejoin are considered if there
286 * are mergejoinable join clauses between the outer and inner join
287 * relations such that the outer path is keyed on the variables
288 * appearing in the clauses. The corresponding inner merge path is
289 * either a path whose keys match those of the outer path(if such a
290 * path is available) or an explicit sort on the appropriate inner
291 * join keys, whichever is cheaper.
293 * 'joinrel' is the join relation
294 * 'outerrel' is the outer join relation
295 * 'innerrel' is the inner join relation
296 * 'outerpath-list' is the list of possible outer paths
297 * 'cheapest-inner' is the cheapest inner path
298 * 'best-innerjoin' is the best inner index path(if any)
299 * 'mergeinfo-list' is a list of nodes containing info on mergejoinable
302 * Returns a list of possible join path nodes.
305 match_unsorted_outer(RelOptInfo * joinrel,
306 RelOptInfo * outerrel,
307 RelOptInfo * innerrel,
308 List *outerpath_list,
309 Path *cheapest_inner,
310 Path *best_innerjoin,
311 List *mergeinfo_list)
313 Path *outerpath = (Path *) NULL;
315 List *temp_node = NIL;
316 List *merge_pathkeys = NIL;
317 Path *nestinnerpath = (Path *) NULL;
320 PathOrder *outerpath_ordering = NULL;
322 foreach(i, outerpath_list)
325 List *matchedJoinKeys = NIL;
326 List *matchedJoinClauses = NIL;
327 MergeInfo *xmergeinfo = (MergeInfo *) NULL;
329 outerpath = (Path *) lfirst(i);
331 outerpath_ordering = outerpath->path_order;
333 if (outerpath_ordering)
335 xmergeinfo = match_order_mergeinfo(outerpath_ordering,
340 clauses = xmergeinfo->jmethod.clauses;
344 List *keys = xmergeinfo->jmethod.jmkeys;
345 List *clauses = xmergeinfo->jmethod.clauses;
347 matchedJoinKeys = match_pathkeys_joinkeys(outerpath->keys,
351 &matchedJoinClauses);
352 merge_pathkeys = new_join_pathkeys(outerpath->keys,
353 joinrel->targetlist, clauses);
356 merge_pathkeys = outerpath->keys;
358 if (best_innerjoin &&
359 path_is_cheaper(best_innerjoin, cheapest_inner))
360 nestinnerpath = best_innerjoin;
362 nestinnerpath = cheapest_inner;
364 paths = lcons(create_nestloop_path(joinrel,
371 if (clauses && matchedJoinKeys)
373 bool path_is_cheaper_than_sort;
375 Path *mergeinnerpath = match_paths_joinkeys(matchedJoinKeys,
380 path_is_cheaper_than_sort = (bool) (mergeinnerpath &&
381 (mergeinnerpath->path_cost <
382 (cheapest_inner->path_cost +
383 cost_sort(matchedJoinKeys,
387 if (!path_is_cheaper_than_sort)
389 varkeys = extract_path_keys(matchedJoinKeys,
390 innerrel->targetlist,
396 * Keep track of the cost of the outer path used with this
397 * ordered inner path for later processing in
398 * (match-unsorted-inner), since it isn't a sort and thus
399 * wouldn't otherwise be considered.
401 if (path_is_cheaper_than_sort)
402 mergeinnerpath->outerjoincost = outerpath->path_cost;
404 mergeinnerpath = cheapest_inner;
406 temp_node = lcons(create_mergejoin_path(joinrel,
414 xmergeinfo->m_ordering,
422 jp_list = nconc(jp_list, temp_node);
428 * match-unsorted-inner --
429 * Find the cheapest ordered join path for a given(ordered, unsorted)
432 * Scans through each path available on an inner join relation and tries
433 * matching its ordering keys against those of mergejoin clauses.
434 * If 1. an appropriately-ordered inner path and matching mergeclause are
436 * 2. sorting the cheapest outer path is cheaper than using an ordered
437 * but unsorted outer path(as was considered in
438 * (match-unsorted-outer)),
439 * then this merge path is considered.
441 * 'joinrel' is the join result relation
442 * 'outerrel' is the outer join relation
443 * 'innerrel' is the inner join relation
444 * 'innerpath-list' is the list of possible inner join paths
445 * 'mergeinfo-list' is a list of nodes containing info on mergejoinable
448 * Returns a list of possible merge paths.
451 match_unsorted_inner(RelOptInfo * joinrel,
452 RelOptInfo * outerrel,
453 RelOptInfo * innerrel,
454 List *innerpath_list,
455 List *mergeinfo_list)
457 Path *innerpath = (Path *) NULL;
459 List *temp_node = NIL;
460 PathOrder *innerpath_ordering = NULL;
465 foreach(i, innerpath_list)
467 MergeInfo *xmergeinfo = (MergeInfo *) NULL;
469 List *matchedJoinKeys = NIL;
470 List *matchedJoinClauses = NIL;
472 innerpath = (Path *) lfirst(i);
474 innerpath_ordering = innerpath->path_order;
476 if (innerpath_ordering)
478 xmergeinfo = match_order_mergeinfo(innerpath_ordering,
483 clauses = ((JoinMethod *) xmergeinfo)->clauses;
487 List *keys = xmergeinfo->jmethod.jmkeys;
488 List *cls = xmergeinfo->jmethod.clauses;
490 matchedJoinKeys = match_pathkeys_joinkeys(innerpath->keys,
494 &matchedJoinClauses);
498 * (match-unsorted-outer) if it is applicable. 'OuterJoinCost was
501 if (clauses && matchedJoinKeys)
503 temp1 = outerrel->cheapestpath->path_cost +
504 cost_sort(matchedJoinKeys, outerrel->size, outerrel->width,
507 temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost)
508 || (innerpath->outerjoincost > temp1));
512 List *outerkeys = extract_path_keys(matchedJoinKeys,
513 outerrel->targetlist,
515 List *merge_pathkeys = new_join_pathkeys(outerkeys,
519 temp_node = lcons(create_mergejoin_path(joinrel,
524 (Path *) outerrel->cheapestpath,
527 xmergeinfo->m_ordering,
533 mp_list = nconc(mp_list, temp_node);
542 EnoughMemoryForHashjoin(RelOptInfo * hashrel)
548 ntuples = hashrel->size;
551 tupsize = hashrel->width + sizeof(HeapTupleData);
552 pages = page_size(ntuples, tupsize);
555 * if amount of buffer space below hashjoin threshold, return false
557 if (ceil(sqrt((double) pages)) > NBuffers)
563 * hash-inner-and-outer-- XXX HASH
564 * Create hashjoin join paths by explicitly hashing both the outer and
565 * inner join relations on each available hash op.
567 * 'joinrel' is the join relation
568 * 'outerrel' is the outer join relation
569 * 'innerrel' is the inner join relation
570 * 'hashinfo-list' is a list of nodes containing info on(hashjoinable)
571 * clauses for joining the relations
573 * Returns a list of hashjoin paths.
576 hash_inner_and_outer(RelOptInfo * joinrel,
577 RelOptInfo * outerrel,
578 RelOptInfo * innerrel,
581 HashInfo *xhashinfo = (HashInfo *) NULL;
582 List *hjoin_list = NIL;
583 HashPath *temp_node = (HashPath *) NULL;
585 List *outerkeys = NIL;
586 List *innerkeys = NIL;
587 List *hash_pathkeys = NIL;
589 foreach(i, hashinfo_list)
591 xhashinfo = (HashInfo *) lfirst(i);
592 outerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
593 outerrel->targetlist,
595 innerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
596 innerrel->targetlist,
598 hash_pathkeys = new_join_pathkeys(outerkeys,
600 ((JoinMethod *) xhashinfo)->clauses);
602 if (EnoughMemoryForHashjoin(innerrel))
604 temp_node = create_hashjoin_path(joinrel,
609 (Path *) outerrel->cheapestpath,
610 (Path *) innerrel->cheapestpath,
613 ((JoinMethod *) xhashinfo)->clauses,
616 hjoin_list = lappend(hjoin_list, temp_node);