1 /*-------------------------------------------------------------------------
4 * Routines to find all possible paths for processing a set of joins
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.28 1999/02/18 00:49:19 momjian Exp $
12 *-------------------------------------------------------------------------
14 #include <sys/types.h>
19 #include "storage/buf_internals.h"
21 #include "nodes/pg_list.h"
22 #include "nodes/relation.h"
23 #include "nodes/plannodes.h"
25 #include "optimizer/internal.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/keys.h"
29 #include "optimizer/cost.h" /* for _enable_{hashjoin,
30 * _enable_mergejoin} */
32 static Path *best_innerjoin(List *join_paths, List *outer_relid);
33 static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
34 List *mergeinfo_list);
35 static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
36 List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin,
37 List *mergeinfo_list);
38 static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
39 List *innerpath_list, List *mergeinfo_list);
40 static bool EnoughMemoryForHashjoin(RelOptInfo *hashrel);
41 static List *hash_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
45 * update_rels_pathlist_for_joins
46 * Creates all possible ways to process joins for each of the join
47 * relations in the list 'joinrels.' Each unique path will be included
48 * in the join relation's 'pathlist' field.
50 * In postgres, n-way joins are handled left-only(permuting clauseless
51 * joins doesn't usually win much).
53 * if BushyPlanFlag is true, bushy tree plans will be generated
55 * 'joinrels' is the list of relation entries to be joined
57 * Modifies the pathlist field of the appropriate rel node to contain
58 * the unique join paths.
59 * If bushy trees are considered, may modify the relid field of the
60 * join rel nodes to flatten the lists.
62 * It does a destructive modification.
65 update_rels_pathlist_for_joins(Query *root, List *joinrels)
67 List *mergeinfo_list = NIL;
68 List *hashinfo_list = NIL;
73 RelOptInfo *joinrel = (RelOptInfo *) lfirst(j);
81 /* flatten out relids later in this function */
82 innerrelids = lsecond(joinrel->relids);
83 outerrelids = lfirst(joinrel->relids);
86 * base relation id is an integer and join relation relid is a
89 innerrel = (length(innerrelids) == 1) ?
90 get_base_rel(root, lfirsti(innerrelids)) :
91 get_join_rel(root, innerrelids);
92 outerrel = (length(outerrelids) == 1) ?
93 get_base_rel(root, lfirsti(outerrelids)) :
94 get_join_rel(root, outerrelids);
96 bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
98 if (_enable_mergejoin_)
99 mergeinfo_list = group_clauses_by_order(joinrel->restrictinfo,
100 lfirsti(innerrel->relids));
102 if (_enable_hashjoin_)
103 hashinfo_list = group_clauses_by_hashop(joinrel->restrictinfo,
104 lfirsti(innerrel->relids));
106 /* need to flatten the relids list */
107 joinrel->relids = intAppend(outerrelids, innerrelids);
110 * 1. Consider mergejoin paths where both relations must be
113 pathlist = sort_inner_and_outer(joinrel, outerrel,
114 innerrel, mergeinfo_list);
117 * 2. Consider paths where the outer relation need not be
118 * explicitly sorted. This may include either nestloops and
119 * mergejoins where the outer path is already ordered.
121 pathlist = add_pathlist(joinrel, pathlist,
122 match_unsorted_outer(joinrel,
126 (Path *) innerrel->cheapestpath,
131 * 3. Consider paths where the inner relation need not be
132 * explicitly sorted. This may include nestloops and mergejoins
133 * the actual nestloop nodes were constructed in
134 * (match_unsorted_outer).
136 pathlist = add_pathlist(joinrel, pathlist,
137 match_unsorted_inner(joinrel, outerrel,
143 * 4. Consider paths where both outer and inner relations must be
144 * hashed before being joined.
146 pathlist = add_pathlist(joinrel, pathlist,
147 hash_inner_and_outer(joinrel, outerrel,
148 innerrel, hashinfo_list));
150 joinrel->pathlist = pathlist;
156 * Find the cheapest index path that has already been identified by
157 * (indexable_joinclauses) as being a possible inner path for the given
158 * outer relation in a nestloop join.
160 * 'join_paths' is a list of join nodes
161 * 'outer_relid' is the relid of the outer join relation
163 * Returns the pathnode of the selected path.
166 best_innerjoin(List *join_paths, Relids outer_relids)
168 Path *cheapest = (Path *) NULL;
171 foreach(join_path, join_paths)
173 Path *path = (Path *) lfirst(join_path);
175 if (intMember(lfirsti(path->joinid), outer_relids)
176 && ((cheapest == NULL ||
177 path_is_cheaper((Path *) lfirst(join_path), cheapest))))
180 cheapest = (Path *) lfirst(join_path);
187 * sort_inner_and_outer
188 * Create mergejoin join paths by explicitly sorting both the outer and
189 * inner join relations on each available merge ordering.
191 * 'joinrel' is the join relation
192 * 'outerrel' is the outer join relation
193 * 'innerrel' is the inner join relation
194 * 'mergeinfo_list' is a list of nodes containing info on(mergejoinable)
195 * clauses for joining the relations
197 * Returns a list of mergejoin paths.
200 sort_inner_and_outer(RelOptInfo *joinrel,
201 RelOptInfo *outerrel,
202 RelOptInfo *innerrel,
203 List *mergeinfo_list)
206 MergeInfo *xmergeinfo = (MergeInfo *) NULL;
207 MergePath *temp_node = (MergePath *) NULL;
209 List *outerkeys = NIL;
210 List *innerkeys = NIL;
211 List *merge_pathkeys = NIL;
213 foreach(i, mergeinfo_list)
215 xmergeinfo = (MergeInfo *) lfirst(i);
217 outerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
218 outerrel->targetlist,
221 innerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
222 innerrel->targetlist,
225 merge_pathkeys = new_join_pathkeys(outerkeys, joinrel->targetlist,
226 xmergeinfo->jmethod.clauses);
228 temp_node = create_mergejoin_path(joinrel,
233 (Path *) outerrel->cheapestpath,
234 (Path *) innerrel->cheapestpath,
236 xmergeinfo->m_ordering,
237 xmergeinfo->jmethod.clauses,
241 ms_list = lappend(ms_list, temp_node);
247 * match_unsorted_outer
248 * Creates possible join paths for processing a single join relation
249 * 'joinrel' by employing either iterative substitution or
250 * mergejoining on each of its possible outer paths(assuming that the
251 * outer relation need not be explicitly sorted).
253 * 1. The inner path is the cheapest available inner path.
254 * 2. Mergejoin wherever possible. Mergejoin are considered if there
255 * are mergejoinable join clauses between the outer and inner join
256 * relations such that the outer path is keyed on the variables
257 * appearing in the clauses. The corresponding inner merge path is
258 * either a path whose keys match those of the outer path(if such a
259 * path is available) or an explicit sort on the appropriate inner
260 * join keys, whichever is cheaper.
262 * 'joinrel' is the join relation
263 * 'outerrel' is the outer join relation
264 * 'innerrel' is the inner join relation
265 * 'outerpath_list' is the list of possible outer paths
266 * 'cheapest_inner' is the cheapest inner path
267 * 'best_innerjoin' is the best inner index path(if any)
268 * 'mergeinfo_list' is a list of nodes containing info on mergejoinable
271 * Returns a list of possible join path nodes.
274 match_unsorted_outer(RelOptInfo *joinrel,
275 RelOptInfo *outerrel,
276 RelOptInfo *innerrel,
277 List *outerpath_list,
278 Path *cheapest_inner,
279 Path *best_innerjoin,
280 List *mergeinfo_list)
282 Path *outerpath = (Path *) NULL;
284 List *temp_node = NIL;
285 List *merge_pathkeys = NIL;
286 Path *nestinnerpath = (Path *) NULL;
289 PathOrder *outerpath_ordering = NULL;
291 foreach(i, outerpath_list)
294 List *matchedJoinKeys = NIL;
295 List *matchedJoinClauses = NIL;
296 MergeInfo *xmergeinfo = (MergeInfo *) NULL;
298 outerpath = (Path *) lfirst(i);
300 outerpath_ordering = outerpath->pathorder;
302 if (outerpath_ordering)
303 xmergeinfo = match_order_mergeinfo(outerpath_ordering,
307 clauses = xmergeinfo->jmethod.clauses;
311 List *jmkeys = xmergeinfo->jmethod.jmkeys;
312 List *clauses = xmergeinfo->jmethod.clauses;
314 matchedJoinKeys = match_pathkeys_joinkeys(outerpath->pathkeys,
318 &matchedJoinClauses);
319 merge_pathkeys = new_join_pathkeys(outerpath->pathkeys,
320 joinrel->targetlist, clauses);
323 merge_pathkeys = outerpath->pathkeys;
325 if (best_innerjoin &&
326 path_is_cheaper(best_innerjoin, cheapest_inner))
327 nestinnerpath = best_innerjoin;
329 nestinnerpath = cheapest_inner;
331 paths = lcons(create_nestloop_path(joinrel,
338 if (clauses && matchedJoinKeys)
340 bool path_is_cheaper_than_sort;
342 Path *mergeinnerpath = match_paths_joinkeys(matchedJoinKeys,
347 /* Should we use the mergeinner, or sort the cheapest inner? */
348 path_is_cheaper_than_sort = (bool) (mergeinnerpath &&
349 (mergeinnerpath->path_cost <
350 (cheapest_inner->path_cost +
351 cost_sort(matchedJoinKeys,
355 if (!path_is_cheaper_than_sort)
357 varkeys = extract_path_keys(matchedJoinKeys,
358 innerrel->targetlist,
364 * Keep track of the cost of the outer path used with this
365 * ordered inner path for later processing in
366 * (match_unsorted_inner), since it isn't a sort and thus
367 * wouldn't otherwise be considered.
369 if (path_is_cheaper_than_sort)
370 mergeinnerpath->outerjoincost = outerpath->path_cost;
372 mergeinnerpath = cheapest_inner;
374 temp_node = lcons(create_mergejoin_path(joinrel,
382 xmergeinfo->m_ordering,
390 jp_list = nconc(jp_list, temp_node);
396 * match_unsorted_inner
397 * Find the cheapest ordered join path for a given(ordered, unsorted)
400 * Scans through each path available on an inner join relation and tries
401 * matching its ordering keys against those of mergejoin clauses.
402 * If 1. an appropriately_ordered inner path and matching mergeclause are
404 * 2. sorting the cheapest outer path is cheaper than using an ordered
405 * but unsorted outer path(as was considered in
406 * (match_unsorted_outer)), then this merge path is considered.
408 * 'joinrel' is the join result relation
409 * 'outerrel' is the outer join relation
410 * 'innerrel' is the inner join relation
411 * 'innerpath_list' is the list of possible inner join paths
412 * 'mergeinfo_list' is a list of nodes containing info on mergejoinable
415 * Returns a list of possible merge paths.
418 match_unsorted_inner(RelOptInfo *joinrel,
419 RelOptInfo *outerrel,
420 RelOptInfo *innerrel,
421 List *innerpath_list,
422 List *mergeinfo_list)
424 Path *innerpath = (Path *) NULL;
426 PathOrder *innerpath_ordering = NULL;
431 foreach(i, innerpath_list)
433 MergeInfo *xmergeinfo = (MergeInfo *) NULL;
435 List *matchedJoinKeys = NIL;
436 List *matchedJoinClauses = NIL;
438 innerpath = (Path *) lfirst(i);
440 innerpath_ordering = innerpath->pathorder;
442 if (innerpath_ordering)
444 xmergeinfo = match_order_mergeinfo(innerpath_ordering,
449 clauses = ((JoinMethod *) xmergeinfo)->clauses;
453 List *jmkeys = xmergeinfo->jmethod.jmkeys;
454 List *cls = xmergeinfo->jmethod.clauses;
456 matchedJoinKeys = match_pathkeys_joinkeys(innerpath->pathkeys,
460 &matchedJoinClauses);
464 * (match_unsorted_outer) if it is applicable. 'OuterJoinCost was
467 if (clauses && matchedJoinKeys)
469 temp1 = outerrel->cheapestpath->path_cost +
470 cost_sort(matchedJoinKeys, outerrel->size, outerrel->width,
473 temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost)
474 || (innerpath->outerjoincost > temp1));
478 List *outerkeys = extract_path_keys(matchedJoinKeys,
479 outerrel->targetlist,
481 List *merge_pathkeys = new_join_pathkeys(outerkeys,
485 mp_list = lappend(mp_list,
486 create_mergejoin_path(joinrel,
491 (Path *) outerrel->cheapestpath,
494 xmergeinfo->m_ordering,
506 EnoughMemoryForHashjoin(RelOptInfo *hashrel)
512 ntuples = hashrel->size;
515 tupsize = hashrel->width + sizeof(HeapTupleData);
516 pages = page_size(ntuples, tupsize);
519 * if amount of buffer space below hashjoin threshold, return false
521 if (ceil(sqrt((double) pages)) > NBuffers)
527 * hash_inner_and_outer-- XXX HASH
528 * Create hashjoin join paths by explicitly hashing both the outer and
529 * inner join relations on each available hash op.
531 * 'joinrel' is the join relation
532 * 'outerrel' is the outer join relation
533 * 'innerrel' is the inner join relation
534 * 'hashinfo_list' is a list of nodes containing info on(hashjoinable)
535 * clauses for joining the relations
537 * Returns a list of hashjoin paths.
540 hash_inner_and_outer(RelOptInfo *joinrel,
541 RelOptInfo *outerrel,
542 RelOptInfo *innerrel,
545 HashInfo *xhashinfo = (HashInfo *) NULL;
546 List *hjoin_list = NIL;
547 HashPath *temp_node = (HashPath *) NULL;
549 List *outerkeys = NIL;
550 List *innerkeys = NIL;
551 List *hash_pathkeys = NIL;
553 foreach(i, hashinfo_list)
555 xhashinfo = (HashInfo *) lfirst(i);
556 outerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
557 outerrel->targetlist,
559 innerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
560 innerrel->targetlist,
562 hash_pathkeys = new_join_pathkeys(outerkeys,
564 ((JoinMethod *) xhashinfo)->clauses);
566 if (EnoughMemoryForHashjoin(innerrel))
568 temp_node = create_hashjoin_path(joinrel,
573 (Path *) outerrel->cheapestpath,
574 (Path *) innerrel->cheapestpath,
577 ((JoinMethod *) xhashinfo)->clauses,
580 hjoin_list = lappend(hjoin_list, temp_node);