1 /*-------------------------------------------------------------------------
4 * Routines to find all possible paths for processing a set of joins
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.126 2009/09/19 17:48:09 tgl Exp $
13 *-------------------------------------------------------------------------
19 #include "executor/executor.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/pathnode.h"
22 #include "optimizer/paths.h"
25 static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel,
26 RelOptInfo *outerrel, RelOptInfo *innerrel,
27 List *restrictlist, JoinType jointype);
28 static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
29 RelOptInfo *outerrel);
30 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
31 RelOptInfo *outerrel, RelOptInfo *innerrel,
32 List *restrictlist, List *mergeclause_list,
33 JoinType jointype, SpecialJoinInfo *sjinfo);
34 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
35 RelOptInfo *outerrel, RelOptInfo *innerrel,
36 List *restrictlist, List *mergeclause_list,
37 JoinType jointype, SpecialJoinInfo *sjinfo);
38 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
39 RelOptInfo *outerrel, RelOptInfo *innerrel,
41 JoinType jointype, SpecialJoinInfo *sjinfo);
42 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
43 RelOptInfo *outer_rel, JoinType jointype);
44 static List *select_mergejoin_clauses(PlannerInfo *root,
53 * add_paths_to_joinrel
54 * Given a join relation and two component rels from which it can be made,
55 * consider all possible paths that use the two component rels as outer
56 * and inner rel respectively. Add these paths to the join rel's pathlist
57 * if they survive comparison with other paths (and remove any existing
58 * paths that are dominated by these paths).
60 * Modifies the pathlist field of the joinrel node to contain the best
63 * jointype is not necessarily the same as sjinfo->jointype; it might be
64 * "flipped around" if we are considering joining the rels in the opposite
65 * direction from what's indicated in sjinfo.
67 * Also, this routine and others in this module accept the special JoinTypes
68 * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
69 * unique-ify the outer or inner relation and then apply a regular inner
70 * join. These values are not allowed to propagate outside this module,
71 * however. Path cost estimation code may need to recognize that it's
72 * dealing with such a case --- the combination of nominal jointype INNER
73 * with sjinfo->jointype == JOIN_SEMI indicates that.
76 add_paths_to_joinrel(PlannerInfo *root,
81 SpecialJoinInfo *sjinfo,
84 List *mergeclause_list = NIL;
87 * 0. Consider join removal. This is always the most efficient strategy,
88 * so if it works, there's no need to consider anything further.
90 if (join_is_removable(root, joinrel, outerrel, innerrel,
91 restrictlist, jointype))
93 generate_outer_only(root, joinrel, outerrel);
98 * Find potential mergejoin clauses. We can skip this if we are not
99 * interested in doing a mergejoin. However, mergejoin is currently our
100 * only way of implementing full outer joins, so override mergejoin
101 * disable if it's a full join.
103 * Note: do this after join_is_removable(), because this sets the
104 * outer_is_left flags in the mergejoin clauses, while join_is_removable
105 * uses those flags for its own purposes. Currently, they set the flags
106 * the same way anyway, but let's avoid unnecessary entanglement.
108 if (enable_mergejoin || jointype == JOIN_FULL)
109 mergeclause_list = select_mergejoin_clauses(root,
117 * 1. Consider mergejoin paths where both relations must be explicitly
120 sort_inner_and_outer(root, joinrel, outerrel, innerrel,
121 restrictlist, mergeclause_list, jointype, sjinfo);
124 * 2. Consider paths where the outer relation need not be explicitly
125 * sorted. This includes both nestloops and mergejoins where the outer
126 * path is already ordered.
128 match_unsorted_outer(root, joinrel, outerrel, innerrel,
129 restrictlist, mergeclause_list, jointype, sjinfo);
134 * 3. Consider paths where the inner relation need not be explicitly
135 * sorted. This includes mergejoins only (nestloops were already built in
136 * match_unsorted_outer).
138 * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
139 * significant difference between the inner and outer side of a mergejoin,
140 * so match_unsorted_inner creates no paths that aren't equivalent to
141 * those made by match_unsorted_outer when add_paths_to_joinrel() is
142 * invoked with the two rels given in the other order.
144 match_unsorted_inner(root, joinrel, outerrel, innerrel,
145 restrictlist, mergeclause_list, jointype, sjinfo);
149 * 4. Consider paths where both outer and inner relations must be hashed
150 * before being joined.
153 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
154 restrictlist, jointype, sjinfo);
158 * clause_sides_match_join
159 * Determine whether a join clause is of the right form to use in this join.
161 * We already know that the clause is a binary opclause referencing only the
162 * rels in the current join. The point here is to check whether it has the
163 * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
164 * rather than mixing outer and inner vars on either side. If it matches,
165 * we set the transient flag outer_is_left to identify which side is which.
168 clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
169 RelOptInfo *innerrel)
171 if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
172 bms_is_subset(rinfo->right_relids, innerrel->relids))
174 /* lefthand side is outer */
175 rinfo->outer_is_left = true;
178 else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
179 bms_is_subset(rinfo->right_relids, outerrel->relids))
181 /* righthand side is outer */
182 rinfo->outer_is_left = false;
185 return false; /* no good for these input relations */
190 * Determine whether we need not perform the join at all, because
191 * it will just duplicate its left input.
193 * This is true for a left join for which the join condition cannot match
194 * more than one inner-side row. (There are other possibly interesting
195 * cases, but we don't have the infrastructure to prove them.)
197 * Note: there is no need to consider the symmetrical case of duplicating the
198 * right input, because add_paths_to_joinrel() will be called with each rel
202 join_is_removable(PlannerInfo *root,
204 RelOptInfo *outerrel,
205 RelOptInfo *innerrel,
209 List *clause_list = NIL;
214 * Currently, we only know how to remove left joins to a baserel with
215 * unique indexes. We can check most of these criteria pretty trivially
216 * to avoid doing useless extra work. But checking whether any of the
217 * indexes are unique would require iterating over the indexlist, so for
218 * now we just make sure there are indexes of some sort or other. If none
219 * of them are unique, join removal will still fail, just slightly later.
221 if (jointype != JOIN_LEFT ||
222 innerrel->reloptkind == RELOPT_JOINREL ||
223 innerrel->rtekind != RTE_RELATION ||
224 innerrel->indexlist == NIL)
228 * We can't remove the join if any inner-rel attributes are used above
231 * As a micro-optimization, it seems better to start with max_attr and
232 * count down rather than starting with min_attr and counting up, on the
233 * theory that the system attributes are somewhat less likely to be wanted
234 * and should be tested last.
236 for (attroff = innerrel->max_attr - innerrel->min_attr;
240 if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
245 * Search for mergejoinable clauses that constrain the inner rel against
246 * either the outer rel or a pseudoconstant. If an operator is
247 * mergejoinable then it behaves like equality for some btree opclass,
248 * so it's what we want. The mergejoinability test also eliminates
249 * clauses containing volatile functions, which we couldn't depend on.
251 foreach(l, restrictlist)
253 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
256 * We are always considering an outer join here, so ignore pushed-down
257 * clauses. Also ignore anything that doesn't have a mergejoinable
260 if (restrictinfo->is_pushed_down)
263 if (!restrictinfo->can_join ||
264 restrictinfo->mergeopfamilies == NIL)
265 continue; /* not mergejoinable */
268 * Check if clause has the form "outer op inner" or "inner op outer".
270 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
271 continue; /* no good for these input relations */
273 /* OK, add to list */
274 clause_list = lappend(clause_list, restrictinfo);
277 /* Now examine the rel's restriction clauses for var = const clauses */
278 foreach(l, innerrel->baserestrictinfo)
280 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
283 * Note: can_join won't be set for a restriction clause, but
284 * mergeopfamilies will be if it has a mergejoinable operator
285 * and doesn't contain volatile functions.
287 if (restrictinfo->mergeopfamilies == NIL)
288 continue; /* not mergejoinable */
291 * The clause certainly doesn't refer to anything but the given
292 * rel. If either side is pseudoconstant then we can use it.
294 if (bms_is_empty(restrictinfo->left_relids))
296 /* righthand side is inner */
297 restrictinfo->outer_is_left = true;
299 else if (bms_is_empty(restrictinfo->right_relids))
301 /* lefthand side is inner */
302 restrictinfo->outer_is_left = false;
307 /* OK, add to list */
308 clause_list = lappend(clause_list, restrictinfo);
311 /* Now examine the indexes to see if we have a matching unique index */
312 if (relation_has_unique_index_for(root, innerrel, clause_list))
316 * Some day it would be nice to check for other methods of establishing
323 * generate_outer_only
324 * Generate "join" paths when we have found the join is removable.
327 generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
328 RelOptInfo *outerrel)
333 * For the moment, replicate all of the outerrel's paths as join paths.
334 * Some of them might not really be interesting above the join, if they
335 * have sort orderings that have no real use except to do a mergejoin
336 * for the join we've just found we don't need. But distinguishing that
337 * case probably isn't worth the extra code it would take.
339 foreach(lc, outerrel->pathlist)
341 Path *outerpath = (Path *) lfirst(lc);
343 add_path(joinrel, (Path *)
344 create_noop_path(root, joinrel, outerpath));
349 * sort_inner_and_outer
350 * Create mergejoin join paths by explicitly sorting both the outer and
351 * inner join relations on each available merge ordering.
353 * 'joinrel' is the join relation
354 * 'outerrel' is the outer join relation
355 * 'innerrel' is the inner join relation
356 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
357 * clauses that apply to this join
358 * 'mergeclause_list' is a list of RestrictInfo nodes for available
359 * mergejoin clauses in this join
360 * 'jointype' is the type of join to do
361 * 'sjinfo' is extra info about the join for selectivity estimation
364 sort_inner_and_outer(PlannerInfo *root,
366 RelOptInfo *outerrel,
367 RelOptInfo *innerrel,
369 List *mergeclause_list,
371 SpecialJoinInfo *sjinfo)
380 * If we are doing a right or full join, we must use *all* the
381 * mergeclauses as join clauses, else we will not have a valid plan.
389 case JOIN_UNIQUE_OUTER:
390 case JOIN_UNIQUE_INNER:
391 useallclauses = false;
395 useallclauses = true;
398 elog(ERROR, "unrecognized join type: %d",
400 useallclauses = false; /* keep compiler quiet */
405 * We only consider the cheapest-total-cost input paths, since we are
406 * assuming here that a sort is required. We will consider
407 * cheapest-startup-cost input paths later, and only if they don't need a
410 * If unique-ification is requested, do it and then handle as a plain
413 outer_path = outerrel->cheapest_total_path;
414 inner_path = innerrel->cheapest_total_path;
415 if (jointype == JOIN_UNIQUE_OUTER)
417 outer_path = (Path *) create_unique_path(root, outerrel,
420 jointype = JOIN_INNER;
422 else if (jointype == JOIN_UNIQUE_INNER)
424 inner_path = (Path *) create_unique_path(root, innerrel,
427 jointype = JOIN_INNER;
431 * Each possible ordering of the available mergejoin clauses will generate
432 * a differently-sorted result path at essentially the same cost. We have
433 * no basis for choosing one over another at this level of joining, but
434 * some sort orders may be more useful than others for higher-level
435 * mergejoins, so it's worth considering multiple orderings.
437 * Actually, it's not quite true that every mergeclause ordering will
438 * generate a different path order, because some of the clauses may be
439 * partially redundant (refer to the same EquivalenceClasses). Therefore,
440 * what we do is convert the mergeclause list to a list of canonical
441 * pathkeys, and then consider different orderings of the pathkeys.
443 * Generating a path for *every* permutation of the pathkeys doesn't seem
444 * like a winning strategy; the cost in planning time is too high. For
445 * now, we generate one path for each pathkey, listing that pathkey first
446 * and the rest in random order. This should allow at least a one-clause
447 * mergejoin without re-sorting against any other possible mergejoin
448 * partner path. But if we've not guessed the right ordering of secondary
449 * keys, we may end up evaluating clauses as qpquals when they could have
450 * been done as mergeclauses. (In practice, it's rare that there's more
451 * than two or three mergeclauses, so expending a huge amount of thought
452 * on that is probably not worth it.)
454 * The pathkey order returned by select_outer_pathkeys_for_merge() has
455 * some heuristics behind it (see that function), so be sure to try it
456 * exactly as-is as well as making variants.
458 all_pathkeys = select_outer_pathkeys_for_merge(root,
462 foreach(l, all_pathkeys)
464 List *front_pathkey = (List *) lfirst(l);
465 List *cur_mergeclauses;
468 List *merge_pathkeys;
470 /* Make a pathkey list with this guy first */
471 if (l != list_head(all_pathkeys))
472 outerkeys = lcons(front_pathkey,
473 list_delete_ptr(list_copy(all_pathkeys),
476 outerkeys = all_pathkeys; /* no work at first one... */
478 /* Sort the mergeclauses into the corresponding ordering */
479 cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
484 /* Should have used them all... */
485 Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
487 /* Build sort pathkeys for the inner side */
488 innerkeys = make_inner_pathkeys_for_merge(root,
492 /* Build pathkeys representing output sort order */
493 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
497 * And now we can make the path.
499 * Note: it's possible that the cheapest paths will already be sorted
500 * properly. create_mergejoin_path will detect that case and suppress
501 * an explicit sort step, so we needn't do so here.
503 add_path(joinrel, (Path *)
504 create_mergejoin_path(root,
519 * match_unsorted_outer
520 * Creates possible join paths for processing a single join relation
521 * 'joinrel' by employing either iterative substitution or
522 * mergejoining on each of its possible outer paths (considering
523 * only outer paths that are already ordered well enough for merging).
525 * We always generate a nestloop path for each available outer path.
526 * In fact we may generate as many as five: one on the cheapest-total-cost
527 * inner path, one on the same with materialization, one on the
528 * cheapest-startup-cost inner path (if different), one on the
529 * cheapest-total inner-indexscan path (if any), and one on the
530 * cheapest-startup inner-indexscan path (if different).
532 * We also consider mergejoins if mergejoin clauses are available. We have
533 * two ways to generate the inner path for a mergejoin: sort the cheapest
534 * inner path, or use an inner path that is already suitably ordered for the
535 * merge. If we have several mergeclauses, it could be that there is no inner
536 * path (or only a very expensive one) for the full list of mergeclauses, but
537 * better paths exist if we truncate the mergeclause list (thereby discarding
538 * some sort key requirements). So, we consider truncations of the
539 * mergeclause list as well as the full list. (Ideally we'd consider all
540 * subsets of the mergeclause list, but that seems way too expensive.)
542 * 'joinrel' is the join relation
543 * 'outerrel' is the outer join relation
544 * 'innerrel' is the inner join relation
545 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
546 * clauses that apply to this join
547 * 'mergeclause_list' is a list of RestrictInfo nodes for available
548 * mergejoin clauses in this join
549 * 'jointype' is the type of join to do
550 * 'sjinfo' is extra info about the join for selectivity estimation
553 match_unsorted_outer(PlannerInfo *root,
555 RelOptInfo *outerrel,
556 RelOptInfo *innerrel,
558 List *mergeclause_list,
560 SpecialJoinInfo *sjinfo)
562 JoinType save_jointype = jointype;
565 Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
566 Path *inner_cheapest_total = innerrel->cheapest_total_path;
567 Path *matpath = NULL;
568 Path *index_cheapest_startup = NULL;
569 Path *index_cheapest_total = NULL;
573 * Nestloop only supports inner, left, semi, and anti joins. Also, if we
574 * are doing a right or full join, we must use *all* the mergeclauses as
575 * join clauses, else we will not have a valid plan. (Although these two
576 * flags are currently inverses, keep them separate for clarity and
577 * possible future changes.)
586 useallclauses = false;
591 useallclauses = true;
593 case JOIN_UNIQUE_OUTER:
594 case JOIN_UNIQUE_INNER:
595 jointype = JOIN_INNER;
597 useallclauses = false;
600 elog(ERROR, "unrecognized join type: %d",
602 nestjoinOK = false; /* keep compiler quiet */
603 useallclauses = false;
608 * If we need to unique-ify the inner path, we will consider only the
611 if (save_jointype == JOIN_UNIQUE_INNER)
613 inner_cheapest_total = (Path *)
614 create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
615 Assert(inner_cheapest_total);
616 inner_cheapest_startup = inner_cheapest_total;
621 * Consider materializing the cheapest inner path, unless it is one
622 * that materializes its output anyway.
624 if (!ExecMaterializesOutput(inner_cheapest_total->pathtype))
626 create_material_path(innerrel, inner_cheapest_total);
629 * Get the best innerjoin indexpaths (if any) for this outer rel.
630 * They're the same for all outer paths.
632 if (innerrel->reloptkind != RELOPT_JOINREL)
634 if (IsA(inner_cheapest_total, AppendPath))
635 index_cheapest_total = best_appendrel_indexscan(root,
639 else if (innerrel->rtekind == RTE_RELATION)
640 best_inner_indexscan(root, innerrel, outerrel, jointype,
641 &index_cheapest_startup,
642 &index_cheapest_total);
646 foreach(l, outerrel->pathlist)
648 Path *outerpath = (Path *) lfirst(l);
649 List *merge_pathkeys;
653 Path *cheapest_startup_inner;
654 Path *cheapest_total_inner;
659 * If we need to unique-ify the outer path, it's pointless to consider
660 * any but the cheapest outer.
662 if (save_jointype == JOIN_UNIQUE_OUTER)
664 if (outerpath != outerrel->cheapest_total_path)
666 outerpath = (Path *) create_unique_path(root, outerrel,
672 * The result will have this sort order (even if it is implemented as
673 * a nestloop, and even if some of the mergeclauses are implemented by
674 * qpquals rather than as true mergeclauses):
676 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
677 outerpath->pathkeys);
682 * Always consider a nestloop join with this outer and
683 * cheapest-total-cost inner. When appropriate, also consider
684 * using the materialized form of the cheapest inner, the
685 * cheapest-startup-cost inner path, and the cheapest innerjoin
688 add_path(joinrel, (Path *)
689 create_nestloop_path(root,
694 inner_cheapest_total,
698 add_path(joinrel, (Path *)
699 create_nestloop_path(root,
707 if (inner_cheapest_startup != inner_cheapest_total)
708 add_path(joinrel, (Path *)
709 create_nestloop_path(root,
714 inner_cheapest_startup,
717 if (index_cheapest_total != NULL)
718 add_path(joinrel, (Path *)
719 create_nestloop_path(root,
724 index_cheapest_total,
727 if (index_cheapest_startup != NULL &&
728 index_cheapest_startup != index_cheapest_total)
729 add_path(joinrel, (Path *)
730 create_nestloop_path(root,
735 index_cheapest_startup,
740 /* Can't do anything else if outer path needs to be unique'd */
741 if (save_jointype == JOIN_UNIQUE_OUTER)
744 /* Look for useful mergeclauses (if any) */
745 mergeclauses = find_mergeclauses_for_pathkeys(root,
751 * Done with this outer path if no chance for a mergejoin.
753 * Special corner case: for "x FULL JOIN y ON true", there will be no
754 * join clauses at all. Ordinarily we'd generate a clauseless
755 * nestloop path, but since mergejoin is our only join type that
756 * supports FULL JOIN, it's necessary to generate a clauseless
757 * mergejoin path instead.
759 if (mergeclauses == NIL)
761 if (jointype == JOIN_FULL)
762 /* okay to try for mergejoin */ ;
766 if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
769 /* Compute the required ordering of the inner path */
770 innersortkeys = make_inner_pathkeys_for_merge(root,
772 outerpath->pathkeys);
775 * Generate a mergejoin on the basis of sorting the cheapest inner.
776 * Since a sort will be needed, only cheapest total cost matters. (But
777 * create_mergejoin_path will do the right thing if
778 * inner_cheapest_total is already correctly sorted.)
780 add_path(joinrel, (Path *)
781 create_mergejoin_path(root,
786 inner_cheapest_total,
793 /* Can't do anything else if inner path needs to be unique'd */
794 if (save_jointype == JOIN_UNIQUE_INNER)
798 * Look for presorted inner paths that satisfy the innersortkey list
799 * --- or any truncation thereof, if we are allowed to build a
800 * mergejoin using a subset of the merge clauses. Here, we consider
801 * both cheap startup cost and cheap total cost.
803 * As we shorten the sortkey list, we should consider only paths that
804 * are strictly cheaper than (in particular, not the same as) any path
805 * found in an earlier iteration. Otherwise we'd be intentionally
806 * using fewer merge keys than a given path allows (treating the rest
807 * as plain joinquals), which is unlikely to be a good idea. Also,
808 * eliminating paths here on the basis of compare_path_costs is a lot
809 * cheaper than building the mergejoin path only to throw it away.
811 * If inner_cheapest_total is well enough sorted to have not required
812 * a sort in the path made above, we shouldn't make a duplicate path
813 * with it, either. We handle that case with the same logic that
814 * handles the previous consideration, by initializing the variables
815 * that track cheapest-so-far properly. Note that we do NOT reject
816 * inner_cheapest_total if we find it matches some shorter set of
817 * pathkeys. That case corresponds to using fewer mergekeys to avoid
818 * sorting inner_cheapest_total, whereas we did sort it above, so the
819 * plans being considered are different.
821 if (pathkeys_contained_in(innersortkeys,
822 inner_cheapest_total->pathkeys))
824 /* inner_cheapest_total didn't require a sort */
825 cheapest_startup_inner = inner_cheapest_total;
826 cheapest_total_inner = inner_cheapest_total;
830 /* it did require a sort, at least for the full set of keys */
831 cheapest_startup_inner = NULL;
832 cheapest_total_inner = NULL;
834 num_sortkeys = list_length(innersortkeys);
835 if (num_sortkeys > 1 && !useallclauses)
836 trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
838 trialsortkeys = innersortkeys; /* won't really truncate */
840 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
843 List *newclauses = NIL;
846 * Look for an inner path ordered well enough for the first
847 * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
848 * destructively, which is why we made a copy...
850 trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
851 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
854 if (innerpath != NULL &&
855 (cheapest_total_inner == NULL ||
856 compare_path_costs(innerpath, cheapest_total_inner,
859 /* Found a cheap (or even-cheaper) sorted path */
860 /* Select the right mergeclauses, if we didn't already */
861 if (sortkeycnt < num_sortkeys)
864 find_mergeclauses_for_pathkeys(root,
868 Assert(newclauses != NIL);
871 newclauses = mergeclauses;
872 add_path(joinrel, (Path *)
873 create_mergejoin_path(root,
884 cheapest_total_inner = innerpath;
886 /* Same on the basis of cheapest startup cost ... */
887 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
890 if (innerpath != NULL &&
891 (cheapest_startup_inner == NULL ||
892 compare_path_costs(innerpath, cheapest_startup_inner,
895 /* Found a cheap (or even-cheaper) sorted path */
896 if (innerpath != cheapest_total_inner)
899 * Avoid rebuilding clause list if we already made one;
900 * saves memory in big join trees...
902 if (newclauses == NIL)
904 if (sortkeycnt < num_sortkeys)
907 find_mergeclauses_for_pathkeys(root,
911 Assert(newclauses != NIL);
914 newclauses = mergeclauses;
916 add_path(joinrel, (Path *)
917 create_mergejoin_path(root,
929 cheapest_startup_inner = innerpath;
933 * Don't consider truncated sortkeys if we need all clauses.
942 * hash_inner_and_outer
943 * Create hashjoin join paths by explicitly hashing both the outer and
944 * inner keys of each available hash clause.
946 * 'joinrel' is the join relation
947 * 'outerrel' is the outer join relation
948 * 'innerrel' is the inner join relation
949 * 'restrictlist' contains all of the RestrictInfo nodes for restriction
950 * clauses that apply to this join
951 * 'jointype' is the type of join to do
952 * 'sjinfo' is extra info about the join for selectivity estimation
955 hash_inner_and_outer(PlannerInfo *root,
957 RelOptInfo *outerrel,
958 RelOptInfo *innerrel,
961 SpecialJoinInfo *sjinfo)
968 * Hashjoin only supports inner, left, semi, and anti joins.
974 case JOIN_UNIQUE_OUTER:
975 case JOIN_UNIQUE_INNER:
987 * We need to build only one hashpath for any given pair of outer and
988 * inner relations; all of the hashable clauses will be used as keys.
990 * Scan the join's restrictinfo list to find hashjoinable clauses that are
991 * usable with this pair of sub-relations.
994 foreach(l, restrictlist)
996 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
999 * If processing an outer join, only use its own join clauses for
1000 * hashing. For inner joins we need not be so picky.
1002 if (isouterjoin && restrictinfo->is_pushed_down)
1005 if (!restrictinfo->can_join ||
1006 restrictinfo->hashjoinoperator == InvalidOid)
1007 continue; /* not hashjoinable */
1010 * Check if clause has the form "outer op inner" or "inner op outer".
1012 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1013 continue; /* no good for these input relations */
1015 hashclauses = lappend(hashclauses, restrictinfo);
1018 /* If we found any usable hashclauses, make a path */
1022 * We consider both the cheapest-total-cost and cheapest-startup-cost
1023 * outer paths. There's no need to consider any but the
1024 * cheapest-total-cost inner path, however.
1026 Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
1027 Path *cheapest_total_outer = outerrel->cheapest_total_path;
1028 Path *cheapest_total_inner = innerrel->cheapest_total_path;
1030 /* Unique-ify if need be */
1031 if (jointype == JOIN_UNIQUE_OUTER)
1033 cheapest_total_outer = (Path *)
1034 create_unique_path(root, outerrel,
1035 cheapest_total_outer, sjinfo);
1036 Assert(cheapest_total_outer);
1037 cheapest_startup_outer = cheapest_total_outer;
1038 jointype = JOIN_INNER;
1040 else if (jointype == JOIN_UNIQUE_INNER)
1042 cheapest_total_inner = (Path *)
1043 create_unique_path(root, innerrel,
1044 cheapest_total_inner, sjinfo);
1045 Assert(cheapest_total_inner);
1046 jointype = JOIN_INNER;
1049 add_path(joinrel, (Path *)
1050 create_hashjoin_path(root,
1054 cheapest_total_outer,
1055 cheapest_total_inner,
1058 if (cheapest_startup_outer != cheapest_total_outer)
1059 add_path(joinrel, (Path *)
1060 create_hashjoin_path(root,
1064 cheapest_startup_outer,
1065 cheapest_total_inner,
1072 * best_appendrel_indexscan
1073 * Finds the best available set of inner indexscans for a nestloop join
1074 * with the given append relation on the inside and the given outer_rel
1075 * outside. Returns an AppendPath comprising the best inner scans, or
1076 * NULL if there are no possible inner indexscans.
1078 * Note that we currently consider only cheapest-total-cost. It's not
1079 * very clear what cheapest-startup-cost might mean for an AppendPath.
1082 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
1083 RelOptInfo *outer_rel, JoinType jointype)
1085 int parentRTindex = rel->relid;
1086 List *append_paths = NIL;
1087 bool found_indexscan = false;
1090 foreach(l, root->append_rel_list)
1092 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1094 RelOptInfo *childrel;
1095 Path *index_cheapest_startup;
1096 Path *index_cheapest_total;
1098 /* append_rel_list contains all append rels; ignore others */
1099 if (appinfo->parent_relid != parentRTindex)
1102 childRTindex = appinfo->child_relid;
1103 childrel = find_base_rel(root, childRTindex);
1104 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1107 * Check to see if child was rejected by constraint exclusion. If so,
1108 * it will have a cheapest_total_path that's a "dummy" path.
1110 if (IS_DUMMY_PATH(childrel->cheapest_total_path))
1111 continue; /* OK, we can ignore it */
1114 * Get the best innerjoin indexpaths (if any) for this child rel.
1116 best_inner_indexscan(root, childrel, outer_rel, jointype,
1117 &index_cheapest_startup, &index_cheapest_total);
1120 * If no luck on an indexpath for this rel, we'll still consider an
1121 * Append substituting the cheapest-total inner path. However we must
1122 * find at least one indexpath, else there's not going to be any
1123 * improvement over the base path for the appendrel.
1125 if (index_cheapest_total)
1126 found_indexscan = true;
1128 index_cheapest_total = childrel->cheapest_total_path;
1130 append_paths = lappend(append_paths, index_cheapest_total);
1133 if (!found_indexscan)
1136 /* Form and return the completed Append path. */
1137 return (Path *) create_append_path(rel, append_paths);
1141 * select_mergejoin_clauses
1142 * Select mergejoin clauses that are usable for a particular join.
1143 * Returns a list of RestrictInfo nodes for those clauses.
1145 * We also mark each selected RestrictInfo to show which side is currently
1146 * being considered as outer. These are transient markings that are only
1147 * good for the duration of the current add_paths_to_joinrel() call!
1149 * We examine each restrictinfo clause known for the join to see
1150 * if it is mergejoinable and involves vars from the two sub-relations
1151 * currently of interest.
1154 select_mergejoin_clauses(PlannerInfo *root,
1155 RelOptInfo *joinrel,
1156 RelOptInfo *outerrel,
1157 RelOptInfo *innerrel,
1161 List *result_list = NIL;
1162 bool isouterjoin = IS_OUTER_JOIN(jointype);
1163 bool have_nonmergeable_joinclause = false;
1166 foreach(l, restrictlist)
1168 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1171 * If processing an outer join, only use its own join clauses in the
1172 * merge. For inner joins we can use pushed-down clauses too. (Note:
1173 * we don't set have_nonmergeable_joinclause here because pushed-down
1174 * clauses will become otherquals not joinquals.)
1176 if (isouterjoin && restrictinfo->is_pushed_down)
1179 if (!restrictinfo->can_join ||
1180 restrictinfo->mergeopfamilies == NIL)
1182 have_nonmergeable_joinclause = true;
1183 continue; /* not mergejoinable */
1187 * Check if clause has the form "outer op inner" or "inner op outer".
1189 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1191 have_nonmergeable_joinclause = true;
1192 continue; /* no good for these input relations */
1196 * Insist that each side have a non-redundant eclass. This
1197 * restriction is needed because various bits of the planner expect
1198 * that each clause in a merge be associatable with some pathkey in a
1199 * canonical pathkey list, but redundant eclasses can't appear in
1200 * canonical sort orderings. (XXX it might be worth relaxing this,
1201 * but not enough time to address it for 8.3.)
1203 * Note: it would be bad if this condition failed for an otherwise
1204 * mergejoinable FULL JOIN clause, since that would result in
1205 * undesirable planner failure. I believe that is not possible
1206 * however; a variable involved in a full join could only appear in
1207 * below_outer_join eclasses, which aren't considered redundant.
1209 * This case *can* happen for left/right join clauses: the outer-side
1210 * variable could be equated to a constant. Because we will propagate
1211 * that constant across the join clause, the loss of ability to do a
1212 * mergejoin is not really all that big a deal, and so it's not clear
1213 * that improving this is important.
1215 cache_mergeclause_eclasses(root, restrictinfo);
1217 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1218 EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1220 have_nonmergeable_joinclause = true;
1221 continue; /* can't handle redundant eclasses */
1224 result_list = lappend(result_list, restrictinfo);
1228 * If it is a right/full join then *all* the explicit join clauses must be
1229 * mergejoinable, else the executor will fail. If we are asked for a right
1230 * join then just return NIL to indicate no mergejoin is possible (we can
1231 * handle it as a left join instead). If we are asked for a full join then
1232 * emit an error, because there is no fallback.
1234 if (have_nonmergeable_joinclause)
1239 return NIL; /* not mergejoinable */
1242 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1243 errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
1246 /* otherwise, it's OK to have nonmergeable join quals */