1 /*-------------------------------------------------------------------------
4 * Routines to determine which relations should be joined
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.93 2008/08/14 18:47:59 tgl Exp $
13 *-------------------------------------------------------------------------
17 #include "optimizer/joininfo.h"
18 #include "optimizer/pathnode.h"
19 #include "optimizer/paths.h"
22 static List *make_rels_by_clause_joins(PlannerInfo *root,
24 ListCell *other_rels);
25 static List *make_rels_by_clauseless_joins(PlannerInfo *root,
27 ListCell *other_rels);
28 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
29 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
30 static bool is_dummy_rel(RelOptInfo *rel);
31 static void mark_dummy_join(RelOptInfo *rel);
35 * join_search_one_level
36 * Consider ways to produce join relations containing exactly 'level'
37 * jointree items. (This is one step of the dynamic-programming method
38 * embodied in standard_join_search.) Join rel nodes for each feasible
39 * combination of lower-level rels are created and returned in a list.
40 * Implementation paths are created for each such joinrel, too.
42 * level: level of rels we want to make this time.
43 * joinrels[j], 1 <= j < level, is a list of rels containing j items.
46 join_search_one_level(PlannerInfo *root, int level, List **joinrels)
48 List *result_rels = NIL;
54 * First, consider left-sided and right-sided plans, in which rels of
55 * exactly level-1 member relations are joined against initial relations.
56 * We prefer to join using join clauses, but if we find a rel of level-1
57 * members that has no join clauses, we will generate Cartesian-product
58 * joins against all initial rels not already contained in it.
60 * In the first pass (level == 2), we try to join each initial rel to each
61 * initial rel that appears later in joinrels[1]. (The mirror-image joins
62 * are handled automatically by make_join_rel.) In later passes, we try
63 * to join rels of size level-1 from joinrels[level-1] to each initial rel
66 foreach(r, joinrels[level - 1])
68 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
72 other_rels = lnext(r); /* only consider remaining initial
75 other_rels = list_head(joinrels[1]); /* consider all initial
78 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
79 has_join_restriction(root, old_rel))
82 * Note that if all available join clauses for this rel require
83 * more than one other rel, we will fail to make any joins against
84 * it here. In most cases that's OK; it'll be considered by
85 * "bushy plan" join code in a higher-level pass where we have
86 * those other rels collected into a join rel.
88 * See also the last-ditch case below.
90 new_rels = make_rels_by_clause_joins(root,
97 * Oops, we have a relation that is not joined to any other
98 * relation, either directly or by join-order restrictions.
99 * Cartesian product time.
101 new_rels = make_rels_by_clauseless_joins(root,
107 * At levels above 2 we will generate the same joined relation in
108 * multiple ways --- for example (a join b) join c is the same
109 * RelOptInfo as (b join c) join a, though the second case will add a
110 * different set of Paths to it. To avoid making extra work for
111 * subsequent passes, do not enter the same RelOptInfo into our output
112 * list multiple times.
114 result_rels = list_concat_unique_ptr(result_rels, new_rels);
118 * Now, consider "bushy plans" in which relations of k initial rels are
119 * joined to relations of level-k initial rels, for 2 <= k <= level-2.
121 * We only consider bushy-plan joins for pairs of rels where there is a
122 * suitable join clause (or join order restriction), in order to avoid
123 * unreasonable growth of planning time.
127 int other_level = level - k;
130 * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
131 * need to go as far as the halfway point.
136 foreach(r, joinrels[k])
138 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
139 ListCell *other_rels;
143 * We can ignore clauseless joins here, *except* when they
144 * participate in join-order restrictions --- then we might have
145 * to force a bushy join plan.
147 if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
148 !has_join_restriction(root, old_rel))
151 if (k == other_level)
152 other_rels = lnext(r); /* only consider remaining rels */
154 other_rels = list_head(joinrels[other_level]);
156 for_each_cell(r2, other_rels)
158 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
160 if (!bms_overlap(old_rel->relids, new_rel->relids))
163 * OK, we can build a rel of the right level from this
164 * pair of rels. Do so if there is at least one usable
165 * join clause or a relevant join restriction.
167 if (have_relevant_joinclause(root, old_rel, new_rel) ||
168 have_join_order_restriction(root, old_rel, new_rel))
172 jrel = make_join_rel(root, old_rel, new_rel);
173 /* Avoid making duplicate entries ... */
175 result_rels = list_append_unique_ptr(result_rels,
184 * Last-ditch effort: if we failed to find any usable joins so far, force
185 * a set of cartesian-product joins to be generated. This handles the
186 * special case where all the available rels have join clauses but we
187 * cannot use any of the joins yet. An example is
189 * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0;
191 * The join clause will be usable at level 3, but at level 2 we have no
192 * choice but to make cartesian joins. We consider only left-sided and
193 * right-sided cartesian joins in this case (no bushy).
195 if (result_rels == NIL)
198 * This loop is just like the first one, except we always call
199 * make_rels_by_clauseless_joins().
201 foreach(r, joinrels[level - 1])
203 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
204 ListCell *other_rels;
207 other_rels = lnext(r); /* only consider remaining initial
210 other_rels = list_head(joinrels[1]); /* consider all initial
213 new_rels = make_rels_by_clauseless_joins(root,
217 result_rels = list_concat_unique_ptr(result_rels, new_rels);
221 * When special joins are involved, there may be no legal way
222 * to make an N-way join for some values of N. For example consider
224 * SELECT ... FROM t1 WHERE
225 * x IN (SELECT ... FROM t2,t3 WHERE ...) AND
226 * y IN (SELECT ... FROM t4,t5 WHERE ...)
228 * We will flatten this query to a 5-way join problem, but there are
229 * no 4-way joins that join_is_legal() will consider legal. We have
230 * to accept failure at level 4 and go on to discover a workable
231 * bushy plan at level 5.
233 * However, if there are no special joins then join_is_legal() should
234 * never fail, and so the following sanity check is useful.
237 if (result_rels == NIL && root->join_info_list == NIL)
238 elog(ERROR, "failed to build any %d-way joins", level);
245 * make_rels_by_clause_joins
246 * Build joins between the given relation 'old_rel' and other relations
247 * that participate in join clauses that 'old_rel' also participates in
248 * (or participate in join-order restrictions with it).
249 * The join rel nodes are returned in a list.
251 * 'old_rel' is the relation entry for the relation to be joined
252 * 'other_rels': the first cell in a linked list containing the other
253 * rels to be considered for joining
255 * Currently, this is only used with initial rels in other_rels, but it
256 * will work for joining to joinrels too.
259 make_rels_by_clause_joins(PlannerInfo *root,
261 ListCell *other_rels)
266 for_each_cell(l, other_rels)
268 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
270 if (!bms_overlap(old_rel->relids, other_rel->relids) &&
271 (have_relevant_joinclause(root, old_rel, other_rel) ||
272 have_join_order_restriction(root, old_rel, other_rel)))
276 jrel = make_join_rel(root, old_rel, other_rel);
278 result = lcons(jrel, result);
286 * make_rels_by_clauseless_joins
287 * Given a relation 'old_rel' and a list of other relations
288 * 'other_rels', create a join relation between 'old_rel' and each
289 * member of 'other_rels' that isn't already included in 'old_rel'.
290 * The join rel nodes are returned in a list.
292 * 'old_rel' is the relation entry for the relation to be joined
293 * 'other_rels': the first cell of a linked list containing the
294 * other rels to be considered for joining
296 * Currently, this is only used with initial rels in other_rels, but it would
297 * work for joining to joinrels too.
300 make_rels_by_clauseless_joins(PlannerInfo *root,
302 ListCell *other_rels)
307 for_each_cell(i, other_rels)
309 RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
311 if (!bms_overlap(other_rel->relids, old_rel->relids))
315 jrel = make_join_rel(root, old_rel, other_rel);
318 * As long as given other_rels are distinct, don't need to test to
319 * see if jrel is already part of output list.
322 result = lcons(jrel, result);
332 * Determine whether a proposed join is legal given the query's
333 * join order constraints; and if it is, determine the join type.
335 * Caller must supply not only the two rels, but the union of their relids.
336 * (We could simplify the API by computing joinrelids locally, but this
337 * would be redundant work in the normal path through make_join_rel.)
339 * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
340 * else it's set to point to the associated SpecialJoinInfo node. Also,
341 * *reversed_p is set TRUE if the given relations need to be swapped to
342 * match the SpecialJoinInfo node.
345 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
347 SpecialJoinInfo **sjinfo_p, bool *reversed_p)
349 SpecialJoinInfo *match_sjinfo;
355 * Ensure output params are set on failure return. This is just to
356 * suppress uninitialized-variable warnings from overly anal compilers.
362 * If we have any special joins, the proposed join might be illegal; and
363 * in any case we have to determine its join type. Scan the join info
364 * list for conflicts.
368 is_valid_inner = true;
370 foreach(l, root->join_info_list)
372 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
375 * This special join is not relevant unless its RHS overlaps the
376 * proposed join. (Check this first as a fast path for dismissing
377 * most irrelevant SJs quickly.)
379 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
383 * Also, not relevant if proposed join is fully contained within RHS
384 * (ie, we're still building up the RHS).
386 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
390 * Also, not relevant if SJ is already done within either input.
392 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
393 bms_is_subset(sjinfo->min_righthand, rel1->relids))
395 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
396 bms_is_subset(sjinfo->min_righthand, rel2->relids))
400 * If one input contains min_lefthand and the other contains
401 * min_righthand, then we can perform the SJ at this join.
403 * Barf if we get matches to more than one SJ (is that possible?)
405 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
406 bms_is_subset(sjinfo->min_righthand, rel2->relids))
409 return false; /* invalid join path */
410 match_sjinfo = sjinfo;
413 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
414 bms_is_subset(sjinfo->min_righthand, rel1->relids))
417 return false; /* invalid join path */
418 match_sjinfo = sjinfo;
424 * Otherwise, the proposed join overlaps the RHS but isn't
425 * a valid implementation of this SJ. It might still be
426 * a legal join, however. If both inputs overlap the RHS,
427 * assume that it's OK. Since the inputs presumably got past
428 * this function's checks previously, they can't overlap the
429 * LHS and their violations of the RHS boundary must represent
430 * SJs that have been determined to commute with this one.
431 * We have to allow this to work correctly in cases like
432 * (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
433 * when the c/d join has been determined to commute with the join
434 * to a, and hence d is not part of min_righthand for the upper
435 * join. It should be legal to join b to c/d but this will appear
436 * as a violation of the upper join's RHS.
437 * Furthermore, if one input overlaps the RHS and the other does
438 * not, we should still allow the join if it is a valid
439 * implementation of some other SJ. We have to allow this to
440 * support the associative identity
441 * (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
442 * since joining B directly to C violates the lower SJ's RHS.
443 * We assume that make_outerjoininfo() set things up correctly
444 * so that we'll only match to some SJ if the join is valid.
445 * Set flag here to check at bottom of loop.
448 if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
449 bms_overlap(rel2->relids, sjinfo->min_righthand))
452 Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
455 is_valid_inner = false;
459 /* Fail if violated some SJ's RHS and didn't match to another SJ */
460 if (match_sjinfo == NULL && !is_valid_inner)
461 return false; /* invalid join path */
463 /* Otherwise, it's a valid join */
464 *sjinfo_p = match_sjinfo;
465 *reversed_p = reversed;
472 * Find or create a join RelOptInfo that represents the join of
473 * the two given rels, and add to it path information for paths
474 * created with the two rels as outer and inner rel.
475 * (The join rel may already contain paths generated from other
476 * pairs of rels that add up to the same set of base rels.)
478 * NB: will return NULL if attempted join is not valid. This can happen
479 * when working with outer joins, or with IN or EXISTS clauses that have been
483 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
486 SpecialJoinInfo *sjinfo;
488 SpecialJoinInfo sjinfo_data;
492 /* We should never try to join two overlapping sets of rels. */
493 Assert(!bms_overlap(rel1->relids, rel2->relids));
495 /* Construct Relids set that identifies the joinrel. */
496 joinrelids = bms_union(rel1->relids, rel2->relids);
498 /* Check validity and determine join type. */
499 if (!join_is_legal(root, rel1, rel2, joinrelids,
502 /* invalid join path */
503 bms_free(joinrelids);
507 /* Swap rels if needed to match the join info. */
510 RelOptInfo *trel = rel1;
517 * If it's a plain inner join, then we won't have found anything in
518 * join_info_list. Make up a SpecialJoinInfo so that selectivity
519 * estimation functions will know what's being joined.
523 sjinfo = &sjinfo_data;
524 sjinfo->type = T_SpecialJoinInfo;
525 sjinfo->min_lefthand = rel1->relids;
526 sjinfo->min_righthand = rel2->relids;
527 sjinfo->syn_lefthand = rel1->relids;
528 sjinfo->syn_righthand = rel2->relids;
529 sjinfo->jointype = JOIN_INNER;
530 /* we don't bother trying to make the remaining fields valid */
531 sjinfo->lhs_strict = false;
532 sjinfo->delay_upper_joins = false;
533 sjinfo->join_quals = NIL;
537 * Find or build the join RelOptInfo, and compute the restrictlist that
538 * goes with this particular joining.
540 joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
544 * If we've already proven this join is empty, we needn't consider
545 * any more paths for it.
547 if (is_dummy_rel(joinrel))
549 bms_free(joinrelids);
554 * Consider paths using each rel as both outer and inner. Depending
555 * on the join type, a provably empty outer or inner rel might mean
556 * the join is provably empty too; in which case throw away any
557 * previously computed paths and mark the join as dummy. (We do it
558 * this way since it's conceivable that dummy-ness of a multi-element
559 * join might only be noticeable for certain construction paths.)
561 * We need only consider the jointypes that appear in join_info_list,
564 switch (sjinfo->jointype)
567 if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
569 mark_dummy_join(joinrel);
572 add_paths_to_joinrel(root, joinrel, rel1, rel2,
575 add_paths_to_joinrel(root, joinrel, rel2, rel1,
580 if (is_dummy_rel(rel1))
582 mark_dummy_join(joinrel);
585 add_paths_to_joinrel(root, joinrel, rel1, rel2,
588 add_paths_to_joinrel(root, joinrel, rel2, rel1,
593 if (is_dummy_rel(rel1) && is_dummy_rel(rel2))
595 mark_dummy_join(joinrel);
598 add_paths_to_joinrel(root, joinrel, rel1, rel2,
601 add_paths_to_joinrel(root, joinrel, rel2, rel1,
606 if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
608 mark_dummy_join(joinrel);
611 add_paths_to_joinrel(root, joinrel, rel1, rel2,
616 * If we know how to unique-ify the RHS and one input rel is
617 * exactly the RHS (not a superset) we can consider unique-ifying
618 * it and then doing a regular join.
620 if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
621 create_unique_path(root, rel2, rel2->cheapest_total_path,
624 add_paths_to_joinrel(root, joinrel, rel1, rel2,
625 JOIN_UNIQUE_INNER, sjinfo,
627 add_paths_to_joinrel(root, joinrel, rel2, rel1,
628 JOIN_UNIQUE_OUTER, sjinfo,
633 if (is_dummy_rel(rel1))
635 mark_dummy_join(joinrel);
638 add_paths_to_joinrel(root, joinrel, rel1, rel2,
643 /* other values not expected here */
644 elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
648 bms_free(joinrelids);
655 * have_join_order_restriction
656 * Detect whether the two relations should be joined to satisfy
657 * a join-order restriction arising from special joins.
659 * In practice this is always used with have_relevant_joinclause(), and so
660 * could be merged with that function, but it seems clearer to separate the
661 * two concerns. We need these tests because there are degenerate cases where
662 * a clauseless join must be performed to satisfy join-order restrictions.
664 * Note: this is only a problem if one side of a degenerate outer join
665 * contains multiple rels, or a clauseless join is required within an
666 * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
667 * join_search_one_level(). We could dispense with this test if we were
668 * willing to try bushy plans in the "last ditch" case, but that seems much
672 have_join_order_restriction(PlannerInfo *root,
673 RelOptInfo *rel1, RelOptInfo *rel2)
679 * It's possible that the rels correspond to the left and right sides of a
680 * degenerate outer join, that is, one with no joinclause mentioning the
681 * non-nullable side; in which case we should force the join to occur.
683 * Also, the two rels could represent a clauseless join that has to be
684 * completed to build up the LHS or RHS of an outer join.
686 foreach(l, root->join_info_list)
688 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
690 /* ignore full joins --- other mechanisms handle them */
691 if (sjinfo->jointype == JOIN_FULL)
694 /* Can we perform the SJ with these rels? */
695 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
696 bms_is_subset(sjinfo->min_righthand, rel2->relids))
701 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
702 bms_is_subset(sjinfo->min_righthand, rel1->relids))
709 * Might we need to join these rels to complete the RHS? We have to
710 * use "overlap" tests since either rel might include a lower SJ that
711 * has been proven to commute with this one.
713 if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
714 bms_overlap(sjinfo->min_righthand, rel2->relids))
720 /* Likewise for the LHS. */
721 if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
722 bms_overlap(sjinfo->min_lefthand, rel2->relids))
730 * We do not force the join to occur if either input rel can legally be
731 * joined to anything else using joinclauses. This essentially means that
732 * clauseless bushy joins are put off as long as possible. The reason is
733 * that when there is a join order restriction high up in the join tree
734 * (that is, with many rels inside the LHS or RHS), we would otherwise
735 * expend lots of effort considering very stupid join combinations within
740 if (has_legal_joinclause(root, rel1) ||
741 has_legal_joinclause(root, rel2))
750 * has_join_restriction
751 * Detect whether the specified relation has join-order restrictions
752 * due to being inside an outer join or an IN (sub-SELECT).
754 * Essentially, this tests whether have_join_order_restriction() could
755 * succeed with this rel and some other one. It's OK if we sometimes
756 * say "true" incorrectly. (Therefore, we don't bother with the relatively
757 * expensive has_legal_joinclause test.)
760 has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
764 foreach(l, root->join_info_list)
766 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
768 /* ignore full joins --- other mechanisms preserve their ordering */
769 if (sjinfo->jointype == JOIN_FULL)
772 /* ignore if SJ is already contained in rel */
773 if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
774 bms_is_subset(sjinfo->min_righthand, rel->relids))
777 /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
778 if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
779 bms_overlap(sjinfo->min_righthand, rel->relids))
788 * has_legal_joinclause
789 * Detect whether the specified relation can legally be joined
790 * to any other rels using join clauses.
792 * We consider only joins to single other relations in the current
793 * initial_rels list. This is sufficient to get a "true" result in most real
794 * queries, and an occasional erroneous "false" will only cost a bit more
795 * planning time. The reason for this limitation is that considering joins to
796 * other joins would require proving that the other join rel can legally be
797 * formed, which seems like too much trouble for something that's only a
798 * heuristic to save planning time. (Note: we must look at initial_rels
799 * and not all of the query, since when we are planning a sub-joinlist we
800 * may be forced to make clauseless joins within initial_rels even though
801 * there are join clauses linking to other parts of the query.)
804 has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
808 foreach(lc, root->initial_rels)
810 RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc);
812 /* ignore rels that are already in "rel" */
813 if (bms_overlap(rel->relids, rel2->relids))
816 if (have_relevant_joinclause(root, rel, rel2))
819 SpecialJoinInfo *sjinfo;
822 /* join_is_legal needs relids of the union */
823 joinrelids = bms_union(rel->relids, rel2->relids);
825 if (join_is_legal(root, rel, rel2, joinrelids,
828 /* Yes, this will work */
829 bms_free(joinrelids);
833 bms_free(joinrelids);
842 * is_dummy_rel --- has relation been proven empty?
844 * If so, it will have a single path that is dummy.
847 is_dummy_rel(RelOptInfo *rel)
849 return (rel->cheapest_total_path != NULL &&
850 IS_DUMMY_PATH(rel->cheapest_total_path));
854 * Mark a joinrel as proven empty.
857 mark_dummy_join(RelOptInfo *rel)
859 /* Set dummy size estimate */
862 /* Evict any previously chosen paths */
865 /* Set up the dummy path */
866 add_path(rel, (Path *) create_append_path(rel, NIL));
869 * Although set_cheapest will be done again later, we do it immediately
870 * in order to keep is_dummy_rel as cheap as possible (ie, not have
871 * to examine the pathlist).