1 /*-------------------------------------------------------------------------
4 * Routines to determine which relations should be joined
6 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.68 2004/05/26 04:41:22 neilc Exp $
13 *-------------------------------------------------------------------------
17 #include "optimizer/pathnode.h"
18 #include "optimizer/paths.h"
21 static List *make_rels_by_clause_joins(Query *root,
23 ListCell *other_rels);
24 static List *make_rels_by_clauseless_joins(Query *root,
26 ListCell *other_rels);
27 static bool is_inside_IN(Query *root, RelOptInfo *rel);
32 * Consider ways to produce join relations containing exactly 'level'
33 * jointree items. (This is one step of the dynamic-programming method
34 * embodied in make_one_rel_by_joins.) Join rel nodes for each feasible
35 * combination of lower-level rels are created and returned in a list.
36 * Implementation paths are created for each such joinrel, too.
38 * level: level of rels we want to make this time.
39 * joinrels[j], 1 <= j < level, is a list of rels containing j items.
42 make_rels_by_joins(Query *root, int level, List **joinrels)
44 List *result_rels = NIL;
51 * First, consider left-sided and right-sided plans, in which rels of
52 * exactly level-1 member relations are joined against initial
53 * relations. We prefer to join using join clauses, but if we find a
54 * rel of level-1 members that has no join clauses, we will generate
55 * Cartesian-product joins against all initial rels not already
58 * In the first pass (level == 2), we try to join each initial rel to
59 * each initial rel that appears later in joinrels[1]. (The
60 * mirror-image joins are handled automatically by make_join_rel.) In
61 * later passes, we try to join rels of size level-1 from
62 * joinrels[level-1] to each initial rel in joinrels[1].
64 foreach(r, joinrels[level - 1])
66 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
70 other_rels = lnext(r); /* only consider remaining initial
73 other_rels = list_head(joinrels[1]); /* consider all initial rels */
75 if (old_rel->joininfo != NIL)
78 * Note that if all available join clauses for this rel
79 * require more than one other rel, we will fail to make any
80 * joins against it here. In most cases that's OK; it'll be
81 * considered by "bushy plan" join code in a higher-level pass
82 * where we have those other rels collected into a join rel.
84 new_rels = make_rels_by_clause_joins(root,
88 * An exception occurs when there is a clauseless join inside an
89 * IN (sub-SELECT) construct. Here, the members of the subselect
90 * all have join clauses (against the stuff outside the IN), but
91 * they *must* be joined to each other before we can make use of
92 * those join clauses. So do the clauseless join bit.
94 * See also the last-ditch case below.
96 if (new_rels == NIL && is_inside_IN(root, old_rel))
97 new_rels = make_rels_by_clauseless_joins(root,
104 * Oops, we have a relation that is not joined to any other
105 * relation. Cartesian product time.
107 new_rels = make_rels_by_clauseless_joins(root,
113 * At levels above 2 we will generate the same joined relation in
114 * multiple ways --- for example (a join b) join c is the same
115 * RelOptInfo as (b join c) join a, though the second case will
116 * add a different set of Paths to it. To avoid making extra work
117 * for subsequent passes, do not enter the same RelOptInfo into
118 * our output list multiple times.
120 foreach(nr, new_rels)
122 RelOptInfo *jrel = (RelOptInfo *) lfirst(nr);
124 if (!ptrMember(jrel, result_rels))
125 result_rels = lcons(jrel, result_rels);
130 * Now, consider "bushy plans" in which relations of k initial rels
131 * are joined to relations of level-k initial rels, for 2 <= k <=
134 * We only consider bushy-plan joins for pairs of rels where there is a
135 * suitable join clause, in order to avoid unreasonable growth of
140 int other_level = level - k;
143 * Since make_join_rel(x, y) handles both x,y and y,x cases, we
144 * only need to go as far as the halfway point.
149 foreach(r, joinrels[k])
151 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
152 ListCell *other_rels;
155 if (old_rel->joininfo == NIL)
156 continue; /* we ignore clauseless joins here */
158 if (k == other_level)
159 other_rels = lnext(r); /* only consider remaining rels */
161 other_rels = list_head(joinrels[other_level]);
163 for_each_cell(r2, other_rels)
165 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
167 if (!bms_overlap(old_rel->relids, new_rel->relids))
172 * OK, we can build a rel of the right level from this
173 * pair of rels. Do so if there is at least one
174 * usable join clause.
176 foreach(i, old_rel->joininfo)
178 JoinInfo *joininfo = (JoinInfo *) lfirst(i);
180 if (bms_is_subset(joininfo->unjoined_relids,
185 jrel = make_join_rel(root, old_rel, new_rel,
187 /* Avoid making duplicate entries ... */
188 if (jrel && !ptrMember(jrel, result_rels))
189 result_rels = lcons(jrel, result_rels);
190 break; /* need not consider more
200 * Last-ditch effort: if we failed to find any usable joins so far,
201 * force a set of cartesian-product joins to be generated. This
202 * handles the special case where all the available rels have join
203 * clauses but we cannot use any of the joins yet. An example is
205 * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0;
207 * The join clause will be usable at level 3, but at level 2 we have no
208 * choice but to make cartesian joins. We consider only left-sided
209 * and right-sided cartesian joins in this case (no bushy).
211 if (result_rels == NIL)
214 * This loop is just like the first one, except we always call
215 * make_rels_by_clauseless_joins().
217 foreach(r, joinrels[level - 1])
219 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
220 ListCell *other_rels;
223 other_rels = lnext(r); /* only consider remaining initial
226 other_rels = list_head(joinrels[1]); /* consider all initial
229 new_rels = make_rels_by_clauseless_joins(root,
233 foreach(nr, new_rels)
235 RelOptInfo *jrel = (RelOptInfo *) lfirst(nr);
237 if (!ptrMember(jrel, result_rels))
238 result_rels = lcons(jrel, result_rels);
243 * When IN clauses are involved, there may be no legal way to make
244 * an N-way join for some values of N. For example consider
246 * SELECT ... FROM t1 WHERE
247 * x IN (SELECT ... FROM t2,t3 WHERE ...) AND
248 * y IN (SELECT ... FROM t4,t5 WHERE ...)
250 * We will flatten this query to a 5-way join problem, but there are
251 * no 4-way joins that make_join_rel() will consider legal. We have
252 * to accept failure at level 4 and go on to discover a workable
253 * bushy plan at level 5.
255 * However, if there are no IN clauses then make_join_rel() should
256 * never fail, and so the following sanity check is useful.
259 if (result_rels == NIL && root->in_info_list == NIL)
260 elog(ERROR, "failed to build any %d-way joins", level);
267 * make_rels_by_clause_joins
268 * Build joins between the given relation 'old_rel' and other relations
269 * that are mentioned within old_rel's joininfo nodes (i.e., relations
270 * that participate in join clauses that 'old_rel' also participates in).
271 * The join rel nodes are returned in a list.
273 * 'old_rel' is the relation entry for the relation to be joined
274 * 'other_rels': the first cell in a linked list containing the other
275 * rels to be considered for joining
277 * Currently, this is only used with initial rels in other_rels, but it
278 * will work for joining to joinrels too, if the caller ensures there is no
279 * membership overlap between old_rel and the rels in other_rels. (We need
280 * no extra test for overlap for initial rels, since the is_subset test can
281 * only succeed when other_rel is not already part of old_rel.)
284 make_rels_by_clause_joins(Query *root,
286 ListCell *other_rels)
292 foreach(i, old_rel->joininfo)
294 JoinInfo *joininfo = (JoinInfo *) lfirst(i);
295 Relids unjoined_relids = joininfo->unjoined_relids;
297 for_each_cell(j, other_rels)
299 RelOptInfo *other_rel = (RelOptInfo *) lfirst(j);
301 if (bms_is_subset(unjoined_relids, other_rel->relids))
305 jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
308 * Avoid entering same joinrel into our output list more
311 if (jrel && !ptrMember(jrel, result))
312 result = lcons(jrel, result);
321 * make_rels_by_clauseless_joins
322 * Given a relation 'old_rel' and a list of other relations
323 * 'other_rels', create a join relation between 'old_rel' and each
324 * member of 'other_rels' that isn't already included in 'old_rel'.
325 * The join rel nodes are returned in a list.
327 * 'old_rel' is the relation entry for the relation to be joined
328 * 'other_rels': the first cell of a linked list containing the
329 * other rels to be considered for joining
331 * Currently, this is only used with initial rels in other_rels, but it would
332 * work for joining to joinrels too.
335 make_rels_by_clauseless_joins(Query *root,
337 ListCell *other_rels)
342 for_each_cell(i, other_rels)
344 RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
346 if (!bms_overlap(other_rel->relids, old_rel->relids))
350 jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
353 * As long as given other_rels are distinct, don't need to
354 * test to see if jrel is already part of output list.
357 result = lcons(jrel, result);
367 * Detect whether the specified relation is inside an IN (sub-SELECT).
369 * Note that we are actually only interested in rels that have been pulled up
370 * out of an IN, so the routine name is a slight misnomer.
373 is_inside_IN(Query *root, RelOptInfo *rel)
377 foreach(l, root->in_info_list)
379 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
381 if (bms_is_subset(rel->relids, ininfo->righthand))
390 * Find or build a RelOptInfo join rel representing a specific
391 * jointree item. For JoinExprs, we only consider the construction
392 * path that corresponds exactly to what the user wrote.
395 make_jointree_rel(Query *root, Node *jtnode)
397 if (IsA(jtnode, RangeTblRef))
399 int varno = ((RangeTblRef *) jtnode)->rtindex;
401 return find_base_rel(root, varno);
403 else if (IsA(jtnode, FromExpr))
405 FromExpr *f = (FromExpr *) jtnode;
407 /* Recurse back to multi-way-join planner */
408 return make_fromexpr_rel(root, f);
410 else if (IsA(jtnode, JoinExpr))
412 JoinExpr *j = (JoinExpr *) jtnode;
418 lrel = make_jointree_rel(root, j->larg);
419 rrel = make_jointree_rel(root, j->rarg);
421 /* Make this join rel */
422 rel = make_join_rel(root, lrel, rrel, j->jointype);
424 if (rel == NULL) /* oops */
425 elog(ERROR, "invalid join order");
428 * Since we are only going to consider this one way to do it,
429 * we're done generating Paths for this joinrel and can now select
430 * the cheapest. In fact we *must* do so now, since next level up
435 #ifdef OPTIMIZER_DEBUG
436 debug_print_rel(root, rel);
442 elog(ERROR, "unrecognized node type: %d",
443 (int) nodeTag(jtnode));
444 return NULL; /* keep compiler quiet */
450 * Find or create a join RelOptInfo that represents the join of
451 * the two given rels, and add to it path information for paths
452 * created with the two rels as outer and inner rel.
453 * (The join rel may already contain paths generated from other
454 * pairs of rels that add up to the same set of base rels.)
456 * NB: will return NULL if attempted join is not valid. This can only
457 * happen when working with IN clauses that have been turned into joins.
460 make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
467 /* We should never try to join two overlapping sets of rels. */
468 Assert(!bms_overlap(rel1->relids, rel2->relids));
470 /* Construct Relids set that identifies the joinrel. */
471 joinrelids = bms_union(rel1->relids, rel2->relids);
474 * If we are implementing IN clauses as joins, there are some joins
475 * that are illegal. Check to see if the proposed join is trouble. We
476 * can skip the work if looking at an outer join, however, because
477 * only top-level joins might be affected.
479 if (jointype == JOIN_INNER)
483 foreach(l, root->in_info_list)
485 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
488 * This IN clause is not relevant unless its RHS overlaps the
489 * proposed join. (Check this first as a fast path for dismissing
490 * most irrelevant INs quickly.)
492 if (!bms_overlap(ininfo->righthand, joinrelids))
496 * If we are still building the IN clause's RHS, then this IN
497 * clause isn't relevant yet.
499 if (bms_is_subset(joinrelids, ininfo->righthand))
503 * Cannot join if proposed join contains rels not in the RHS
504 * *and* contains only part of the RHS. We must build the
505 * complete RHS (subselect's join) before it can be joined to
506 * rels outside the subselect.
508 if (!bms_is_subset(ininfo->righthand, joinrelids))
510 bms_free(joinrelids);
515 * At this point we are considering a join of the IN's RHS to
518 * If we already joined IN's RHS to any other rels in either
519 * input path, then this join is not constrained (the necessary
520 * work was done at the lower level where that join occurred).
522 if (bms_is_subset(ininfo->righthand, rel1->relids) &&
523 !bms_equal(ininfo->righthand, rel1->relids))
525 if (bms_is_subset(ininfo->righthand, rel2->relids) &&
526 !bms_equal(ininfo->righthand, rel2->relids))
530 * JOIN_IN technique will work if outerrel includes LHS and
531 * innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
534 * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
535 * conversely JOIN_UNIQUE_INNER will work if innerrel is
538 * But none of these will work if we already found another IN
539 * that needs to trigger here.
541 if (jointype != JOIN_INNER)
543 bms_free(joinrelids);
546 if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
547 bms_equal(ininfo->righthand, rel2->relids))
549 else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
550 bms_equal(ininfo->righthand, rel1->relids))
551 jointype = JOIN_REVERSE_IN;
552 else if (bms_equal(ininfo->righthand, rel1->relids))
553 jointype = JOIN_UNIQUE_OUTER;
554 else if (bms_equal(ininfo->righthand, rel2->relids))
555 jointype = JOIN_UNIQUE_INNER;
558 /* invalid join path */
559 bms_free(joinrelids);
566 * Find or build the join RelOptInfo, and compute the restrictlist
567 * that goes with this particular joining.
569 joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
573 * Consider paths using each rel as both outer and inner.
578 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER,
580 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER,
584 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT,
586 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT,
590 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL,
592 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL,
596 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT,
598 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
602 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
604 /* REVERSE_IN isn't supported by joinpath.c */
605 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
607 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
610 case JOIN_REVERSE_IN:
611 /* REVERSE_IN isn't supported by joinpath.c */
612 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
614 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
616 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
619 case JOIN_UNIQUE_OUTER:
620 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
622 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
625 case JOIN_UNIQUE_INNER:
626 add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
628 add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
632 elog(ERROR, "unrecognized join type: %d",
637 bms_free(joinrelids);