]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinrels.c
Refactor planner's pathkeys data structure to create a separate, explicit
[postgresql] / src / backend / optimizer / path / joinrels.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinrels.c
4  *        Routines to determine which relations should be joined
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.84 2007/01/20 20:45:39 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "optimizer/joininfo.h"
18 #include "optimizer/pathnode.h"
19 #include "optimizer/paths.h"
20
21
22 static List *make_rels_by_clause_joins(PlannerInfo *root,
23                                                   RelOptInfo *old_rel,
24                                                   ListCell *other_rels);
25 static List *make_rels_by_clauseless_joins(PlannerInfo *root,
26                                                           RelOptInfo *old_rel,
27                                                           ListCell *other_rels);
28 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
29
30
31 /*
32  * make_rels_by_joins
33  *        Consider ways to produce join relations containing exactly 'level'
34  *        jointree items.  (This is one step of the dynamic-programming method
35  *        embodied in make_one_rel_by_joins.)  Join rel nodes for each feasible
36  *        combination of lower-level rels are created and returned in a list.
37  *        Implementation paths are created for each such joinrel, too.
38  *
39  * level: level of rels we want to make this time.
40  * joinrels[j], 1 <= j < level, is a list of rels containing j items.
41  */
42 List *
43 make_rels_by_joins(PlannerInfo *root, int level, List **joinrels)
44 {
45         List       *result_rels = NIL;
46         List       *new_rels;
47         ListCell   *r;
48         int                     k;
49
50         /*
51          * First, consider left-sided and right-sided plans, in which rels of
52          * exactly level-1 member relations are joined against initial relations.
53          * We prefer to join using join clauses, but if we find a rel of level-1
54          * members that has no join clauses, we will generate Cartesian-product
55          * joins against all initial rels not already contained in it.
56          *
57          * In the first pass (level == 2), we try to join each initial rel to each
58          * initial rel that appears later in joinrels[1].  (The mirror-image joins
59          * are handled automatically by make_join_rel.)  In later passes, we try
60          * to join rels of size level-1 from joinrels[level-1] to each initial rel
61          * in joinrels[1].
62          */
63         foreach(r, joinrels[level - 1])
64         {
65                 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
66                 ListCell   *other_rels;
67
68                 if (level == 2)
69                         other_rels = lnext(r);          /* only consider remaining initial
70                                                                                  * rels */
71                 else
72                         other_rels = list_head(joinrels[1]);            /* consider all initial
73                                                                                                                  * rels */
74
75                 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins)
76                 {
77                         /*
78                          * Note that if all available join clauses for this rel require
79                          * more than one other rel, we will fail to make any joins against
80                          * it here.  In most cases that's OK; it'll be considered by
81                          * "bushy plan" join code in a higher-level pass where we have
82                          * those other rels collected into a join rel.
83                          */
84                         new_rels = make_rels_by_clause_joins(root,
85                                                                                                  old_rel,
86                                                                                                  other_rels);
87
88                         /*
89                          * An exception occurs when there is a clauseless join inside a
90                          * construct that restricts join order, i.e., an outer join or
91                          * an IN (sub-SELECT) construct.  Here, the rel may well have join
92                          * clauses against stuff outside its OJ side or IN sub-SELECT, but
93                          * the clauseless join *must* be done before we can make use of
94                          * those join clauses.   So do the clauseless join bit.
95                          *
96                          * See also the last-ditch case below.
97                          */
98                         if (new_rels == NIL && has_join_restriction(root, old_rel))
99                                 new_rels = make_rels_by_clauseless_joins(root,
100                                                                                                                  old_rel,
101                                                                                                                  other_rels);
102                 }
103                 else
104                 {
105                         /*
106                          * Oops, we have a relation that is not joined to any other
107                          * relation.  Cartesian product time.
108                          */
109                         new_rels = make_rels_by_clauseless_joins(root,
110                                                                                                          old_rel,
111                                                                                                          other_rels);
112                 }
113
114                 /*
115                  * At levels above 2 we will generate the same joined relation in
116                  * multiple ways --- for example (a join b) join c is the same
117                  * RelOptInfo as (b join c) join a, though the second case will add a
118                  * different set of Paths to it.  To avoid making extra work for
119                  * subsequent passes, do not enter the same RelOptInfo into our output
120                  * list multiple times.
121                  */
122                 result_rels = list_concat_unique_ptr(result_rels, new_rels);
123         }
124
125         /*
126          * Now, consider "bushy plans" in which relations of k initial rels are
127          * joined to relations of level-k initial rels, for 2 <= k <= level-2.
128          *
129          * We only consider bushy-plan joins for pairs of rels where there is a
130          * suitable join clause, in order to avoid unreasonable growth of planning
131          * time.
132          */
133         for (k = 2;; k++)
134         {
135                 int                     other_level = level - k;
136
137                 /*
138                  * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
139                  * need to go as far as the halfway point.
140                  */
141                 if (k > other_level)
142                         break;
143
144                 foreach(r, joinrels[k])
145                 {
146                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
147                         ListCell   *other_rels;
148                         ListCell   *r2;
149
150                         /*
151                          * We can ignore clauseless joins here, *except* when there are
152                          * outer joins --- then we might have to force a bushy outer
153                          * join.  See have_relevant_joinclause().
154                          */
155                         if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
156                                 root->oj_info_list == NIL)
157                                 continue;
158
159                         if (k == other_level)
160                                 other_rels = lnext(r);  /* only consider remaining rels */
161                         else
162                                 other_rels = list_head(joinrels[other_level]);
163
164                         for_each_cell(r2, other_rels)
165                         {
166                                 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
167
168                                 if (!bms_overlap(old_rel->relids, new_rel->relids))
169                                 {
170                                         /*
171                                          * OK, we can build a rel of the right level from this
172                                          * pair of rels.  Do so if there is at least one usable
173                                          * join clause.
174                                          */
175                                         if (have_relevant_joinclause(root, old_rel, new_rel))
176                                         {
177                                                 RelOptInfo *jrel;
178
179                                                 jrel = make_join_rel(root, old_rel, new_rel);
180                                                 /* Avoid making duplicate entries ... */
181                                                 if (jrel)
182                                                         result_rels = list_append_unique_ptr(result_rels,
183                                                                                                                                  jrel);
184                                         }
185                                 }
186                         }
187                 }
188         }
189
190         /*
191          * Last-ditch effort: if we failed to find any usable joins so far, force
192          * a set of cartesian-product joins to be generated.  This handles the
193          * special case where all the available rels have join clauses but we
194          * cannot use any of the joins yet.  An example is
195          *
196          * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0;
197          *
198          * The join clause will be usable at level 3, but at level 2 we have no
199          * choice but to make cartesian joins.  We consider only left-sided and
200          * right-sided cartesian joins in this case (no bushy).
201          */
202         if (result_rels == NIL)
203         {
204                 /*
205                  * This loop is just like the first one, except we always call
206                  * make_rels_by_clauseless_joins().
207                  */
208                 foreach(r, joinrels[level - 1])
209                 {
210                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
211                         ListCell   *other_rels;
212
213                         if (level == 2)
214                                 other_rels = lnext(r);  /* only consider remaining initial
215                                                                                  * rels */
216                         else
217                                 other_rels = list_head(joinrels[1]);    /* consider all initial
218                                                                                                                  * rels */
219
220                         new_rels = make_rels_by_clauseless_joins(root,
221                                                                                                          old_rel,
222                                                                                                          other_rels);
223
224                         result_rels = list_concat_unique_ptr(result_rels, new_rels);
225                 }
226
227                 /*----------
228                  * When OJs or IN clauses are involved, there may be no legal way
229                  * to make an N-way join for some values of N.  For example consider
230                  *
231                  * SELECT ... FROM t1 WHERE
232                  *       x IN (SELECT ... FROM t2,t3 WHERE ...) AND
233                  *       y IN (SELECT ... FROM t4,t5 WHERE ...)
234                  *
235                  * We will flatten this query to a 5-way join problem, but there are
236                  * no 4-way joins that make_join_rel() will consider legal.  We have
237                  * to accept failure at level 4 and go on to discover a workable
238                  * bushy plan at level 5.
239                  *
240                  * However, if there are no such clauses then make_join_rel() should
241                  * never fail, and so the following sanity check is useful.
242                  *----------
243                  */
244                 if (result_rels == NIL &&
245                         root->oj_info_list == NIL && root->in_info_list == NIL)
246                         elog(ERROR, "failed to build any %d-way joins", level);
247         }
248
249         return result_rels;
250 }
251
252 /*
253  * make_rels_by_clause_joins
254  *        Build joins between the given relation 'old_rel' and other relations
255  *        that participate in join clauses that 'old_rel' also participates in.
256  *        The join rel nodes are returned in a list.
257  *
258  * 'old_rel' is the relation entry for the relation to be joined
259  * 'other_rels': the first cell in a linked list containing the other
260  * rels to be considered for joining
261  *
262  * Currently, this is only used with initial rels in other_rels, but it
263  * will work for joining to joinrels too.
264  */
265 static List *
266 make_rels_by_clause_joins(PlannerInfo *root,
267                                                   RelOptInfo *old_rel,
268                                                   ListCell *other_rels)
269 {
270         List       *result = NIL;
271         ListCell   *l;
272
273         for_each_cell(l, other_rels)
274         {
275                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
276
277                 if (!bms_overlap(old_rel->relids, other_rel->relids) &&
278                         have_relevant_joinclause(root, old_rel, other_rel))
279                 {
280                         RelOptInfo *jrel;
281
282                         jrel = make_join_rel(root, old_rel, other_rel);
283                         if (jrel)
284                                 result = lcons(jrel, result);
285                 }
286         }
287
288         return result;
289 }
290
291 /*
292  * make_rels_by_clauseless_joins
293  *        Given a relation 'old_rel' and a list of other relations
294  *        'other_rels', create a join relation between 'old_rel' and each
295  *        member of 'other_rels' that isn't already included in 'old_rel'.
296  *        The join rel nodes are returned in a list.
297  *
298  * 'old_rel' is the relation entry for the relation to be joined
299  * 'other_rels': the first cell of a linked list containing the
300  * other rels to be considered for joining
301  *
302  * Currently, this is only used with initial rels in other_rels, but it would
303  * work for joining to joinrels too.
304  */
305 static List *
306 make_rels_by_clauseless_joins(PlannerInfo *root,
307                                                           RelOptInfo *old_rel,
308                                                           ListCell *other_rels)
309 {
310         List       *result = NIL;
311         ListCell   *i;
312
313         for_each_cell(i, other_rels)
314         {
315                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
316
317                 if (!bms_overlap(other_rel->relids, old_rel->relids))
318                 {
319                         RelOptInfo *jrel;
320
321                         jrel = make_join_rel(root, old_rel, other_rel);
322
323                         /*
324                          * As long as given other_rels are distinct, don't need to test to
325                          * see if jrel is already part of output list.
326                          */
327                         if (jrel)
328                                 result = lcons(jrel, result);
329                 }
330         }
331
332         return result;
333 }
334
335
336 /*
337  * has_join_restriction
338  *              Detect whether the specified relation has join-order restrictions
339  *              due to being inside an outer join or an IN (sub-SELECT).
340  */
341 static bool
342 has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
343 {
344         ListCell   *l;
345
346         foreach(l, root->oj_info_list)
347         {
348                 OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
349
350                 /* ignore full joins --- other mechanisms preserve their ordering */
351                 if (ojinfo->is_full_join)
352                         continue;
353                 /* anything inside the RHS is definitely restricted */
354                 if (bms_is_subset(rel->relids, ojinfo->min_righthand))
355                         return true;
356                 /* if it's a proper subset of the LHS, it's also restricted */
357                 if (bms_is_subset(rel->relids, ojinfo->min_lefthand) &&
358                         !bms_equal(rel->relids, ojinfo->min_lefthand))
359                         return true;
360         }
361
362         foreach(l, root->in_info_list)
363         {
364                 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
365
366                 if (bms_is_subset(rel->relids, ininfo->righthand))
367                         return true;
368         }
369         return false;
370 }
371
372
373 /*
374  * make_join_rel
375  *         Find or create a join RelOptInfo that represents the join of
376  *         the two given rels, and add to it path information for paths
377  *         created with the two rels as outer and inner rel.
378  *         (The join rel may already contain paths generated from other
379  *         pairs of rels that add up to the same set of base rels.)
380  *
381  * NB: will return NULL if attempted join is not valid.  This can happen
382  * when working with outer joins, or with IN clauses that have been turned
383  * into joins.
384  */
385 RelOptInfo *
386 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
387 {
388         Relids          joinrelids;
389         JoinType        jointype;
390         bool            is_valid_inner;
391         RelOptInfo *joinrel;
392         List       *restrictlist;
393         ListCell   *l;
394
395         /* We should never try to join two overlapping sets of rels. */
396         Assert(!bms_overlap(rel1->relids, rel2->relids));
397
398         /* Construct Relids set that identifies the joinrel. */
399         joinrelids = bms_union(rel1->relids, rel2->relids);
400
401         /*
402          * If we have any outer joins, the proposed join might be illegal; and in
403          * any case we have to determine its join type.  Scan the OJ list for
404          * conflicts.
405          */
406         jointype = JOIN_INNER;          /* default if no match to an OJ */
407         is_valid_inner = true;
408
409         foreach(l, root->oj_info_list)
410         {
411                 OuterJoinInfo *ojinfo = (OuterJoinInfo *) lfirst(l);
412
413                 /*
414                  * This OJ is not relevant unless its RHS overlaps the proposed join.
415                  * (Check this first as a fast path for dismissing most irrelevant OJs
416                  * quickly.)
417                  */
418                 if (!bms_overlap(ojinfo->min_righthand, joinrelids))
419                         continue;
420
421                 /*
422                  * Also, not relevant if proposed join is fully contained within RHS
423                  * (ie, we're still building up the RHS).
424                  */
425                 if (bms_is_subset(joinrelids, ojinfo->min_righthand))
426                         continue;
427
428                 /*
429                  * Also, not relevant if OJ is already done within either input.
430                  */
431                 if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
432                         bms_is_subset(ojinfo->min_righthand, rel1->relids))
433                         continue;
434                 if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
435                         bms_is_subset(ojinfo->min_righthand, rel2->relids))
436                         continue;
437
438                 /*
439                  * If one input contains min_lefthand and the other contains
440                  * min_righthand, then we can perform the OJ at this join.
441                  *
442                  * Barf if we get matches to more than one OJ (is that possible?)
443                  */
444                 if (bms_is_subset(ojinfo->min_lefthand, rel1->relids) &&
445                         bms_is_subset(ojinfo->min_righthand, rel2->relids))
446                 {
447                         if (jointype != JOIN_INNER)
448                         {
449                                 /* invalid join path */
450                                 bms_free(joinrelids);
451                                 return NULL;
452                         }
453                         jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_LEFT;
454                 }
455                 else if (bms_is_subset(ojinfo->min_lefthand, rel2->relids) &&
456                                  bms_is_subset(ojinfo->min_righthand, rel1->relids))
457                 {
458                         if (jointype != JOIN_INNER)
459                         {
460                                 /* invalid join path */
461                                 bms_free(joinrelids);
462                                 return NULL;
463                         }
464                         jointype = ojinfo->is_full_join ? JOIN_FULL : JOIN_RIGHT;
465                 }
466                 else
467                 {
468                         /*----------
469                          * Otherwise, the proposed join overlaps the RHS but isn't
470                          * a valid implementation of this OJ.  It might still be
471                          * a valid implementation of some other OJ, however.  We have
472                          * to allow this to support the associative identity
473                          *      (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
474                          * since joining B directly to C violates the lower OJ's RHS.
475                          * We assume that make_outerjoininfo() set things up correctly
476                          * so that we'll only match to the upper OJ if the transformation
477                          * is valid.  Set flag here to check at bottom of loop.
478                          *----------
479                          */
480                         is_valid_inner = false;
481                 }
482         }
483
484         /* Fail if violated some OJ's RHS and didn't match to another OJ */
485         if (jointype == JOIN_INNER && !is_valid_inner)
486         {
487                 /* invalid join path */
488                 bms_free(joinrelids);
489                 return NULL;
490         }
491
492         /*
493          * Similarly, if we are implementing IN clauses as joins, check for
494          * illegal join path and detect whether we need a non-default join type.
495          */
496         foreach(l, root->in_info_list)
497         {
498                 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
499
500                 /*
501                  * This IN clause is not relevant unless its RHS overlaps the proposed
502                  * join.  (Check this first as a fast path for dismissing most
503                  * irrelevant INs quickly.)
504                  */
505                 if (!bms_overlap(ininfo->righthand, joinrelids))
506                         continue;
507
508                 /*
509                  * If we are still building the IN clause's RHS, then this IN clause
510                  * isn't relevant yet.
511                  */
512                 if (bms_is_subset(joinrelids, ininfo->righthand))
513                         continue;
514
515                 /*
516                  * Cannot join if proposed join contains rels not in the RHS *and*
517                  * contains only part of the RHS.  We must build the complete RHS
518                  * (subselect's join) before it can be joined to rels outside the
519                  * subselect.
520                  */
521                 if (!bms_is_subset(ininfo->righthand, joinrelids))
522                 {
523                         bms_free(joinrelids);
524                         return NULL;
525                 }
526
527                 /*
528                  * At this point we are considering a join of the IN's RHS to some
529                  * other rel(s).
530                  *
531                  * If we already joined IN's RHS to any other rels in either input
532                  * path, then this join is not constrained (the necessary work was
533                  * done at the lower level where that join occurred).
534                  */
535                 if (bms_is_subset(ininfo->righthand, rel1->relids) &&
536                         !bms_equal(ininfo->righthand, rel1->relids))
537                         continue;
538                 if (bms_is_subset(ininfo->righthand, rel2->relids) &&
539                         !bms_equal(ininfo->righthand, rel2->relids))
540                         continue;
541
542                 /*
543                  * JOIN_IN technique will work if outerrel includes LHS and innerrel
544                  * is exactly RHS; conversely JOIN_REVERSE_IN handles RHS/LHS.
545                  *
546                  * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS; conversely
547                  * JOIN_UNIQUE_INNER will work if innerrel is exactly RHS.
548                  *
549                  * But none of these will work if we already found an OJ or another IN
550                  * that needs to trigger here.
551                  */
552                 if (jointype != JOIN_INNER)
553                 {
554                         bms_free(joinrelids);
555                         return NULL;
556                 }
557                 if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
558                         bms_equal(ininfo->righthand, rel2->relids))
559                         jointype = JOIN_IN;
560                 else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
561                                  bms_equal(ininfo->righthand, rel1->relids))
562                         jointype = JOIN_REVERSE_IN;
563                 else if (bms_equal(ininfo->righthand, rel1->relids))
564                         jointype = JOIN_UNIQUE_OUTER;
565                 else if (bms_equal(ininfo->righthand, rel2->relids))
566                         jointype = JOIN_UNIQUE_INNER;
567                 else
568                 {
569                         /* invalid join path */
570                         bms_free(joinrelids);
571                         return NULL;
572                 }
573         }
574
575         /*
576          * Find or build the join RelOptInfo, and compute the restrictlist that
577          * goes with this particular joining.
578          */
579         joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
580                                                          &restrictlist);
581
582         /*
583          * Consider paths using each rel as both outer and inner.
584          */
585         switch (jointype)
586         {
587                 case JOIN_INNER:
588                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER,
589                                                                  restrictlist);
590                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER,
591                                                                  restrictlist);
592                         break;
593                 case JOIN_LEFT:
594                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT,
595                                                                  restrictlist);
596                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT,
597                                                                  restrictlist);
598                         break;
599                 case JOIN_FULL:
600                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL,
601                                                                  restrictlist);
602                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL,
603                                                                  restrictlist);
604                         break;
605                 case JOIN_RIGHT:
606                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT,
607                                                                  restrictlist);
608                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
609                                                                  restrictlist);
610                         break;
611                 case JOIN_IN:
612                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
613                                                                  restrictlist);
614                         /* REVERSE_IN isn't supported by joinpath.c */
615                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
616                                                                  restrictlist);
617                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
618                                                                  restrictlist);
619                         break;
620                 case JOIN_REVERSE_IN:
621                         /* REVERSE_IN isn't supported by joinpath.c */
622                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
623                                                                  restrictlist);
624                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
625                                                                  restrictlist);
626                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
627                                                                  restrictlist);
628                         break;
629                 case JOIN_UNIQUE_OUTER:
630                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
631                                                                  restrictlist);
632                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
633                                                                  restrictlist);
634                         break;
635                 case JOIN_UNIQUE_INNER:
636                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
637                                                                  restrictlist);
638                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
639                                                                  restrictlist);
640                         break;
641                 default:
642                         elog(ERROR, "unrecognized join type: %d",
643                                  (int) jointype);
644                         break;
645         }
646
647         bms_free(joinrelids);
648
649         return joinrel;
650 }