]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinrels.c
Reimplement the linked list data structure used throughout the backend.
[postgresql] / src / backend / optimizer / path / joinrels.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinrels.c
4  *        Routines to determine which relations should be joined
5  *
6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.68 2004/05/26 04:41:22 neilc Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "optimizer/pathnode.h"
18 #include "optimizer/paths.h"
19
20
21 static List *make_rels_by_clause_joins(Query *root,
22                                                   RelOptInfo *old_rel,
23                                                   ListCell *other_rels);
24 static List *make_rels_by_clauseless_joins(Query *root,
25                                                           RelOptInfo *old_rel,
26                                                           ListCell *other_rels);
27 static bool is_inside_IN(Query *root, RelOptInfo *rel);
28
29
30 /*
31  * make_rels_by_joins
32  *        Consider ways to produce join relations containing exactly 'level'
33  *        jointree items.  (This is one step of the dynamic-programming method
34  *        embodied in make_one_rel_by_joins.)  Join rel nodes for each feasible
35  *        combination of lower-level rels are created and returned in a list.
36  *        Implementation paths are created for each such joinrel, too.
37  *
38  * level: level of rels we want to make this time.
39  * joinrels[j], 1 <= j < level, is a list of rels containing j items.
40  */
41 List *
42 make_rels_by_joins(Query *root, int level, List **joinrels)
43 {
44         List       *result_rels = NIL;
45         List       *new_rels;
46         ListCell   *nr;
47         ListCell   *r;
48         int                     k;
49
50         /*
51          * First, consider left-sided and right-sided plans, in which rels of
52          * exactly level-1 member relations are joined against initial
53          * relations. We prefer to join using join clauses, but if we find a
54          * rel of level-1 members that has no join clauses, we will generate
55          * Cartesian-product joins against all initial rels not already
56          * contained in it.
57          *
58          * In the first pass (level == 2), we try to join each initial rel to
59          * each initial rel that appears later in joinrels[1].  (The
60          * mirror-image joins are handled automatically by make_join_rel.)      In
61          * later passes, we try to join rels of size level-1 from
62          * joinrels[level-1] to each initial rel in joinrels[1].
63          */
64         foreach(r, joinrels[level - 1])
65         {
66                 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
67                 ListCell   *other_rels;
68
69                 if (level == 2)
70                         other_rels = lnext(r);          /* only consider remaining initial
71                                                                                  * rels */
72                 else
73                         other_rels = list_head(joinrels[1]);    /* consider all initial rels */
74
75                 if (old_rel->joininfo != NIL)
76                 {
77                         /*
78                          * Note that if all available join clauses for this rel
79                          * require more than one other rel, we will fail to make any
80                          * joins against it here.  In most cases that's OK; it'll be
81                          * considered by "bushy plan" join code in a higher-level pass
82                          * where we have those other rels collected into a join rel.
83                          */
84                         new_rels = make_rels_by_clause_joins(root,
85                                                                                                  old_rel,
86                                                                                                  other_rels);
87                         /*
88                          * An exception occurs when there is a clauseless join inside an
89                          * IN (sub-SELECT) construct.  Here, the members of the subselect
90                          * all have join clauses (against the stuff outside the IN), but
91                          * they *must* be joined to each other before we can make use of
92                          * those join clauses.  So do the clauseless join bit.
93                          *
94                          * See also the last-ditch case below.
95                          */
96                         if (new_rels == NIL && is_inside_IN(root, old_rel))
97                                 new_rels = make_rels_by_clauseless_joins(root,
98                                                                                                                  old_rel,
99                                                                                                                  other_rels);
100                 }
101                 else
102                 {
103                         /*
104                          * Oops, we have a relation that is not joined to any other
105                          * relation.  Cartesian product time.
106                          */
107                         new_rels = make_rels_by_clauseless_joins(root,
108                                                                                                          old_rel,
109                                                                                                          other_rels);
110                 }
111
112                 /*
113                  * At levels above 2 we will generate the same joined relation in
114                  * multiple ways --- for example (a join b) join c is the same
115                  * RelOptInfo as (b join c) join a, though the second case will
116                  * add a different set of Paths to it.  To avoid making extra work
117                  * for subsequent passes, do not enter the same RelOptInfo into
118                  * our output list multiple times.
119                  */
120                 foreach(nr, new_rels)
121                 {
122                         RelOptInfo *jrel = (RelOptInfo *) lfirst(nr);
123
124                         if (!ptrMember(jrel, result_rels))
125                                 result_rels = lcons(jrel, result_rels);
126                 }
127         }
128
129         /*
130          * Now, consider "bushy plans" in which relations of k initial rels
131          * are joined to relations of level-k initial rels, for 2 <= k <=
132          * level-2.
133          *
134          * We only consider bushy-plan joins for pairs of rels where there is a
135          * suitable join clause, in order to avoid unreasonable growth of
136          * planning time.
137          */
138         for (k = 2;; k++)
139         {
140                 int                     other_level = level - k;
141
142                 /*
143                  * Since make_join_rel(x, y) handles both x,y and y,x cases, we
144                  * only need to go as far as the halfway point.
145                  */
146                 if (k > other_level)
147                         break;
148
149                 foreach(r, joinrels[k])
150                 {
151                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
152                         ListCell   *other_rels;
153                         ListCell   *r2;
154
155                         if (old_rel->joininfo == NIL)
156                                 continue;               /* we ignore clauseless joins here */
157
158                         if (k == other_level)
159                                 other_rels = lnext(r);  /* only consider remaining rels */
160                         else
161                                 other_rels = list_head(joinrels[other_level]);
162
163                         for_each_cell(r2, other_rels)
164                         {
165                                 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
166
167                                 if (!bms_overlap(old_rel->relids, new_rel->relids))
168                                 {
169                                         ListCell   *i;
170
171                                         /*
172                                          * OK, we can build a rel of the right level from this
173                                          * pair of rels.  Do so if there is at least one
174                                          * usable join clause.
175                                          */
176                                         foreach(i, old_rel->joininfo)
177                                         {
178                                                 JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
179
180                                                 if (bms_is_subset(joininfo->unjoined_relids,
181                                                                                   new_rel->relids))
182                                                 {
183                                                         RelOptInfo *jrel;
184
185                                                         jrel = make_join_rel(root, old_rel, new_rel,
186                                                                                                  JOIN_INNER);
187                                                         /* Avoid making duplicate entries ... */
188                                                         if (jrel && !ptrMember(jrel, result_rels))
189                                                                 result_rels = lcons(jrel, result_rels);
190                                                         break;          /* need not consider more
191                                                                                  * joininfos */
192                                                 }
193                                         }
194                                 }
195                         }
196                 }
197         }
198
199         /*
200          * Last-ditch effort: if we failed to find any usable joins so far,
201          * force a set of cartesian-product joins to be generated.      This
202          * handles the special case where all the available rels have join
203          * clauses but we cannot use any of the joins yet.      An example is
204          *
205          * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0;
206          *
207          * The join clause will be usable at level 3, but at level 2 we have no
208          * choice but to make cartesian joins.  We consider only left-sided
209          * and right-sided cartesian joins in this case (no bushy).
210          */
211         if (result_rels == NIL)
212         {
213                 /*
214                  * This loop is just like the first one, except we always call
215                  * make_rels_by_clauseless_joins().
216                  */
217                 foreach(r, joinrels[level - 1])
218                 {
219                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
220                         ListCell   *other_rels;
221
222                         if (level == 2)
223                                 other_rels = lnext(r);  /* only consider remaining initial
224                                                                                  * rels */
225                         else
226                                 other_rels = list_head(joinrels[1]); /* consider all initial
227                                                                                                           * rels */
228
229                         new_rels = make_rels_by_clauseless_joins(root,
230                                                                                                          old_rel,
231                                                                                                          other_rels);
232
233                         foreach(nr, new_rels)
234                         {
235                                 RelOptInfo *jrel = (RelOptInfo *) lfirst(nr);
236
237                                 if (!ptrMember(jrel, result_rels))
238                                         result_rels = lcons(jrel, result_rels);
239                         }
240                 }
241
242                 /*----------
243                  * When IN clauses are involved, there may be no legal way to make
244                  * an N-way join for some values of N.  For example consider
245                  *
246                  * SELECT ... FROM t1 WHERE
247                  *   x IN (SELECT ... FROM t2,t3 WHERE ...) AND
248                  *   y IN (SELECT ... FROM t4,t5 WHERE ...)
249                  *
250                  * We will flatten this query to a 5-way join problem, but there are
251                  * no 4-way joins that make_join_rel() will consider legal.  We have
252                  * to accept failure at level 4 and go on to discover a workable
253                  * bushy plan at level 5.
254                  *
255                  * However, if there are no IN clauses then make_join_rel() should
256                  * never fail, and so the following sanity check is useful.
257                  *----------
258                  */
259                 if (result_rels == NIL && root->in_info_list == NIL)
260                         elog(ERROR, "failed to build any %d-way joins", level);
261         }
262
263         return result_rels;
264 }
265
266 /*
267  * make_rels_by_clause_joins
268  *        Build joins between the given relation 'old_rel' and other relations
269  *        that are mentioned within old_rel's joininfo nodes (i.e., relations
270  *        that participate in join clauses that 'old_rel' also participates in).
271  *        The join rel nodes are returned in a list.
272  *
273  * 'old_rel' is the relation entry for the relation to be joined
274  * 'other_rels': the first cell in a linked list containing the other
275  * rels to be considered for joining
276  *
277  * Currently, this is only used with initial rels in other_rels, but it
278  * will work for joining to joinrels too, if the caller ensures there is no
279  * membership overlap between old_rel and the rels in other_rels.  (We need
280  * no extra test for overlap for initial rels, since the is_subset test can
281  * only succeed when other_rel is not already part of old_rel.)
282  */
283 static List *
284 make_rels_by_clause_joins(Query *root,
285                                                   RelOptInfo *old_rel,
286                                                   ListCell *other_rels)
287 {
288         List       *result = NIL;
289         ListCell   *i,
290                            *j;
291
292         foreach(i, old_rel->joininfo)
293         {
294                 JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
295                 Relids          unjoined_relids = joininfo->unjoined_relids;
296
297                 for_each_cell(j, other_rels)
298                 {
299                         RelOptInfo *other_rel = (RelOptInfo *) lfirst(j);
300
301                         if (bms_is_subset(unjoined_relids, other_rel->relids))
302                         {
303                                 RelOptInfo *jrel;
304
305                                 jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
306
307                                 /*
308                                  * Avoid entering same joinrel into our output list more
309                                  * than once.
310                                  */
311                                 if (jrel && !ptrMember(jrel, result))
312                                         result = lcons(jrel, result);
313                         }
314                 }
315         }
316
317         return result;
318 }
319
320 /*
321  * make_rels_by_clauseless_joins
322  *        Given a relation 'old_rel' and a list of other relations
323  *        'other_rels', create a join relation between 'old_rel' and each
324  *        member of 'other_rels' that isn't already included in 'old_rel'.
325  *        The join rel nodes are returned in a list.
326  *
327  * 'old_rel' is the relation entry for the relation to be joined
328  * 'other_rels': the first cell of a linked list containing the
329  * other rels to be considered for joining
330  *
331  * Currently, this is only used with initial rels in other_rels, but it would
332  * work for joining to joinrels too.
333  */
334 static List *
335 make_rels_by_clauseless_joins(Query *root,
336                                                           RelOptInfo *old_rel,
337                                                           ListCell *other_rels)
338 {
339         List       *result = NIL;
340         ListCell   *i;
341
342         for_each_cell(i, other_rels)
343         {
344                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
345
346                 if (!bms_overlap(other_rel->relids, old_rel->relids))
347                 {
348                         RelOptInfo *jrel;
349
350                         jrel = make_join_rel(root, old_rel, other_rel, JOIN_INNER);
351
352                         /*
353                          * As long as given other_rels are distinct, don't need to
354                          * test to see if jrel is already part of output list.
355                          */
356                         if (jrel)
357                                 result = lcons(jrel, result);
358                 }
359         }
360
361         return result;
362 }
363
364
365 /*
366  * is_inside_IN
367  *              Detect whether the specified relation is inside an IN (sub-SELECT).
368  *
369  * Note that we are actually only interested in rels that have been pulled up
370  * out of an IN, so the routine name is a slight misnomer.
371  */
372 static bool
373 is_inside_IN(Query *root, RelOptInfo *rel)
374 {
375         ListCell   *l;
376
377         foreach(l, root->in_info_list)
378         {
379                 InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
380
381                 if (bms_is_subset(rel->relids, ininfo->righthand))
382                         return true;
383         }
384         return false;
385 }
386
387
388 /*
389  * make_jointree_rel
390  *              Find or build a RelOptInfo join rel representing a specific
391  *              jointree item.  For JoinExprs, we only consider the construction
392  *              path that corresponds exactly to what the user wrote.
393  */
394 RelOptInfo *
395 make_jointree_rel(Query *root, Node *jtnode)
396 {
397         if (IsA(jtnode, RangeTblRef))
398         {
399                 int                     varno = ((RangeTblRef *) jtnode)->rtindex;
400
401                 return find_base_rel(root, varno);
402         }
403         else if (IsA(jtnode, FromExpr))
404         {
405                 FromExpr   *f = (FromExpr *) jtnode;
406
407                 /* Recurse back to multi-way-join planner */
408                 return make_fromexpr_rel(root, f);
409         }
410         else if (IsA(jtnode, JoinExpr))
411         {
412                 JoinExpr   *j = (JoinExpr *) jtnode;
413                 RelOptInfo *rel,
414                                    *lrel,
415                                    *rrel;
416
417                 /* Recurse */
418                 lrel = make_jointree_rel(root, j->larg);
419                 rrel = make_jointree_rel(root, j->rarg);
420
421                 /* Make this join rel */
422                 rel = make_join_rel(root, lrel, rrel, j->jointype);
423
424                 if (rel == NULL)                /* oops */
425                         elog(ERROR, "invalid join order");
426
427                 /*
428                  * Since we are only going to consider this one way to do it,
429                  * we're done generating Paths for this joinrel and can now select
430                  * the cheapest.  In fact we *must* do so now, since next level up
431                  * will need it!
432                  */
433                 set_cheapest(rel);
434
435 #ifdef OPTIMIZER_DEBUG
436                 debug_print_rel(root, rel);
437 #endif
438
439                 return rel;
440         }
441         else
442                 elog(ERROR, "unrecognized node type: %d",
443                          (int) nodeTag(jtnode));
444         return NULL;                            /* keep compiler quiet */
445 }
446
447
448 /*
449  * make_join_rel
450  *         Find or create a join RelOptInfo that represents the join of
451  *         the two given rels, and add to it path information for paths
452  *         created with the two rels as outer and inner rel.
453  *         (The join rel may already contain paths generated from other
454  *         pairs of rels that add up to the same set of base rels.)
455  *
456  * NB: will return NULL if attempted join is not valid.  This can only
457  * happen when working with IN clauses that have been turned into joins.
458  */
459 RelOptInfo *
460 make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
461                           JoinType jointype)
462 {
463         Relids          joinrelids;
464         RelOptInfo *joinrel;
465         List       *restrictlist;
466
467         /* We should never try to join two overlapping sets of rels. */
468         Assert(!bms_overlap(rel1->relids, rel2->relids));
469
470         /* Construct Relids set that identifies the joinrel. */
471         joinrelids = bms_union(rel1->relids, rel2->relids);
472
473         /*
474          * If we are implementing IN clauses as joins, there are some joins
475          * that are illegal.  Check to see if the proposed join is trouble. We
476          * can skip the work if looking at an outer join, however, because
477          * only top-level joins might be affected.
478          */
479         if (jointype == JOIN_INNER)
480         {
481                 ListCell   *l;
482
483                 foreach(l, root->in_info_list)
484                 {
485                         InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
486
487                         /*
488                          * This IN clause is not relevant unless its RHS overlaps the
489                          * proposed join.  (Check this first as a fast path for dismissing
490                          * most irrelevant INs quickly.)
491                          */
492                         if (!bms_overlap(ininfo->righthand, joinrelids))
493                                 continue;
494
495                         /*
496                          * If we are still building the IN clause's RHS, then this IN
497                          * clause isn't relevant yet.
498                          */
499                         if (bms_is_subset(joinrelids, ininfo->righthand))
500                                 continue;
501
502                         /*
503                          * Cannot join if proposed join contains rels not in the RHS
504                          * *and* contains only part of the RHS.  We must build the
505                          * complete RHS (subselect's join) before it can be joined to
506                          * rels outside the subselect.
507                          */
508                         if (!bms_is_subset(ininfo->righthand, joinrelids))
509                         {
510                                 bms_free(joinrelids);
511                                 return NULL;
512                         }
513
514                         /*
515                          * At this point we are considering a join of the IN's RHS to
516                          * some other rel(s).
517                          *
518                          * If we already joined IN's RHS to any other rels in either
519                          * input path, then this join is not constrained (the necessary
520                          * work was done at the lower level where that join occurred).
521                          */
522                         if (bms_is_subset(ininfo->righthand, rel1->relids) &&
523                                 !bms_equal(ininfo->righthand, rel1->relids))
524                                 continue;
525                         if (bms_is_subset(ininfo->righthand, rel2->relids) &&
526                                 !bms_equal(ininfo->righthand, rel2->relids))
527                                 continue;
528
529                         /*
530                          * JOIN_IN technique will work if outerrel includes LHS and
531                          * innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
532                          * RHS/LHS.
533                          *
534                          * JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
535                          * conversely JOIN_UNIQUE_INNER will work if innerrel is
536                          * exactly RHS.
537                          *
538                          * But none of these will work if we already found another IN
539                          * that needs to trigger here.
540                          */
541                         if (jointype != JOIN_INNER)
542                         {
543                                 bms_free(joinrelids);
544                                 return NULL;
545                         }
546                         if (bms_is_subset(ininfo->lefthand, rel1->relids) &&
547                                 bms_equal(ininfo->righthand, rel2->relids))
548                                 jointype = JOIN_IN;
549                         else if (bms_is_subset(ininfo->lefthand, rel2->relids) &&
550                                          bms_equal(ininfo->righthand, rel1->relids))
551                                 jointype = JOIN_REVERSE_IN;
552                         else if (bms_equal(ininfo->righthand, rel1->relids))
553                                 jointype = JOIN_UNIQUE_OUTER;
554                         else if (bms_equal(ininfo->righthand, rel2->relids))
555                                 jointype = JOIN_UNIQUE_INNER;
556                         else
557                         {
558                                 /* invalid join path */
559                                 bms_free(joinrelids);
560                                 return NULL;
561                         }
562                 }
563         }
564
565         /*
566          * Find or build the join RelOptInfo, and compute the restrictlist
567          * that goes with this particular joining.
568          */
569         joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
570                                                          &restrictlist);
571
572         /*
573          * Consider paths using each rel as both outer and inner.
574          */
575         switch (jointype)
576         {
577                 case JOIN_INNER:
578                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER,
579                                                                  restrictlist);
580                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER,
581                                                                  restrictlist);
582                         break;
583                 case JOIN_LEFT:
584                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT,
585                                                                  restrictlist);
586                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT,
587                                                                  restrictlist);
588                         break;
589                 case JOIN_FULL:
590                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL,
591                                                                  restrictlist);
592                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL,
593                                                                  restrictlist);
594                         break;
595                 case JOIN_RIGHT:
596                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_RIGHT,
597                                                                  restrictlist);
598                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
599                                                                  restrictlist);
600                         break;
601                 case JOIN_IN:
602                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
603                                                                  restrictlist);
604                         /* REVERSE_IN isn't supported by joinpath.c */
605                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
606                                                                  restrictlist);
607                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
608                                                                  restrictlist);
609                         break;
610                 case JOIN_REVERSE_IN:
611                         /* REVERSE_IN isn't supported by joinpath.c */
612                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
613                                                                  restrictlist);
614                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
615                                                                  restrictlist);
616                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
617                                                                  restrictlist);
618                         break;
619                 case JOIN_UNIQUE_OUTER:
620                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
621                                                                  restrictlist);
622                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
623                                                                  restrictlist);
624                         break;
625                 case JOIN_UNIQUE_INNER:
626                         add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
627                                                                  restrictlist);
628                         add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
629                                                                  restrictlist);
630                         break;
631                 default:
632                         elog(ERROR, "unrecognized join type: %d",
633                                  (int) jointype);
634                         break;
635         }
636
637         bms_free(joinrelids);
638
639         return joinrel;
640 }