]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinrels.c
Implement SEMI and ANTI joins in the planner and executor. (Semijoins replace
[postgresql] / src / backend / optimizer / path / joinrels.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinrels.c
4  *        Routines to determine which relations should be joined
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/joinrels.c,v 1.93 2008/08/14 18:47:59 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "optimizer/joininfo.h"
18 #include "optimizer/pathnode.h"
19 #include "optimizer/paths.h"
20
21
22 static List *make_rels_by_clause_joins(PlannerInfo *root,
23                                                   RelOptInfo *old_rel,
24                                                   ListCell *other_rels);
25 static List *make_rels_by_clauseless_joins(PlannerInfo *root,
26                                                           RelOptInfo *old_rel,
27                                                           ListCell *other_rels);
28 static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
29 static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
30 static bool is_dummy_rel(RelOptInfo *rel);
31 static void mark_dummy_join(RelOptInfo *rel);
32
33
34 /*
35  * join_search_one_level
36  *        Consider ways to produce join relations containing exactly 'level'
37  *        jointree items.  (This is one step of the dynamic-programming method
38  *        embodied in standard_join_search.)  Join rel nodes for each feasible
39  *        combination of lower-level rels are created and returned in a list.
40  *        Implementation paths are created for each such joinrel, too.
41  *
42  * level: level of rels we want to make this time.
43  * joinrels[j], 1 <= j < level, is a list of rels containing j items.
44  */
45 List *
46 join_search_one_level(PlannerInfo *root, int level, List **joinrels)
47 {
48         List       *result_rels = NIL;
49         List       *new_rels;
50         ListCell   *r;
51         int                     k;
52
53         /*
54          * First, consider left-sided and right-sided plans, in which rels of
55          * exactly level-1 member relations are joined against initial relations.
56          * We prefer to join using join clauses, but if we find a rel of level-1
57          * members that has no join clauses, we will generate Cartesian-product
58          * joins against all initial rels not already contained in it.
59          *
60          * In the first pass (level == 2), we try to join each initial rel to each
61          * initial rel that appears later in joinrels[1].  (The mirror-image joins
62          * are handled automatically by make_join_rel.)  In later passes, we try
63          * to join rels of size level-1 from joinrels[level-1] to each initial rel
64          * in joinrels[1].
65          */
66         foreach(r, joinrels[level - 1])
67         {
68                 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
69                 ListCell   *other_rels;
70
71                 if (level == 2)
72                         other_rels = lnext(r);          /* only consider remaining initial
73                                                                                  * rels */
74                 else
75                         other_rels = list_head(joinrels[1]);            /* consider all initial
76                                                                                                                  * rels */
77
78                 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
79                         has_join_restriction(root, old_rel))
80                 {
81                         /*
82                          * Note that if all available join clauses for this rel require
83                          * more than one other rel, we will fail to make any joins against
84                          * it here.  In most cases that's OK; it'll be considered by
85                          * "bushy plan" join code in a higher-level pass where we have
86                          * those other rels collected into a join rel.
87                          *
88                          * See also the last-ditch case below.
89                          */
90                         new_rels = make_rels_by_clause_joins(root,
91                                                                                                  old_rel,
92                                                                                                  other_rels);
93                 }
94                 else
95                 {
96                         /*
97                          * Oops, we have a relation that is not joined to any other
98                          * relation, either directly or by join-order restrictions.
99                          * Cartesian product time.
100                          */
101                         new_rels = make_rels_by_clauseless_joins(root,
102                                                                                                          old_rel,
103                                                                                                          other_rels);
104                 }
105
106                 /*
107                  * At levels above 2 we will generate the same joined relation in
108                  * multiple ways --- for example (a join b) join c is the same
109                  * RelOptInfo as (b join c) join a, though the second case will add a
110                  * different set of Paths to it.  To avoid making extra work for
111                  * subsequent passes, do not enter the same RelOptInfo into our output
112                  * list multiple times.
113                  */
114                 result_rels = list_concat_unique_ptr(result_rels, new_rels);
115         }
116
117         /*
118          * Now, consider "bushy plans" in which relations of k initial rels are
119          * joined to relations of level-k initial rels, for 2 <= k <= level-2.
120          *
121          * We only consider bushy-plan joins for pairs of rels where there is a
122          * suitable join clause (or join order restriction), in order to avoid
123          * unreasonable growth of planning time.
124          */
125         for (k = 2;; k++)
126         {
127                 int                     other_level = level - k;
128
129                 /*
130                  * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
131                  * need to go as far as the halfway point.
132                  */
133                 if (k > other_level)
134                         break;
135
136                 foreach(r, joinrels[k])
137                 {
138                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
139                         ListCell   *other_rels;
140                         ListCell   *r2;
141
142                         /*
143                          * We can ignore clauseless joins here, *except* when they
144                          * participate in join-order restrictions --- then we might have
145                          * to force a bushy join plan.
146                          */
147                         if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
148                                 !has_join_restriction(root, old_rel))
149                                 continue;
150
151                         if (k == other_level)
152                                 other_rels = lnext(r);  /* only consider remaining rels */
153                         else
154                                 other_rels = list_head(joinrels[other_level]);
155
156                         for_each_cell(r2, other_rels)
157                         {
158                                 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
159
160                                 if (!bms_overlap(old_rel->relids, new_rel->relids))
161                                 {
162                                         /*
163                                          * OK, we can build a rel of the right level from this
164                                          * pair of rels.  Do so if there is at least one usable
165                                          * join clause or a relevant join restriction.
166                                          */
167                                         if (have_relevant_joinclause(root, old_rel, new_rel) ||
168                                                 have_join_order_restriction(root, old_rel, new_rel))
169                                         {
170                                                 RelOptInfo *jrel;
171
172                                                 jrel = make_join_rel(root, old_rel, new_rel);
173                                                 /* Avoid making duplicate entries ... */
174                                                 if (jrel)
175                                                         result_rels = list_append_unique_ptr(result_rels,
176                                                                                                                                  jrel);
177                                         }
178                                 }
179                         }
180                 }
181         }
182
183         /*
184          * Last-ditch effort: if we failed to find any usable joins so far, force
185          * a set of cartesian-product joins to be generated.  This handles the
186          * special case where all the available rels have join clauses but we
187          * cannot use any of the joins yet.  An example is
188          *
189          * SELECT * FROM a,b,c WHERE (a.f1 + b.f2 + c.f3) = 0;
190          *
191          * The join clause will be usable at level 3, but at level 2 we have no
192          * choice but to make cartesian joins.  We consider only left-sided and
193          * right-sided cartesian joins in this case (no bushy).
194          */
195         if (result_rels == NIL)
196         {
197                 /*
198                  * This loop is just like the first one, except we always call
199                  * make_rels_by_clauseless_joins().
200                  */
201                 foreach(r, joinrels[level - 1])
202                 {
203                         RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
204                         ListCell   *other_rels;
205
206                         if (level == 2)
207                                 other_rels = lnext(r);  /* only consider remaining initial
208                                                                                  * rels */
209                         else
210                                 other_rels = list_head(joinrels[1]);    /* consider all initial
211                                                                                                                  * rels */
212
213                         new_rels = make_rels_by_clauseless_joins(root,
214                                                                                                          old_rel,
215                                                                                                          other_rels);
216
217                         result_rels = list_concat_unique_ptr(result_rels, new_rels);
218                 }
219
220                 /*----------
221                  * When special joins are involved, there may be no legal way
222                  * to make an N-way join for some values of N.  For example consider
223                  *
224                  * SELECT ... FROM t1 WHERE
225                  *       x IN (SELECT ... FROM t2,t3 WHERE ...) AND
226                  *       y IN (SELECT ... FROM t4,t5 WHERE ...)
227                  *
228                  * We will flatten this query to a 5-way join problem, but there are
229                  * no 4-way joins that join_is_legal() will consider legal.  We have
230                  * to accept failure at level 4 and go on to discover a workable
231                  * bushy plan at level 5.
232                  *
233                  * However, if there are no special joins then join_is_legal() should
234                  * never fail, and so the following sanity check is useful.
235                  *----------
236                  */
237                 if (result_rels == NIL && root->join_info_list == NIL)
238                         elog(ERROR, "failed to build any %d-way joins", level);
239         }
240
241         return result_rels;
242 }
243
244 /*
245  * make_rels_by_clause_joins
246  *        Build joins between the given relation 'old_rel' and other relations
247  *        that participate in join clauses that 'old_rel' also participates in
248  *        (or participate in join-order restrictions with it).
249  *        The join rel nodes are returned in a list.
250  *
251  * 'old_rel' is the relation entry for the relation to be joined
252  * 'other_rels': the first cell in a linked list containing the other
253  * rels to be considered for joining
254  *
255  * Currently, this is only used with initial rels in other_rels, but it
256  * will work for joining to joinrels too.
257  */
258 static List *
259 make_rels_by_clause_joins(PlannerInfo *root,
260                                                   RelOptInfo *old_rel,
261                                                   ListCell *other_rels)
262 {
263         List       *result = NIL;
264         ListCell   *l;
265
266         for_each_cell(l, other_rels)
267         {
268                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
269
270                 if (!bms_overlap(old_rel->relids, other_rel->relids) &&
271                         (have_relevant_joinclause(root, old_rel, other_rel) ||
272                          have_join_order_restriction(root, old_rel, other_rel)))
273                 {
274                         RelOptInfo *jrel;
275
276                         jrel = make_join_rel(root, old_rel, other_rel);
277                         if (jrel)
278                                 result = lcons(jrel, result);
279                 }
280         }
281
282         return result;
283 }
284
285 /*
286  * make_rels_by_clauseless_joins
287  *        Given a relation 'old_rel' and a list of other relations
288  *        'other_rels', create a join relation between 'old_rel' and each
289  *        member of 'other_rels' that isn't already included in 'old_rel'.
290  *        The join rel nodes are returned in a list.
291  *
292  * 'old_rel' is the relation entry for the relation to be joined
293  * 'other_rels': the first cell of a linked list containing the
294  * other rels to be considered for joining
295  *
296  * Currently, this is only used with initial rels in other_rels, but it would
297  * work for joining to joinrels too.
298  */
299 static List *
300 make_rels_by_clauseless_joins(PlannerInfo *root,
301                                                           RelOptInfo *old_rel,
302                                                           ListCell *other_rels)
303 {
304         List       *result = NIL;
305         ListCell   *i;
306
307         for_each_cell(i, other_rels)
308         {
309                 RelOptInfo *other_rel = (RelOptInfo *) lfirst(i);
310
311                 if (!bms_overlap(other_rel->relids, old_rel->relids))
312                 {
313                         RelOptInfo *jrel;
314
315                         jrel = make_join_rel(root, old_rel, other_rel);
316
317                         /*
318                          * As long as given other_rels are distinct, don't need to test to
319                          * see if jrel is already part of output list.
320                          */
321                         if (jrel)
322                                 result = lcons(jrel, result);
323                 }
324         }
325
326         return result;
327 }
328
329
330 /*
331  * join_is_legal
332  *         Determine whether a proposed join is legal given the query's
333  *         join order constraints; and if it is, determine the join type.
334  *
335  * Caller must supply not only the two rels, but the union of their relids.
336  * (We could simplify the API by computing joinrelids locally, but this
337  * would be redundant work in the normal path through make_join_rel.)
338  *
339  * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
340  * else it's set to point to the associated SpecialJoinInfo node.  Also,
341  * *reversed_p is set TRUE if the given relations need to be swapped to
342  * match the SpecialJoinInfo node.
343  */
344 static bool
345 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
346                           Relids joinrelids,
347                           SpecialJoinInfo **sjinfo_p, bool *reversed_p)
348 {
349         SpecialJoinInfo *match_sjinfo;
350         bool            reversed;
351         bool            is_valid_inner;
352         ListCell   *l;
353
354         /*
355          * Ensure output params are set on failure return.  This is just to
356          * suppress uninitialized-variable warnings from overly anal compilers.
357          */
358         *sjinfo_p = NULL;
359         *reversed_p = false;
360
361         /*
362          * If we have any special joins, the proposed join might be illegal; and
363          * in any case we have to determine its join type.  Scan the join info
364          * list for conflicts.
365          */
366         match_sjinfo = NULL;
367         reversed = false;
368         is_valid_inner = true;
369
370         foreach(l, root->join_info_list)
371         {
372                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
373
374                 /*
375                  * This special join is not relevant unless its RHS overlaps the
376                  * proposed join.  (Check this first as a fast path for dismissing
377                  * most irrelevant SJs quickly.)
378                  */
379                 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
380                         continue;
381
382                 /*
383                  * Also, not relevant if proposed join is fully contained within RHS
384                  * (ie, we're still building up the RHS).
385                  */
386                 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
387                         continue;
388
389                 /*
390                  * Also, not relevant if SJ is already done within either input.
391                  */
392                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
393                         bms_is_subset(sjinfo->min_righthand, rel1->relids))
394                         continue;
395                 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
396                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
397                         continue;
398
399                 /*
400                  * If one input contains min_lefthand and the other contains
401                  * min_righthand, then we can perform the SJ at this join.
402                  *
403                  * Barf if we get matches to more than one SJ (is that possible?)
404                  */
405                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
406                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
407                 {
408                         if (match_sjinfo)
409                                 return false;   /* invalid join path */
410                         match_sjinfo = sjinfo;
411                         reversed = false;
412                 }
413                 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
414                                  bms_is_subset(sjinfo->min_righthand, rel1->relids))
415                 {
416                         if (match_sjinfo)
417                                 return false;   /* invalid join path */
418                         match_sjinfo = sjinfo;
419                         reversed = true;
420                 }
421                 else
422                 {
423                         /*----------
424                          * Otherwise, the proposed join overlaps the RHS but isn't
425                          * a valid implementation of this SJ.  It might still be
426                          * a legal join, however.  If both inputs overlap the RHS,
427                          * assume that it's OK.  Since the inputs presumably got past
428                          * this function's checks previously, they can't overlap the
429                          * LHS and their violations of the RHS boundary must represent
430                          * SJs that have been determined to commute with this one.
431                          * We have to allow this to work correctly in cases like
432                          *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
433                          * when the c/d join has been determined to commute with the join
434                          * to a, and hence d is not part of min_righthand for the upper
435                          * join.  It should be legal to join b to c/d but this will appear
436                          * as a violation of the upper join's RHS.
437                          * Furthermore, if one input overlaps the RHS and the other does
438                          * not, we should still allow the join if it is a valid
439                          * implementation of some other SJ.  We have to allow this to
440                          * support the associative identity
441                          *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
442                          * since joining B directly to C violates the lower SJ's RHS.
443                          * We assume that make_outerjoininfo() set things up correctly
444                          * so that we'll only match to some SJ if the join is valid.
445                          * Set flag here to check at bottom of loop.
446                          *----------
447                          */
448                         if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
449                                 bms_overlap(rel2->relids, sjinfo->min_righthand))
450                         {
451                                 /* seems OK */
452                                 Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
453                         }
454                         else
455                                 is_valid_inner = false;
456                 }
457         }
458
459         /* Fail if violated some SJ's RHS and didn't match to another SJ */
460         if (match_sjinfo == NULL && !is_valid_inner)
461                 return false;                   /* invalid join path */
462
463         /* Otherwise, it's a valid join */
464         *sjinfo_p = match_sjinfo;
465         *reversed_p = reversed;
466         return true;
467 }
468
469
470 /*
471  * make_join_rel
472  *         Find or create a join RelOptInfo that represents the join of
473  *         the two given rels, and add to it path information for paths
474  *         created with the two rels as outer and inner rel.
475  *         (The join rel may already contain paths generated from other
476  *         pairs of rels that add up to the same set of base rels.)
477  *
478  * NB: will return NULL if attempted join is not valid.  This can happen
479  * when working with outer joins, or with IN or EXISTS clauses that have been
480  * turned into joins.
481  */
482 RelOptInfo *
483 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
484 {
485         Relids          joinrelids;
486         SpecialJoinInfo *sjinfo;
487         bool            reversed;
488         SpecialJoinInfo sjinfo_data;
489         RelOptInfo *joinrel;
490         List       *restrictlist;
491
492         /* We should never try to join two overlapping sets of rels. */
493         Assert(!bms_overlap(rel1->relids, rel2->relids));
494
495         /* Construct Relids set that identifies the joinrel. */
496         joinrelids = bms_union(rel1->relids, rel2->relids);
497
498         /* Check validity and determine join type. */
499         if (!join_is_legal(root, rel1, rel2, joinrelids,
500                                            &sjinfo, &reversed))
501         {
502                 /* invalid join path */
503                 bms_free(joinrelids);
504                 return NULL;
505         }
506
507         /* Swap rels if needed to match the join info. */
508         if (reversed)
509         {
510                 RelOptInfo *trel = rel1;
511
512                 rel1 = rel2;
513                 rel2 = trel;
514         }
515
516         /*
517          * If it's a plain inner join, then we won't have found anything in
518          * join_info_list.  Make up a SpecialJoinInfo so that selectivity
519          * estimation functions will know what's being joined.
520          */
521         if (sjinfo == NULL)
522         {
523                 sjinfo = &sjinfo_data;
524                 sjinfo->type = T_SpecialJoinInfo;
525                 sjinfo->min_lefthand = rel1->relids;
526                 sjinfo->min_righthand = rel2->relids;
527                 sjinfo->syn_lefthand = rel1->relids;
528                 sjinfo->syn_righthand = rel2->relids;
529                 sjinfo->jointype = JOIN_INNER;
530                 /* we don't bother trying to make the remaining fields valid */
531                 sjinfo->lhs_strict = false;
532                 sjinfo->delay_upper_joins = false;
533                 sjinfo->join_quals = NIL;
534         }
535
536         /*
537          * Find or build the join RelOptInfo, and compute the restrictlist that
538          * goes with this particular joining.
539          */
540         joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
541                                                          &restrictlist);
542
543         /*
544          * If we've already proven this join is empty, we needn't consider
545          * any more paths for it.
546          */
547         if (is_dummy_rel(joinrel))
548         {
549                 bms_free(joinrelids);
550                 return joinrel;
551         }
552
553         /*
554          * Consider paths using each rel as both outer and inner.  Depending
555          * on the join type, a provably empty outer or inner rel might mean
556          * the join is provably empty too; in which case throw away any
557          * previously computed paths and mark the join as dummy.  (We do it
558          * this way since it's conceivable that dummy-ness of a multi-element
559          * join might only be noticeable for certain construction paths.)
560          *
561          * We need only consider the jointypes that appear in join_info_list,
562          * plus JOIN_INNER.
563          */
564         switch (sjinfo->jointype)
565         {
566                 case JOIN_INNER:
567                         if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
568                         {
569                                 mark_dummy_join(joinrel);
570                                 break;
571                         }
572                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
573                                                                  JOIN_INNER, sjinfo,
574                                                                  restrictlist);
575                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
576                                                                  JOIN_INNER, sjinfo,
577                                                                  restrictlist);
578                         break;
579                 case JOIN_LEFT:
580                         if (is_dummy_rel(rel1))
581                         {
582                                 mark_dummy_join(joinrel);
583                                 break;
584                         }
585                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
586                                                                  JOIN_LEFT, sjinfo,
587                                                                  restrictlist);
588                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
589                                                                  JOIN_RIGHT, sjinfo,
590                                                                  restrictlist);
591                         break;
592                 case JOIN_FULL:
593                         if (is_dummy_rel(rel1) && is_dummy_rel(rel2))
594                         {
595                                 mark_dummy_join(joinrel);
596                                 break;
597                         }
598                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
599                                                                  JOIN_FULL, sjinfo,
600                                                                  restrictlist);
601                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
602                                                                  JOIN_FULL, sjinfo,
603                                                                  restrictlist);
604                         break;
605                 case JOIN_SEMI:
606                         if (is_dummy_rel(rel1) || is_dummy_rel(rel2))
607                         {
608                                 mark_dummy_join(joinrel);
609                                 break;
610                         }
611                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
612                                                                  JOIN_SEMI, sjinfo,
613                                                                  restrictlist);
614
615                         /*
616                          * If we know how to unique-ify the RHS and one input rel is
617                          * exactly the RHS (not a superset) we can consider unique-ifying
618                          * it and then doing a regular join.
619                          */
620                         if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
621                                 create_unique_path(root, rel2, rel2->cheapest_total_path,
622                                                                    sjinfo) != NULL)
623                         {
624                                 add_paths_to_joinrel(root, joinrel, rel1, rel2,
625                                                                          JOIN_UNIQUE_INNER, sjinfo,
626                                                                          restrictlist);
627                                 add_paths_to_joinrel(root, joinrel, rel2, rel1,
628                                                                          JOIN_UNIQUE_OUTER, sjinfo,
629                                                                          restrictlist);
630                         }
631                         break;
632                 case JOIN_ANTI:
633                         if (is_dummy_rel(rel1))
634                         {
635                                 mark_dummy_join(joinrel);
636                                 break;
637                         }
638                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
639                                                                  JOIN_ANTI, sjinfo,
640                                                                  restrictlist);
641                         break;
642                 default:
643                         /* other values not expected here */
644                         elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
645                         break;
646         }
647
648         bms_free(joinrelids);
649
650         return joinrel;
651 }
652
653
654 /*
655  * have_join_order_restriction
656  *              Detect whether the two relations should be joined to satisfy
657  *              a join-order restriction arising from special joins.
658  *
659  * In practice this is always used with have_relevant_joinclause(), and so
660  * could be merged with that function, but it seems clearer to separate the
661  * two concerns.  We need these tests because there are degenerate cases where
662  * a clauseless join must be performed to satisfy join-order restrictions.
663  *
664  * Note: this is only a problem if one side of a degenerate outer join
665  * contains multiple rels, or a clauseless join is required within an
666  * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
667  * join_search_one_level().  We could dispense with this test if we were
668  * willing to try bushy plans in the "last ditch" case, but that seems much
669  * less efficient.
670  */
671 bool
672 have_join_order_restriction(PlannerInfo *root,
673                                                         RelOptInfo *rel1, RelOptInfo *rel2)
674 {
675         bool            result = false;
676         ListCell   *l;
677
678         /*
679          * It's possible that the rels correspond to the left and right sides of a
680          * degenerate outer join, that is, one with no joinclause mentioning the
681          * non-nullable side; in which case we should force the join to occur.
682          *
683          * Also, the two rels could represent a clauseless join that has to be
684          * completed to build up the LHS or RHS of an outer join.
685          */
686         foreach(l, root->join_info_list)
687         {
688                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
689
690                 /* ignore full joins --- other mechanisms handle them */
691                 if (sjinfo->jointype == JOIN_FULL)
692                         continue;
693
694                 /* Can we perform the SJ with these rels? */
695                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
696                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
697                 {
698                         result = true;
699                         break;
700                 }
701                 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
702                         bms_is_subset(sjinfo->min_righthand, rel1->relids))
703                 {
704                         result = true;
705                         break;
706                 }
707
708                 /*
709                  * Might we need to join these rels to complete the RHS?  We have to
710                  * use "overlap" tests since either rel might include a lower SJ that
711                  * has been proven to commute with this one.
712                  */
713                 if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
714                         bms_overlap(sjinfo->min_righthand, rel2->relids))
715                 {
716                         result = true;
717                         break;
718                 }
719
720                 /* Likewise for the LHS. */
721                 if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
722                         bms_overlap(sjinfo->min_lefthand, rel2->relids))
723                 {
724                         result = true;
725                         break;
726                 }
727         }
728
729         /*
730          * We do not force the join to occur if either input rel can legally be
731          * joined to anything else using joinclauses.  This essentially means that
732          * clauseless bushy joins are put off as long as possible. The reason is
733          * that when there is a join order restriction high up in the join tree
734          * (that is, with many rels inside the LHS or RHS), we would otherwise
735          * expend lots of effort considering very stupid join combinations within
736          * its LHS or RHS.
737          */
738         if (result)
739         {
740                 if (has_legal_joinclause(root, rel1) ||
741                         has_legal_joinclause(root, rel2))
742                         result = false;
743         }
744
745         return result;
746 }
747
748
749 /*
750  * has_join_restriction
751  *              Detect whether the specified relation has join-order restrictions
752  *              due to being inside an outer join or an IN (sub-SELECT).
753  *
754  * Essentially, this tests whether have_join_order_restriction() could
755  * succeed with this rel and some other one.  It's OK if we sometimes
756  * say "true" incorrectly.      (Therefore, we don't bother with the relatively
757  * expensive has_legal_joinclause test.)
758  */
759 static bool
760 has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
761 {
762         ListCell   *l;
763
764         foreach(l, root->join_info_list)
765         {
766                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
767
768                 /* ignore full joins --- other mechanisms preserve their ordering */
769                 if (sjinfo->jointype == JOIN_FULL)
770                         continue;
771
772                 /* ignore if SJ is already contained in rel */
773                 if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
774                         bms_is_subset(sjinfo->min_righthand, rel->relids))
775                         continue;
776
777                 /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
778                 if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
779                         bms_overlap(sjinfo->min_righthand, rel->relids))
780                         return true;
781         }
782
783         return false;
784 }
785
786
787 /*
788  * has_legal_joinclause
789  *              Detect whether the specified relation can legally be joined
790  *              to any other rels using join clauses.
791  *
792  * We consider only joins to single other relations in the current
793  * initial_rels list.  This is sufficient to get a "true" result in most real
794  * queries, and an occasional erroneous "false" will only cost a bit more
795  * planning time.  The reason for this limitation is that considering joins to
796  * other joins would require proving that the other join rel can legally be
797  * formed, which seems like too much trouble for something that's only a
798  * heuristic to save planning time.  (Note: we must look at initial_rels
799  * and not all of the query, since when we are planning a sub-joinlist we
800  * may be forced to make clauseless joins within initial_rels even though
801  * there are join clauses linking to other parts of the query.)
802  */
803 static bool
804 has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
805 {
806         ListCell   *lc;
807
808         foreach(lc, root->initial_rels)
809         {
810                 RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc);
811
812                 /* ignore rels that are already in "rel" */
813                 if (bms_overlap(rel->relids, rel2->relids))
814                         continue;
815
816                 if (have_relevant_joinclause(root, rel, rel2))
817                 {
818                         Relids          joinrelids;
819                         SpecialJoinInfo *sjinfo;
820                         bool            reversed;
821
822                         /* join_is_legal needs relids of the union */
823                         joinrelids = bms_union(rel->relids, rel2->relids);
824
825                         if (join_is_legal(root, rel, rel2, joinrelids,
826                                                           &sjinfo, &reversed))
827                         {
828                                 /* Yes, this will work */
829                                 bms_free(joinrelids);
830                                 return true;
831                         }
832
833                         bms_free(joinrelids);
834                 }
835         }
836
837         return false;
838 }
839
840
841 /*
842  * is_dummy_rel --- has relation been proven empty?
843  *
844  * If so, it will have a single path that is dummy.
845  */
846 static bool
847 is_dummy_rel(RelOptInfo *rel)
848 {
849         return (rel->cheapest_total_path != NULL &&
850                         IS_DUMMY_PATH(rel->cheapest_total_path));
851 }
852
853 /*
854  * Mark a joinrel as proven empty.
855  */
856 static void
857 mark_dummy_join(RelOptInfo *rel)
858 {
859         /* Set dummy size estimate */
860         rel->rows = 0;
861
862         /* Evict any previously chosen paths */
863         rel->pathlist = NIL;
864
865         /* Set up the dummy path */
866         add_path(rel, (Path *) create_append_path(rel, NIL));
867
868         /*
869          * Although set_cheapest will be done again later, we do it immediately
870          * in order to keep is_dummy_rel as cheap as possible (ie, not have
871          * to examine the pathlist).
872          */
873         set_cheapest(rel);
874 }