]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinpath.c
Rename new subroutine, per discussion with Robert Haas.
[postgresql] / src / backend / optimizer / path / joinpath.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  *        Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.126 2009/09/19 17:48:09 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "executor/executor.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/pathnode.h"
22 #include "optimizer/paths.h"
23
24
25 static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel,
26                                   RelOptInfo *outerrel, RelOptInfo *innerrel,
27                                   List *restrictlist, JoinType jointype);
28 static void generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
29                                         RelOptInfo *outerrel);
30 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
31                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
32                                          List *restrictlist, List *mergeclause_list,
33                                          JoinType jointype, SpecialJoinInfo *sjinfo);
34 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
35                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
36                                          List *restrictlist, List *mergeclause_list,
37                                          JoinType jointype, SpecialJoinInfo *sjinfo);
38 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
39                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
40                                          List *restrictlist,
41                                          JoinType jointype, SpecialJoinInfo *sjinfo);
42 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
43                                                  RelOptInfo *outer_rel, JoinType jointype);
44 static List *select_mergejoin_clauses(PlannerInfo *root,
45                                                  RelOptInfo *joinrel,
46                                                  RelOptInfo *outerrel,
47                                                  RelOptInfo *innerrel,
48                                                  List *restrictlist,
49                                                  JoinType jointype);
50
51
52 /*
53  * add_paths_to_joinrel
54  *        Given a join relation and two component rels from which it can be made,
55  *        consider all possible paths that use the two component rels as outer
56  *        and inner rel respectively.  Add these paths to the join rel's pathlist
57  *        if they survive comparison with other paths (and remove any existing
58  *        paths that are dominated by these paths).
59  *
60  * Modifies the pathlist field of the joinrel node to contain the best
61  * paths found so far.
62  *
63  * jointype is not necessarily the same as sjinfo->jointype; it might be
64  * "flipped around" if we are considering joining the rels in the opposite
65  * direction from what's indicated in sjinfo.
66  *
67  * Also, this routine and others in this module accept the special JoinTypes
68  * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
69  * unique-ify the outer or inner relation and then apply a regular inner
70  * join.  These values are not allowed to propagate outside this module,
71  * however.  Path cost estimation code may need to recognize that it's
72  * dealing with such a case --- the combination of nominal jointype INNER
73  * with sjinfo->jointype == JOIN_SEMI indicates that.
74  */
75 void
76 add_paths_to_joinrel(PlannerInfo *root,
77                                          RelOptInfo *joinrel,
78                                          RelOptInfo *outerrel,
79                                          RelOptInfo *innerrel,
80                                          JoinType jointype,
81                                          SpecialJoinInfo *sjinfo,
82                                          List *restrictlist)
83 {
84         List       *mergeclause_list = NIL;
85
86         /*
87          * 0. Consider join removal.  This is always the most efficient strategy,
88          * so if it works, there's no need to consider anything further.
89          */
90         if (join_is_removable(root, joinrel, outerrel, innerrel,
91                                                   restrictlist, jointype))
92         {
93                 generate_outer_only(root, joinrel, outerrel);
94                 return;
95         }
96
97         /*
98          * Find potential mergejoin clauses.  We can skip this if we are not
99          * interested in doing a mergejoin.  However, mergejoin is currently our
100          * only way of implementing full outer joins, so override mergejoin
101          * disable if it's a full join.
102          *
103          * Note: do this after join_is_removable(), because this sets the
104          * outer_is_left flags in the mergejoin clauses, while join_is_removable
105          * uses those flags for its own purposes.  Currently, they set the flags
106          * the same way anyway, but let's avoid unnecessary entanglement.
107          */
108         if (enable_mergejoin || jointype == JOIN_FULL)
109                 mergeclause_list = select_mergejoin_clauses(root,
110                                                                                                         joinrel,
111                                                                                                         outerrel,
112                                                                                                         innerrel,
113                                                                                                         restrictlist,
114                                                                                                         jointype);
115
116         /*
117          * 1. Consider mergejoin paths where both relations must be explicitly
118          * sorted.
119          */
120         sort_inner_and_outer(root, joinrel, outerrel, innerrel,
121                                                  restrictlist, mergeclause_list, jointype, sjinfo);
122
123         /*
124          * 2. Consider paths where the outer relation need not be explicitly
125          * sorted. This includes both nestloops and mergejoins where the outer
126          * path is already ordered.
127          */
128         match_unsorted_outer(root, joinrel, outerrel, innerrel,
129                                                  restrictlist, mergeclause_list, jointype, sjinfo);
130
131 #ifdef NOT_USED
132
133         /*
134          * 3. Consider paths where the inner relation need not be explicitly
135          * sorted.      This includes mergejoins only (nestloops were already built in
136          * match_unsorted_outer).
137          *
138          * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
139          * significant difference between the inner and outer side of a mergejoin,
140          * so match_unsorted_inner creates no paths that aren't equivalent to
141          * those made by match_unsorted_outer when add_paths_to_joinrel() is
142          * invoked with the two rels given in the other order.
143          */
144         match_unsorted_inner(root, joinrel, outerrel, innerrel,
145                                                  restrictlist, mergeclause_list, jointype, sjinfo);
146 #endif
147
148         /*
149          * 4. Consider paths where both outer and inner relations must be hashed
150          * before being joined.
151          */
152         if (enable_hashjoin)
153                 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
154                                                          restrictlist, jointype, sjinfo);
155 }
156
157 /*
158  * clause_sides_match_join
159  *        Determine whether a join clause is of the right form to use in this join.
160  *
161  * We already know that the clause is a binary opclause referencing only the
162  * rels in the current join.  The point here is to check whether it has the
163  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
164  * rather than mixing outer and inner vars on either side.  If it matches,
165  * we set the transient flag outer_is_left to identify which side is which.
166  */
167 static inline bool
168 clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
169                                                 RelOptInfo *innerrel)
170 {
171         if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
172                 bms_is_subset(rinfo->right_relids, innerrel->relids))
173         {
174                 /* lefthand side is outer */
175                 rinfo->outer_is_left = true;
176                 return true;
177         }
178         else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
179                          bms_is_subset(rinfo->right_relids, outerrel->relids))
180         {
181                 /* righthand side is outer */
182                 rinfo->outer_is_left = false;
183                 return true;
184         }
185         return false;                           /* no good for these input relations */
186 }
187
188 /*
189  * join_is_removable
190  *        Determine whether we need not perform the join at all, because
191  *        it will just duplicate its left input.
192  *
193  * This is true for a left join for which the join condition cannot match
194  * more than one inner-side row.  (There are other possibly interesting
195  * cases, but we don't have the infrastructure to prove them.)
196  *
197  * Note: there is no need to consider the symmetrical case of duplicating the
198  * right input, because add_paths_to_joinrel() will be called with each rel
199  * on the outer side.
200  */
201 static bool
202 join_is_removable(PlannerInfo *root,
203                                   RelOptInfo *joinrel,
204                                   RelOptInfo *outerrel,
205                                   RelOptInfo *innerrel,
206                                   List *restrictlist,
207                                   JoinType jointype)
208 {
209         List       *clause_list = NIL;
210         ListCell   *l;
211         int                     attroff;
212
213         /*
214          * Currently, we only know how to remove left joins to a baserel with
215          * unique indexes.  We can check most of these criteria pretty trivially
216          * to avoid doing useless extra work.  But checking whether any of the
217          * indexes are unique would require iterating over the indexlist, so for
218          * now we just make sure there are indexes of some sort or other.  If none
219          * of them are unique, join removal will still fail, just slightly later.
220          */
221         if (jointype != JOIN_LEFT ||
222                 innerrel->reloptkind == RELOPT_JOINREL ||
223                 innerrel->rtekind != RTE_RELATION ||
224                 innerrel->indexlist == NIL)
225                 return false;
226
227         /*
228          * We can't remove the join if any inner-rel attributes are used above
229          * the join.
230          *
231          * As a micro-optimization, it seems better to start with max_attr and
232          * count down rather than starting with min_attr and counting up, on the
233          * theory that the system attributes are somewhat less likely to be wanted
234          * and should be tested last.
235          */
236         for (attroff = innerrel->max_attr - innerrel->min_attr;
237                  attroff >= 0;
238                  attroff--)
239         {
240                 if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
241                         return false;
242         }
243
244         /*
245          * Search for mergejoinable clauses that constrain the inner rel against
246          * either the outer rel or a pseudoconstant.  If an operator is
247          * mergejoinable then it behaves like equality for some btree opclass,
248          * so it's what we want.  The mergejoinability test also eliminates
249          * clauses containing volatile functions, which we couldn't depend on.
250          */
251         foreach(l, restrictlist)
252         {
253                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
254
255                 /*
256                  * We are always considering an outer join here, so ignore pushed-down
257                  * clauses.  Also ignore anything that doesn't have a mergejoinable
258                  * operator.
259                  */
260                 if (restrictinfo->is_pushed_down)
261                         continue;
262
263                 if (!restrictinfo->can_join ||
264                         restrictinfo->mergeopfamilies == NIL)
265                         continue;                       /* not mergejoinable */
266
267                 /*
268                  * Check if clause has the form "outer op inner" or "inner op outer".
269                  */
270                 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
271                         continue;                       /* no good for these input relations */
272
273                 /* OK, add to list */
274                 clause_list = lappend(clause_list, restrictinfo);
275         }
276
277         /* Now examine the rel's restriction clauses for var = const clauses */
278         foreach(l, innerrel->baserestrictinfo)
279         {
280                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
281
282                 /*
283                  * Note: can_join won't be set for a restriction clause, but
284                  * mergeopfamilies will be if it has a mergejoinable operator
285                  * and doesn't contain volatile functions.
286                  */
287                 if (restrictinfo->mergeopfamilies == NIL)
288                         continue;                       /* not mergejoinable */
289
290                 /*
291                  * The clause certainly doesn't refer to anything but the given
292                  * rel.  If either side is pseudoconstant then we can use it.
293                  */
294                 if (bms_is_empty(restrictinfo->left_relids))
295                 {
296                         /* righthand side is inner */
297                         restrictinfo->outer_is_left = true;
298                 }
299                 else if (bms_is_empty(restrictinfo->right_relids))
300                 {
301                         /* lefthand side is inner */
302                         restrictinfo->outer_is_left = false;
303                 }
304                 else
305                         continue;
306
307                 /* OK, add to list */
308                 clause_list = lappend(clause_list, restrictinfo);
309         }
310
311         /* Now examine the indexes to see if we have a matching unique index */
312         if (relation_has_unique_index_for(root, innerrel, clause_list))
313                 return true;
314
315         /*
316          * Some day it would be nice to check for other methods of establishing
317          * distinctness.
318          */
319         return false;
320 }
321
322 /*
323  * generate_outer_only
324  *        Generate "join" paths when we have found the join is removable.
325  */
326 static void
327 generate_outer_only(PlannerInfo *root, RelOptInfo *joinrel,
328                                         RelOptInfo *outerrel)
329 {
330         ListCell   *lc;
331
332         /*
333          * For the moment, replicate all of the outerrel's paths as join paths.
334          * Some of them might not really be interesting above the join, if they
335          * have sort orderings that have no real use except to do a mergejoin
336          * for the join we've just found we don't need.  But distinguishing that
337          * case probably isn't worth the extra code it would take.
338          */
339         foreach(lc, outerrel->pathlist)
340         {
341                 Path   *outerpath = (Path *) lfirst(lc);
342
343                 add_path(joinrel, (Path *)
344                                  create_noop_path(root, joinrel, outerpath));
345         }
346 }
347
348 /*
349  * sort_inner_and_outer
350  *        Create mergejoin join paths by explicitly sorting both the outer and
351  *        inner join relations on each available merge ordering.
352  *
353  * 'joinrel' is the join relation
354  * 'outerrel' is the outer join relation
355  * 'innerrel' is the inner join relation
356  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
357  *              clauses that apply to this join
358  * 'mergeclause_list' is a list of RestrictInfo nodes for available
359  *              mergejoin clauses in this join
360  * 'jointype' is the type of join to do
361  * 'sjinfo' is extra info about the join for selectivity estimation
362  */
363 static void
364 sort_inner_and_outer(PlannerInfo *root,
365                                          RelOptInfo *joinrel,
366                                          RelOptInfo *outerrel,
367                                          RelOptInfo *innerrel,
368                                          List *restrictlist,
369                                          List *mergeclause_list,
370                                          JoinType jointype,
371                                          SpecialJoinInfo *sjinfo)
372 {
373         bool            useallclauses;
374         Path       *outer_path;
375         Path       *inner_path;
376         List       *all_pathkeys;
377         ListCell   *l;
378
379         /*
380          * If we are doing a right or full join, we must use *all* the
381          * mergeclauses as join clauses, else we will not have a valid plan.
382          */
383         switch (jointype)
384         {
385                 case JOIN_INNER:
386                 case JOIN_LEFT:
387                 case JOIN_SEMI:
388                 case JOIN_ANTI:
389                 case JOIN_UNIQUE_OUTER:
390                 case JOIN_UNIQUE_INNER:
391                         useallclauses = false;
392                         break;
393                 case JOIN_RIGHT:
394                 case JOIN_FULL:
395                         useallclauses = true;
396                         break;
397                 default:
398                         elog(ERROR, "unrecognized join type: %d",
399                                  (int) jointype);
400                         useallclauses = false;          /* keep compiler quiet */
401                         break;
402         }
403
404         /*
405          * We only consider the cheapest-total-cost input paths, since we are
406          * assuming here that a sort is required.  We will consider
407          * cheapest-startup-cost input paths later, and only if they don't need a
408          * sort.
409          *
410          * If unique-ification is requested, do it and then handle as a plain
411          * inner join.
412          */
413         outer_path = outerrel->cheapest_total_path;
414         inner_path = innerrel->cheapest_total_path;
415         if (jointype == JOIN_UNIQUE_OUTER)
416         {
417                 outer_path = (Path *) create_unique_path(root, outerrel,
418                                                                                                  outer_path, sjinfo);
419                 Assert(outer_path);
420                 jointype = JOIN_INNER;
421         }
422         else if (jointype == JOIN_UNIQUE_INNER)
423         {
424                 inner_path = (Path *) create_unique_path(root, innerrel,
425                                                                                                  inner_path, sjinfo);
426                 Assert(inner_path);
427                 jointype = JOIN_INNER;
428         }
429
430         /*
431          * Each possible ordering of the available mergejoin clauses will generate
432          * a differently-sorted result path at essentially the same cost.  We have
433          * no basis for choosing one over another at this level of joining, but
434          * some sort orders may be more useful than others for higher-level
435          * mergejoins, so it's worth considering multiple orderings.
436          *
437          * Actually, it's not quite true that every mergeclause ordering will
438          * generate a different path order, because some of the clauses may be
439          * partially redundant (refer to the same EquivalenceClasses).  Therefore,
440          * what we do is convert the mergeclause list to a list of canonical
441          * pathkeys, and then consider different orderings of the pathkeys.
442          *
443          * Generating a path for *every* permutation of the pathkeys doesn't seem
444          * like a winning strategy; the cost in planning time is too high. For
445          * now, we generate one path for each pathkey, listing that pathkey first
446          * and the rest in random order.  This should allow at least a one-clause
447          * mergejoin without re-sorting against any other possible mergejoin
448          * partner path.  But if we've not guessed the right ordering of secondary
449          * keys, we may end up evaluating clauses as qpquals when they could have
450          * been done as mergeclauses.  (In practice, it's rare that there's more
451          * than two or three mergeclauses, so expending a huge amount of thought
452          * on that is probably not worth it.)
453          *
454          * The pathkey order returned by select_outer_pathkeys_for_merge() has
455          * some heuristics behind it (see that function), so be sure to try it
456          * exactly as-is as well as making variants.
457          */
458         all_pathkeys = select_outer_pathkeys_for_merge(root,
459                                                                                                    mergeclause_list,
460                                                                                                    joinrel);
461
462         foreach(l, all_pathkeys)
463         {
464                 List       *front_pathkey = (List *) lfirst(l);
465                 List       *cur_mergeclauses;
466                 List       *outerkeys;
467                 List       *innerkeys;
468                 List       *merge_pathkeys;
469
470                 /* Make a pathkey list with this guy first */
471                 if (l != list_head(all_pathkeys))
472                         outerkeys = lcons(front_pathkey,
473                                                           list_delete_ptr(list_copy(all_pathkeys),
474                                                                                           front_pathkey));
475                 else
476                         outerkeys = all_pathkeys;       /* no work at first one... */
477
478                 /* Sort the mergeclauses into the corresponding ordering */
479                 cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
480                                                                                                                   outerkeys,
481                                                                                                                   true,
482                                                                                                                   mergeclause_list);
483
484                 /* Should have used them all... */
485                 Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
486
487                 /* Build sort pathkeys for the inner side */
488                 innerkeys = make_inner_pathkeys_for_merge(root,
489                                                                                                   cur_mergeclauses,
490                                                                                                   outerkeys);
491
492                 /* Build pathkeys representing output sort order */
493                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
494                                                                                          outerkeys);
495
496                 /*
497                  * And now we can make the path.
498                  *
499                  * Note: it's possible that the cheapest paths will already be sorted
500                  * properly.  create_mergejoin_path will detect that case and suppress
501                  * an explicit sort step, so we needn't do so here.
502                  */
503                 add_path(joinrel, (Path *)
504                                  create_mergejoin_path(root,
505                                                                            joinrel,
506                                                                            jointype,
507                                                                            sjinfo,
508                                                                            outer_path,
509                                                                            inner_path,
510                                                                            restrictlist,
511                                                                            merge_pathkeys,
512                                                                            cur_mergeclauses,
513                                                                            outerkeys,
514                                                                            innerkeys));
515         }
516 }
517
518 /*
519  * match_unsorted_outer
520  *        Creates possible join paths for processing a single join relation
521  *        'joinrel' by employing either iterative substitution or
522  *        mergejoining on each of its possible outer paths (considering
523  *        only outer paths that are already ordered well enough for merging).
524  *
525  * We always generate a nestloop path for each available outer path.
526  * In fact we may generate as many as five: one on the cheapest-total-cost
527  * inner path, one on the same with materialization, one on the
528  * cheapest-startup-cost inner path (if different), one on the
529  * cheapest-total inner-indexscan path (if any), and one on the
530  * cheapest-startup inner-indexscan path (if different).
531  *
532  * We also consider mergejoins if mergejoin clauses are available.      We have
533  * two ways to generate the inner path for a mergejoin: sort the cheapest
534  * inner path, or use an inner path that is already suitably ordered for the
535  * merge.  If we have several mergeclauses, it could be that there is no inner
536  * path (or only a very expensive one) for the full list of mergeclauses, but
537  * better paths exist if we truncate the mergeclause list (thereby discarding
538  * some sort key requirements).  So, we consider truncations of the
539  * mergeclause list as well as the full list.  (Ideally we'd consider all
540  * subsets of the mergeclause list, but that seems way too expensive.)
541  *
542  * 'joinrel' is the join relation
543  * 'outerrel' is the outer join relation
544  * 'innerrel' is the inner join relation
545  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
546  *              clauses that apply to this join
547  * 'mergeclause_list' is a list of RestrictInfo nodes for available
548  *              mergejoin clauses in this join
549  * 'jointype' is the type of join to do
550  * 'sjinfo' is extra info about the join for selectivity estimation
551  */
552 static void
553 match_unsorted_outer(PlannerInfo *root,
554                                          RelOptInfo *joinrel,
555                                          RelOptInfo *outerrel,
556                                          RelOptInfo *innerrel,
557                                          List *restrictlist,
558                                          List *mergeclause_list,
559                                          JoinType jointype,
560                                          SpecialJoinInfo *sjinfo)
561 {
562         JoinType        save_jointype = jointype;
563         bool            nestjoinOK;
564         bool            useallclauses;
565         Path       *inner_cheapest_startup = innerrel->cheapest_startup_path;
566         Path       *inner_cheapest_total = innerrel->cheapest_total_path;
567         Path       *matpath = NULL;
568         Path       *index_cheapest_startup = NULL;
569         Path       *index_cheapest_total = NULL;
570         ListCell   *l;
571
572         /*
573          * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
574          * are doing a right or full join, we must use *all* the mergeclauses as
575          * join clauses, else we will not have a valid plan.  (Although these two
576          * flags are currently inverses, keep them separate for clarity and
577          * possible future changes.)
578          */
579         switch (jointype)
580         {
581                 case JOIN_INNER:
582                 case JOIN_LEFT:
583                 case JOIN_SEMI:
584                 case JOIN_ANTI:
585                         nestjoinOK = true;
586                         useallclauses = false;
587                         break;
588                 case JOIN_RIGHT:
589                 case JOIN_FULL:
590                         nestjoinOK = false;
591                         useallclauses = true;
592                         break;
593                 case JOIN_UNIQUE_OUTER:
594                 case JOIN_UNIQUE_INNER:
595                         jointype = JOIN_INNER;
596                         nestjoinOK = true;
597                         useallclauses = false;
598                         break;
599                 default:
600                         elog(ERROR, "unrecognized join type: %d",
601                                  (int) jointype);
602                         nestjoinOK = false; /* keep compiler quiet */
603                         useallclauses = false;
604                         break;
605         }
606
607         /*
608          * If we need to unique-ify the inner path, we will consider only the
609          * cheapest inner.
610          */
611         if (save_jointype == JOIN_UNIQUE_INNER)
612         {
613                 inner_cheapest_total = (Path *)
614                         create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
615                 Assert(inner_cheapest_total);
616                 inner_cheapest_startup = inner_cheapest_total;
617         }
618         else if (nestjoinOK)
619         {
620                 /*
621                  * Consider materializing the cheapest inner path, unless it is one
622                  * that materializes its output anyway.
623                  */
624                 if (!ExecMaterializesOutput(inner_cheapest_total->pathtype))
625                         matpath = (Path *)
626                                 create_material_path(innerrel, inner_cheapest_total);
627
628                 /*
629                  * Get the best innerjoin indexpaths (if any) for this outer rel.
630                  * They're the same for all outer paths.
631                  */
632                 if (innerrel->reloptkind != RELOPT_JOINREL)
633                 {
634                         if (IsA(inner_cheapest_total, AppendPath))
635                                 index_cheapest_total = best_appendrel_indexscan(root,
636                                                                                                                                 innerrel,
637                                                                                                                                 outerrel,
638                                                                                                                                 jointype);
639                         else if (innerrel->rtekind == RTE_RELATION)
640                                 best_inner_indexscan(root, innerrel, outerrel, jointype,
641                                                                          &index_cheapest_startup,
642                                                                          &index_cheapest_total);
643                 }
644         }
645
646         foreach(l, outerrel->pathlist)
647         {
648                 Path       *outerpath = (Path *) lfirst(l);
649                 List       *merge_pathkeys;
650                 List       *mergeclauses;
651                 List       *innersortkeys;
652                 List       *trialsortkeys;
653                 Path       *cheapest_startup_inner;
654                 Path       *cheapest_total_inner;
655                 int                     num_sortkeys;
656                 int                     sortkeycnt;
657
658                 /*
659                  * If we need to unique-ify the outer path, it's pointless to consider
660                  * any but the cheapest outer.
661                  */
662                 if (save_jointype == JOIN_UNIQUE_OUTER)
663                 {
664                         if (outerpath != outerrel->cheapest_total_path)
665                                 continue;
666                         outerpath = (Path *) create_unique_path(root, outerrel,
667                                                                                                         outerpath, sjinfo);
668                         Assert(outerpath);
669                 }
670
671                 /*
672                  * The result will have this sort order (even if it is implemented as
673                  * a nestloop, and even if some of the mergeclauses are implemented by
674                  * qpquals rather than as true mergeclauses):
675                  */
676                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
677                                                                                          outerpath->pathkeys);
678
679                 if (nestjoinOK)
680                 {
681                         /*
682                          * Always consider a nestloop join with this outer and
683                          * cheapest-total-cost inner.  When appropriate, also consider
684                          * using the materialized form of the cheapest inner, the
685                          * cheapest-startup-cost inner path, and the cheapest innerjoin
686                          * indexpaths.
687                          */
688                         add_path(joinrel, (Path *)
689                                          create_nestloop_path(root,
690                                                                                   joinrel,
691                                                                                   jointype,
692                                                                                   sjinfo,
693                                                                                   outerpath,
694                                                                                   inner_cheapest_total,
695                                                                                   restrictlist,
696                                                                                   merge_pathkeys));
697                         if (matpath != NULL)
698                                 add_path(joinrel, (Path *)
699                                                  create_nestloop_path(root,
700                                                                                           joinrel,
701                                                                                           jointype,
702                                                                                           sjinfo,
703                                                                                           outerpath,
704                                                                                           matpath,
705                                                                                           restrictlist,
706                                                                                           merge_pathkeys));
707                         if (inner_cheapest_startup != inner_cheapest_total)
708                                 add_path(joinrel, (Path *)
709                                                  create_nestloop_path(root,
710                                                                                           joinrel,
711                                                                                           jointype,
712                                                                                           sjinfo,
713                                                                                           outerpath,
714                                                                                           inner_cheapest_startup,
715                                                                                           restrictlist,
716                                                                                           merge_pathkeys));
717                         if (index_cheapest_total != NULL)
718                                 add_path(joinrel, (Path *)
719                                                  create_nestloop_path(root,
720                                                                                           joinrel,
721                                                                                           jointype,
722                                                                                           sjinfo,
723                                                                                           outerpath,
724                                                                                           index_cheapest_total,
725                                                                                           restrictlist,
726                                                                                           merge_pathkeys));
727                         if (index_cheapest_startup != NULL &&
728                                 index_cheapest_startup != index_cheapest_total)
729                                 add_path(joinrel, (Path *)
730                                                  create_nestloop_path(root,
731                                                                                           joinrel,
732                                                                                           jointype,
733                                                                                           sjinfo,
734                                                                                           outerpath,
735                                                                                           index_cheapest_startup,
736                                                                                           restrictlist,
737                                                                                           merge_pathkeys));
738                 }
739
740                 /* Can't do anything else if outer path needs to be unique'd */
741                 if (save_jointype == JOIN_UNIQUE_OUTER)
742                         continue;
743
744                 /* Look for useful mergeclauses (if any) */
745                 mergeclauses = find_mergeclauses_for_pathkeys(root,
746                                                                                                           outerpath->pathkeys,
747                                                                                                           true,
748                                                                                                           mergeclause_list);
749
750                 /*
751                  * Done with this outer path if no chance for a mergejoin.
752                  *
753                  * Special corner case: for "x FULL JOIN y ON true", there will be no
754                  * join clauses at all.  Ordinarily we'd generate a clauseless
755                  * nestloop path, but since mergejoin is our only join type that
756                  * supports FULL JOIN, it's necessary to generate a clauseless
757                  * mergejoin path instead.
758                  */
759                 if (mergeclauses == NIL)
760                 {
761                         if (jointype == JOIN_FULL)
762                                  /* okay to try for mergejoin */ ;
763                         else
764                                 continue;
765                 }
766                 if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
767                         continue;
768
769                 /* Compute the required ordering of the inner path */
770                 innersortkeys = make_inner_pathkeys_for_merge(root,
771                                                                                                           mergeclauses,
772                                                                                                           outerpath->pathkeys);
773
774                 /*
775                  * Generate a mergejoin on the basis of sorting the cheapest inner.
776                  * Since a sort will be needed, only cheapest total cost matters. (But
777                  * create_mergejoin_path will do the right thing if
778                  * inner_cheapest_total is already correctly sorted.)
779                  */
780                 add_path(joinrel, (Path *)
781                                  create_mergejoin_path(root,
782                                                                            joinrel,
783                                                                            jointype,
784                                                                            sjinfo,
785                                                                            outerpath,
786                                                                            inner_cheapest_total,
787                                                                            restrictlist,
788                                                                            merge_pathkeys,
789                                                                            mergeclauses,
790                                                                            NIL,
791                                                                            innersortkeys));
792
793                 /* Can't do anything else if inner path needs to be unique'd */
794                 if (save_jointype == JOIN_UNIQUE_INNER)
795                         continue;
796
797                 /*
798                  * Look for presorted inner paths that satisfy the innersortkey list
799                  * --- or any truncation thereof, if we are allowed to build a
800                  * mergejoin using a subset of the merge clauses.  Here, we consider
801                  * both cheap startup cost and cheap total cost.
802                  *
803                  * As we shorten the sortkey list, we should consider only paths that
804                  * are strictly cheaper than (in particular, not the same as) any path
805                  * found in an earlier iteration.  Otherwise we'd be intentionally
806                  * using fewer merge keys than a given path allows (treating the rest
807                  * as plain joinquals), which is unlikely to be a good idea.  Also,
808                  * eliminating paths here on the basis of compare_path_costs is a lot
809                  * cheaper than building the mergejoin path only to throw it away.
810                  *
811                  * If inner_cheapest_total is well enough sorted to have not required
812                  * a sort in the path made above, we shouldn't make a duplicate path
813                  * with it, either.  We handle that case with the same logic that
814                  * handles the previous consideration, by initializing the variables
815                  * that track cheapest-so-far properly.  Note that we do NOT reject
816                  * inner_cheapest_total if we find it matches some shorter set of
817                  * pathkeys.  That case corresponds to using fewer mergekeys to avoid
818                  * sorting inner_cheapest_total, whereas we did sort it above, so the
819                  * plans being considered are different.
820                  */
821                 if (pathkeys_contained_in(innersortkeys,
822                                                                   inner_cheapest_total->pathkeys))
823                 {
824                         /* inner_cheapest_total didn't require a sort */
825                         cheapest_startup_inner = inner_cheapest_total;
826                         cheapest_total_inner = inner_cheapest_total;
827                 }
828                 else
829                 {
830                         /* it did require a sort, at least for the full set of keys */
831                         cheapest_startup_inner = NULL;
832                         cheapest_total_inner = NULL;
833                 }
834                 num_sortkeys = list_length(innersortkeys);
835                 if (num_sortkeys > 1 && !useallclauses)
836                         trialsortkeys = list_copy(innersortkeys);       /* need modifiable copy */
837                 else
838                         trialsortkeys = innersortkeys;          /* won't really truncate */
839
840                 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
841                 {
842                         Path       *innerpath;
843                         List       *newclauses = NIL;
844
845                         /*
846                          * Look for an inner path ordered well enough for the first
847                          * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
848                          * destructively, which is why we made a copy...
849                          */
850                         trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
851                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
852                                                                                                            trialsortkeys,
853                                                                                                            TOTAL_COST);
854                         if (innerpath != NULL &&
855                                 (cheapest_total_inner == NULL ||
856                                  compare_path_costs(innerpath, cheapest_total_inner,
857                                                                         TOTAL_COST) < 0))
858                         {
859                                 /* Found a cheap (or even-cheaper) sorted path */
860                                 /* Select the right mergeclauses, if we didn't already */
861                                 if (sortkeycnt < num_sortkeys)
862                                 {
863                                         newclauses =
864                                                 find_mergeclauses_for_pathkeys(root,
865                                                                                                            trialsortkeys,
866                                                                                                            false,
867                                                                                                            mergeclauses);
868                                         Assert(newclauses != NIL);
869                                 }
870                                 else
871                                         newclauses = mergeclauses;
872                                 add_path(joinrel, (Path *)
873                                                  create_mergejoin_path(root,
874                                                                                            joinrel,
875                                                                                            jointype,
876                                                                                            sjinfo,
877                                                                                            outerpath,
878                                                                                            innerpath,
879                                                                                            restrictlist,
880                                                                                            merge_pathkeys,
881                                                                                            newclauses,
882                                                                                            NIL,
883                                                                                            NIL));
884                                 cheapest_total_inner = innerpath;
885                         }
886                         /* Same on the basis of cheapest startup cost ... */
887                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
888                                                                                                            trialsortkeys,
889                                                                                                            STARTUP_COST);
890                         if (innerpath != NULL &&
891                                 (cheapest_startup_inner == NULL ||
892                                  compare_path_costs(innerpath, cheapest_startup_inner,
893                                                                         STARTUP_COST) < 0))
894                         {
895                                 /* Found a cheap (or even-cheaper) sorted path */
896                                 if (innerpath != cheapest_total_inner)
897                                 {
898                                         /*
899                                          * Avoid rebuilding clause list if we already made one;
900                                          * saves memory in big join trees...
901                                          */
902                                         if (newclauses == NIL)
903                                         {
904                                                 if (sortkeycnt < num_sortkeys)
905                                                 {
906                                                         newclauses =
907                                                                 find_mergeclauses_for_pathkeys(root,
908                                                                                                                            trialsortkeys,
909                                                                                                                            false,
910                                                                                                                            mergeclauses);
911                                                         Assert(newclauses != NIL);
912                                                 }
913                                                 else
914                                                         newclauses = mergeclauses;
915                                         }
916                                         add_path(joinrel, (Path *)
917                                                          create_mergejoin_path(root,
918                                                                                                    joinrel,
919                                                                                                    jointype,
920                                                                                                    sjinfo,
921                                                                                                    outerpath,
922                                                                                                    innerpath,
923                                                                                                    restrictlist,
924                                                                                                    merge_pathkeys,
925                                                                                                    newclauses,
926                                                                                                    NIL,
927                                                                                                    NIL));
928                                 }
929                                 cheapest_startup_inner = innerpath;
930                         }
931
932                         /*
933                          * Don't consider truncated sortkeys if we need all clauses.
934                          */
935                         if (useallclauses)
936                                 break;
937                 }
938         }
939 }
940
941 /*
942  * hash_inner_and_outer
943  *        Create hashjoin join paths by explicitly hashing both the outer and
944  *        inner keys of each available hash clause.
945  *
946  * 'joinrel' is the join relation
947  * 'outerrel' is the outer join relation
948  * 'innerrel' is the inner join relation
949  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
950  *              clauses that apply to this join
951  * 'jointype' is the type of join to do
952  * 'sjinfo' is extra info about the join for selectivity estimation
953  */
954 static void
955 hash_inner_and_outer(PlannerInfo *root,
956                                          RelOptInfo *joinrel,
957                                          RelOptInfo *outerrel,
958                                          RelOptInfo *innerrel,
959                                          List *restrictlist,
960                                          JoinType jointype,
961                                          SpecialJoinInfo *sjinfo)
962 {
963         bool            isouterjoin;
964         List       *hashclauses;
965         ListCell   *l;
966
967         /*
968          * Hashjoin only supports inner, left, semi, and anti joins.
969          */
970         switch (jointype)
971         {
972                 case JOIN_INNER:
973                 case JOIN_SEMI:
974                 case JOIN_UNIQUE_OUTER:
975                 case JOIN_UNIQUE_INNER:
976                         isouterjoin = false;
977                         break;
978                 case JOIN_LEFT:
979                 case JOIN_ANTI:
980                         isouterjoin = true;
981                         break;
982                 default:
983                         return;
984         }
985
986         /*
987          * We need to build only one hashpath for any given pair of outer and
988          * inner relations; all of the hashable clauses will be used as keys.
989          *
990          * Scan the join's restrictinfo list to find hashjoinable clauses that are
991          * usable with this pair of sub-relations.
992          */
993         hashclauses = NIL;
994         foreach(l, restrictlist)
995         {
996                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
997
998                 /*
999                  * If processing an outer join, only use its own join clauses for
1000                  * hashing.  For inner joins we need not be so picky.
1001                  */
1002                 if (isouterjoin && restrictinfo->is_pushed_down)
1003                         continue;
1004
1005                 if (!restrictinfo->can_join ||
1006                         restrictinfo->hashjoinoperator == InvalidOid)
1007                         continue;                       /* not hashjoinable */
1008
1009                 /*
1010                  * Check if clause has the form "outer op inner" or "inner op outer".
1011                  */
1012                 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1013                         continue;                       /* no good for these input relations */
1014
1015                 hashclauses = lappend(hashclauses, restrictinfo);
1016         }
1017
1018         /* If we found any usable hashclauses, make a path */
1019         if (hashclauses)
1020         {
1021                 /*
1022                  * We consider both the cheapest-total-cost and cheapest-startup-cost
1023                  * outer paths.  There's no need to consider any but the
1024                  * cheapest-total-cost inner path, however.
1025                  */
1026                 Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
1027                 Path       *cheapest_total_outer = outerrel->cheapest_total_path;
1028                 Path       *cheapest_total_inner = innerrel->cheapest_total_path;
1029
1030                 /* Unique-ify if need be */
1031                 if (jointype == JOIN_UNIQUE_OUTER)
1032                 {
1033                         cheapest_total_outer = (Path *)
1034                                 create_unique_path(root, outerrel,
1035                                                                    cheapest_total_outer, sjinfo);
1036                         Assert(cheapest_total_outer);
1037                         cheapest_startup_outer = cheapest_total_outer;
1038                         jointype = JOIN_INNER;
1039                 }
1040                 else if (jointype == JOIN_UNIQUE_INNER)
1041                 {
1042                         cheapest_total_inner = (Path *)
1043                                 create_unique_path(root, innerrel,
1044                                                                    cheapest_total_inner, sjinfo);
1045                         Assert(cheapest_total_inner);
1046                         jointype = JOIN_INNER;
1047                 }
1048
1049                 add_path(joinrel, (Path *)
1050                                  create_hashjoin_path(root,
1051                                                                           joinrel,
1052                                                                           jointype,
1053                                                                           sjinfo,
1054                                                                           cheapest_total_outer,
1055                                                                           cheapest_total_inner,
1056                                                                           restrictlist,
1057                                                                           hashclauses));
1058                 if (cheapest_startup_outer != cheapest_total_outer)
1059                         add_path(joinrel, (Path *)
1060                                          create_hashjoin_path(root,
1061                                                                                   joinrel,
1062                                                                                   jointype,
1063                                                                                   sjinfo,
1064                                                                                   cheapest_startup_outer,
1065                                                                                   cheapest_total_inner,
1066                                                                                   restrictlist,
1067                                                                                   hashclauses));
1068         }
1069 }
1070
1071 /*
1072  * best_appendrel_indexscan
1073  *        Finds the best available set of inner indexscans for a nestloop join
1074  *        with the given append relation on the inside and the given outer_rel
1075  *        outside.      Returns an AppendPath comprising the best inner scans, or
1076  *        NULL if there are no possible inner indexscans.
1077  *
1078  * Note that we currently consider only cheapest-total-cost.  It's not
1079  * very clear what cheapest-startup-cost might mean for an AppendPath.
1080  */
1081 static Path *
1082 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
1083                                                  RelOptInfo *outer_rel, JoinType jointype)
1084 {
1085         int                     parentRTindex = rel->relid;
1086         List       *append_paths = NIL;
1087         bool            found_indexscan = false;
1088         ListCell   *l;
1089
1090         foreach(l, root->append_rel_list)
1091         {
1092                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
1093                 int                     childRTindex;
1094                 RelOptInfo *childrel;
1095                 Path       *index_cheapest_startup;
1096                 Path       *index_cheapest_total;
1097
1098                 /* append_rel_list contains all append rels; ignore others */
1099                 if (appinfo->parent_relid != parentRTindex)
1100                         continue;
1101
1102                 childRTindex = appinfo->child_relid;
1103                 childrel = find_base_rel(root, childRTindex);
1104                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
1105
1106                 /*
1107                  * Check to see if child was rejected by constraint exclusion. If so,
1108                  * it will have a cheapest_total_path that's a "dummy" path.
1109                  */
1110                 if (IS_DUMMY_PATH(childrel->cheapest_total_path))
1111                         continue;                       /* OK, we can ignore it */
1112
1113                 /*
1114                  * Get the best innerjoin indexpaths (if any) for this child rel.
1115                  */
1116                 best_inner_indexscan(root, childrel, outer_rel, jointype,
1117                                                          &index_cheapest_startup, &index_cheapest_total);
1118
1119                 /*
1120                  * If no luck on an indexpath for this rel, we'll still consider an
1121                  * Append substituting the cheapest-total inner path.  However we must
1122                  * find at least one indexpath, else there's not going to be any
1123                  * improvement over the base path for the appendrel.
1124                  */
1125                 if (index_cheapest_total)
1126                         found_indexscan = true;
1127                 else
1128                         index_cheapest_total = childrel->cheapest_total_path;
1129
1130                 append_paths = lappend(append_paths, index_cheapest_total);
1131         }
1132
1133         if (!found_indexscan)
1134                 return NULL;
1135
1136         /* Form and return the completed Append path. */
1137         return (Path *) create_append_path(rel, append_paths);
1138 }
1139
1140 /*
1141  * select_mergejoin_clauses
1142  *        Select mergejoin clauses that are usable for a particular join.
1143  *        Returns a list of RestrictInfo nodes for those clauses.
1144  *
1145  * We also mark each selected RestrictInfo to show which side is currently
1146  * being considered as outer.  These are transient markings that are only
1147  * good for the duration of the current add_paths_to_joinrel() call!
1148  *
1149  * We examine each restrictinfo clause known for the join to see
1150  * if it is mergejoinable and involves vars from the two sub-relations
1151  * currently of interest.
1152  */
1153 static List *
1154 select_mergejoin_clauses(PlannerInfo *root,
1155                                                  RelOptInfo *joinrel,
1156                                                  RelOptInfo *outerrel,
1157                                                  RelOptInfo *innerrel,
1158                                                  List *restrictlist,
1159                                                  JoinType jointype)
1160 {
1161         List       *result_list = NIL;
1162         bool            isouterjoin = IS_OUTER_JOIN(jointype);
1163         bool            have_nonmergeable_joinclause = false;
1164         ListCell   *l;
1165
1166         foreach(l, restrictlist)
1167         {
1168                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1169
1170                 /*
1171                  * If processing an outer join, only use its own join clauses in the
1172                  * merge.  For inner joins we can use pushed-down clauses too. (Note:
1173                  * we don't set have_nonmergeable_joinclause here because pushed-down
1174                  * clauses will become otherquals not joinquals.)
1175                  */
1176                 if (isouterjoin && restrictinfo->is_pushed_down)
1177                         continue;
1178
1179                 if (!restrictinfo->can_join ||
1180                         restrictinfo->mergeopfamilies == NIL)
1181                 {
1182                         have_nonmergeable_joinclause = true;
1183                         continue;                       /* not mergejoinable */
1184                 }
1185
1186                 /*
1187                  * Check if clause has the form "outer op inner" or "inner op outer".
1188                  */
1189                 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1190                 {
1191                         have_nonmergeable_joinclause = true;
1192                         continue;                       /* no good for these input relations */
1193                 }
1194
1195                 /*
1196                  * Insist that each side have a non-redundant eclass.  This
1197                  * restriction is needed because various bits of the planner expect
1198                  * that each clause in a merge be associatable with some pathkey in a
1199                  * canonical pathkey list, but redundant eclasses can't appear in
1200                  * canonical sort orderings.  (XXX it might be worth relaxing this,
1201                  * but not enough time to address it for 8.3.)
1202                  *
1203                  * Note: it would be bad if this condition failed for an otherwise
1204                  * mergejoinable FULL JOIN clause, since that would result in
1205                  * undesirable planner failure.  I believe that is not possible
1206                  * however; a variable involved in a full join could only appear in
1207                  * below_outer_join eclasses, which aren't considered redundant.
1208                  *
1209                  * This case *can* happen for left/right join clauses: the outer-side
1210                  * variable could be equated to a constant.  Because we will propagate
1211                  * that constant across the join clause, the loss of ability to do a
1212                  * mergejoin is not really all that big a deal, and so it's not clear
1213                  * that improving this is important.
1214                  */
1215                 cache_mergeclause_eclasses(root, restrictinfo);
1216
1217                 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
1218                         EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
1219                 {
1220                         have_nonmergeable_joinclause = true;
1221                         continue;                       /* can't handle redundant eclasses */
1222                 }
1223
1224                 result_list = lappend(result_list, restrictinfo);
1225         }
1226
1227         /*
1228          * If it is a right/full join then *all* the explicit join clauses must be
1229          * mergejoinable, else the executor will fail. If we are asked for a right
1230          * join then just return NIL to indicate no mergejoin is possible (we can
1231          * handle it as a left join instead). If we are asked for a full join then
1232          * emit an error, because there is no fallback.
1233          */
1234         if (have_nonmergeable_joinclause)
1235         {
1236                 switch (jointype)
1237                 {
1238                         case JOIN_RIGHT:
1239                                 return NIL;             /* not mergejoinable */
1240                         case JOIN_FULL:
1241                                 ereport(ERROR,
1242                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1243                                                  errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
1244                                 break;
1245                         default:
1246                                 /* otherwise, it's OK to have nonmergeable join quals */
1247                                 break;
1248                 }
1249         }
1250
1251         return result_list;
1252 }