]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinpath.c
Change the planner-to-executor API so that the planner tells the executor
[postgresql] / src / backend / optimizer / path / joinpath.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  *        Routines to find all possible paths for processing a set of joins
5  *
6  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include <math.h>
18
19 #include "access/skey.h"
20 #include "optimizer/cost.h"
21 #include "optimizer/pathnode.h"
22 #include "optimizer/paths.h"
23
24
25 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
26                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
27                                          List *restrictlist, List *mergeclause_list,
28                                          JoinType jointype);
29 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
30                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
31                                          List *restrictlist, List *mergeclause_list,
32                                          JoinType jointype);
33 static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
34                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
35                                          List *restrictlist, JoinType jointype);
36 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
37                                                  RelOptInfo *outer_rel, JoinType jointype);
38 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
39                                                  RelOptInfo *outerrel,
40                                                  RelOptInfo *innerrel,
41                                                  List *restrictlist,
42                                                  JoinType jointype);
43 static void build_mergejoin_strat_arrays(List *mergeclauses,
44                                                                                  Oid **mergefamilies,
45                                                                                  int **mergestrategies,
46                                                                                  bool **mergenullsfirst);
47
48
49 /*
50  * add_paths_to_joinrel
51  *        Given a join relation and two component rels from which it can be made,
52  *        consider all possible paths that use the two component rels as outer
53  *        and inner rel respectively.  Add these paths to the join rel's pathlist
54  *        if they survive comparison with other paths (and remove any existing
55  *        paths that are dominated by these paths).
56  *
57  * Modifies the pathlist field of the joinrel node to contain the best
58  * paths found so far.
59  */
60 void
61 add_paths_to_joinrel(PlannerInfo *root,
62                                          RelOptInfo *joinrel,
63                                          RelOptInfo *outerrel,
64                                          RelOptInfo *innerrel,
65                                          JoinType jointype,
66                                          List *restrictlist)
67 {
68         List       *mergeclause_list = NIL;
69
70         /*
71          * Find potential mergejoin clauses.  We can skip this if we are not
72          * interested in doing a mergejoin.  However, mergejoin is currently our
73          * only way of implementing full outer joins, so override mergejoin
74          * disable if it's a full join.
75          */
76         if (enable_mergejoin || jointype == JOIN_FULL)
77                 mergeclause_list = select_mergejoin_clauses(joinrel,
78                                                                                                         outerrel,
79                                                                                                         innerrel,
80                                                                                                         restrictlist,
81                                                                                                         jointype);
82
83         /*
84          * 1. Consider mergejoin paths where both relations must be explicitly
85          * sorted.
86          */
87         sort_inner_and_outer(root, joinrel, outerrel, innerrel,
88                                                  restrictlist, mergeclause_list, jointype);
89
90         /*
91          * 2. Consider paths where the outer relation need not be explicitly
92          * sorted. This includes both nestloops and mergejoins where the outer
93          * path is already ordered.
94          */
95         match_unsorted_outer(root, joinrel, outerrel, innerrel,
96                                                  restrictlist, mergeclause_list, jointype);
97
98 #ifdef NOT_USED
99
100         /*
101          * 3. Consider paths where the inner relation need not be explicitly
102          * sorted.      This includes mergejoins only (nestloops were already built in
103          * match_unsorted_outer).
104          *
105          * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
106          * significant difference between the inner and outer side of a mergejoin,
107          * so match_unsorted_inner creates no paths that aren't equivalent to
108          * those made by match_unsorted_outer when add_paths_to_joinrel() is
109          * invoked with the two rels given in the other order.
110          */
111         match_unsorted_inner(root, joinrel, outerrel, innerrel,
112                                                  restrictlist, mergeclause_list, jointype);
113 #endif
114
115         /*
116          * 4. Consider paths where both outer and inner relations must be hashed
117          * before being joined.
118          */
119         if (enable_hashjoin)
120                 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
121                                                          restrictlist, jointype);
122 }
123
124 /*
125  * sort_inner_and_outer
126  *        Create mergejoin join paths by explicitly sorting both the outer and
127  *        inner join relations on each available merge ordering.
128  *
129  * 'joinrel' is the join relation
130  * 'outerrel' is the outer join relation
131  * 'innerrel' is the inner join relation
132  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
133  *              clauses that apply to this join
134  * 'mergeclause_list' is a list of RestrictInfo nodes for available
135  *              mergejoin clauses in this join
136  * 'jointype' is the type of join to do
137  */
138 static void
139 sort_inner_and_outer(PlannerInfo *root,
140                                          RelOptInfo *joinrel,
141                                          RelOptInfo *outerrel,
142                                          RelOptInfo *innerrel,
143                                          List *restrictlist,
144                                          List *mergeclause_list,
145                                          JoinType jointype)
146 {
147         bool            useallclauses;
148         Path       *outer_path;
149         Path       *inner_path;
150         List       *all_pathkeys;
151         ListCell   *l;
152
153         /*
154          * If we are doing a right or full join, we must use *all* the
155          * mergeclauses as join clauses, else we will not have a valid plan.
156          */
157         switch (jointype)
158         {
159                 case JOIN_INNER:
160                 case JOIN_LEFT:
161                 case JOIN_IN:
162                 case JOIN_UNIQUE_OUTER:
163                 case JOIN_UNIQUE_INNER:
164                         useallclauses = false;
165                         break;
166                 case JOIN_RIGHT:
167                 case JOIN_FULL:
168                         useallclauses = true;
169                         break;
170                 default:
171                         elog(ERROR, "unrecognized join type: %d",
172                                  (int) jointype);
173                         useallclauses = false;          /* keep compiler quiet */
174                         break;
175         }
176
177         /*
178          * We only consider the cheapest-total-cost input paths, since we are
179          * assuming here that a sort is required.  We will consider
180          * cheapest-startup-cost input paths later, and only if they don't need a
181          * sort.
182          *
183          * If unique-ification is requested, do it and then handle as a plain
184          * inner join.
185          */
186         outer_path = outerrel->cheapest_total_path;
187         inner_path = innerrel->cheapest_total_path;
188         if (jointype == JOIN_UNIQUE_OUTER)
189         {
190                 outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
191                 jointype = JOIN_INNER;
192         }
193         else if (jointype == JOIN_UNIQUE_INNER)
194         {
195                 inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
196                 jointype = JOIN_INNER;
197         }
198
199         /*
200          * Each possible ordering of the available mergejoin clauses will generate
201          * a differently-sorted result path at essentially the same cost.  We have
202          * no basis for choosing one over another at this level of joining, but
203          * some sort orders may be more useful than others for higher-level
204          * mergejoins, so it's worth considering multiple orderings.
205          *
206          * Actually, it's not quite true that every mergeclause ordering will
207          * generate a different path order, because some of the clauses may be
208          * redundant.  Therefore, what we do is convert the mergeclause list to a
209          * list of canonical pathkeys, and then consider different orderings of
210          * the pathkeys.
211          *
212          * Generating a path for *every* permutation of the pathkeys doesn't seem
213          * like a winning strategy; the cost in planning time is too high. For
214          * now, we generate one path for each pathkey, listing that pathkey first
215          * and the rest in random order.  This should allow at least a one-clause
216          * mergejoin without re-sorting against any other possible mergejoin
217          * partner path.  But if we've not guessed the right ordering of secondary
218          * keys, we may end up evaluating clauses as qpquals when they could have
219          * been done as mergeclauses. We need to figure out a better way.  (Two
220          * possible approaches: look at all the relevant index relations to
221          * suggest plausible sort orders, or make just one output path and somehow
222          * mark it as having a sort-order that can be rearranged freely.)
223          */
224         all_pathkeys = make_pathkeys_for_mergeclauses(root,
225                                                                                                   mergeclause_list,
226                                                                                                   outerrel);
227
228         foreach(l, all_pathkeys)
229         {
230                 List       *front_pathkey = (List *) lfirst(l);
231                 List       *cur_pathkeys;
232                 List       *cur_mergeclauses;
233                 Oid                *mergefamilies;
234                 int                *mergestrategies;
235                 bool       *mergenullsfirst;
236                 List       *outerkeys;
237                 List       *innerkeys;
238                 List       *merge_pathkeys;
239
240                 /* Make a pathkey list with this guy first. */
241                 if (l != list_head(all_pathkeys))
242                         cur_pathkeys = lcons(front_pathkey,
243                                                                  list_delete_ptr(list_copy(all_pathkeys),
244                                                                                                  front_pathkey));
245                 else
246                         cur_pathkeys = all_pathkeys;            /* no work at first one... */
247
248                 /*
249                  * Select mergeclause(s) that match this sort ordering.  If we had
250                  * redundant merge clauses then we will get a subset of the original
251                  * clause list.  There had better be some match, however...
252                  */
253                 cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
254                                                                                                                   cur_pathkeys,
255                                                                                                                   mergeclause_list);
256                 Assert(cur_mergeclauses != NIL);
257
258                 /* Forget it if can't use all the clauses in right/full join */
259                 if (useallclauses &&
260                         list_length(cur_mergeclauses) != list_length(mergeclause_list))
261                         continue;
262
263                 /*
264                  * Build sort pathkeys for both sides.
265                  *
266                  * Note: it's possible that the cheapest paths will already be sorted
267                  * properly.  create_mergejoin_path will detect that case and suppress
268                  * an explicit sort step, so we needn't do so here.
269                  */
270                 outerkeys = make_pathkeys_for_mergeclauses(root,
271                                                                                                    cur_mergeclauses,
272                                                                                                    outerrel);
273                 innerkeys = make_pathkeys_for_mergeclauses(root,
274                                                                                                    cur_mergeclauses,
275                                                                                                    innerrel);
276                 /* Build pathkeys representing output sort order. */
277                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
278                                                                                          outerkeys);
279
280                 /* Build opfamily info for execution */
281                 build_mergejoin_strat_arrays(cur_mergeclauses,
282                                                                          &mergefamilies,
283                                                                          &mergestrategies,
284                                                                          &mergenullsfirst);
285
286                 /*
287                  * And now we can make the path.
288                  */
289                 add_path(joinrel, (Path *)
290                                  create_mergejoin_path(root,
291                                                                            joinrel,
292                                                                            jointype,
293                                                                            outer_path,
294                                                                            inner_path,
295                                                                            restrictlist,
296                                                                            merge_pathkeys,
297                                                                            cur_mergeclauses,
298                                                                            mergefamilies,
299                                                                            mergestrategies,
300                                                                            mergenullsfirst,
301                                                                            outerkeys,
302                                                                            innerkeys));
303         }
304 }
305
306 /*
307  * match_unsorted_outer
308  *        Creates possible join paths for processing a single join relation
309  *        'joinrel' by employing either iterative substitution or
310  *        mergejoining on each of its possible outer paths (considering
311  *        only outer paths that are already ordered well enough for merging).
312  *
313  * We always generate a nestloop path for each available outer path.
314  * In fact we may generate as many as four: one on the cheapest-total-cost
315  * inner path, one on the same with materialization, one on the
316  * cheapest-startup-cost inner path (if different),
317  * and one on the best inner-indexscan path (if any).
318  *
319  * We also consider mergejoins if mergejoin clauses are available.      We have
320  * two ways to generate the inner path for a mergejoin: sort the cheapest
321  * inner path, or use an inner path that is already suitably ordered for the
322  * merge.  If we have several mergeclauses, it could be that there is no inner
323  * path (or only a very expensive one) for the full list of mergeclauses, but
324  * better paths exist if we truncate the mergeclause list (thereby discarding
325  * some sort key requirements).  So, we consider truncations of the
326  * mergeclause list as well as the full list.  (Ideally we'd consider all
327  * subsets of the mergeclause list, but that seems way too expensive.)
328  *
329  * 'joinrel' is the join relation
330  * 'outerrel' is the outer join relation
331  * 'innerrel' is the inner join relation
332  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
333  *              clauses that apply to this join
334  * 'mergeclause_list' is a list of RestrictInfo nodes for available
335  *              mergejoin clauses in this join
336  * 'jointype' is the type of join to do
337  */
338 static void
339 match_unsorted_outer(PlannerInfo *root,
340                                          RelOptInfo *joinrel,
341                                          RelOptInfo *outerrel,
342                                          RelOptInfo *innerrel,
343                                          List *restrictlist,
344                                          List *mergeclause_list,
345                                          JoinType jointype)
346 {
347         JoinType        save_jointype = jointype;
348         bool            nestjoinOK;
349         bool            useallclauses;
350         Path       *inner_cheapest_startup = innerrel->cheapest_startup_path;
351         Path       *inner_cheapest_total = innerrel->cheapest_total_path;
352         Path       *matpath = NULL;
353         Path       *bestinnerjoin = NULL;
354         ListCell   *l;
355
356         /*
357          * Nestloop only supports inner, left, and IN joins.  Also, if we are
358          * doing a right or full join, we must use *all* the mergeclauses as join
359          * clauses, else we will not have a valid plan.  (Although these two flags
360          * are currently inverses, keep them separate for clarity and possible
361          * future changes.)
362          */
363         switch (jointype)
364         {
365                 case JOIN_INNER:
366                 case JOIN_LEFT:
367                 case JOIN_IN:
368                 case JOIN_UNIQUE_OUTER:
369                 case JOIN_UNIQUE_INNER:
370                         nestjoinOK = true;
371                         useallclauses = false;
372                         break;
373                 case JOIN_RIGHT:
374                 case JOIN_FULL:
375                         nestjoinOK = false;
376                         useallclauses = true;
377                         break;
378                 default:
379                         elog(ERROR, "unrecognized join type: %d",
380                                  (int) jointype);
381                         nestjoinOK = false; /* keep compiler quiet */
382                         useallclauses = false;
383                         break;
384         }
385
386         /*
387          * If we need to unique-ify the inner path, we will consider only the
388          * cheapest inner.
389          */
390         if (jointype == JOIN_UNIQUE_INNER)
391         {
392                 inner_cheapest_total = (Path *)
393                         create_unique_path(root, innerrel, inner_cheapest_total);
394                 inner_cheapest_startup = inner_cheapest_total;
395                 jointype = JOIN_INNER;
396         }
397         else if (nestjoinOK)
398         {
399                 /*
400                  * If the cheapest inner path is a join or seqscan, we should consider
401                  * materializing it.  (This is a heuristic: we could consider it
402                  * always, but for inner indexscans it's probably a waste of time.)
403                  */
404                 if (!(IsA(inner_cheapest_total, IndexPath) ||
405                           IsA(inner_cheapest_total, BitmapHeapPath) ||
406                           IsA(inner_cheapest_total, TidPath)))
407                         matpath = (Path *)
408                                 create_material_path(innerrel, inner_cheapest_total);
409
410                 /*
411                  * Get the best innerjoin indexpath (if any) for this outer rel. It's
412                  * the same for all outer paths.
413                  */
414                 if (innerrel->reloptkind != RELOPT_JOINREL)
415                 {
416                         if (IsA(inner_cheapest_total, AppendPath))
417                                 bestinnerjoin = best_appendrel_indexscan(root, innerrel,
418                                                                                                                  outerrel, jointype);
419                         else if (innerrel->rtekind == RTE_RELATION)
420                                 bestinnerjoin = best_inner_indexscan(root, innerrel,
421                                                                                                          outerrel, jointype);
422                 }
423         }
424
425         foreach(l, outerrel->pathlist)
426         {
427                 Path       *outerpath = (Path *) lfirst(l);
428                 List       *merge_pathkeys;
429                 List       *mergeclauses;
430                 Oid                *mergefamilies;
431                 int                *mergestrategies;
432                 bool       *mergenullsfirst;
433                 List       *innersortkeys;
434                 List       *trialsortkeys;
435                 Path       *cheapest_startup_inner;
436                 Path       *cheapest_total_inner;
437                 int                     num_sortkeys;
438                 int                     sortkeycnt;
439
440                 /*
441                  * If we need to unique-ify the outer path, it's pointless to consider
442                  * any but the cheapest outer.
443                  */
444                 if (save_jointype == JOIN_UNIQUE_OUTER)
445                 {
446                         if (outerpath != outerrel->cheapest_total_path)
447                                 continue;
448                         outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
449                         jointype = JOIN_INNER;
450                 }
451
452                 /*
453                  * The result will have this sort order (even if it is implemented as
454                  * a nestloop, and even if some of the mergeclauses are implemented by
455                  * qpquals rather than as true mergeclauses):
456                  */
457                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
458                                                                                          outerpath->pathkeys);
459
460                 if (nestjoinOK)
461                 {
462                         /*
463                          * Always consider a nestloop join with this outer and
464                          * cheapest-total-cost inner.  When appropriate, also consider
465                          * using the materialized form of the cheapest inner, the
466                          * cheapest-startup-cost inner path, and the best innerjoin
467                          * indexpath.
468                          */
469                         add_path(joinrel, (Path *)
470                                          create_nestloop_path(root,
471                                                                                   joinrel,
472                                                                                   jointype,
473                                                                                   outerpath,
474                                                                                   inner_cheapest_total,
475                                                                                   restrictlist,
476                                                                                   merge_pathkeys));
477                         if (matpath != NULL)
478                                 add_path(joinrel, (Path *)
479                                                  create_nestloop_path(root,
480                                                                                           joinrel,
481                                                                                           jointype,
482                                                                                           outerpath,
483                                                                                           matpath,
484                                                                                           restrictlist,
485                                                                                           merge_pathkeys));
486                         if (inner_cheapest_startup != inner_cheapest_total)
487                                 add_path(joinrel, (Path *)
488                                                  create_nestloop_path(root,
489                                                                                           joinrel,
490                                                                                           jointype,
491                                                                                           outerpath,
492                                                                                           inner_cheapest_startup,
493                                                                                           restrictlist,
494                                                                                           merge_pathkeys));
495                         if (bestinnerjoin != NULL)
496                                 add_path(joinrel, (Path *)
497                                                  create_nestloop_path(root,
498                                                                                           joinrel,
499                                                                                           jointype,
500                                                                                           outerpath,
501                                                                                           bestinnerjoin,
502                                                                                           restrictlist,
503                                                                                           merge_pathkeys));
504                 }
505
506                 /* Can't do anything else if outer path needs to be unique'd */
507                 if (save_jointype == JOIN_UNIQUE_OUTER)
508                         continue;
509
510                 /* Look for useful mergeclauses (if any) */
511                 mergeclauses = find_mergeclauses_for_pathkeys(root,
512                                                                                                           outerpath->pathkeys,
513                                                                                                           mergeclause_list);
514
515                 /*
516                  * Done with this outer path if no chance for a mergejoin.
517                  *
518                  * Special corner case: for "x FULL JOIN y ON true", there will be no
519                  * join clauses at all.  Ordinarily we'd generate a clauseless
520                  * nestloop path, but since mergejoin is our only join type that
521                  * supports FULL JOIN, it's necessary to generate a clauseless
522                  * mergejoin path instead.
523                  */
524                 if (mergeclauses == NIL)
525                 {
526                         if (jointype == JOIN_FULL)
527                                  /* okay to try for mergejoin */ ;
528                         else
529                                 continue;
530                 }
531                 if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
532                         continue;
533
534                 /* Compute the required ordering of the inner path */
535                 innersortkeys = make_pathkeys_for_mergeclauses(root,
536                                                                                                            mergeclauses,
537                                                                                                            innerrel);
538
539                 /* Build opfamily info for execution */
540                 build_mergejoin_strat_arrays(mergeclauses,
541                                                                          &mergefamilies,
542                                                                          &mergestrategies,
543                                                                          &mergenullsfirst);
544
545                 /*
546                  * Generate a mergejoin on the basis of sorting the cheapest inner.
547                  * Since a sort will be needed, only cheapest total cost matters. (But
548                  * create_mergejoin_path will do the right thing if
549                  * inner_cheapest_total is already correctly sorted.)
550                  */
551                 add_path(joinrel, (Path *)
552                                  create_mergejoin_path(root,
553                                                                            joinrel,
554                                                                            jointype,
555                                                                            outerpath,
556                                                                            inner_cheapest_total,
557                                                                            restrictlist,
558                                                                            merge_pathkeys,
559                                                                            mergeclauses,
560                                                                            mergefamilies,
561                                                                            mergestrategies,
562                                                                            mergenullsfirst,
563                                                                            NIL,
564                                                                            innersortkeys));
565
566                 /* Can't do anything else if inner path needs to be unique'd */
567                 if (save_jointype == JOIN_UNIQUE_INNER)
568                         continue;
569
570                 /*
571                  * Look for presorted inner paths that satisfy the innersortkey list
572                  * --- or any truncation thereof, if we are allowed to build a
573                  * mergejoin using a subset of the merge clauses.  Here, we consider
574                  * both cheap startup cost and cheap total cost.  We can ignore
575                  * inner_cheapest_total on the first iteration, since we already made
576                  * a path with it --- but not on later iterations with shorter sort
577                  * keys, because then we are considering a different situation, viz
578                  * using a simpler mergejoin to avoid a sort of the inner rel.
579                  */
580                 num_sortkeys = list_length(innersortkeys);
581                 if (num_sortkeys > 1 && !useallclauses)
582                         trialsortkeys = list_copy(innersortkeys);       /* need modifiable copy */
583                 else
584                         trialsortkeys = innersortkeys;          /* won't really truncate */
585                 cheapest_startup_inner = NULL;
586                 cheapest_total_inner = NULL;
587
588                 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
589                 {
590                         Path       *innerpath;
591                         List       *newclauses = NIL;
592
593                         /*
594                          * Look for an inner path ordered well enough for the first
595                          * 'sortkeycnt' innersortkeys.  NB: trialsortkeys list is modified
596                          * destructively, which is why we made a copy...
597                          */
598                         trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
599                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
600                                                                                                            trialsortkeys,
601                                                                                                            TOTAL_COST);
602                         if (innerpath != NULL &&
603                                 (innerpath != inner_cheapest_total ||
604                                  sortkeycnt < num_sortkeys) &&
605                                 (cheapest_total_inner == NULL ||
606                                  compare_path_costs(innerpath, cheapest_total_inner,
607                                                                         TOTAL_COST) < 0))
608                         {
609                                 /* Found a cheap (or even-cheaper) sorted path */
610                                 /* Select the right mergeclauses, if we didn't already */
611                                 if (sortkeycnt < num_sortkeys)
612                                 {
613                                         newclauses =
614                                                 find_mergeclauses_for_pathkeys(root,
615                                                                                                            trialsortkeys,
616                                                                                                            mergeclauses);
617                                         Assert(newclauses != NIL);
618                                 }
619                                 else
620                                         newclauses = mergeclauses;
621
622                                 /* Build opfamily info for execution */
623                                 build_mergejoin_strat_arrays(newclauses,
624                                                                                          &mergefamilies,
625                                                                                          &mergestrategies,
626                                                                                          &mergenullsfirst);
627
628                                 add_path(joinrel, (Path *)
629                                                  create_mergejoin_path(root,
630                                                                                            joinrel,
631                                                                                            jointype,
632                                                                                            outerpath,
633                                                                                            innerpath,
634                                                                                            restrictlist,
635                                                                                            merge_pathkeys,
636                                                                                            newclauses,
637                                                                                            mergefamilies,
638                                                                                            mergestrategies,
639                                                                                            mergenullsfirst,
640                                                                                            NIL,
641                                                                                            NIL));
642                                 cheapest_total_inner = innerpath;
643                         }
644                         /* Same on the basis of cheapest startup cost ... */
645                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
646                                                                                                            trialsortkeys,
647                                                                                                            STARTUP_COST);
648                         if (innerpath != NULL &&
649                                 (innerpath != inner_cheapest_total ||
650                                  sortkeycnt < num_sortkeys) &&
651                                 (cheapest_startup_inner == NULL ||
652                                  compare_path_costs(innerpath, cheapest_startup_inner,
653                                                                         STARTUP_COST) < 0))
654                         {
655                                 /* Found a cheap (or even-cheaper) sorted path */
656                                 if (innerpath != cheapest_total_inner)
657                                 {
658                                         /*
659                                          * Avoid rebuilding clause list if we already made one;
660                                          * saves memory in big join trees...
661                                          */
662                                         if (newclauses == NIL)
663                                         {
664                                                 if (sortkeycnt < num_sortkeys)
665                                                 {
666                                                         newclauses =
667                                                                 find_mergeclauses_for_pathkeys(root,
668                                                                                                                            trialsortkeys,
669                                                                                                                            mergeclauses);
670                                                         Assert(newclauses != NIL);
671                                                 }
672                                                 else
673                                                         newclauses = mergeclauses;
674                                         }
675
676                                         /* Build opfamily info for execution */
677                                         build_mergejoin_strat_arrays(newclauses,
678                                                                                                  &mergefamilies,
679                                                                                                  &mergestrategies,
680                                                                                                  &mergenullsfirst);
681
682                                         add_path(joinrel, (Path *)
683                                                          create_mergejoin_path(root,
684                                                                                                    joinrel,
685                                                                                                    jointype,
686                                                                                                    outerpath,
687                                                                                                    innerpath,
688                                                                                                    restrictlist,
689                                                                                                    merge_pathkeys,
690                                                                                                    newclauses,
691                                                                                                    mergefamilies,
692                                                                                                    mergestrategies,
693                                                                                                    mergenullsfirst,
694                                                                                                    NIL,
695                                                                                                    NIL));
696                                 }
697                                 cheapest_startup_inner = innerpath;
698                         }
699
700                         /*
701                          * Don't consider truncated sortkeys if we need all clauses.
702                          */
703                         if (useallclauses)
704                                 break;
705                 }
706         }
707 }
708
709 /*
710  * hash_inner_and_outer
711  *        Create hashjoin join paths by explicitly hashing both the outer and
712  *        inner keys of each available hash clause.
713  *
714  * 'joinrel' is the join relation
715  * 'outerrel' is the outer join relation
716  * 'innerrel' is the inner join relation
717  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
718  *              clauses that apply to this join
719  * 'jointype' is the type of join to do
720  */
721 static void
722 hash_inner_and_outer(PlannerInfo *root,
723                                          RelOptInfo *joinrel,
724                                          RelOptInfo *outerrel,
725                                          RelOptInfo *innerrel,
726                                          List *restrictlist,
727                                          JoinType jointype)
728 {
729         bool            isouterjoin;
730         List       *hashclauses;
731         ListCell   *l;
732
733         /*
734          * Hashjoin only supports inner, left, and IN joins.
735          */
736         switch (jointype)
737         {
738                 case JOIN_INNER:
739                 case JOIN_IN:
740                 case JOIN_UNIQUE_OUTER:
741                 case JOIN_UNIQUE_INNER:
742                         isouterjoin = false;
743                         break;
744                 case JOIN_LEFT:
745                         isouterjoin = true;
746                         break;
747                 default:
748                         return;
749         }
750
751         /*
752          * We need to build only one hashpath for any given pair of outer and
753          * inner relations; all of the hashable clauses will be used as keys.
754          *
755          * Scan the join's restrictinfo list to find hashjoinable clauses that are
756          * usable with this pair of sub-relations.
757          */
758         hashclauses = NIL;
759         foreach(l, restrictlist)
760         {
761                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
762
763                 if (!restrictinfo->can_join ||
764                         restrictinfo->hashjoinoperator == InvalidOid)
765                         continue;                       /* not hashjoinable */
766
767                 /*
768                  * If processing an outer join, only use its own join clauses for
769                  * hashing.  For inner joins we need not be so picky.
770                  */
771                 if (isouterjoin && restrictinfo->is_pushed_down)
772                         continue;
773
774                 /*
775                  * Check if clause is usable with these input rels.
776                  */
777                 if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
778                         bms_is_subset(restrictinfo->right_relids, innerrel->relids))
779                 {
780                         /* righthand side is inner */
781                 }
782                 else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
783                                  bms_is_subset(restrictinfo->right_relids, outerrel->relids))
784                 {
785                         /* lefthand side is inner */
786                 }
787                 else
788                         continue;                       /* no good for these input relations */
789
790                 hashclauses = lappend(hashclauses, restrictinfo);
791         }
792
793         /* If we found any usable hashclauses, make a path */
794         if (hashclauses)
795         {
796                 /*
797                  * We consider both the cheapest-total-cost and cheapest-startup-cost
798                  * outer paths.  There's no need to consider any but the
799                  * cheapest-total-cost inner path, however.
800                  */
801                 Path       *cheapest_startup_outer = outerrel->cheapest_startup_path;
802                 Path       *cheapest_total_outer = outerrel->cheapest_total_path;
803                 Path       *cheapest_total_inner = innerrel->cheapest_total_path;
804
805                 /* Unique-ify if need be */
806                 if (jointype == JOIN_UNIQUE_OUTER)
807                 {
808                         cheapest_total_outer = (Path *)
809                                 create_unique_path(root, outerrel, cheapest_total_outer);
810                         cheapest_startup_outer = cheapest_total_outer;
811                         jointype = JOIN_INNER;
812                 }
813                 else if (jointype == JOIN_UNIQUE_INNER)
814                 {
815                         cheapest_total_inner = (Path *)
816                                 create_unique_path(root, innerrel, cheapest_total_inner);
817                         jointype = JOIN_INNER;
818                 }
819
820                 add_path(joinrel, (Path *)
821                                  create_hashjoin_path(root,
822                                                                           joinrel,
823                                                                           jointype,
824                                                                           cheapest_total_outer,
825                                                                           cheapest_total_inner,
826                                                                           restrictlist,
827                                                                           hashclauses));
828                 if (cheapest_startup_outer != cheapest_total_outer)
829                         add_path(joinrel, (Path *)
830                                          create_hashjoin_path(root,
831                                                                                   joinrel,
832                                                                                   jointype,
833                                                                                   cheapest_startup_outer,
834                                                                                   cheapest_total_inner,
835                                                                                   restrictlist,
836                                                                                   hashclauses));
837         }
838 }
839
840 /*
841  * best_appendrel_indexscan
842  *        Finds the best available set of inner indexscans for a nestloop join
843  *        with the given append relation on the inside and the given outer_rel
844  *        outside.      Returns an AppendPath comprising the best inner scans, or
845  *        NULL if there are no possible inner indexscans.
846  */
847 static Path *
848 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
849                                                  RelOptInfo *outer_rel, JoinType jointype)
850 {
851         int                     parentRTindex = rel->relid;
852         List       *append_paths = NIL;
853         bool            found_indexscan = false;
854         ListCell   *l;
855
856         foreach(l, root->append_rel_list)
857         {
858                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
859                 int                     childRTindex;
860                 RelOptInfo *childrel;
861                 Path       *bestinnerjoin;
862
863                 /* append_rel_list contains all append rels; ignore others */
864                 if (appinfo->parent_relid != parentRTindex)
865                         continue;
866
867                 childRTindex = appinfo->child_relid;
868                 childrel = find_base_rel(root, childRTindex);
869                 Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
870
871                 /*
872                  * Check to see if child was rejected by constraint exclusion. If so,
873                  * it will have a cheapest_total_path that's an Append path with no
874                  * members (see set_plain_rel_pathlist).
875                  */
876                 if (IsA(childrel->cheapest_total_path, AppendPath) &&
877                         ((AppendPath *) childrel->cheapest_total_path)->subpaths == NIL)
878                         continue;                       /* OK, we can ignore it */
879
880                 /*
881                  * Get the best innerjoin indexpath (if any) for this child rel.
882                  */
883                 bestinnerjoin = best_inner_indexscan(root, childrel,
884                                                                                          outer_rel, jointype);
885
886                 /*
887                  * If no luck on an indexpath for this rel, we'll still consider an
888                  * Append substituting the cheapest-total inner path.  However we must
889                  * find at least one indexpath, else there's not going to be any
890                  * improvement over the base path for the appendrel.
891                  */
892                 if (bestinnerjoin)
893                         found_indexscan = true;
894                 else
895                         bestinnerjoin = childrel->cheapest_total_path;
896
897                 append_paths = lappend(append_paths, bestinnerjoin);
898         }
899
900         if (!found_indexscan)
901                 return NULL;
902
903         /* Form and return the completed Append path. */
904         return (Path *) create_append_path(rel, append_paths);
905 }
906
907 /*
908  * select_mergejoin_clauses
909  *        Select mergejoin clauses that are usable for a particular join.
910  *        Returns a list of RestrictInfo nodes for those clauses.
911  *
912  * We examine each restrictinfo clause known for the join to see
913  * if it is mergejoinable and involves vars from the two sub-relations
914  * currently of interest.
915  */
916 static List *
917 select_mergejoin_clauses(RelOptInfo *joinrel,
918                                                  RelOptInfo *outerrel,
919                                                  RelOptInfo *innerrel,
920                                                  List *restrictlist,
921                                                  JoinType jointype)
922 {
923         List       *result_list = NIL;
924         bool            isouterjoin = IS_OUTER_JOIN(jointype);
925         bool            have_nonmergeable_joinclause = false;
926         ListCell   *l;
927
928         foreach(l, restrictlist)
929         {
930                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
931
932                 /*
933                  * If processing an outer join, only use its own join clauses in the
934                  * merge.  For inner joins we can use pushed-down clauses too. (Note:
935                  * we don't set have_nonmergeable_joinclause here because pushed-down
936                  * clauses will become otherquals not joinquals.)
937                  */
938                 if (isouterjoin && restrictinfo->is_pushed_down)
939                         continue;
940
941                 if (!restrictinfo->can_join ||
942                         restrictinfo->mergejoinoperator == InvalidOid)
943                 {
944                         have_nonmergeable_joinclause = true;
945                         continue;                       /* not mergejoinable */
946                 }
947
948                 /*
949                  * Check if clause is usable with these input rels.  All the vars
950                  * needed on each side of the clause must be available from one or the
951                  * other of the input rels.
952                  */
953                 if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
954                         bms_is_subset(restrictinfo->right_relids, innerrel->relids))
955                 {
956                         /* righthand side is inner */
957                 }
958                 else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
959                                  bms_is_subset(restrictinfo->right_relids, outerrel->relids))
960                 {
961                         /* lefthand side is inner */
962                 }
963                 else
964                 {
965                         have_nonmergeable_joinclause = true;
966                         continue;                       /* no good for these input relations */
967                 }
968
969                 result_list = lcons(restrictinfo, result_list);
970         }
971
972         /*
973          * If it is a right/full join then *all* the explicit join clauses must be
974          * mergejoinable, else the executor will fail. If we are asked for a right
975          * join then just return NIL to indicate no mergejoin is possible (we can
976          * handle it as a left join instead). If we are asked for a full join then
977          * emit an error, because there is no fallback.
978          */
979         if (have_nonmergeable_joinclause)
980         {
981                 switch (jointype)
982                 {
983                         case JOIN_RIGHT:
984                                 return NIL;             /* not mergejoinable */
985                         case JOIN_FULL:
986                                 ereport(ERROR,
987                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
988                                                  errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
989                                 break;
990                         default:
991                                 /* otherwise, it's OK to have nonmergeable join quals */
992                                 break;
993                 }
994         }
995
996         return result_list;
997 }
998
999 /*
1000  * Temporary hack to build opfamily and strategy info needed for mergejoin
1001  * by the executor.  We need to rethink the planner's handling of merge
1002  * planning so that it can deal with multiple possible merge orders, but
1003  * that's not done yet.
1004  */
1005 static void
1006 build_mergejoin_strat_arrays(List *mergeclauses,
1007                                                          Oid **mergefamilies,
1008                                                          int **mergestrategies,
1009                                                          bool **mergenullsfirst)
1010 {
1011         int                     nClauses = list_length(mergeclauses);
1012         int                     i;
1013         ListCell   *l;
1014
1015         *mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid));
1016         *mergestrategies = (int *) palloc(nClauses * sizeof(int));
1017         *mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
1018
1019         i = 0;
1020         foreach(l, mergeclauses)
1021         {
1022                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1023
1024                 /*
1025                  * We do not need to worry about whether the mergeclause will be
1026                  * commuted at runtime --- it's the same opfamily either way.
1027                  */
1028                 (*mergefamilies)[i] = restrictinfo->mergeopfamily;
1029                 /*
1030                  * For the moment, strategy must always be LessThan --- see
1031                  * hack version of get_op_mergejoin_info
1032                  */
1033                 (*mergestrategies)[i] = BTLessStrategyNumber;
1034
1035                 /* And we only allow NULLS LAST, too */
1036                 (*mergenullsfirst)[i] = false;
1037
1038                 i++;
1039         }
1040 }