]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinpath.c
Major optimizer improvement for joining a large number of tables.
[postgresql] / src / backend / optimizer / path / joinpath.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c--
4  *        Routines to find all possible paths for processing a set of joins
5  *
6  * Copyright (c) 1994, Regents of the University of California
7  *
8  *
9  * IDENTIFICATION
10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.16 1999/02/09 03:51:19 momjian Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include <sys/types.h>
15 #include <math.h>
16
17 #include "postgres.h"
18
19 #include "storage/buf_internals.h"
20
21 #include "nodes/pg_list.h"
22 #include "nodes/relation.h"
23 #include "nodes/plannodes.h"
24
25 #include "optimizer/internal.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/keys.h"
29 #include "optimizer/cost.h"             /* for _enable_{hashjoin,
30                                                                  * _enable_mergejoin} */
31
32 static Path *best_innerjoin(List *join_paths, List *outer_relid);
33 static List *sort_inner_and_outer(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
34                                          List *mergeinfo_list);
35 static List *match_unsorted_outer(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
36                 List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin,
37                                          List *mergeinfo_list);
38 static List *match_unsorted_inner(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
39                                          List *innerpath_list, List *mergeinfo_list);
40 static bool EnoughMemoryForHashjoin(RelOptInfo * hashrel);
41 static List *hash_inner_and_outer(RelOptInfo * joinrel, RelOptInfo * outerrel, RelOptInfo * innerrel,
42                                          List *hashinfo_list);
43
44 /*
45  * find-all-join-paths--
46  *        Creates all possible ways to process joins for each of the join
47  *        relations in the list 'joinrels.'  Each unique path will be included
48  *        in the join relation's 'pathlist' field.
49  *
50  *        In postgres, n-way joins are handled left-only(permuting clauseless
51  *        joins doesn't usually win much).
52  *
53  *        if BushyPlanFlag is true, bushy tree plans will be generated
54  *
55  * 'joinrels' is the list of relation entries to be joined
56  *
57  * Modifies the pathlist field of the appropriate rel node to contain
58  * the unique join paths.
59  * If bushy trees are considered, may modify the relid field of the
60  * join rel nodes to flatten the lists.
61  *
62  * Returns nothing of interest. (?)
63  * It does a destructive modification.
64  */
65 void
66 find_all_join_paths(Query *root, List *joinrels)
67 {
68         List       *mergeinfo_list = NIL;
69         List       *hashinfo_list = NIL;
70         List       *temp_list = NIL;
71         List       *path = NIL;
72
73         while (joinrels != NIL)
74         {
75                 RelOptInfo *joinrel = (RelOptInfo *) lfirst(joinrels);
76                 List       *innerrelids;
77                 List       *outerrelids;
78                 RelOptInfo *innerrel;
79                 RelOptInfo *outerrel;
80                 Path       *bestinnerjoin;
81                 List       *pathlist = NIL;
82
83                 innerrelids = lsecond(joinrel->relids);
84                 outerrelids = lfirst(joinrel->relids);
85
86                 /*
87                  * base relation id is an integer and join relation relid is a
88                  * list of integers.
89                  */
90                 innerrel = (length(innerrelids) == 1) ?
91                         get_base_rel(root, lfirsti(innerrelids)) : get_join_rel(root, innerrelids);
92                 outerrel = (length(outerrelids) == 1) ?
93                         get_base_rel(root, lfirsti(outerrelids)) : get_join_rel(root, outerrelids);
94
95                 bestinnerjoin = best_innerjoin(innerrel->innerjoin,
96                                                                            outerrel->relids);
97                 if (_enable_mergejoin_)
98                 {
99                         mergeinfo_list = group_clauses_by_order(joinrel->restrictinfo,
100                                                                            lfirsti(innerrel->relids));
101                 }
102
103                 if (_enable_hashjoin_)
104                 {
105                         hashinfo_list = group_clauses_by_hashop(joinrel->restrictinfo,
106                                                                                 lfirsti(innerrel->relids));
107                 }
108
109                 /* need to flatten the relids list */
110                 joinrel->relids = intAppend(outerrelids, innerrelids);
111
112                 /*
113                  * 1. Consider mergejoin paths where both relations must be
114                  * explicitly sorted.
115                  */
116                 pathlist = sort_inner_and_outer(joinrel, outerrel,
117                                                                                 innerrel, mergeinfo_list);
118
119                 /*
120                  * 2. Consider paths where the outer relation need not be
121                  * explicitly sorted. This may include either nestloops and
122                  * mergejoins where the outer path is already ordered.
123                  */
124                 pathlist = add_pathlist(joinrel, pathlist,
125                                                  match_unsorted_outer(joinrel,
126                                                                                           outerrel,
127                                                                                           innerrel,
128                                                                                           outerrel->pathlist,
129                                                                                  (Path *) innerrel->cheapestpath,
130                                                                                           bestinnerjoin,
131                                                                                           mergeinfo_list));
132
133                 /*
134                  * 3. Consider paths where the inner relation need not be
135                  * explicitly sorted.  This may include nestloops and mergejoins
136                  * the actual nestloop nodes were constructed in
137                  * (match-unsorted-outer).
138                  */
139                 pathlist = add_pathlist(joinrel, pathlist,
140                                                  match_unsorted_inner(joinrel, outerrel,
141                                                                                           innerrel,
142                                                                                           innerrel->pathlist,
143                                                                                           mergeinfo_list));
144
145                 /*
146                  * 4. Consider paths where both outer and inner relations must be
147                  * hashed before being joined.
148                  */
149
150                 pathlist = add_pathlist(joinrel, pathlist,
151                                                  hash_inner_and_outer(joinrel, outerrel,
152                                                                                           innerrel, hashinfo_list));
153
154                 joinrel->pathlist = pathlist;
155
156                 /*
157                  * 'OuterJoinCost is only valid when calling
158                  * (match-unsorted-inner) with the same arguments as the previous
159                  * invokation of (match-unsorted-outer), so clear the field before
160                  * going on.
161                  */
162                 temp_list = innerrel->pathlist;
163                 foreach(path, temp_list)
164                 {
165                         /*
166                          * XXX
167                          *
168                          * This gross hack is to get around an apparent optimizer bug on
169                          * Sparc (or maybe it is a bug of ours?) that causes really
170                          * wierd behavior.
171                          */
172                         if (IsA_JoinPath(path))
173                                 ((Path *) lfirst(path))->outerjoincost = (Cost) 0;
174
175                         /*
176                          * do it iff it is a join path, which is not always true, esp
177                          * since the base level
178                          */
179                 }
180
181                 joinrels = lnext(joinrels);
182         }
183 }
184
185 /*
186  * best-innerjoin--
187  *        Find the cheapest index path that has already been identified by
188  *        (indexable_joinclauses) as being a possible inner path for the given
189  *        outer relation in a nestloop join.
190  *
191  * 'join-paths' is a list of join nodes
192  * 'outer-relid' is the relid of the outer join relation
193  *
194  * Returns the pathnode of the selected path.
195  */
196 static Path *
197 best_innerjoin(List *join_paths, List *outer_relids)
198 {
199         Path       *cheapest = (Path *) NULL;
200         List       *join_path;
201
202         foreach(join_path, join_paths)
203         {
204                 Path       *path = (Path *) lfirst(join_path);
205
206                 if (intMember(lfirsti(path->joinid), outer_relids)
207                         && ((cheapest == NULL ||
208                                  path_is_cheaper((Path *) lfirst(join_path), cheapest))))
209                 {
210
211                         cheapest = (Path *) lfirst(join_path);
212                 }
213         }
214         return cheapest;
215 }
216
217 /*
218  * sort-inner-and-outer--
219  *        Create mergejoin join paths by explicitly sorting both the outer and
220  *        inner join relations on each available merge ordering.
221  *
222  * 'joinrel' is the join relation
223  * 'outerrel' is the outer join relation
224  * 'innerrel' is the inner join relation
225  * 'mergeinfo-list' is a list of nodes containing info on(mergejoinable)
226  *                              clauses for joining the relations
227  *
228  * Returns a list of mergejoin paths.
229  */
230 static List *
231 sort_inner_and_outer(RelOptInfo * joinrel,
232                                          RelOptInfo * outerrel,
233                                          RelOptInfo * innerrel,
234                                          List *mergeinfo_list)
235 {
236         List       *ms_list = NIL;
237         MergeInfo          *xmergeinfo = (MergeInfo *) NULL;
238         MergePath  *temp_node = (MergePath *) NULL;
239         List       *i;
240         List       *outerkeys = NIL;
241         List       *innerkeys = NIL;
242         List       *merge_pathkeys = NIL;
243
244         foreach(i, mergeinfo_list)
245         {
246                 xmergeinfo = (MergeInfo *) lfirst(i);
247
248                 outerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
249                                                           outerrel->targetlist,
250                                                           OUTER);
251
252                 innerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
253                                                           innerrel->targetlist,
254                                                           INNER);
255
256                 merge_pathkeys = new_join_pathkeys(outerkeys, joinrel->targetlist,
257                                                           xmergeinfo->jmethod.clauses);
258
259                 temp_node = create_mergejoin_path(joinrel,
260                                                                   outerrel->size,
261                                                                   innerrel->size,
262                                                                   outerrel->width,
263                                                                   innerrel->width,
264                                                                   (Path *) outerrel->cheapestpath,
265                                                                   (Path *) innerrel->cheapestpath,
266                                                                   merge_pathkeys,
267                                                                   xmergeinfo->m_ordering,
268                                                                   xmergeinfo->jmethod.clauses,
269                                                                   outerkeys,
270                                                                   innerkeys);
271
272                 ms_list = lappend(ms_list, temp_node);
273         }
274         return ms_list;
275 }
276
277 /*
278  * match-unsorted-outer--
279  *        Creates possible join paths for processing a single join relation
280  *        'joinrel' by employing either iterative substitution or
281  *        mergejoining on each of its possible outer paths(assuming that the
282  *        outer relation need not be explicitly sorted).
283  *
284  *        1. The inner path is the cheapest available inner path.
285  *        2. Mergejoin wherever possible.  Mergejoin are considered if there
286  *               are mergejoinable join clauses between the outer and inner join
287  *               relations such that the outer path is keyed on the variables
288  *               appearing in the clauses.      The corresponding inner merge path is
289  *               either a path whose keys match those of the outer path(if such a
290  *               path is available) or an explicit sort on the appropriate inner
291  *               join keys, whichever is cheaper.
292  *
293  * 'joinrel' is the join relation
294  * 'outerrel' is the outer join relation
295  * 'innerrel' is the inner join relation
296  * 'outerpath-list' is the list of possible outer paths
297  * 'cheapest-inner' is the cheapest inner path
298  * 'best-innerjoin' is the best inner index path(if any)
299  * 'mergeinfo-list' is a list of nodes containing info on mergejoinable
300  *              clauses
301  *
302  * Returns a list of possible join path nodes.
303  */
304 static List *
305 match_unsorted_outer(RelOptInfo * joinrel,
306                                          RelOptInfo * outerrel,
307                                          RelOptInfo * innerrel,
308                                          List *outerpath_list,
309                                          Path *cheapest_inner,
310                                          Path *best_innerjoin,
311                                          List *mergeinfo_list)
312 {
313         Path       *outerpath = (Path *) NULL;
314         List       *jp_list = NIL;
315         List       *temp_node = NIL;
316         List       *merge_pathkeys = NIL;
317         Path       *nestinnerpath = (Path *) NULL;
318         List       *paths = NIL;
319         List       *i = NIL;
320         PathOrder  *outerpath_ordering = NULL;
321
322         foreach(i, outerpath_list)
323         {
324                 List       *clauses = NIL;
325                 List       *matchedJoinKeys = NIL;
326                 List       *matchedJoinClauses = NIL;
327                 MergeInfo  *xmergeinfo = (MergeInfo *) NULL;
328
329                 outerpath = (Path *) lfirst(i);
330
331                 outerpath_ordering = outerpath->path_order;
332
333                 if (outerpath_ordering)
334                 {
335                         xmergeinfo = match_order_mergeinfo(outerpath_ordering,
336                                                                           mergeinfo_list);
337                 }
338
339                 if (xmergeinfo)
340                         clauses = xmergeinfo->jmethod.clauses;
341
342                 if (clauses)
343                 {
344                         List       *keys = xmergeinfo->jmethod.jmkeys;
345                         List       *clauses = xmergeinfo->jmethod.clauses;
346
347                         matchedJoinKeys = match_pathkeys_joinkeys(outerpath->keys,
348                                                                                 keys,
349                                                                                 clauses,
350                                                                                 OUTER,
351                                                                                 &matchedJoinClauses);
352                         merge_pathkeys = new_join_pathkeys(outerpath->keys,
353                                                                   joinrel->targetlist, clauses);
354                 }
355                 else
356                         merge_pathkeys = outerpath->keys;
357
358                 if (best_innerjoin &&
359                         path_is_cheaper(best_innerjoin, cheapest_inner))
360                         nestinnerpath = best_innerjoin;
361                 else
362                         nestinnerpath = cheapest_inner;
363
364                 paths = lcons(create_nestloop_path(joinrel,
365                                                                                    outerrel,
366                                                                                    outerpath,
367                                                                                    nestinnerpath,
368                                                                                    merge_pathkeys),
369                                           NIL);
370
371                 if (clauses && matchedJoinKeys)
372                 {
373                         bool            path_is_cheaper_than_sort;
374                         List       *varkeys = NIL;
375                         Path       *mergeinnerpath = match_paths_joinkeys(matchedJoinKeys,
376                                                                  outerpath_ordering,
377                                                                  innerrel->pathlist,
378                                                                  INNER);
379
380                         path_is_cheaper_than_sort = (bool) (mergeinnerpath &&
381                                                 (mergeinnerpath->path_cost <
382                                                  (cheapest_inner->path_cost +
383                                                   cost_sort(matchedJoinKeys,
384                                                                         innerrel->size,
385                                                                         innerrel->width,
386                                                                         false))));
387                         if (!path_is_cheaper_than_sort)
388                         {
389                                 varkeys = extract_path_keys(matchedJoinKeys,
390                                                                           innerrel->targetlist,
391                                                                           INNER);
392                         }
393
394
395                         /*
396                          * Keep track of the cost of the outer path used with this
397                          * ordered inner path for later processing in
398                          * (match-unsorted-inner), since it isn't a sort and thus
399                          * wouldn't otherwise be considered.
400                          */
401                         if (path_is_cheaper_than_sort)
402                                 mergeinnerpath->outerjoincost = outerpath->path_cost;
403                         else
404                                 mergeinnerpath = cheapest_inner;
405
406                         temp_node = lcons(create_mergejoin_path(joinrel,
407                                                                                         outerrel->size,
408                                                                                         innerrel->size,
409                                                                                         outerrel->width,
410                                                                                         innerrel->width,
411                                                                                         outerpath,
412                                                                                         mergeinnerpath,
413                                                                                         merge_pathkeys,
414                                                                                         xmergeinfo->m_ordering,
415                                                                                         matchedJoinClauses,
416                                                                                         NIL,
417                                                                                         varkeys),
418                                           paths);
419                 }
420                 else
421                         temp_node = paths;
422                 jp_list = nconc(jp_list, temp_node);
423         }
424         return jp_list;
425 }
426
427 /*
428  * match-unsorted-inner --
429  *        Find the cheapest ordered join path for a given(ordered, unsorted)
430  *        inner join path.
431  *
432  *        Scans through each path available on an inner join relation and tries
433  *        matching its ordering keys against those of mergejoin clauses.
434  *        If 1. an appropriately-ordered inner path and matching mergeclause are
435  *                      found, and
436  *               2. sorting the cheapest outer path is cheaper than using an ordered
437  *                        but unsorted outer path(as was considered in
438  *                        (match-unsorted-outer)),
439  *        then this merge path is considered.
440  *
441  * 'joinrel' is the join result relation
442  * 'outerrel' is the outer join relation
443  * 'innerrel' is the inner join relation
444  * 'innerpath-list' is the list of possible inner join paths
445  * 'mergeinfo-list' is a list of nodes containing info on mergejoinable
446  *                              clauses
447  *
448  * Returns a list of possible merge paths.
449  */
450 static List *
451 match_unsorted_inner(RelOptInfo * joinrel,
452                                          RelOptInfo * outerrel,
453                                          RelOptInfo * innerrel,
454                                          List *innerpath_list,
455                                          List *mergeinfo_list)
456 {
457         Path       *innerpath = (Path *) NULL;
458         List       *mp_list = NIL;
459         List       *temp_node = NIL;
460         PathOrder  *innerpath_ordering = NULL;
461         Cost            temp1 = 0.0;
462         bool            temp2 = false;
463         List       *i = NIL;
464
465         foreach(i, innerpath_list)
466         {
467                 MergeInfo  *xmergeinfo = (MergeInfo *) NULL;
468                 List       *clauses = NIL;
469                 List       *matchedJoinKeys = NIL;
470                 List       *matchedJoinClauses = NIL;
471
472                 innerpath = (Path *) lfirst(i);
473
474                 innerpath_ordering = innerpath->path_order;
475
476                 if (innerpath_ordering)
477                 {
478                         xmergeinfo = match_order_mergeinfo(innerpath_ordering,
479                                                                           mergeinfo_list);
480                 }
481
482                 if (xmergeinfo)
483                         clauses = ((JoinMethod *) xmergeinfo)->clauses;
484
485                 if (clauses)
486                 {
487                         List       *keys = xmergeinfo->jmethod.jmkeys;
488                         List       *cls = xmergeinfo->jmethod.clauses;
489
490                         matchedJoinKeys = match_pathkeys_joinkeys(innerpath->keys,
491                                                                                 keys,
492                                                                                 cls,
493                                                                                 INNER,
494                                                                                 &matchedJoinClauses);
495                 }
496
497                 /*
498                  * (match-unsorted-outer) if it is applicable. 'OuterJoinCost was
499                  * set above in
500                  */
501                 if (clauses && matchedJoinKeys)
502                 {
503                         temp1 = outerrel->cheapestpath->path_cost +
504                                 cost_sort(matchedJoinKeys, outerrel->size, outerrel->width,
505                                                   false);
506
507                         temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost)
508                                                         || (innerpath->outerjoincost > temp1));
509
510                         if (temp2)
511                         {
512                                 List       *outerkeys = extract_path_keys(matchedJoinKeys,
513                                                                   outerrel->targetlist,
514                                                                   OUTER);
515                                 List       *merge_pathkeys = new_join_pathkeys(outerkeys,
516                                                                   joinrel->targetlist,
517                                                                   clauses);
518
519                                 temp_node = lcons(create_mergejoin_path(joinrel,
520                                                                                                 outerrel->size,
521                                                                                                 innerrel->size,
522                                                                                                 outerrel->width,
523                                                                                                 innerrel->width,
524                                                                                  (Path *) outerrel->cheapestpath,
525                                                                                                 innerpath,
526                                                                                                 merge_pathkeys,
527                                                                                                 xmergeinfo->m_ordering,
528                                                                                                 matchedJoinClauses,
529                                                                                                 outerkeys,
530                                                                                                 NIL),
531                                                   NIL);
532
533                                 mp_list = nconc(mp_list, temp_node);
534                         }
535                 }
536         }
537         return mp_list;
538
539 }
540
541 static bool
542 EnoughMemoryForHashjoin(RelOptInfo * hashrel)
543 {
544         int                     ntuples;
545         int                     tupsize;
546         int                     pages;
547
548         ntuples = hashrel->size;
549         if (ntuples == 0)
550                 ntuples = 1000;
551         tupsize = hashrel->width + sizeof(HeapTupleData);
552         pages = page_size(ntuples, tupsize);
553
554         /*
555          * if amount of buffer space below hashjoin threshold, return false
556          */
557         if (ceil(sqrt((double) pages)) > NBuffers)
558                 return false;
559         return true;
560 }
561
562 /*
563  * hash-inner-and-outer--                               XXX HASH
564  *        Create hashjoin join paths by explicitly hashing both the outer and
565  *        inner join relations on each available hash op.
566  *
567  * 'joinrel' is the join relation
568  * 'outerrel' is the outer join relation
569  * 'innerrel' is the inner join relation
570  * 'hashinfo-list' is a list of nodes containing info on(hashjoinable)
571  *              clauses for joining the relations
572  *
573  * Returns a list of hashjoin paths.
574  */
575 static List *
576 hash_inner_and_outer(RelOptInfo * joinrel,
577                                          RelOptInfo * outerrel,
578                                          RelOptInfo * innerrel,
579                                          List *hashinfo_list)
580 {
581         HashInfo   *xhashinfo = (HashInfo *) NULL;
582         List       *hjoin_list = NIL;
583         HashPath   *temp_node = (HashPath *) NULL;
584         List       *i = NIL;
585         List       *outerkeys = NIL;
586         List       *innerkeys = NIL;
587         List       *hash_pathkeys = NIL;
588
589         foreach(i, hashinfo_list)
590         {
591                 xhashinfo = (HashInfo *) lfirst(i);
592                 outerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
593                                                           outerrel->targetlist,
594                                                           OUTER);
595                 innerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
596                                                           innerrel->targetlist,
597                                                           INNER);
598                 hash_pathkeys = new_join_pathkeys(outerkeys,
599                                                           joinrel->targetlist,
600                                                           ((JoinMethod *) xhashinfo)->clauses);
601
602                 if (EnoughMemoryForHashjoin(innerrel))
603                 {
604                         temp_node = create_hashjoin_path(joinrel,
605                                                                                          outerrel->size,
606                                                                                          innerrel->size,
607                                                                                          outerrel->width,
608                                                                                          innerrel->width,
609                                                                                  (Path *) outerrel->cheapestpath,
610                                                                                  (Path *) innerrel->cheapestpath,
611                                                                                          hash_pathkeys,
612                                                                                          xhashinfo->hashop,
613                                                                          ((JoinMethod *) xhashinfo)->clauses,
614                                                                                          outerkeys,
615                                                                                          innerkeys);
616                         hjoin_list = lappend(hjoin_list, temp_node);
617                 }
618         }
619         return hjoin_list;
620 }