]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/path/joinpath.c
8a62c44b855fc3af57fb97bbb719a5c90b502e16
[postgresql] / src / backend / optimizer / path / joinpath.c
1 /*-------------------------------------------------------------------------
2  *
3  * joinpath.c
4  *        Routines to find all possible paths for processing a set of joins
5  *
6  * Copyright (c) 1994, Regents of the University of California
7  *
8  *
9  * IDENTIFICATION
10  *        $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.28 1999/02/18 00:49:19 momjian Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include <sys/types.h>
15 #include <math.h>
16
17 #include "postgres.h"
18
19 #include "storage/buf_internals.h"
20
21 #include "nodes/pg_list.h"
22 #include "nodes/relation.h"
23 #include "nodes/plannodes.h"
24
25 #include "optimizer/internal.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/pathnode.h"
28 #include "optimizer/keys.h"
29 #include "optimizer/cost.h"             /* for _enable_{hashjoin,
30                                                                  * _enable_mergejoin} */
31
32 static Path *best_innerjoin(List *join_paths, List *outer_relid);
33 static List *sort_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
34                                          List *mergeinfo_list);
35 static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
36                 List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin,
37                                          List *mergeinfo_list);
38 static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
39                                          List *innerpath_list, List *mergeinfo_list);
40 static bool EnoughMemoryForHashjoin(RelOptInfo *hashrel);
41 static List *hash_inner_and_outer(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel,
42                                          List *hashinfo_list);
43
44 /*
45  * update_rels_pathlist_for_joins
46  *        Creates all possible ways to process joins for each of the join
47  *        relations in the list 'joinrels.'  Each unique path will be included
48  *        in the join relation's 'pathlist' field.
49  *
50  *        In postgres, n-way joins are handled left-only(permuting clauseless
51  *        joins doesn't usually win much).
52  *
53  *        if BushyPlanFlag is true, bushy tree plans will be generated
54  *
55  * 'joinrels' is the list of relation entries to be joined
56  *
57  * Modifies the pathlist field of the appropriate rel node to contain
58  * the unique join paths.
59  * If bushy trees are considered, may modify the relid field of the
60  * join rel nodes to flatten the lists.
61  *
62  * It does a destructive modification.
63  */
64 void
65 update_rels_pathlist_for_joins(Query *root, List *joinrels)
66 {
67         List       *mergeinfo_list = NIL;
68         List       *hashinfo_list = NIL;
69         List       *j;
70
71         foreach(j, joinrels)
72         {
73                 RelOptInfo *joinrel = (RelOptInfo *) lfirst(j);
74                 Relids          innerrelids;
75                 Relids          outerrelids;
76                 RelOptInfo *innerrel;
77                 RelOptInfo *outerrel;
78                 Path       *bestinnerjoin;
79                 List       *pathlist = NIL;
80
81                 /* flatten out relids later in this function */
82                 innerrelids = lsecond(joinrel->relids);
83                 outerrelids = lfirst(joinrel->relids);
84
85                 /*
86                  * base relation id is an integer and join relation relid is a
87                  * list of integers.
88                  */
89                 innerrel = (length(innerrelids) == 1) ?
90                         get_base_rel(root, lfirsti(innerrelids)) :
91                         get_join_rel(root, innerrelids);
92                 outerrel = (length(outerrelids) == 1) ?
93                         get_base_rel(root, lfirsti(outerrelids)) :
94                         get_join_rel(root, outerrelids);
95
96                 bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
97
98                 if (_enable_mergejoin_)
99                         mergeinfo_list = group_clauses_by_order(joinrel->restrictinfo,
100                                                                            lfirsti(innerrel->relids));
101
102                 if (_enable_hashjoin_)
103                         hashinfo_list = group_clauses_by_hashop(joinrel->restrictinfo,
104                                                                                 lfirsti(innerrel->relids));
105
106                 /* need to flatten the relids list */
107                 joinrel->relids = intAppend(outerrelids, innerrelids);
108
109                 /*
110                  * 1. Consider mergejoin paths where both relations must be
111                  * explicitly sorted.
112                  */
113                 pathlist = sort_inner_and_outer(joinrel, outerrel,
114                                                                                 innerrel, mergeinfo_list);
115
116                 /*
117                  * 2. Consider paths where the outer relation need not be
118                  * explicitly sorted. This may include either nestloops and
119                  * mergejoins where the outer path is already ordered.
120                  */
121                 pathlist = add_pathlist(joinrel, pathlist,
122                                                  match_unsorted_outer(joinrel,
123                                                                                           outerrel,
124                                                                                           innerrel,
125                                                                                           outerrel->pathlist,
126                                                                                  (Path *) innerrel->cheapestpath,
127                                                                                           bestinnerjoin,
128                                                                                           mergeinfo_list));
129
130                 /*
131                  * 3. Consider paths where the inner relation need not be
132                  * explicitly sorted.  This may include nestloops and mergejoins
133                  * the actual nestloop nodes were constructed in
134                  * (match_unsorted_outer).
135                  */
136                 pathlist = add_pathlist(joinrel, pathlist,
137                                                  match_unsorted_inner(joinrel, outerrel,
138                                                                                           innerrel,
139                                                                                           innerrel->pathlist,
140                                                                                           mergeinfo_list));
141
142                 /*
143                  * 4. Consider paths where both outer and inner relations must be
144                  * hashed before being joined.
145                  */
146                 pathlist = add_pathlist(joinrel, pathlist,
147                                                  hash_inner_and_outer(joinrel, outerrel,
148                                                                                           innerrel, hashinfo_list));
149
150                 joinrel->pathlist = pathlist;
151         }
152 }
153
154 /*
155  * best_innerjoin
156  *        Find the cheapest index path that has already been identified by
157  *        (indexable_joinclauses) as being a possible inner path for the given
158  *        outer relation in a nestloop join.
159  *
160  * 'join_paths' is a list of join nodes
161  * 'outer_relid' is the relid of the outer join relation
162  *
163  * Returns the pathnode of the selected path.
164  */
165 static Path *
166 best_innerjoin(List *join_paths, Relids outer_relids)
167 {
168         Path       *cheapest = (Path *) NULL;
169         List       *join_path;
170
171         foreach(join_path, join_paths)
172         {
173                 Path       *path = (Path *) lfirst(join_path);
174
175                 if (intMember(lfirsti(path->joinid), outer_relids)
176                         && ((cheapest == NULL ||
177                                  path_is_cheaper((Path *) lfirst(join_path), cheapest))))
178                 {
179
180                         cheapest = (Path *) lfirst(join_path);
181                 }
182         }
183         return cheapest;
184 }
185
186 /*
187  * sort_inner_and_outer
188  *        Create mergejoin join paths by explicitly sorting both the outer and
189  *        inner join relations on each available merge ordering.
190  *
191  * 'joinrel' is the join relation
192  * 'outerrel' is the outer join relation
193  * 'innerrel' is the inner join relation
194  * 'mergeinfo_list' is a list of nodes containing info on(mergejoinable)
195  *                              clauses for joining the relations
196  *
197  * Returns a list of mergejoin paths.
198  */
199 static List *
200 sort_inner_and_outer(RelOptInfo *joinrel,
201                                          RelOptInfo *outerrel,
202                                          RelOptInfo *innerrel,
203                                          List *mergeinfo_list)
204 {
205         List       *ms_list = NIL;
206         MergeInfo          *xmergeinfo = (MergeInfo *) NULL;
207         MergePath  *temp_node = (MergePath *) NULL;
208         List       *i;
209         List       *outerkeys = NIL;
210         List       *innerkeys = NIL;
211         List       *merge_pathkeys = NIL;
212
213         foreach(i, mergeinfo_list)
214         {
215                 xmergeinfo = (MergeInfo *) lfirst(i);
216
217                 outerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
218                                                           outerrel->targetlist,
219                                                           OUTER);
220
221                 innerkeys = extract_path_keys(xmergeinfo->jmethod.jmkeys,
222                                                           innerrel->targetlist,
223                                                           INNER);
224
225                 merge_pathkeys = new_join_pathkeys(outerkeys, joinrel->targetlist,
226                                                           xmergeinfo->jmethod.clauses);
227
228                 temp_node = create_mergejoin_path(joinrel,
229                                                                   outerrel->size,
230                                                                   innerrel->size,
231                                                                   outerrel->width,
232                                                                   innerrel->width,
233                                                                   (Path *) outerrel->cheapestpath,
234                                                                   (Path *) innerrel->cheapestpath,
235                                                                   merge_pathkeys,
236                                                                   xmergeinfo->m_ordering,
237                                                                   xmergeinfo->jmethod.clauses,
238                                                                   outerkeys,
239                                                                   innerkeys);
240
241                 ms_list = lappend(ms_list, temp_node);
242         }
243         return ms_list;
244 }
245
246 /*
247  * match_unsorted_outer
248  *        Creates possible join paths for processing a single join relation
249  *        'joinrel' by employing either iterative substitution or
250  *        mergejoining on each of its possible outer paths(assuming that the
251  *        outer relation need not be explicitly sorted).
252  *
253  *        1. The inner path is the cheapest available inner path.
254  *        2. Mergejoin wherever possible.  Mergejoin are considered if there
255  *               are mergejoinable join clauses between the outer and inner join
256  *               relations such that the outer path is keyed on the variables
257  *               appearing in the clauses.      The corresponding inner merge path is
258  *               either a path whose keys match those of the outer path(if such a
259  *               path is available) or an explicit sort on the appropriate inner
260  *               join keys, whichever is cheaper.
261  *
262  * 'joinrel' is the join relation
263  * 'outerrel' is the outer join relation
264  * 'innerrel' is the inner join relation
265  * 'outerpath_list' is the list of possible outer paths
266  * 'cheapest_inner' is the cheapest inner path
267  * 'best_innerjoin' is the best inner index path(if any)
268  * 'mergeinfo_list' is a list of nodes containing info on mergejoinable
269  *              clauses
270  *
271  * Returns a list of possible join path nodes.
272  */
273 static List *
274 match_unsorted_outer(RelOptInfo *joinrel,
275                                          RelOptInfo *outerrel,
276                                          RelOptInfo *innerrel,
277                                          List *outerpath_list,
278                                          Path *cheapest_inner,
279                                          Path *best_innerjoin,
280                                          List *mergeinfo_list)
281 {
282         Path       *outerpath = (Path *) NULL;
283         List       *jp_list = NIL;
284         List       *temp_node = NIL;
285         List       *merge_pathkeys = NIL;
286         Path       *nestinnerpath = (Path *) NULL;
287         List       *paths = NIL;
288         List       *i = NIL;
289         PathOrder  *outerpath_ordering = NULL;
290
291         foreach(i, outerpath_list)
292         {
293                 List       *clauses = NIL;
294                 List       *matchedJoinKeys = NIL;
295                 List       *matchedJoinClauses = NIL;
296                 MergeInfo  *xmergeinfo = (MergeInfo *) NULL;
297
298                 outerpath = (Path *) lfirst(i);
299
300                 outerpath_ordering = outerpath->pathorder;
301
302                 if (outerpath_ordering)
303                         xmergeinfo = match_order_mergeinfo(outerpath_ordering,
304                                                                                                 mergeinfo_list);
305
306                 if (xmergeinfo)
307                         clauses = xmergeinfo->jmethod.clauses;
308
309                 if (clauses)
310                 {
311                         List       *jmkeys = xmergeinfo->jmethod.jmkeys;
312                         List       *clauses = xmergeinfo->jmethod.clauses;
313
314                         matchedJoinKeys = match_pathkeys_joinkeys(outerpath->pathkeys,
315                                                                                                                 jmkeys,
316                                                                                                                 clauses,
317                                                                                                                 OUTER,
318                                                                                                                 &matchedJoinClauses);
319                         merge_pathkeys = new_join_pathkeys(outerpath->pathkeys,
320                                                                                           joinrel->targetlist, clauses);
321                 }
322                 else
323                         merge_pathkeys = outerpath->pathkeys;
324
325                 if (best_innerjoin &&
326                         path_is_cheaper(best_innerjoin, cheapest_inner))
327                         nestinnerpath = best_innerjoin;
328                 else
329                         nestinnerpath = cheapest_inner;
330
331                 paths = lcons(create_nestloop_path(joinrel,
332                                                                                    outerrel,
333                                                                                    outerpath,
334                                                                                    nestinnerpath,
335                                                                                    merge_pathkeys),
336                                           NIL);
337
338                 if (clauses && matchedJoinKeys)
339                 {
340                         bool            path_is_cheaper_than_sort;
341                         List       *varkeys = NIL;
342                         Path       *mergeinnerpath = match_paths_joinkeys(matchedJoinKeys,
343                                                                  outerpath_ordering,
344                                                                  innerrel->pathlist,
345                                                                  INNER);
346
347                         /* Should we use the mergeinner, or sort the cheapest inner? */
348                         path_is_cheaper_than_sort = (bool) (mergeinnerpath &&
349                                                 (mergeinnerpath->path_cost <
350                                                  (cheapest_inner->path_cost +
351                                                   cost_sort(matchedJoinKeys,
352                                                                         innerrel->size,
353                                                                         innerrel->width,
354                                                                         false))));
355                         if (!path_is_cheaper_than_sort)
356                         {
357                                 varkeys = extract_path_keys(matchedJoinKeys,
358                                                                           innerrel->targetlist,
359                                                                           INNER);
360                         }
361
362
363                         /*
364                          * Keep track of the cost of the outer path used with this
365                          * ordered inner path for later processing in
366                          * (match_unsorted_inner), since it isn't a sort and thus
367                          * wouldn't otherwise be considered.
368                          */
369                         if (path_is_cheaper_than_sort)
370                                 mergeinnerpath->outerjoincost = outerpath->path_cost;
371                         else
372                                 mergeinnerpath = cheapest_inner;
373
374                         temp_node = lcons(create_mergejoin_path(joinrel,
375                                                                                         outerrel->size,
376                                                                                         innerrel->size,
377                                                                                         outerrel->width,
378                                                                                         innerrel->width,
379                                                                                         outerpath,
380                                                                                         mergeinnerpath,
381                                                                                         merge_pathkeys,
382                                                                                         xmergeinfo->m_ordering,
383                                                                                         matchedJoinClauses,
384                                                                                         NIL,
385                                                                                         varkeys),
386                                                                           paths);
387                 }
388                 else
389                         temp_node = paths;
390                 jp_list = nconc(jp_list, temp_node);
391         }
392         return jp_list;
393 }
394
395 /*
396  * match_unsorted_inner 
397  *        Find the cheapest ordered join path for a given(ordered, unsorted)
398  *        inner join path.
399  *
400  *        Scans through each path available on an inner join relation and tries
401  *        matching its ordering keys against those of mergejoin clauses.
402  *        If 1. an appropriately_ordered inner path and matching mergeclause are
403  *                      found, and
404  *               2. sorting the cheapest outer path is cheaper than using an ordered
405  *                        but unsorted outer path(as was considered in
406  *                      (match_unsorted_outer)), then this merge path is considered.
407  *
408  * 'joinrel' is the join result relation
409  * 'outerrel' is the outer join relation
410  * 'innerrel' is the inner join relation
411  * 'innerpath_list' is the list of possible inner join paths
412  * 'mergeinfo_list' is a list of nodes containing info on mergejoinable
413  *                              clauses
414  *
415  * Returns a list of possible merge paths.
416  */
417 static List *
418 match_unsorted_inner(RelOptInfo *joinrel,
419                                          RelOptInfo *outerrel,
420                                          RelOptInfo *innerrel,
421                                          List *innerpath_list,
422                                          List *mergeinfo_list)
423 {
424         Path       *innerpath = (Path *) NULL;
425         List       *mp_list = NIL;
426         PathOrder  *innerpath_ordering = NULL;
427         Cost            temp1 = 0.0;
428         bool            temp2 = false;
429         List       *i = NIL;
430
431         foreach(i, innerpath_list)
432         {
433                 MergeInfo  *xmergeinfo = (MergeInfo *) NULL;
434                 List       *clauses = NIL;
435                 List       *matchedJoinKeys = NIL;
436                 List       *matchedJoinClauses = NIL;
437
438                 innerpath = (Path *) lfirst(i);
439
440                 innerpath_ordering = innerpath->pathorder;
441
442                 if (innerpath_ordering)
443                 {
444                         xmergeinfo = match_order_mergeinfo(innerpath_ordering,
445                                                                           mergeinfo_list);
446                 }
447
448                 if (xmergeinfo)
449                         clauses = ((JoinMethod *) xmergeinfo)->clauses;
450
451                 if (clauses)
452                 {
453                         List       *jmkeys = xmergeinfo->jmethod.jmkeys;
454                         List       *cls = xmergeinfo->jmethod.clauses;
455
456                         matchedJoinKeys = match_pathkeys_joinkeys(innerpath->pathkeys,
457                                                                                 jmkeys,
458                                                                                 cls,
459                                                                                 INNER,
460                                                                                 &matchedJoinClauses);
461                 }
462
463                 /*
464                  * (match_unsorted_outer) if it is applicable. 'OuterJoinCost was
465                  * set above in
466                  */
467                 if (clauses && matchedJoinKeys)
468                 {
469                         temp1 = outerrel->cheapestpath->path_cost +
470                                 cost_sort(matchedJoinKeys, outerrel->size, outerrel->width,
471                                                   false);
472
473                         temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost)
474                                                         || (innerpath->outerjoincost > temp1));
475
476                         if (temp2)
477                         {
478                                 List       *outerkeys = extract_path_keys(matchedJoinKeys,
479                                                                   outerrel->targetlist,
480                                                                   OUTER);
481                                 List       *merge_pathkeys = new_join_pathkeys(outerkeys,
482                                                                   joinrel->targetlist,
483                                                                   clauses);
484
485                                 mp_list = lappend(mp_list,
486                                                                   create_mergejoin_path(joinrel,
487                                                                                                 outerrel->size,
488                                                                                                 innerrel->size,
489                                                                                                 outerrel->width,
490                                                                                                 innerrel->width,
491                                                                                  (Path *) outerrel->cheapestpath,
492                                                                                                 innerpath,
493                                                                                                 merge_pathkeys,
494                                                                                                 xmergeinfo->m_ordering,
495                                                                                                 matchedJoinClauses,
496                                                                                                 outerkeys,
497                                                                                                 NIL));
498                         }
499                 }
500         }
501         return mp_list;
502
503 }
504
505 static bool
506 EnoughMemoryForHashjoin(RelOptInfo *hashrel)
507 {
508         int                     ntuples;
509         int                     tupsize;
510         int                     pages;
511
512         ntuples = hashrel->size;
513         if (ntuples == 0)
514                 ntuples = 1000;
515         tupsize = hashrel->width + sizeof(HeapTupleData);
516         pages = page_size(ntuples, tupsize);
517
518         /*
519          * if amount of buffer space below hashjoin threshold, return false
520          */
521         if (ceil(sqrt((double) pages)) > NBuffers)
522                 return false;
523         return true;
524 }
525
526 /*
527  * hash_inner_and_outer--                               XXX HASH
528  *        Create hashjoin join paths by explicitly hashing both the outer and
529  *        inner join relations on each available hash op.
530  *
531  * 'joinrel' is the join relation
532  * 'outerrel' is the outer join relation
533  * 'innerrel' is the inner join relation
534  * 'hashinfo_list' is a list of nodes containing info on(hashjoinable)
535  *              clauses for joining the relations
536  *
537  * Returns a list of hashjoin paths.
538  */
539 static List *
540 hash_inner_and_outer(RelOptInfo *joinrel,
541                                          RelOptInfo *outerrel,
542                                          RelOptInfo *innerrel,
543                                          List *hashinfo_list)
544 {
545         HashInfo   *xhashinfo = (HashInfo *) NULL;
546         List       *hjoin_list = NIL;
547         HashPath   *temp_node = (HashPath *) NULL;
548         List       *i = NIL;
549         List       *outerkeys = NIL;
550         List       *innerkeys = NIL;
551         List       *hash_pathkeys = NIL;
552
553         foreach(i, hashinfo_list)
554         {
555                 xhashinfo = (HashInfo *) lfirst(i);
556                 outerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
557                                                           outerrel->targetlist,
558                                                           OUTER);
559                 innerkeys = extract_path_keys(((JoinMethod *) xhashinfo)->jmkeys,
560                                                           innerrel->targetlist,
561                                                           INNER);
562                 hash_pathkeys = new_join_pathkeys(outerkeys,
563                                                           joinrel->targetlist,
564                                                           ((JoinMethod *) xhashinfo)->clauses);
565
566                 if (EnoughMemoryForHashjoin(innerrel))
567                 {
568                         temp_node = create_hashjoin_path(joinrel,
569                                                                                          outerrel->size,
570                                                                                          innerrel->size,
571                                                                                          outerrel->width,
572                                                                                          innerrel->width,
573                                                                                         (Path *) outerrel->cheapestpath,
574                                                                                         (Path *) innerrel->cheapestpath,
575                                                                                          hash_pathkeys,
576                                                                                          xhashinfo->hashop,
577                                                                                 ((JoinMethod *) xhashinfo)->clauses,
578                                                                                          outerkeys,
579                                                                                          innerkeys);
580                         hjoin_list = lappend(hjoin_list, temp_node);
581                 }
582         }
583         return hjoin_list;
584 }