1 /*-------------------------------------------------------------------------
4 * Utilities for matching and building join and path keys
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.6 1999/02/21 01:55:02 momjian Exp $
12 *-------------------------------------------------------------------------
16 #include "nodes/pg_list.h"
17 #include "nodes/relation.h"
18 #include "nodes/plannodes.h"
20 #include "optimizer/internal.h"
21 #include "optimizer/paths.h"
22 #include "optimizer/var.h"
23 #include "optimizer/keys.h"
24 #include "optimizer/tlist.h"
25 #include "optimizer/joininfo.h"
26 #include "optimizer/ordering.h"
28 static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
30 static List *new_join_pathkey(List *subkeys, List *considered_subkeys,
31 List *join_rel_tlist, List *joinclauses);
32 static List *new_matching_subkeys(Var *subkey, List *considered_subkeys,
33 List *join_rel_tlist, List *joinclauses);
37 * Explanation of Path.pathkeys
39 * Path.pathkeys is a List of List of Var nodes that represent the sort
40 * order of the result generated by the Path.
42 * In single/base relation RelOptInfo's, the Path's represent various ways
43 * of generate the relation. Sequential scan Paths have a NIL pathkeys.
44 * Index scans have Path.pathkeys that represent the chosen index. A
45 * single-key index pathkeys would be { {tab1_indexkey1} }. The pathkeys
46 * entry for a multi-key index would be { {tab1_indexkey1}, {tab1_indexkey2} }.
48 * Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
49 * only performed with equajoins("="). Because of this, the multi-relation
50 * path actually has more than one primary Var key. For example, a
51 * mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
52 * { {tab1.col1, tab2.col1} }. This allows future joins to use either Var
53 * as a pre-sorted key to prevent Mergejoins from having to re-sort the Path.
54 * They are equal, so they are both primary sort keys. This is why pathkeys
57 * For multi-key sorts, if the outer is sorted by a multi-key index, the
58 * multi-key index remains after the join. If the inner has a multi-key
59 * sort, only the primary key of the inner is added to the result.
60 * Mergejoins only join on the primary key. Currently, non-primary keys
61 * in the pathkeys List are of limited value.
63 * HashJoin has similar functionality. NestJoin does not perform sorting,
64 * and allows non-equajoins, so it does not allow useful pathkeys.
70 /****************************************************************************
72 ****************************************************************************/
75 * order_joinkeys_by_pathkeys
76 * Attempts to match the keys of a path against the keys of join clauses.
77 * This is done by looking for a matching join key in 'joinkeys' for
78 * every path key in the list 'path.keys'. If there is a matching join key
79 * (not necessarily unique) for every path key, then the list of
80 * corresponding join keys and join clauses are returned in the order in
81 * which the keys matched the path keys.
83 * 'pathkeys' is a list of path keys:
84 * ( ( (var) (var) ... ) ( (var) ... ) )
85 * 'joinkeys' is a list of join keys:
86 * ( (outer inner) (outer inner) ... )
87 * 'joinclauses' is a list of clauses corresponding to the join keys in
89 * 'outer_or_inner' is a flag that selects the desired subkey of a join key
92 * Returns the join keys and corresponding join clauses in a list if all
93 * of the path keys were matched:
95 * ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
96 * ( clause0 ... clauseN )
100 * Returns a list of matched join keys and a list of matched join clauses
101 * in pointers if valid order can be found.
104 order_joinkeys_by_pathkeys(List *pathkeys,
108 List **matchedJoinKeysPtr,
109 List **matchedJoinClausesPtr)
111 List *matched_joinkeys = NIL;
112 List *matched_joinclauses = NIL;
115 int matched_joinkey_index = -1;
116 int matched_keys = 0;
118 * Reorder the joinkeys by picking out one that matches each pathkey,
119 * and create a new joinkey/joinclause list in pathkey order
124 matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
127 if (matched_joinkey_index != -1)
130 if (matchedJoinKeysPtr)
132 JoinKey *joinkey = nth(matched_joinkey_index, joinkeys);
133 matched_joinkeys = lappend(matched_joinkeys, joinkey);
136 if (matchedJoinClausesPtr && joinclauses)
138 Expr *joinclause = nth(matched_joinkey_index,
140 matched_joinclauses = lappend(matched_joinclauses, joinclause);
144 /* A pathkey could not be matched. */
149 * Did we fail to match all the joinkeys?
150 * Extra pathkeys are no problem.
152 if (matched_keys != length(joinkeys))
154 if (matchedJoinKeysPtr)
155 *matchedJoinKeysPtr = NIL;
156 if (matchedJoinClausesPtr)
157 *matchedJoinClausesPtr = NIL;
161 if (matchedJoinKeysPtr)
162 *matchedJoinKeysPtr = matched_joinkeys;
163 if (matchedJoinClausesPtr)
164 *matchedJoinClausesPtr = matched_joinclauses;
170 * match_pathkey_joinkeys
171 * Returns the 0-based index into 'joinkeys' of the first joinkey whose
172 * outer or inner subkey matches any subkey of 'pathkey'.
174 * All these keys are equivalent, so any of them can match. See above.
177 match_pathkey_joinkeys(List *pathkey,
188 path_subkey = (Var *) lfirst(i);
192 jk = (JoinKey *) lfirst(x);
193 if (var_equal(path_subkey, extract_join_key(jk, outer_or_inner)))
198 return -1; /* no index found */
203 * get_cheapest_path_for_joinkeys
204 * Attempts to find a path in 'paths' whose keys match a set of join
205 * keys 'joinkeys'. To match,
206 * 1. the path node ordering must equal 'ordering'.
207 * 2. each subkey of a given path must match(i.e., be(var_equal) to) the
208 * appropriate subkey of the corresponding join key in 'joinkeys',
209 * i.e., the Nth path key must match its subkeys against the subkey of
210 * the Nth join key in 'joinkeys'.
212 * 'joinkeys' is the list of key pairs to which the path keys must be
214 * 'ordering' is the ordering of the(outer) path to which 'joinkeys'
216 * 'paths' is a list of(inner) paths which are to be matched against
217 * each join key in 'joinkeys'
218 * 'outer_or_inner' is a flag that selects the desired subkey of a join key
221 * Find the cheapest path that matches the join keys
224 get_cheapest_path_for_joinkeys(List *joinkeys,
229 Path *matched_path = NULL;
234 Path *path = (Path *) lfirst(i);
235 int better_sort, better_key;
237 if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
238 outer_or_inner, NULL, NULL) &&
239 pathorder_match(ordering, path->pathorder, &better_sort) &&
243 if (path->path_cost < matched_path->path_cost)
255 * Builds a subkey list for a path by pulling one of the subkeys from
256 * a list of join keys 'joinkeys' and then finding the var node in the
257 * target list 'tlist' that corresponds to that subkey.
259 * 'joinkeys' is a list of join key pairs
260 * 'tlist' is a relation target list
261 * 'outer_or_inner' is a flag that selects the desired subkey of a join key
264 * Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
265 * It is a list of lists because of multi-key indexes.
268 extract_path_keys(List *joinkeys,
272 List *pathkeys = NIL;
275 foreach(jk, joinkeys)
277 JoinKey *jkey = (JoinKey *) lfirst(jk);
283 * find the right Var in the target list for this key
285 var = (Var *) extract_join_key(jkey, outer_or_inner);
286 key = (Var *) matching_tlist_var(var, tlist);
289 * Include it in the pathkeys list if we haven't already done so
293 Var *pkey = lfirst((List *) lfirst(p)); /* XXX fix me */
299 continue; /* key already in pathkeys */
301 pathkeys = lappend(pathkeys, lcons(key, NIL));
307 /****************************************************************************
308 * NEW PATHKEY FORMATION
309 ****************************************************************************/
313 * Find the path keys for a join relation by finding all vars in the list
314 * of join clauses 'joinclauses' such that:
315 * (1) the var corresponding to the outer join relation is a
316 * key on the outer path
317 * (2) the var appears in the target list of the join relation
318 * In other words, add to each outer path key the inner path keys that
319 * are required for qualification.
321 * 'outer_pathkeys' is the list of the outer path's path keys
322 * 'join_rel_tlist' is the target list of the join relation
323 * 'joinclauses' is the list of restricting join clauses
325 * Returns the list of new path keys.
329 new_join_pathkeys(List *outer_pathkeys,
330 List *join_rel_tlist,
333 List *outer_pathkey = NIL;
338 foreach(i, outer_pathkeys)
340 outer_pathkey = lfirst(i);
341 x = new_join_pathkey(outer_pathkey, NIL, join_rel_tlist, joinclauses);
343 t_list = lappend(t_list, x);
350 * Finds new vars that become subkeys due to qualification clauses that
351 * contain any previously considered subkeys. These new subkeys plus the
352 * subkeys from 'subkeys' form a new pathkey for the join relation.
354 * Note that each returned subkey is the var node found in
355 * 'join_rel_tlist' rather than the joinclause var node.
357 * 'subkeys' is a list of subkeys for which matching subkeys are to be
359 * 'considered_subkeys' is the current list of all subkeys corresponding
362 * Returns a new pathkey(list of subkeys).
366 new_join_pathkey(List *subkeys,
367 List *considered_subkeys,
368 List *join_rel_tlist,
374 List *matched_subkeys = NIL;
375 Expr *tlist_key = (Expr *) NULL;
376 List *newly_considered_subkeys = NIL;
380 subkey = (Var *) lfirst(i);
382 break; /* XXX something is wrong */
383 matched_subkeys = new_matching_subkeys(subkey, considered_subkeys,
384 join_rel_tlist, joinclauses);
385 tlist_key = matching_tlist_var(subkey, join_rel_tlist);
386 newly_considered_subkeys = NIL;
390 if (!member(tlist_key, matched_subkeys))
391 newly_considered_subkeys = lcons(tlist_key, matched_subkeys);
394 newly_considered_subkeys = matched_subkeys;
396 considered_subkeys = append(considered_subkeys, newly_considered_subkeys);
398 t_list = nconc(t_list, newly_considered_subkeys);
404 * new_matching_subkeys
405 * Returns a list of new subkeys:
406 * (1) which are not listed in 'considered_subkeys'
407 * (2) for which the "other" variable in some clause in 'joinclauses' is
409 * (3) which are mentioned in 'join_rel_tlist'
411 * Note that each returned subkey is the var node found in
412 * 'join_rel_tlist' rather than the joinclause var node.
414 * 'subkey' is the var node for which we are trying to find matching
417 * Returns a list of new subkeys.
421 new_matching_subkeys(Var *subkey,
422 List *considered_subkeys,
423 List *join_rel_tlist,
429 Expr *tlist_other_var;
431 foreach(i, joinclauses)
433 joinclause = lfirst(i);
434 tlist_other_var = matching_tlist_var(
435 other_join_clause_var(subkey, joinclause),
438 if (tlist_other_var &&
439 !(member(tlist_other_var, considered_subkeys)))
442 /* XXX was "push" function */
443 considered_subkeys = lappend(considered_subkeys,
447 * considered_subkeys = nreverse(considered_subkeys); XXX -- I
448 * am not sure of this.
451 t_list = lappend(t_list, tlist_other_var);