1 /*-------------------------------------------------------------------------
4 * Utilities for matching and building path keys
6 * See src/backend/optimizer/README for a great deal of information about
7 * the nature and use of path keys.
10 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/backend/optimizer/path/pathkeys.c
16 *-------------------------------------------------------------------------
20 #include "access/skey.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/plannodes.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/tlist.h"
28 #include "utils/lsyscache.h"
31 static PathKey *make_canonical_pathkey(PlannerInfo *root,
32 EquivalenceClass *eclass, Oid opfamily,
33 int strategy, bool nulls_first);
34 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
35 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
38 /****************************************************************************
39 * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
40 ****************************************************************************/
43 * make_canonical_pathkey
44 * Given the parameters for a PathKey, find any pre-existing matching
45 * pathkey in the query's list of "canonical" pathkeys. Make a new
46 * entry if there's not one already.
48 * Note that this function must not be used until after we have completed
49 * merging EquivalenceClasses. (We don't try to enforce that here; instead,
50 * equivclass.c will complain if a merge occurs after root->canon_pathkeys
51 * has become nonempty.)
54 make_canonical_pathkey(PlannerInfo *root,
55 EquivalenceClass *eclass, Oid opfamily,
56 int strategy, bool nulls_first)
60 MemoryContext oldcontext;
62 /* The passed eclass might be non-canonical, so chase up to the top */
63 while (eclass->ec_merged)
64 eclass = eclass->ec_merged;
66 foreach(lc, root->canon_pathkeys)
68 pk = (PathKey *) lfirst(lc);
69 if (eclass == pk->pk_eclass &&
70 opfamily == pk->pk_opfamily &&
71 strategy == pk->pk_strategy &&
72 nulls_first == pk->pk_nulls_first)
77 * Be sure canonical pathkeys are allocated in the main planning context.
78 * Not an issue in normal planning, but it is for GEQO.
80 oldcontext = MemoryContextSwitchTo(root->planner_cxt);
82 pk = makeNode(PathKey);
83 pk->pk_eclass = eclass;
84 pk->pk_opfamily = opfamily;
85 pk->pk_strategy = strategy;
86 pk->pk_nulls_first = nulls_first;
88 root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
90 MemoryContextSwitchTo(oldcontext);
96 * pathkey_is_redundant
97 * Is a pathkey redundant with one already in the given list?
99 * We detect two cases:
101 * 1. If the new pathkey's equivalence class contains a constant, and isn't
102 * below an outer join, then we can disregard it as a sort key. An example:
103 * SELECT ... WHERE x = 42 ORDER BY x, y;
104 * We may as well just sort by y. Note that because of opfamily matching,
105 * this is semantically correct: we know that the equality constraint is one
106 * that actually binds the variable to a single value in the terms of any
107 * ordering operator that might go with the eclass. This rule not only lets
108 * us simplify (or even skip) explicit sorts, but also allows matching index
109 * sort orders to a query when there are don't-care index columns.
111 * 2. If the new pathkey's equivalence class is the same as that of any
112 * existing member of the pathkey list, then it is redundant. Some examples:
113 * SELECT ... ORDER BY x, x;
114 * SELECT ... ORDER BY x, x DESC;
115 * SELECT ... WHERE x = y ORDER BY x, y;
116 * In all these cases the second sort key cannot distinguish values that are
117 * considered equal by the first, and so there's no point in using it.
118 * Note in particular that we need not compare opfamily (all the opfamilies
119 * of the EC have the same notion of equality) nor sort direction.
121 * Both the given pathkey and the list members must be canonical for this
122 * to work properly, but that's okay since we no longer ever construct any
123 * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
124 * canonical includes the additional requirement of no redundant entries,
125 * which is exactly what we are checking for here.)
127 * Because the equivclass.c machinery forms only one copy of any EC per query,
128 * pointer comparison is enough to decide whether canonical ECs are the same.
131 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
133 EquivalenceClass *new_ec = new_pathkey->pk_eclass;
136 /* Check for EC containing a constant --- unconditionally redundant */
137 if (EC_MUST_BE_REDUNDANT(new_ec))
140 /* If same EC already used in list, then redundant */
141 foreach(lc, pathkeys)
143 PathKey *old_pathkey = (PathKey *) lfirst(lc);
145 if (new_ec == old_pathkey->pk_eclass)
153 * make_pathkey_from_sortinfo
154 * Given an expression and sort-order information, create a PathKey.
155 * The result is always a "canonical" PathKey, but it might be redundant.
157 * expr is the expression, and nullable_relids is the set of base relids
158 * that are potentially nullable below it.
160 * If the PathKey is being generated from a SortGroupClause, sortref should be
161 * the SortGroupClause's SortGroupRef; otherwise zero.
163 * If rel is not NULL, it identifies a specific relation we're considering
164 * a path for, and indicates that child EC members for that relation can be
165 * considered. Otherwise child members are ignored. (See the comments for
166 * get_eclass_for_sort_expr.)
168 * create_it is TRUE if we should create any missing EquivalenceClass
169 * needed to represent the sort key. If it's FALSE, we return NULL if the
170 * sort key isn't already present in any EquivalenceClass.
173 make_pathkey_from_sortinfo(PlannerInfo *root,
175 Relids nullable_relids,
188 EquivalenceClass *eclass;
190 strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
193 * EquivalenceClasses need to contain opfamily lists based on the family
194 * membership of mergejoinable equality operators, which could belong to
195 * more than one opfamily. So we have to look up the opfamily's equality
196 * operator and get its membership.
198 equality_op = get_opfamily_member(opfamily,
201 BTEqualStrategyNumber);
202 if (!OidIsValid(equality_op)) /* shouldn't happen */
203 elog(ERROR, "could not find equality operator for opfamily %u",
205 opfamilies = get_mergejoin_opfamilies(equality_op);
206 if (!opfamilies) /* certainly should find some */
207 elog(ERROR, "could not find opfamilies for equality operator %u",
210 /* Now find or (optionally) create a matching EquivalenceClass */
211 eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
212 opfamilies, opcintype, collation,
213 sortref, rel, create_it);
215 /* Fail if no EC and !create_it */
219 /* And finally we can find or create a PathKey node */
220 return make_canonical_pathkey(root, eclass, opfamily,
221 strategy, nulls_first);
225 * make_pathkey_from_sortop
226 * Like make_pathkey_from_sortinfo, but work from a sort operator.
228 * This should eventually go away, but we need to restructure SortGroupClause
232 make_pathkey_from_sortop(PlannerInfo *root,
234 Relids nullable_relids,
245 /* Find the operator in pg_amop --- failure shouldn't happen */
246 if (!get_ordering_op_properties(ordering_op,
247 &opfamily, &opcintype, &strategy))
248 elog(ERROR, "operator %u is not a valid ordering operator",
251 /* Because SortGroupClause doesn't carry collation, consult the expr */
252 collation = exprCollation((Node *) expr);
254 return make_pathkey_from_sortinfo(root,
260 (strategy == BTGreaterStrategyNumber),
268 /****************************************************************************
269 * PATHKEY COMPARISONS
270 ****************************************************************************/
274 * Compare two pathkeys to see if they are equivalent, and if not whether
275 * one is "better" than the other.
277 * We assume the pathkeys are canonical, and so they can be checked for
278 * equality by simple pointer comparison.
281 compare_pathkeys(List *keys1, List *keys2)
287 * Fall out quickly if we are passed two identical lists. This mostly
288 * catches the case where both are NIL, but that's common enough to
292 return PATHKEYS_EQUAL;
294 forboth(key1, keys1, key2, keys2)
296 PathKey *pathkey1 = (PathKey *) lfirst(key1);
297 PathKey *pathkey2 = (PathKey *) lfirst(key2);
299 if (pathkey1 != pathkey2)
300 return PATHKEYS_DIFFERENT; /* no need to keep looking */
304 * If we reached the end of only one list, the other is longer and
305 * therefore not a subset.
308 return PATHKEYS_BETTER1; /* key1 is longer */
310 return PATHKEYS_BETTER2; /* key2 is longer */
311 return PATHKEYS_EQUAL;
315 * pathkeys_contained_in
316 * Common special case of compare_pathkeys: we just want to know
317 * if keys2 are at least as well sorted as keys1.
320 pathkeys_contained_in(List *keys1, List *keys2)
322 switch (compare_pathkeys(keys1, keys2))
325 case PATHKEYS_BETTER2:
334 * get_cheapest_path_for_pathkeys
335 * Find the cheapest path (according to the specified criterion) that
336 * satisfies the given pathkeys and parameterization.
337 * Return NULL if no such path.
339 * 'paths' is a list of possible paths that all generate the same relation
340 * 'pathkeys' represents a required ordering (in canonical form!)
341 * 'required_outer' denotes allowable outer relations for parameterized paths
342 * 'cost_criterion' is STARTUP_COST or TOTAL_COST
345 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
346 Relids required_outer,
347 CostSelector cost_criterion)
349 Path *matched_path = NULL;
354 Path *path = (Path *) lfirst(l);
357 * Since cost comparison is a lot cheaper than pathkey comparison, do
358 * that first. (XXX is that still true?)
360 if (matched_path != NULL &&
361 compare_path_costs(matched_path, path, cost_criterion) <= 0)
364 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
365 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
372 * get_cheapest_fractional_path_for_pathkeys
373 * Find the cheapest path (for retrieving a specified fraction of all
374 * the tuples) that satisfies the given pathkeys and parameterization.
375 * Return NULL if no such path.
377 * See compare_fractional_path_costs() for the interpretation of the fraction
380 * 'paths' is a list of possible paths that all generate the same relation
381 * 'pathkeys' represents a required ordering (in canonical form!)
382 * 'required_outer' denotes allowable outer relations for parameterized paths
383 * 'fraction' is the fraction of the total tuples expected to be retrieved
386 get_cheapest_fractional_path_for_pathkeys(List *paths,
388 Relids required_outer,
391 Path *matched_path = NULL;
396 Path *path = (Path *) lfirst(l);
399 * Since cost comparison is a lot cheaper than pathkey comparison, do
400 * that first. (XXX is that still true?)
402 if (matched_path != NULL &&
403 compare_fractional_path_costs(matched_path, path, fraction) <= 0)
406 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
407 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
413 /****************************************************************************
414 * NEW PATHKEY FORMATION
415 ****************************************************************************/
418 * build_index_pathkeys
419 * Build a pathkeys list that describes the ordering induced by an index
420 * scan using the given index. (Note that an unordered index doesn't
421 * induce any ordering, so we return NIL.)
423 * If 'scandir' is BackwardScanDirection, build pathkeys representing a
424 * backwards scan of the index.
426 * The result is canonical, meaning that redundant pathkeys are removed;
427 * it may therefore have fewer entries than there are index columns.
429 * Another reason for stopping early is that we may be able to tell that
430 * an index column's sort order is uninteresting for this query. However,
431 * that test is just based on the existence of an EquivalenceClass and not
432 * on position in pathkey lists, so it's not complete. Caller should call
433 * truncate_useless_pathkeys() to possibly remove more pathkeys.
436 build_index_pathkeys(PlannerInfo *root,
438 ScanDirection scandir)
444 if (index->sortopfamily == NULL)
445 return NIL; /* non-orderable index */
448 foreach(lc, index->indextlist)
450 TargetEntry *indextle = (TargetEntry *) lfirst(lc);
456 /* We assume we don't need to make a copy of the tlist item */
457 indexkey = indextle->expr;
459 if (ScanDirectionIsBackward(scandir))
461 reverse_sort = !index->reverse_sort[i];
462 nulls_first = !index->nulls_first[i];
466 reverse_sort = index->reverse_sort[i];
467 nulls_first = index->nulls_first[i];
471 * OK, try to make a canonical pathkey for this sort key. Note we're
472 * underneath any outer joins, so nullable_relids should be NULL.
474 cpathkey = make_pathkey_from_sortinfo(root,
477 index->sortopfamily[i],
479 index->indexcollations[i],
487 * If the sort key isn't already present in any EquivalenceClass, then
488 * it's not an interesting sort order for this query. So we can stop
489 * now --- lower-order sort keys aren't useful either.
494 /* Add to list unless redundant */
495 if (!pathkey_is_redundant(cpathkey, retval))
496 retval = lappend(retval, cpathkey);
505 * build_expression_pathkey
506 * Build a pathkeys list that describes an ordering by a single expression
507 * using the given sort operator.
509 * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
510 * We induce the other arguments assuming default sort order for the operator.
512 * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
513 * is false and the expression isn't already in some EquivalenceClass.
516 build_expression_pathkey(PlannerInfo *root,
518 Relids nullable_relids,
529 /* Find the operator in pg_amop --- failure shouldn't happen */
530 if (!get_ordering_op_properties(opno,
531 &opfamily, &opcintype, &strategy))
532 elog(ERROR, "operator %u is not a valid ordering operator",
535 cpathkey = make_pathkey_from_sortinfo(root,
540 exprCollation((Node *) expr),
541 (strategy == BTGreaterStrategyNumber),
542 (strategy == BTGreaterStrategyNumber),
548 pathkeys = list_make1(cpathkey);
556 * convert_subquery_pathkeys
557 * Build a pathkeys list that describes the ordering of a subquery's
558 * result, in the terms of the outer query. This is essentially a
559 * task of conversion.
561 * 'rel': outer query's RelOptInfo for the subquery relation.
562 * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
564 * It is not necessary for caller to do truncate_useless_pathkeys(),
565 * because we select keys in a way that takes usefulness of the keys into
569 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
570 List *subquery_pathkeys)
574 int outer_query_keys = list_length(root->query_pathkeys);
575 List *sub_tlist = rel->subplan->targetlist;
578 foreach(i, subquery_pathkeys)
580 PathKey *sub_pathkey = (PathKey *) lfirst(i);
581 EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
582 PathKey *best_pathkey = NULL;
584 if (sub_eclass->ec_has_volatile)
587 * If the sub_pathkey's EquivalenceClass is volatile, then it must
588 * have come from an ORDER BY clause, and we have to match it to
589 * that same targetlist entry.
593 if (sub_eclass->ec_sortref == 0) /* can't happen */
594 elog(ERROR, "volatile EquivalenceClass has no sortref");
595 tle = get_sortgroupref_tle(sub_eclass->ec_sortref, sub_tlist);
597 /* resjunk items aren't visible to outer query */
600 /* We can represent this sub_pathkey */
601 EquivalenceMember *sub_member;
603 EquivalenceClass *outer_ec;
605 Assert(list_length(sub_eclass->ec_members) == 1);
606 sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
607 outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
610 * Note: it might look funny to be setting sortref = 0 for a
611 * reference to a volatile sub_eclass. However, the
612 * expression is *not* volatile in the outer query: it's just
613 * a Var referencing whatever the subquery emitted. (IOW, the
614 * outer query isn't going to re-execute the volatile
615 * expression itself.) So this is okay. Likewise, it's
616 * correct to pass nullable_relids = NULL, because we're
617 * underneath any outer joins appearing in the outer query.
620 get_eclass_for_sort_expr(root,
623 sub_eclass->ec_opfamilies,
624 sub_member->em_datatype,
625 sub_eclass->ec_collation,
631 * If we don't find a matching EC, sub-pathkey isn't
632 * interesting to the outer query
636 make_canonical_pathkey(root,
638 sub_pathkey->pk_opfamily,
639 sub_pathkey->pk_strategy,
640 sub_pathkey->pk_nulls_first);
646 * Otherwise, the sub_pathkey's EquivalenceClass could contain
647 * multiple elements (representing knowledge that multiple items
648 * are effectively equal). Each element might match none, one, or
649 * more of the output columns that are visible to the outer query.
650 * This means we may have multiple possible representations of the
651 * sub_pathkey in the context of the outer query. Ideally we
652 * would generate them all and put them all into an EC of the
653 * outer query, thereby propagating equality knowledge up to the
654 * outer query. Right now we cannot do so, because the outer
655 * query's EquivalenceClasses are already frozen when this is
656 * called. Instead we prefer the one that has the highest "score"
657 * (number of EC peers, plus one if it matches the outer
658 * query_pathkeys). This is the most likely to be useful in the
664 foreach(j, sub_eclass->ec_members)
666 EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
667 Expr *sub_expr = sub_member->em_expr;
668 Oid sub_expr_type = sub_member->em_datatype;
669 Oid sub_expr_coll = sub_eclass->ec_collation;
672 if (sub_member->em_is_child)
673 continue; /* ignore children here */
675 foreach(k, sub_tlist)
677 TargetEntry *tle = (TargetEntry *) lfirst(k);
680 EquivalenceClass *outer_ec;
684 /* resjunk items aren't visible to outer query */
689 * The targetlist entry is considered to match if it
690 * matches after sort-key canonicalization. That is
691 * needed since the sub_expr has been through the same
694 tle_expr = canonicalize_ec_expression(tle->expr,
697 if (!equal(tle_expr, sub_expr))
701 * Build a representation of this targetlist entry as an
704 outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
707 /* See if we have a matching EC for that */
708 outer_ec = get_eclass_for_sort_expr(root,
711 sub_eclass->ec_opfamilies,
719 * If we don't find a matching EC, this sub-pathkey isn't
720 * interesting to the outer query
725 outer_pk = make_canonical_pathkey(root,
727 sub_pathkey->pk_opfamily,
728 sub_pathkey->pk_strategy,
729 sub_pathkey->pk_nulls_first);
730 /* score = # of equivalence peers */
731 score = list_length(outer_ec->ec_members) - 1;
732 /* +1 if it matches the proper query_pathkeys item */
733 if (retvallen < outer_query_keys &&
734 list_nth(root->query_pathkeys, retvallen) == outer_pk)
736 if (score > best_score)
738 best_pathkey = outer_pk;
746 * If we couldn't find a representation of this sub_pathkey, we're
747 * done (we can't use the ones to its right, either).
753 * Eliminate redundant ordering info; could happen if outer query
754 * equivalences subquery keys...
756 if (!pathkey_is_redundant(best_pathkey, retval))
758 retval = lappend(retval, best_pathkey);
767 * build_join_pathkeys
768 * Build the path keys for a join relation constructed by mergejoin or
769 * nestloop join. This is normally the same as the outer path's keys.
771 * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
772 * having the outer path's path keys, because null lefthand rows may be
773 * inserted at random points. It must be treated as unsorted.
775 * We truncate away any pathkeys that are uninteresting for higher joins.
777 * 'joinrel' is the join relation that paths are being formed for
778 * 'jointype' is the join type (inner, left, full, etc)
779 * 'outer_pathkeys' is the list of the current outer path's path keys
781 * Returns the list of new path keys.
784 build_join_pathkeys(PlannerInfo *root,
787 List *outer_pathkeys)
789 if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
793 * This used to be quite a complex bit of code, but now that all pathkey
794 * sublists start out life canonicalized, we don't have to do a darn thing
797 * We do, however, need to truncate the pathkeys list, since it may
798 * contain pathkeys that were useful for forming this joinrel but are
799 * uninteresting to higher levels.
801 return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
804 /****************************************************************************
805 * PATHKEYS AND SORT CLAUSES
806 ****************************************************************************/
809 * make_pathkeys_for_sortclauses
810 * Generate a pathkeys list that represents the sort order specified
811 * by a list of SortGroupClauses
813 * The resulting PathKeys are always in canonical form. (Actually, there
814 * is no longer any code anywhere that creates non-canonical PathKeys.)
816 * We assume that root->nullable_baserels is the set of base relids that could
817 * have gone to NULL below the SortGroupClause expressions. This is okay if
818 * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
819 * and if this function is only invoked after deconstruct_jointree. In the
820 * future we might have to make callers pass in the appropriate
821 * nullable-relids set, but for now it seems unnecessary.
823 * 'sortclauses' is a list of SortGroupClause nodes
824 * 'tlist' is the targetlist to find the referenced tlist entries in
827 make_pathkeys_for_sortclauses(PlannerInfo *root,
831 List *pathkeys = NIL;
834 foreach(l, sortclauses)
836 SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
840 sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
841 Assert(OidIsValid(sortcl->sortop));
842 pathkey = make_pathkey_from_sortop(root,
844 root->nullable_baserels,
847 sortcl->tleSortGroupRef,
850 /* Canonical form eliminates redundant ordering keys */
851 if (!pathkey_is_redundant(pathkey, pathkeys))
852 pathkeys = lappend(pathkeys, pathkey);
857 /****************************************************************************
858 * PATHKEYS AND MERGECLAUSES
859 ****************************************************************************/
862 * initialize_mergeclause_eclasses
863 * Set the EquivalenceClass links in a mergeclause restrictinfo.
865 * RestrictInfo contains fields in which we may cache pointers to
866 * EquivalenceClasses for the left and right inputs of the mergeclause.
867 * (If the mergeclause is a true equivalence clause these will be the
868 * same EquivalenceClass, otherwise not.) If the mergeclause is either
869 * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
870 * then it's easy to set up the left_ec and right_ec members --- otherwise,
871 * this function should be called to set them up. We will generate new
872 * EquivalenceClauses if necessary to represent the mergeclause's left and
875 * Note this is called before EC merging is complete, so the links won't
876 * necessarily point to canonical ECs. Before they are actually used for
877 * anything, update_mergeclause_eclasses must be called to ensure that
878 * they've been updated to point to canonical ECs.
881 initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
883 Expr *clause = restrictinfo->clause;
887 /* Should be a mergeclause ... */
888 Assert(restrictinfo->mergeopfamilies != NIL);
889 /* ... with links not yet set */
890 Assert(restrictinfo->left_ec == NULL);
891 Assert(restrictinfo->right_ec == NULL);
893 /* Need the declared input types of the operator */
894 op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
896 /* Find or create a matching EquivalenceClass for each side */
897 restrictinfo->left_ec =
898 get_eclass_for_sort_expr(root,
899 (Expr *) get_leftop(clause),
900 restrictinfo->nullable_relids,
901 restrictinfo->mergeopfamilies,
903 ((OpExpr *) clause)->inputcollid,
907 restrictinfo->right_ec =
908 get_eclass_for_sort_expr(root,
909 (Expr *) get_rightop(clause),
910 restrictinfo->nullable_relids,
911 restrictinfo->mergeopfamilies,
913 ((OpExpr *) clause)->inputcollid,
920 * update_mergeclause_eclasses
921 * Make the cached EquivalenceClass links valid in a mergeclause
924 * These pointers should have been set by process_equivalence or
925 * initialize_mergeclause_eclasses, but they might have been set to
926 * non-canonical ECs that got merged later. Chase up to the canonical
927 * merged parent if so.
930 update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
932 /* Should be a merge clause ... */
933 Assert(restrictinfo->mergeopfamilies != NIL);
934 /* ... with pointers already set */
935 Assert(restrictinfo->left_ec != NULL);
936 Assert(restrictinfo->right_ec != NULL);
938 /* Chase up to the top as needed */
939 while (restrictinfo->left_ec->ec_merged)
940 restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
941 while (restrictinfo->right_ec->ec_merged)
942 restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
946 * find_mergeclauses_for_pathkeys
947 * This routine attempts to find a set of mergeclauses that can be
948 * used with a specified ordering for one of the input relations.
949 * If successful, it returns a list of mergeclauses.
951 * 'pathkeys' is a pathkeys list showing the ordering of an input path.
952 * 'outer_keys' is TRUE if these keys are for the outer input path,
953 * FALSE if for inner.
954 * 'restrictinfos' is a list of mergejoinable restriction clauses for the
955 * join relation being formed.
957 * The restrictinfos must be marked (via outer_is_left) to show which side
958 * of each clause is associated with the current outer path. (See
959 * select_mergejoin_clauses())
961 * The result is NIL if no merge can be done, else a maximal list of
962 * usable mergeclauses (represented as a list of their restrictinfo nodes).
965 find_mergeclauses_for_pathkeys(PlannerInfo *root,
970 List *mergeclauses = NIL;
973 /* make sure we have eclasses cached in the clauses */
974 foreach(i, restrictinfos)
976 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
978 update_mergeclause_eclasses(root, rinfo);
983 PathKey *pathkey = (PathKey *) lfirst(i);
984 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
985 List *matched_restrictinfos = NIL;
989 * A mergejoin clause matches a pathkey if it has the same EC.
990 * If there are multiple matching clauses, take them all. In plain
991 * inner-join scenarios we expect only one match, because
992 * equivalence-class processing will have removed any redundant
993 * mergeclauses. However, in outer-join scenarios there might be
994 * multiple matches. An example is
996 * select * from a full join b
997 * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
999 * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
1000 * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
1001 * we *must* do so or we will be unable to form a valid plan.
1003 * We expect that the given pathkeys list is canonical, which means
1004 * no two members have the same EC, so it's not possible for this
1005 * code to enter the same mergeclause into the result list twice.
1007 * It's possible that multiple matching clauses might have different
1008 * ECs on the other side, in which case the order we put them into our
1009 * result makes a difference in the pathkeys required for the other
1010 * input path. However this routine hasn't got any info about which
1011 * order would be best, so we don't worry about that.
1013 * It's also possible that the selected mergejoin clauses produce
1014 * a noncanonical ordering of pathkeys for the other side, ie, we
1015 * might select clauses that reference b.v1, b.v2, b.v1 in that
1016 * order. This is not harmful in itself, though it suggests that
1017 * the clauses are partially redundant. Since it happens only with
1018 * redundant query conditions, we don't bother to eliminate it.
1019 * make_inner_pathkeys_for_merge() has to delete duplicates when
1020 * it constructs the canonical pathkeys list, and we also have to
1021 * deal with the case in create_mergejoin_plan().
1024 foreach(j, restrictinfos)
1026 RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1027 EquivalenceClass *clause_ec;
1030 clause_ec = rinfo->outer_is_left ?
1031 rinfo->left_ec : rinfo->right_ec;
1033 clause_ec = rinfo->outer_is_left ?
1034 rinfo->right_ec : rinfo->left_ec;
1035 if (clause_ec == pathkey_ec)
1036 matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1040 * If we didn't find a mergeclause, we're done --- any additional
1041 * sort-key positions in the pathkeys are useless. (But we can still
1042 * mergejoin if we found at least one mergeclause.)
1044 if (matched_restrictinfos == NIL)
1048 * If we did find usable mergeclause(s) for this sort-key position,
1049 * add them to result list.
1051 mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1054 return mergeclauses;
1058 * select_outer_pathkeys_for_merge
1059 * Builds a pathkey list representing a possible sort ordering
1060 * that can be used with the given mergeclauses.
1062 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1063 * that will be used in a merge join.
1064 * 'joinrel' is the join relation we are trying to construct.
1066 * The restrictinfos must be marked (via outer_is_left) to show which side
1067 * of each clause is associated with the current outer path. (See
1068 * select_mergejoin_clauses())
1070 * Returns a pathkeys list that can be applied to the outer relation.
1072 * Since we assume here that a sort is required, there is no particular use
1073 * in matching any available ordering of the outerrel. (joinpath.c has an
1074 * entirely separate code path for considering sort-free mergejoins.) Rather,
1075 * it's interesting to try to match the requested query_pathkeys so that a
1076 * second output sort may be avoided; and failing that, we try to list "more
1077 * popular" keys (those with the most unmatched EquivalenceClass peers)
1078 * earlier, in hopes of making the resulting ordering useful for as many
1079 * higher-level mergejoins as possible.
1082 select_outer_pathkeys_for_merge(PlannerInfo *root,
1084 RelOptInfo *joinrel)
1086 List *pathkeys = NIL;
1087 int nClauses = list_length(mergeclauses);
1088 EquivalenceClass **ecs;
1094 /* Might have no mergeclauses */
1099 * Make arrays of the ECs used by the mergeclauses (dropping any
1100 * duplicates) and their "popularity" scores.
1102 ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1103 scores = (int *) palloc(nClauses * sizeof(int));
1106 foreach(lc, mergeclauses)
1108 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1109 EquivalenceClass *oeclass;
1113 /* get the outer eclass */
1114 update_mergeclause_eclasses(root, rinfo);
1116 if (rinfo->outer_is_left)
1117 oeclass = rinfo->left_ec;
1119 oeclass = rinfo->right_ec;
1121 /* reject duplicates */
1122 for (j = 0; j < necs; j++)
1124 if (ecs[j] == oeclass)
1132 foreach(lc2, oeclass->ec_members)
1134 EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1136 /* Potential future join partner? */
1137 if (!em->em_is_const && !em->em_is_child &&
1138 !bms_overlap(em->em_relids, joinrel->relids))
1142 ecs[necs] = oeclass;
1143 scores[necs] = score;
1148 * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1149 * can generate a sort order that's also useful for final output. There is
1150 * no percentage in a partial match, though, so we have to have 'em all.
1152 if (root->query_pathkeys)
1154 foreach(lc, root->query_pathkeys)
1156 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1157 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1159 for (j = 0; j < necs; j++)
1161 if (ecs[j] == query_ec)
1162 break; /* found match */
1165 break; /* didn't find match */
1167 /* if we got to the end of the list, we have them all */
1170 /* copy query_pathkeys as starting point for our output */
1171 pathkeys = list_copy(root->query_pathkeys);
1172 /* mark their ECs as already-emitted */
1173 foreach(lc, root->query_pathkeys)
1175 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1176 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1178 for (j = 0; j < necs; j++)
1180 if (ecs[j] == query_ec)
1191 * Add remaining ECs to the list in popularity order, using a default sort
1192 * ordering. (We could use qsort() here, but the list length is usually
1193 * so small it's not worth it.)
1199 EquivalenceClass *ec;
1203 best_score = scores[0];
1204 for (j = 1; j < necs; j++)
1206 if (scores[j] > best_score)
1209 best_score = scores[j];
1213 break; /* all done */
1215 scores[best_j] = -1;
1216 pathkey = make_canonical_pathkey(root,
1218 linitial_oid(ec->ec_opfamilies),
1219 BTLessStrategyNumber,
1221 /* can't be redundant because no duplicate ECs */
1222 Assert(!pathkey_is_redundant(pathkey, pathkeys));
1223 pathkeys = lappend(pathkeys, pathkey);
1233 * make_inner_pathkeys_for_merge
1234 * Builds a pathkey list representing the explicit sort order that
1235 * must be applied to an inner path to make it usable with the
1236 * given mergeclauses.
1238 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1239 * that will be used in a merge join.
1240 * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1243 * The restrictinfos must be marked (via outer_is_left) to show which side
1244 * of each clause is associated with the current outer path. (See
1245 * select_mergejoin_clauses())
1247 * Returns a pathkeys list that can be applied to the inner relation.
1249 * Note that it is not this routine's job to decide whether sorting is
1250 * actually needed for a particular input path. Assume a sort is necessary;
1251 * just make the keys, eh?
1254 make_inner_pathkeys_for_merge(PlannerInfo *root,
1256 List *outer_pathkeys)
1258 List *pathkeys = NIL;
1259 EquivalenceClass *lastoeclass;
1266 lop = list_head(outer_pathkeys);
1268 foreach(lc, mergeclauses)
1270 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1271 EquivalenceClass *oeclass;
1272 EquivalenceClass *ieclass;
1275 update_mergeclause_eclasses(root, rinfo);
1277 if (rinfo->outer_is_left)
1279 oeclass = rinfo->left_ec;
1280 ieclass = rinfo->right_ec;
1284 oeclass = rinfo->right_ec;
1285 ieclass = rinfo->left_ec;
1288 /* outer eclass should match current or next pathkeys */
1289 /* we check this carefully for debugging reasons */
1290 if (oeclass != lastoeclass)
1293 elog(ERROR, "too few pathkeys for mergeclauses");
1294 opathkey = (PathKey *) lfirst(lop);
1296 lastoeclass = opathkey->pk_eclass;
1297 if (oeclass != lastoeclass)
1298 elog(ERROR, "outer pathkeys do not match mergeclause");
1302 * Often, we'll have same EC on both sides, in which case the outer
1303 * pathkey is also canonical for the inner side, and we can skip a
1306 if (ieclass == oeclass)
1309 pathkey = make_canonical_pathkey(root,
1311 opathkey->pk_opfamily,
1312 opathkey->pk_strategy,
1313 opathkey->pk_nulls_first);
1316 * Don't generate redundant pathkeys (can happen if multiple
1317 * mergeclauses refer to same EC).
1319 if (!pathkey_is_redundant(pathkey, pathkeys))
1320 pathkeys = lappend(pathkeys, pathkey);
1326 /****************************************************************************
1327 * PATHKEY USEFULNESS CHECKS
1329 * We only want to remember as many of the pathkeys of a path as have some
1330 * potential use, either for subsequent mergejoins or for meeting the query's
1331 * requested output ordering. This ensures that add_path() won't consider
1332 * a path to have a usefully different ordering unless it really is useful.
1333 * These routines check for usefulness of given pathkeys.
1334 ****************************************************************************/
1337 * pathkeys_useful_for_merging
1338 * Count the number of pathkeys that may be useful for mergejoins
1339 * above the given relation.
1341 * We consider a pathkey potentially useful if it corresponds to the merge
1342 * ordering of either side of any joinclause for the rel. This might be
1343 * overoptimistic, since joinclauses that require different other relations
1344 * might never be usable at the same time, but trying to be exact is likely
1345 * to be more trouble than it's worth.
1347 * To avoid doubling the number of mergejoin paths considered, we would like
1348 * to consider only one of the two scan directions (ASC or DESC) as useful
1349 * for merging for any given target column. The choice is arbitrary unless
1350 * one of the directions happens to match an ORDER BY key, in which case
1351 * that direction should be preferred, in hopes of avoiding a final sort step.
1352 * right_merge_direction() implements this heuristic.
1355 pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1360 foreach(i, pathkeys)
1362 PathKey *pathkey = (PathKey *) lfirst(i);
1363 bool matched = false;
1366 /* If "wrong" direction, not useful for merging */
1367 if (!right_merge_direction(root, pathkey))
1371 * First look into the EquivalenceClass of the pathkey, to see if
1372 * there are any members not yet joined to the rel. If so, it's
1373 * surely possible to generate a mergejoin clause using them.
1375 if (rel->has_eclass_joins &&
1376 eclass_useful_for_merging(pathkey->pk_eclass, rel))
1381 * Otherwise search the rel's joininfo list, which contains
1382 * non-EquivalenceClass-derivable join clauses that might
1383 * nonetheless be mergejoinable.
1385 foreach(j, rel->joininfo)
1387 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1389 if (restrictinfo->mergeopfamilies == NIL)
1391 update_mergeclause_eclasses(root, restrictinfo);
1393 if (pathkey->pk_eclass == restrictinfo->left_ec ||
1394 pathkey->pk_eclass == restrictinfo->right_ec)
1403 * If we didn't find a mergeclause, we're done --- any additional
1404 * sort-key positions in the pathkeys are useless. (But we can still
1405 * mergejoin if we found at least one mergeclause.)
1417 * right_merge_direction
1418 * Check whether the pathkey embodies the preferred sort direction
1419 * for merging its target column.
1422 right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1426 foreach(l, root->query_pathkeys)
1428 PathKey *query_pathkey = (PathKey *) lfirst(l);
1430 if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1431 pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1434 * Found a matching query sort column. Prefer this pathkey's
1435 * direction iff it matches. Note that we ignore pk_nulls_first,
1436 * which means that a sort might be needed anyway ... but we still
1437 * want to prefer only one of the two possible directions, and we
1438 * might as well use this one.
1440 return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1444 /* If no matching ORDER BY request, prefer the ASC direction */
1445 return (pathkey->pk_strategy == BTLessStrategyNumber);
1449 * pathkeys_useful_for_ordering
1450 * Count the number of pathkeys that are useful for meeting the
1451 * query's requested output ordering.
1453 * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1454 * no good to order by just the first key(s) of the requested ordering.
1455 * So the result is always either 0 or list_length(root->query_pathkeys).
1458 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1460 if (root->query_pathkeys == NIL)
1461 return 0; /* no special ordering requested */
1463 if (pathkeys == NIL)
1464 return 0; /* unordered path */
1466 if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1468 /* It's useful ... or at least the first N keys are */
1469 return list_length(root->query_pathkeys);
1472 return 0; /* path ordering not useful */
1476 * truncate_useless_pathkeys
1477 * Shorten the given pathkey list to just the useful pathkeys.
1480 truncate_useless_pathkeys(PlannerInfo *root,
1487 nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1488 nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1489 if (nuseful2 > nuseful)
1493 * Note: not safe to modify input list destructively, but we can avoid
1494 * copying the list if we're not actually going to change it
1498 else if (nuseful == list_length(pathkeys))
1501 return list_truncate(list_copy(pathkeys), nuseful);
1505 * has_useful_pathkeys
1506 * Detect whether the specified rel could have any pathkeys that are
1507 * useful according to truncate_useless_pathkeys().
1509 * This is a cheap test that lets us skip building pathkeys at all in very
1510 * simple queries. It's OK to err in the direction of returning "true" when
1511 * there really aren't any usable pathkeys, but erring in the other direction
1512 * is bad --- so keep this in sync with the routines above!
1514 * We could make the test more complex, for example checking to see if any of
1515 * the joinclauses are really mergejoinable, but that likely wouldn't win
1516 * often enough to repay the extra cycles. Queries with neither a join nor
1517 * a sort are reasonably common, though, so this much work seems worthwhile.
1520 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1522 if (rel->joininfo != NIL || rel->has_eclass_joins)
1523 return true; /* might be able to use pathkeys for merging */
1524 if (root->query_pathkeys != NIL)
1525 return true; /* might be able to use them for ordering */
1526 return false; /* definitely useless */