1 /*-------------------------------------------------------------------------
4 * Utilities for matching and building path keys
6 * See src/backend/optimizer/README for a great deal of information about
7 * the nature and use of path keys.
10 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/backend/optimizer/path/pathkeys.c
16 *-------------------------------------------------------------------------
20 #include "access/stratnum.h"
21 #include "nodes/makefuncs.h"
22 #include "nodes/nodeFuncs.h"
23 #include "nodes/plannodes.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/pathnode.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/tlist.h"
28 #include "utils/lsyscache.h"
31 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
32 static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
35 /****************************************************************************
36 * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
37 ****************************************************************************/
40 * make_canonical_pathkey
41 * Given the parameters for a PathKey, find any pre-existing matching
42 * pathkey in the query's list of "canonical" pathkeys. Make a new
43 * entry if there's not one already.
45 * Note that this function must not be used until after we have completed
46 * merging EquivalenceClasses. (We don't try to enforce that here; instead,
47 * equivclass.c will complain if a merge occurs after root->canon_pathkeys
48 * has become nonempty.)
51 make_canonical_pathkey(PlannerInfo *root,
52 EquivalenceClass *eclass, Oid opfamily,
53 int strategy, bool nulls_first)
57 MemoryContext oldcontext;
59 /* The passed eclass might be non-canonical, so chase up to the top */
60 while (eclass->ec_merged)
61 eclass = eclass->ec_merged;
63 foreach(lc, root->canon_pathkeys)
65 pk = (PathKey *) lfirst(lc);
66 if (eclass == pk->pk_eclass &&
67 opfamily == pk->pk_opfamily &&
68 strategy == pk->pk_strategy &&
69 nulls_first == pk->pk_nulls_first)
74 * Be sure canonical pathkeys are allocated in the main planning context.
75 * Not an issue in normal planning, but it is for GEQO.
77 oldcontext = MemoryContextSwitchTo(root->planner_cxt);
79 pk = makeNode(PathKey);
80 pk->pk_eclass = eclass;
81 pk->pk_opfamily = opfamily;
82 pk->pk_strategy = strategy;
83 pk->pk_nulls_first = nulls_first;
85 root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
87 MemoryContextSwitchTo(oldcontext);
93 * pathkey_is_redundant
94 * Is a pathkey redundant with one already in the given list?
96 * We detect two cases:
98 * 1. If the new pathkey's equivalence class contains a constant, and isn't
99 * below an outer join, then we can disregard it as a sort key. An example:
100 * SELECT ... WHERE x = 42 ORDER BY x, y;
101 * We may as well just sort by y. Note that because of opfamily matching,
102 * this is semantically correct: we know that the equality constraint is one
103 * that actually binds the variable to a single value in the terms of any
104 * ordering operator that might go with the eclass. This rule not only lets
105 * us simplify (or even skip) explicit sorts, but also allows matching index
106 * sort orders to a query when there are don't-care index columns.
108 * 2. If the new pathkey's equivalence class is the same as that of any
109 * existing member of the pathkey list, then it is redundant. Some examples:
110 * SELECT ... ORDER BY x, x;
111 * SELECT ... ORDER BY x, x DESC;
112 * SELECT ... WHERE x = y ORDER BY x, y;
113 * In all these cases the second sort key cannot distinguish values that are
114 * considered equal by the first, and so there's no point in using it.
115 * Note in particular that we need not compare opfamily (all the opfamilies
116 * of the EC have the same notion of equality) nor sort direction.
118 * Both the given pathkey and the list members must be canonical for this
119 * to work properly, but that's okay since we no longer ever construct any
120 * non-canonical pathkeys. (Note: the notion of a pathkey *list* being
121 * canonical includes the additional requirement of no redundant entries,
122 * which is exactly what we are checking for here.)
124 * Because the equivclass.c machinery forms only one copy of any EC per query,
125 * pointer comparison is enough to decide whether canonical ECs are the same.
128 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
130 EquivalenceClass *new_ec = new_pathkey->pk_eclass;
133 /* Check for EC containing a constant --- unconditionally redundant */
134 if (EC_MUST_BE_REDUNDANT(new_ec))
137 /* If same EC already used in list, then redundant */
138 foreach(lc, pathkeys)
140 PathKey *old_pathkey = (PathKey *) lfirst(lc);
142 if (new_ec == old_pathkey->pk_eclass)
150 * make_pathkey_from_sortinfo
151 * Given an expression and sort-order information, create a PathKey.
152 * The result is always a "canonical" PathKey, but it might be redundant.
154 * expr is the expression, and nullable_relids is the set of base relids
155 * that are potentially nullable below it.
157 * If the PathKey is being generated from a SortGroupClause, sortref should be
158 * the SortGroupClause's SortGroupRef; otherwise zero.
160 * If rel is not NULL, it identifies a specific relation we're considering
161 * a path for, and indicates that child EC members for that relation can be
162 * considered. Otherwise child members are ignored. (See the comments for
163 * get_eclass_for_sort_expr.)
165 * create_it is TRUE if we should create any missing EquivalenceClass
166 * needed to represent the sort key. If it's FALSE, we return NULL if the
167 * sort key isn't already present in any EquivalenceClass.
170 make_pathkey_from_sortinfo(PlannerInfo *root,
172 Relids nullable_relids,
185 EquivalenceClass *eclass;
187 strategy = reverse_sort ? BTGreaterStrategyNumber : BTLessStrategyNumber;
190 * EquivalenceClasses need to contain opfamily lists based on the family
191 * membership of mergejoinable equality operators, which could belong to
192 * more than one opfamily. So we have to look up the opfamily's equality
193 * operator and get its membership.
195 equality_op = get_opfamily_member(opfamily,
198 BTEqualStrategyNumber);
199 if (!OidIsValid(equality_op)) /* shouldn't happen */
200 elog(ERROR, "could not find equality operator for opfamily %u",
202 opfamilies = get_mergejoin_opfamilies(equality_op);
203 if (!opfamilies) /* certainly should find some */
204 elog(ERROR, "could not find opfamilies for equality operator %u",
207 /* Now find or (optionally) create a matching EquivalenceClass */
208 eclass = get_eclass_for_sort_expr(root, expr, nullable_relids,
209 opfamilies, opcintype, collation,
210 sortref, rel, create_it);
212 /* Fail if no EC and !create_it */
216 /* And finally we can find or create a PathKey node */
217 return make_canonical_pathkey(root, eclass, opfamily,
218 strategy, nulls_first);
222 * make_pathkey_from_sortop
223 * Like make_pathkey_from_sortinfo, but work from a sort operator.
225 * This should eventually go away, but we need to restructure SortGroupClause
229 make_pathkey_from_sortop(PlannerInfo *root,
231 Relids nullable_relids,
242 /* Find the operator in pg_amop --- failure shouldn't happen */
243 if (!get_ordering_op_properties(ordering_op,
244 &opfamily, &opcintype, &strategy))
245 elog(ERROR, "operator %u is not a valid ordering operator",
248 /* Because SortGroupClause doesn't carry collation, consult the expr */
249 collation = exprCollation((Node *) expr);
251 return make_pathkey_from_sortinfo(root,
257 (strategy == BTGreaterStrategyNumber),
265 /****************************************************************************
266 * PATHKEY COMPARISONS
267 ****************************************************************************/
271 * Compare two pathkeys to see if they are equivalent, and if not whether
272 * one is "better" than the other.
274 * We assume the pathkeys are canonical, and so they can be checked for
275 * equality by simple pointer comparison.
278 compare_pathkeys(List *keys1, List *keys2)
284 * Fall out quickly if we are passed two identical lists. This mostly
285 * catches the case where both are NIL, but that's common enough to
289 return PATHKEYS_EQUAL;
291 forboth(key1, keys1, key2, keys2)
293 PathKey *pathkey1 = (PathKey *) lfirst(key1);
294 PathKey *pathkey2 = (PathKey *) lfirst(key2);
296 if (pathkey1 != pathkey2)
297 return PATHKEYS_DIFFERENT; /* no need to keep looking */
301 * If we reached the end of only one list, the other is longer and
302 * therefore not a subset.
305 return PATHKEYS_BETTER1; /* key1 is longer */
307 return PATHKEYS_BETTER2; /* key2 is longer */
308 return PATHKEYS_EQUAL;
312 * pathkeys_contained_in
313 * Common special case of compare_pathkeys: we just want to know
314 * if keys2 are at least as well sorted as keys1.
317 pathkeys_contained_in(List *keys1, List *keys2)
319 switch (compare_pathkeys(keys1, keys2))
322 case PATHKEYS_BETTER2:
331 * get_cheapest_path_for_pathkeys
332 * Find the cheapest path (according to the specified criterion) that
333 * satisfies the given pathkeys and parameterization.
334 * Return NULL if no such path.
336 * 'paths' is a list of possible paths that all generate the same relation
337 * 'pathkeys' represents a required ordering (in canonical form!)
338 * 'required_outer' denotes allowable outer relations for parameterized paths
339 * 'cost_criterion' is STARTUP_COST or TOTAL_COST
342 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
343 Relids required_outer,
344 CostSelector cost_criterion)
346 Path *matched_path = NULL;
351 Path *path = (Path *) lfirst(l);
354 * Since cost comparison is a lot cheaper than pathkey comparison, do
355 * that first. (XXX is that still true?)
357 if (matched_path != NULL &&
358 compare_path_costs(matched_path, path, cost_criterion) <= 0)
361 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
362 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
369 * get_cheapest_fractional_path_for_pathkeys
370 * Find the cheapest path (for retrieving a specified fraction of all
371 * the tuples) that satisfies the given pathkeys and parameterization.
372 * Return NULL if no such path.
374 * See compare_fractional_path_costs() for the interpretation of the fraction
377 * 'paths' is a list of possible paths that all generate the same relation
378 * 'pathkeys' represents a required ordering (in canonical form!)
379 * 'required_outer' denotes allowable outer relations for parameterized paths
380 * 'fraction' is the fraction of the total tuples expected to be retrieved
383 get_cheapest_fractional_path_for_pathkeys(List *paths,
385 Relids required_outer,
388 Path *matched_path = NULL;
393 Path *path = (Path *) lfirst(l);
396 * Since cost comparison is a lot cheaper than pathkey comparison, do
397 * that first. (XXX is that still true?)
399 if (matched_path != NULL &&
400 compare_fractional_path_costs(matched_path, path, fraction) <= 0)
403 if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
404 bms_is_subset(PATH_REQ_OUTER(path), required_outer))
410 /****************************************************************************
411 * NEW PATHKEY FORMATION
412 ****************************************************************************/
415 * build_index_pathkeys
416 * Build a pathkeys list that describes the ordering induced by an index
417 * scan using the given index. (Note that an unordered index doesn't
418 * induce any ordering, so we return NIL.)
420 * If 'scandir' is BackwardScanDirection, build pathkeys representing a
421 * backwards scan of the index.
423 * The result is canonical, meaning that redundant pathkeys are removed;
424 * it may therefore have fewer entries than there are index columns.
426 * Another reason for stopping early is that we may be able to tell that
427 * an index column's sort order is uninteresting for this query. However,
428 * that test is just based on the existence of an EquivalenceClass and not
429 * on position in pathkey lists, so it's not complete. Caller should call
430 * truncate_useless_pathkeys() to possibly remove more pathkeys.
433 build_index_pathkeys(PlannerInfo *root,
435 ScanDirection scandir)
441 if (index->sortopfamily == NULL)
442 return NIL; /* non-orderable index */
445 foreach(lc, index->indextlist)
447 TargetEntry *indextle = (TargetEntry *) lfirst(lc);
453 /* We assume we don't need to make a copy of the tlist item */
454 indexkey = indextle->expr;
456 if (ScanDirectionIsBackward(scandir))
458 reverse_sort = !index->reverse_sort[i];
459 nulls_first = !index->nulls_first[i];
463 reverse_sort = index->reverse_sort[i];
464 nulls_first = index->nulls_first[i];
468 * OK, try to make a canonical pathkey for this sort key. Note we're
469 * underneath any outer joins, so nullable_relids should be NULL.
471 cpathkey = make_pathkey_from_sortinfo(root,
474 index->sortopfamily[i],
476 index->indexcollations[i],
484 * If the sort key isn't already present in any EquivalenceClass, then
485 * it's not an interesting sort order for this query. So we can stop
486 * now --- lower-order sort keys aren't useful either.
491 /* Add to list unless redundant */
492 if (!pathkey_is_redundant(cpathkey, retval))
493 retval = lappend(retval, cpathkey);
502 * build_expression_pathkey
503 * Build a pathkeys list that describes an ordering by a single expression
504 * using the given sort operator.
506 * expr, nullable_relids, and rel are as for make_pathkey_from_sortinfo.
507 * We induce the other arguments assuming default sort order for the operator.
509 * Similarly to make_pathkey_from_sortinfo, the result is NIL if create_it
510 * is false and the expression isn't already in some EquivalenceClass.
513 build_expression_pathkey(PlannerInfo *root,
515 Relids nullable_relids,
526 /* Find the operator in pg_amop --- failure shouldn't happen */
527 if (!get_ordering_op_properties(opno,
528 &opfamily, &opcintype, &strategy))
529 elog(ERROR, "operator %u is not a valid ordering operator",
532 cpathkey = make_pathkey_from_sortinfo(root,
537 exprCollation((Node *) expr),
538 (strategy == BTGreaterStrategyNumber),
539 (strategy == BTGreaterStrategyNumber),
545 pathkeys = list_make1(cpathkey);
553 * convert_subquery_pathkeys
554 * Build a pathkeys list that describes the ordering of a subquery's
555 * result, in the terms of the outer query. This is essentially a
556 * task of conversion.
558 * 'rel': outer query's RelOptInfo for the subquery relation.
559 * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
560 * 'subquery_tlist': the subquery's output targetlist, in its terms.
562 * It is not necessary for caller to do truncate_useless_pathkeys(),
563 * because we select keys in a way that takes usefulness of the keys into
567 convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
568 List *subquery_pathkeys,
569 List *subquery_tlist)
573 int outer_query_keys = list_length(root->query_pathkeys);
576 foreach(i, subquery_pathkeys)
578 PathKey *sub_pathkey = (PathKey *) lfirst(i);
579 EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
580 PathKey *best_pathkey = NULL;
582 if (sub_eclass->ec_has_volatile)
585 * If the sub_pathkey's EquivalenceClass is volatile, then it must
586 * have come from an ORDER BY clause, and we have to match it to
587 * that same targetlist entry.
591 if (sub_eclass->ec_sortref == 0) /* can't happen */
592 elog(ERROR, "volatile EquivalenceClass has no sortref");
593 tle = get_sortgroupref_tle(sub_eclass->ec_sortref, subquery_tlist);
595 /* resjunk items aren't visible to outer query */
598 /* We can represent this sub_pathkey */
599 EquivalenceMember *sub_member;
601 EquivalenceClass *outer_ec;
603 Assert(list_length(sub_eclass->ec_members) == 1);
604 sub_member = (EquivalenceMember *) linitial(sub_eclass->ec_members);
605 outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid, tle);
608 * Note: it might look funny to be setting sortref = 0 for a
609 * reference to a volatile sub_eclass. However, the
610 * expression is *not* volatile in the outer query: it's just
611 * a Var referencing whatever the subquery emitted. (IOW, the
612 * outer query isn't going to re-execute the volatile
613 * expression itself.) So this is okay. Likewise, it's
614 * correct to pass nullable_relids = NULL, because we're
615 * underneath any outer joins appearing in the outer query.
618 get_eclass_for_sort_expr(root,
621 sub_eclass->ec_opfamilies,
622 sub_member->em_datatype,
623 sub_eclass->ec_collation,
629 * If we don't find a matching EC, sub-pathkey isn't
630 * interesting to the outer query
634 make_canonical_pathkey(root,
636 sub_pathkey->pk_opfamily,
637 sub_pathkey->pk_strategy,
638 sub_pathkey->pk_nulls_first);
644 * Otherwise, the sub_pathkey's EquivalenceClass could contain
645 * multiple elements (representing knowledge that multiple items
646 * are effectively equal). Each element might match none, one, or
647 * more of the output columns that are visible to the outer query.
648 * This means we may have multiple possible representations of the
649 * sub_pathkey in the context of the outer query. Ideally we
650 * would generate them all and put them all into an EC of the
651 * outer query, thereby propagating equality knowledge up to the
652 * outer query. Right now we cannot do so, because the outer
653 * query's EquivalenceClasses are already frozen when this is
654 * called. Instead we prefer the one that has the highest "score"
655 * (number of EC peers, plus one if it matches the outer
656 * query_pathkeys). This is the most likely to be useful in the
662 foreach(j, sub_eclass->ec_members)
664 EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
665 Expr *sub_expr = sub_member->em_expr;
666 Oid sub_expr_type = sub_member->em_datatype;
667 Oid sub_expr_coll = sub_eclass->ec_collation;
670 if (sub_member->em_is_child)
671 continue; /* ignore children here */
673 foreach(k, subquery_tlist)
675 TargetEntry *tle = (TargetEntry *) lfirst(k);
678 EquivalenceClass *outer_ec;
682 /* resjunk items aren't visible to outer query */
687 * The targetlist entry is considered to match if it
688 * matches after sort-key canonicalization. That is
689 * needed since the sub_expr has been through the same
692 tle_expr = canonicalize_ec_expression(tle->expr,
695 if (!equal(tle_expr, sub_expr))
699 * Build a representation of this targetlist entry as an
702 outer_expr = (Expr *) makeVarFromTargetEntry(rel->relid,
705 /* See if we have a matching EC for that */
706 outer_ec = get_eclass_for_sort_expr(root,
709 sub_eclass->ec_opfamilies,
717 * If we don't find a matching EC, this sub-pathkey isn't
718 * interesting to the outer query
723 outer_pk = make_canonical_pathkey(root,
725 sub_pathkey->pk_opfamily,
726 sub_pathkey->pk_strategy,
727 sub_pathkey->pk_nulls_first);
728 /* score = # of equivalence peers */
729 score = list_length(outer_ec->ec_members) - 1;
730 /* +1 if it matches the proper query_pathkeys item */
731 if (retvallen < outer_query_keys &&
732 list_nth(root->query_pathkeys, retvallen) == outer_pk)
734 if (score > best_score)
736 best_pathkey = outer_pk;
744 * If we couldn't find a representation of this sub_pathkey, we're
745 * done (we can't use the ones to its right, either).
751 * Eliminate redundant ordering info; could happen if outer query
752 * equivalences subquery keys...
754 if (!pathkey_is_redundant(best_pathkey, retval))
756 retval = lappend(retval, best_pathkey);
765 * build_join_pathkeys
766 * Build the path keys for a join relation constructed by mergejoin or
767 * nestloop join. This is normally the same as the outer path's keys.
769 * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
770 * having the outer path's path keys, because null lefthand rows may be
771 * inserted at random points. It must be treated as unsorted.
773 * We truncate away any pathkeys that are uninteresting for higher joins.
775 * 'joinrel' is the join relation that paths are being formed for
776 * 'jointype' is the join type (inner, left, full, etc)
777 * 'outer_pathkeys' is the list of the current outer path's path keys
779 * Returns the list of new path keys.
782 build_join_pathkeys(PlannerInfo *root,
785 List *outer_pathkeys)
787 if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
791 * This used to be quite a complex bit of code, but now that all pathkey
792 * sublists start out life canonicalized, we don't have to do a darn thing
795 * We do, however, need to truncate the pathkeys list, since it may
796 * contain pathkeys that were useful for forming this joinrel but are
797 * uninteresting to higher levels.
799 return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
802 /****************************************************************************
803 * PATHKEYS AND SORT CLAUSES
804 ****************************************************************************/
807 * make_pathkeys_for_sortclauses
808 * Generate a pathkeys list that represents the sort order specified
809 * by a list of SortGroupClauses
811 * The resulting PathKeys are always in canonical form. (Actually, there
812 * is no longer any code anywhere that creates non-canonical PathKeys.)
814 * We assume that root->nullable_baserels is the set of base relids that could
815 * have gone to NULL below the SortGroupClause expressions. This is okay if
816 * the expressions came from the query's top level (ORDER BY, DISTINCT, etc)
817 * and if this function is only invoked after deconstruct_jointree. In the
818 * future we might have to make callers pass in the appropriate
819 * nullable-relids set, but for now it seems unnecessary.
821 * 'sortclauses' is a list of SortGroupClause nodes
822 * 'tlist' is the targetlist to find the referenced tlist entries in
825 make_pathkeys_for_sortclauses(PlannerInfo *root,
829 List *pathkeys = NIL;
832 foreach(l, sortclauses)
834 SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
838 sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
839 Assert(OidIsValid(sortcl->sortop));
840 pathkey = make_pathkey_from_sortop(root,
842 root->nullable_baserels,
845 sortcl->tleSortGroupRef,
848 /* Canonical form eliminates redundant ordering keys */
849 if (!pathkey_is_redundant(pathkey, pathkeys))
850 pathkeys = lappend(pathkeys, pathkey);
855 /****************************************************************************
856 * PATHKEYS AND MERGECLAUSES
857 ****************************************************************************/
860 * initialize_mergeclause_eclasses
861 * Set the EquivalenceClass links in a mergeclause restrictinfo.
863 * RestrictInfo contains fields in which we may cache pointers to
864 * EquivalenceClasses for the left and right inputs of the mergeclause.
865 * (If the mergeclause is a true equivalence clause these will be the
866 * same EquivalenceClass, otherwise not.) If the mergeclause is either
867 * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
868 * then it's easy to set up the left_ec and right_ec members --- otherwise,
869 * this function should be called to set them up. We will generate new
870 * EquivalenceClauses if necessary to represent the mergeclause's left and
873 * Note this is called before EC merging is complete, so the links won't
874 * necessarily point to canonical ECs. Before they are actually used for
875 * anything, update_mergeclause_eclasses must be called to ensure that
876 * they've been updated to point to canonical ECs.
879 initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
881 Expr *clause = restrictinfo->clause;
885 /* Should be a mergeclause ... */
886 Assert(restrictinfo->mergeopfamilies != NIL);
887 /* ... with links not yet set */
888 Assert(restrictinfo->left_ec == NULL);
889 Assert(restrictinfo->right_ec == NULL);
891 /* Need the declared input types of the operator */
892 op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
894 /* Find or create a matching EquivalenceClass for each side */
895 restrictinfo->left_ec =
896 get_eclass_for_sort_expr(root,
897 (Expr *) get_leftop(clause),
898 restrictinfo->nullable_relids,
899 restrictinfo->mergeopfamilies,
901 ((OpExpr *) clause)->inputcollid,
905 restrictinfo->right_ec =
906 get_eclass_for_sort_expr(root,
907 (Expr *) get_rightop(clause),
908 restrictinfo->nullable_relids,
909 restrictinfo->mergeopfamilies,
911 ((OpExpr *) clause)->inputcollid,
918 * update_mergeclause_eclasses
919 * Make the cached EquivalenceClass links valid in a mergeclause
922 * These pointers should have been set by process_equivalence or
923 * initialize_mergeclause_eclasses, but they might have been set to
924 * non-canonical ECs that got merged later. Chase up to the canonical
925 * merged parent if so.
928 update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
930 /* Should be a merge clause ... */
931 Assert(restrictinfo->mergeopfamilies != NIL);
932 /* ... with pointers already set */
933 Assert(restrictinfo->left_ec != NULL);
934 Assert(restrictinfo->right_ec != NULL);
936 /* Chase up to the top as needed */
937 while (restrictinfo->left_ec->ec_merged)
938 restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
939 while (restrictinfo->right_ec->ec_merged)
940 restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
944 * find_mergeclauses_for_pathkeys
945 * This routine attempts to find a set of mergeclauses that can be
946 * used with a specified ordering for one of the input relations.
947 * If successful, it returns a list of mergeclauses.
949 * 'pathkeys' is a pathkeys list showing the ordering of an input path.
950 * 'outer_keys' is TRUE if these keys are for the outer input path,
951 * FALSE if for inner.
952 * 'restrictinfos' is a list of mergejoinable restriction clauses for the
953 * join relation being formed.
955 * The restrictinfos must be marked (via outer_is_left) to show which side
956 * of each clause is associated with the current outer path. (See
957 * select_mergejoin_clauses())
959 * The result is NIL if no merge can be done, else a maximal list of
960 * usable mergeclauses (represented as a list of their restrictinfo nodes).
963 find_mergeclauses_for_pathkeys(PlannerInfo *root,
968 List *mergeclauses = NIL;
971 /* make sure we have eclasses cached in the clauses */
972 foreach(i, restrictinfos)
974 RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
976 update_mergeclause_eclasses(root, rinfo);
981 PathKey *pathkey = (PathKey *) lfirst(i);
982 EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
983 List *matched_restrictinfos = NIL;
987 * A mergejoin clause matches a pathkey if it has the same EC.
988 * If there are multiple matching clauses, take them all. In plain
989 * inner-join scenarios we expect only one match, because
990 * equivalence-class processing will have removed any redundant
991 * mergeclauses. However, in outer-join scenarios there might be
992 * multiple matches. An example is
994 * select * from a full join b
995 * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
997 * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
998 * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
999 * we *must* do so or we will be unable to form a valid plan.
1001 * We expect that the given pathkeys list is canonical, which means
1002 * no two members have the same EC, so it's not possible for this
1003 * code to enter the same mergeclause into the result list twice.
1005 * It's possible that multiple matching clauses might have different
1006 * ECs on the other side, in which case the order we put them into our
1007 * result makes a difference in the pathkeys required for the other
1008 * input path. However this routine hasn't got any info about which
1009 * order would be best, so we don't worry about that.
1011 * It's also possible that the selected mergejoin clauses produce
1012 * a noncanonical ordering of pathkeys for the other side, ie, we
1013 * might select clauses that reference b.v1, b.v2, b.v1 in that
1014 * order. This is not harmful in itself, though it suggests that
1015 * the clauses are partially redundant. Since it happens only with
1016 * redundant query conditions, we don't bother to eliminate it.
1017 * make_inner_pathkeys_for_merge() has to delete duplicates when
1018 * it constructs the canonical pathkeys list, and we also have to
1019 * deal with the case in create_mergejoin_plan().
1022 foreach(j, restrictinfos)
1024 RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
1025 EquivalenceClass *clause_ec;
1028 clause_ec = rinfo->outer_is_left ?
1029 rinfo->left_ec : rinfo->right_ec;
1031 clause_ec = rinfo->outer_is_left ?
1032 rinfo->right_ec : rinfo->left_ec;
1033 if (clause_ec == pathkey_ec)
1034 matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
1038 * If we didn't find a mergeclause, we're done --- any additional
1039 * sort-key positions in the pathkeys are useless. (But we can still
1040 * mergejoin if we found at least one mergeclause.)
1042 if (matched_restrictinfos == NIL)
1046 * If we did find usable mergeclause(s) for this sort-key position,
1047 * add them to result list.
1049 mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
1052 return mergeclauses;
1056 * select_outer_pathkeys_for_merge
1057 * Builds a pathkey list representing a possible sort ordering
1058 * that can be used with the given mergeclauses.
1060 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1061 * that will be used in a merge join.
1062 * 'joinrel' is the join relation we are trying to construct.
1064 * The restrictinfos must be marked (via outer_is_left) to show which side
1065 * of each clause is associated with the current outer path. (See
1066 * select_mergejoin_clauses())
1068 * Returns a pathkeys list that can be applied to the outer relation.
1070 * Since we assume here that a sort is required, there is no particular use
1071 * in matching any available ordering of the outerrel. (joinpath.c has an
1072 * entirely separate code path for considering sort-free mergejoins.) Rather,
1073 * it's interesting to try to match the requested query_pathkeys so that a
1074 * second output sort may be avoided; and failing that, we try to list "more
1075 * popular" keys (those with the most unmatched EquivalenceClass peers)
1076 * earlier, in hopes of making the resulting ordering useful for as many
1077 * higher-level mergejoins as possible.
1080 select_outer_pathkeys_for_merge(PlannerInfo *root,
1082 RelOptInfo *joinrel)
1084 List *pathkeys = NIL;
1085 int nClauses = list_length(mergeclauses);
1086 EquivalenceClass **ecs;
1092 /* Might have no mergeclauses */
1097 * Make arrays of the ECs used by the mergeclauses (dropping any
1098 * duplicates) and their "popularity" scores.
1100 ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
1101 scores = (int *) palloc(nClauses * sizeof(int));
1104 foreach(lc, mergeclauses)
1106 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1107 EquivalenceClass *oeclass;
1111 /* get the outer eclass */
1112 update_mergeclause_eclasses(root, rinfo);
1114 if (rinfo->outer_is_left)
1115 oeclass = rinfo->left_ec;
1117 oeclass = rinfo->right_ec;
1119 /* reject duplicates */
1120 for (j = 0; j < necs; j++)
1122 if (ecs[j] == oeclass)
1130 foreach(lc2, oeclass->ec_members)
1132 EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
1134 /* Potential future join partner? */
1135 if (!em->em_is_const && !em->em_is_child &&
1136 !bms_overlap(em->em_relids, joinrel->relids))
1140 ecs[necs] = oeclass;
1141 scores[necs] = score;
1146 * Find out if we have all the ECs mentioned in query_pathkeys; if so we
1147 * can generate a sort order that's also useful for final output. There is
1148 * no percentage in a partial match, though, so we have to have 'em all.
1150 if (root->query_pathkeys)
1152 foreach(lc, root->query_pathkeys)
1154 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1155 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1157 for (j = 0; j < necs; j++)
1159 if (ecs[j] == query_ec)
1160 break; /* found match */
1163 break; /* didn't find match */
1165 /* if we got to the end of the list, we have them all */
1168 /* copy query_pathkeys as starting point for our output */
1169 pathkeys = list_copy(root->query_pathkeys);
1170 /* mark their ECs as already-emitted */
1171 foreach(lc, root->query_pathkeys)
1173 PathKey *query_pathkey = (PathKey *) lfirst(lc);
1174 EquivalenceClass *query_ec = query_pathkey->pk_eclass;
1176 for (j = 0; j < necs; j++)
1178 if (ecs[j] == query_ec)
1189 * Add remaining ECs to the list in popularity order, using a default sort
1190 * ordering. (We could use qsort() here, but the list length is usually
1191 * so small it's not worth it.)
1197 EquivalenceClass *ec;
1201 best_score = scores[0];
1202 for (j = 1; j < necs; j++)
1204 if (scores[j] > best_score)
1207 best_score = scores[j];
1211 break; /* all done */
1213 scores[best_j] = -1;
1214 pathkey = make_canonical_pathkey(root,
1216 linitial_oid(ec->ec_opfamilies),
1217 BTLessStrategyNumber,
1219 /* can't be redundant because no duplicate ECs */
1220 Assert(!pathkey_is_redundant(pathkey, pathkeys));
1221 pathkeys = lappend(pathkeys, pathkey);
1231 * make_inner_pathkeys_for_merge
1232 * Builds a pathkey list representing the explicit sort order that
1233 * must be applied to an inner path to make it usable with the
1234 * given mergeclauses.
1236 * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
1237 * that will be used in a merge join.
1238 * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
1241 * The restrictinfos must be marked (via outer_is_left) to show which side
1242 * of each clause is associated with the current outer path. (See
1243 * select_mergejoin_clauses())
1245 * Returns a pathkeys list that can be applied to the inner relation.
1247 * Note that it is not this routine's job to decide whether sorting is
1248 * actually needed for a particular input path. Assume a sort is necessary;
1249 * just make the keys, eh?
1252 make_inner_pathkeys_for_merge(PlannerInfo *root,
1254 List *outer_pathkeys)
1256 List *pathkeys = NIL;
1257 EquivalenceClass *lastoeclass;
1264 lop = list_head(outer_pathkeys);
1266 foreach(lc, mergeclauses)
1268 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
1269 EquivalenceClass *oeclass;
1270 EquivalenceClass *ieclass;
1273 update_mergeclause_eclasses(root, rinfo);
1275 if (rinfo->outer_is_left)
1277 oeclass = rinfo->left_ec;
1278 ieclass = rinfo->right_ec;
1282 oeclass = rinfo->right_ec;
1283 ieclass = rinfo->left_ec;
1286 /* outer eclass should match current or next pathkeys */
1287 /* we check this carefully for debugging reasons */
1288 if (oeclass != lastoeclass)
1291 elog(ERROR, "too few pathkeys for mergeclauses");
1292 opathkey = (PathKey *) lfirst(lop);
1294 lastoeclass = opathkey->pk_eclass;
1295 if (oeclass != lastoeclass)
1296 elog(ERROR, "outer pathkeys do not match mergeclause");
1300 * Often, we'll have same EC on both sides, in which case the outer
1301 * pathkey is also canonical for the inner side, and we can skip a
1304 if (ieclass == oeclass)
1307 pathkey = make_canonical_pathkey(root,
1309 opathkey->pk_opfamily,
1310 opathkey->pk_strategy,
1311 opathkey->pk_nulls_first);
1314 * Don't generate redundant pathkeys (can happen if multiple
1315 * mergeclauses refer to same EC).
1317 if (!pathkey_is_redundant(pathkey, pathkeys))
1318 pathkeys = lappend(pathkeys, pathkey);
1324 /****************************************************************************
1325 * PATHKEY USEFULNESS CHECKS
1327 * We only want to remember as many of the pathkeys of a path as have some
1328 * potential use, either for subsequent mergejoins or for meeting the query's
1329 * requested output ordering. This ensures that add_path() won't consider
1330 * a path to have a usefully different ordering unless it really is useful.
1331 * These routines check for usefulness of given pathkeys.
1332 ****************************************************************************/
1335 * pathkeys_useful_for_merging
1336 * Count the number of pathkeys that may be useful for mergejoins
1337 * above the given relation.
1339 * We consider a pathkey potentially useful if it corresponds to the merge
1340 * ordering of either side of any joinclause for the rel. This might be
1341 * overoptimistic, since joinclauses that require different other relations
1342 * might never be usable at the same time, but trying to be exact is likely
1343 * to be more trouble than it's worth.
1345 * To avoid doubling the number of mergejoin paths considered, we would like
1346 * to consider only one of the two scan directions (ASC or DESC) as useful
1347 * for merging for any given target column. The choice is arbitrary unless
1348 * one of the directions happens to match an ORDER BY key, in which case
1349 * that direction should be preferred, in hopes of avoiding a final sort step.
1350 * right_merge_direction() implements this heuristic.
1353 pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
1358 foreach(i, pathkeys)
1360 PathKey *pathkey = (PathKey *) lfirst(i);
1361 bool matched = false;
1364 /* If "wrong" direction, not useful for merging */
1365 if (!right_merge_direction(root, pathkey))
1369 * First look into the EquivalenceClass of the pathkey, to see if
1370 * there are any members not yet joined to the rel. If so, it's
1371 * surely possible to generate a mergejoin clause using them.
1373 if (rel->has_eclass_joins &&
1374 eclass_useful_for_merging(root, pathkey->pk_eclass, rel))
1379 * Otherwise search the rel's joininfo list, which contains
1380 * non-EquivalenceClass-derivable join clauses that might
1381 * nonetheless be mergejoinable.
1383 foreach(j, rel->joininfo)
1385 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
1387 if (restrictinfo->mergeopfamilies == NIL)
1389 update_mergeclause_eclasses(root, restrictinfo);
1391 if (pathkey->pk_eclass == restrictinfo->left_ec ||
1392 pathkey->pk_eclass == restrictinfo->right_ec)
1401 * If we didn't find a mergeclause, we're done --- any additional
1402 * sort-key positions in the pathkeys are useless. (But we can still
1403 * mergejoin if we found at least one mergeclause.)
1415 * right_merge_direction
1416 * Check whether the pathkey embodies the preferred sort direction
1417 * for merging its target column.
1420 right_merge_direction(PlannerInfo *root, PathKey *pathkey)
1424 foreach(l, root->query_pathkeys)
1426 PathKey *query_pathkey = (PathKey *) lfirst(l);
1428 if (pathkey->pk_eclass == query_pathkey->pk_eclass &&
1429 pathkey->pk_opfamily == query_pathkey->pk_opfamily)
1432 * Found a matching query sort column. Prefer this pathkey's
1433 * direction iff it matches. Note that we ignore pk_nulls_first,
1434 * which means that a sort might be needed anyway ... but we still
1435 * want to prefer only one of the two possible directions, and we
1436 * might as well use this one.
1438 return (pathkey->pk_strategy == query_pathkey->pk_strategy);
1442 /* If no matching ORDER BY request, prefer the ASC direction */
1443 return (pathkey->pk_strategy == BTLessStrategyNumber);
1447 * pathkeys_useful_for_ordering
1448 * Count the number of pathkeys that are useful for meeting the
1449 * query's requested output ordering.
1451 * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
1452 * no good to order by just the first key(s) of the requested ordering.
1453 * So the result is always either 0 or list_length(root->query_pathkeys).
1456 pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
1458 if (root->query_pathkeys == NIL)
1459 return 0; /* no special ordering requested */
1461 if (pathkeys == NIL)
1462 return 0; /* unordered path */
1464 if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
1466 /* It's useful ... or at least the first N keys are */
1467 return list_length(root->query_pathkeys);
1470 return 0; /* path ordering not useful */
1474 * truncate_useless_pathkeys
1475 * Shorten the given pathkey list to just the useful pathkeys.
1478 truncate_useless_pathkeys(PlannerInfo *root,
1485 nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
1486 nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
1487 if (nuseful2 > nuseful)
1491 * Note: not safe to modify input list destructively, but we can avoid
1492 * copying the list if we're not actually going to change it
1496 else if (nuseful == list_length(pathkeys))
1499 return list_truncate(list_copy(pathkeys), nuseful);
1503 * has_useful_pathkeys
1504 * Detect whether the specified rel could have any pathkeys that are
1505 * useful according to truncate_useless_pathkeys().
1507 * This is a cheap test that lets us skip building pathkeys at all in very
1508 * simple queries. It's OK to err in the direction of returning "true" when
1509 * there really aren't any usable pathkeys, but erring in the other direction
1510 * is bad --- so keep this in sync with the routines above!
1512 * We could make the test more complex, for example checking to see if any of
1513 * the joinclauses are really mergejoinable, but that likely wouldn't win
1514 * often enough to repay the extra cycles. Queries with neither a join nor
1515 * a sort are reasonably common, though, so this much work seems worthwhile.
1518 has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel)
1520 if (rel->joininfo != NIL || rel->has_eclass_joins)
1521 return true; /* might be able to use pathkeys for merging */
1522 if (root->query_pathkeys != NIL)
1523 return true; /* might be able to use them for ordering */
1524 return false; /* definitely useless */