Refactor planner's pathkeys data structure to create a separate, explicit

[postgresql] / src / backend / optimizer / path / pathkeys.c
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 09a5b8a51f17aeef65d63250eee0f1c4c0e47d18..15793b4ef99c779e9e5ea48e1705fea9c633a843 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1,415 +1,1351 @@
  /*-------------------------------------------------------------------------
   *
- * joinutils.c
- *       Utilities for matching and building join and path keys
+ * pathkeys.c
+ *       Utilities for matching and building path keys
   *
- * Copyright (c) 1994, Regents of the University of California
+ * See src/backend/optimizer/README for a great deal of information about
+ * the nature and use of path keys.
   *
   *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.9 1999/05/17 00:26:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.82 2007/01/20 20:45:39 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
  
-#include "nodes/pg_list.h"
-#include "nodes/relation.h"
+#include "access/skey.h"
+#include "nodes/makefuncs.h"
  #include "nodes/plannodes.h"
-
-#include "optimizer/internal.h"
+#include "optimizer/clauses.h"
+#include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
-#include "optimizer/var.h"
-#include "optimizer/keys.h"
  #include "optimizer/tlist.h"
-#include "optimizer/joininfo.h"
-#include "optimizer/ordering.h"
-
-static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
-                                               int outer_or_inner);
-static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
-                                                         List *joinclauses);
-
-
-/*--------------------
- *     Explanation of Path.pathkeys
- *
- *     Path.pathkeys is a List of List of Var nodes that represent the sort
- *     order of the result generated by the Path.
- *
- *     In single/base relation RelOptInfo's, the Path's represent various ways
- *     of generating the relation and the resulting ordering of the tuples.
- *     Sequential scan Paths have NIL pathkeys, indicating no known ordering.
- *     Index scans have Path.pathkeys that represent the chosen index.
- *     A single-key index pathkeys would be { {tab1_indexkey1} }.  For a
- *     multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
- *     indicating major sort by indexkey1 and minor sort by indexkey2.
- *
- *     Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
- *     only performed with equijoins ("=").  Because of this, the multi-relation
- *     path actually has more than one primary Var key.  For example, a
- *     mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
- *     { {tab1.col1, tab2.col1} }, indicating that the major sort order of the
- *     Path can be taken to be *either* tab1.col1 or tab2.col1.
- *     They are equal, so they are both primary sort keys.  This allows future
- *     joins to use either Var as a pre-sorted key to prevent upper Mergejoins
- *     from having to re-sort the Path.  This is why pathkeys is a List of Lists.
- *
- *     Note that while the order of the top list is meaningful (primary vs.
- *     secondary sort key), the order of each sublist is arbitrary.
- *
- *     For multi-key sorts, if the outer is sorted by a multi-key index, the
- *     multi-key index remains after the join.  If the inner has a multi-key
- *     sort, only the primary key of the inner is added to the result.
- *     Mergejoins only join on the primary key.  Currently, non-primary keys
- *     in the pathkeys List are of limited value.
- *
- *     Although Hashjoins also work only with equijoin operators, it is *not*
- *     safe to consider the output of a Hashjoin to be sorted in any particular
- *     order --- not even the outer path's order.  This is true because the
- *     executor might have to split the join into multiple batches.
- *
- *     NestJoin does not perform sorting, and allows non-equijoins, so it does
- *     not allow useful pathkeys.  (But couldn't we use the outer path's order?)
- *
- *     -- bjm
- *--------------------
+#include "parser/parsetree.h"
+#include "parser/parse_expr.h"
+#include "utils/lsyscache.h"
+
+
+/*
+ * If an EC contains a const and isn't below-outer-join, any PathKey depending
+ * on it must be redundant, since there's only one possible value of the key.
   */
- 
+#define MUST_BE_REDUNDANT(eclass)  \
+       ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
+
+static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
+                                                       int strategy, bool nulls_first);
+static PathKey *make_canonical_pathkey(PlannerInfo *root,
+                                          EquivalenceClass *eclass, Oid opfamily,
+                                          int strategy, bool nulls_first);
+static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root,
+                                                  Expr *expr, Oid ordering_op,
+                                                  bool nulls_first,
+                                                  bool canonicalize);
+static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
+                                 AttrNumber varattno);
+
+
  /****************************************************************************
- *             KEY COMPARISONS
+ *             PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
   ****************************************************************************/
  
  /*
- * order_joinkeys_by_pathkeys
- *       Attempts to match the keys of a path against the keys of join clauses.
- *       This is done by looking for a matching join key in 'joinkeys' for
- *       every path key in the list 'path.keys'. If there is a matching join key
- *       (not necessarily unique) for every path key, then the list of
- *       corresponding join keys and join clauses are returned in the order in
- *       which the keys matched the path keys.
- *
- * 'pathkeys' is a list of path keys:
- *             ( ( (var) (var) ... ) ( (var) ... ) )
- * 'joinkeys' is a list of join keys:
- *             ( (outer inner) (outer inner) ... )
- * 'joinclauses' is a list of clauses corresponding to the join keys in
- *             'joinkeys'
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- *             in 'joinkeys'
- *
- * Returns the join keys and corresponding join clauses in a list if all
- * of the path keys were matched:
- *             (
- *              ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
- *              ( clause0 ... clauseN )
- *             )
- * and nil otherwise.
- *
- * Returns a list of matched join keys and a list of matched join clauses
- * in pointers if valid order can be found.
+ * makePathKey
+ *             create a PathKey node
+ *
+ * This does not promise to create a canonical PathKey, it's merely a
+ * convenience routine to build the specified node.
   */
-bool
-order_joinkeys_by_pathkeys(List *pathkeys,
-                                                       List *joinkeys,
-                                                       List *joinclauses,
-                                                       int outer_or_inner,
-                                                       List **matchedJoinKeysPtr,
-                                                       List **matchedJoinClausesPtr)
+static PathKey *
+makePathKey(EquivalenceClass *eclass, Oid opfamily,
+                       int strategy, bool nulls_first)
  {
-       List       *matched_joinkeys = NIL;
-       List       *matched_joinclauses = NIL;
-       List       *pathkey = NIL;
-       List       *i = NIL;
-       int                     matched_joinkey_index = -1;
-       int                     matched_keys = 0;
+       PathKey    *pk = makeNode(PathKey);
+
+       pk->pk_eclass = eclass;
+       pk->pk_opfamily = opfamily;
+       pk->pk_strategy = strategy;
+       pk->pk_nulls_first = nulls_first;
+
+       return pk;
+}
+
+/*
+ * make_canonical_pathkey
+ *       Given the parameters for a PathKey, find any pre-existing matching
+ *       pathkey in the query's list of "canonical" pathkeys.  Make a new
+ *       entry if there's not one already.
+ *
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.
+ */
+static PathKey *
+make_canonical_pathkey(PlannerInfo *root,
+                                          EquivalenceClass *eclass, Oid opfamily,
+                                          int strategy, bool nulls_first)
+{
+       PathKey    *pk;
+       ListCell   *lc;
+       MemoryContext oldcontext;
+
+       /* The passed eclass might be non-canonical, so chase up to the top */
+       while (eclass->ec_merged)
+               eclass = eclass->ec_merged;
+
+       foreach(lc, root->canon_pathkeys)
+       {
+               pk = (PathKey *) lfirst(lc);
+               if (eclass == pk->pk_eclass &&
+                       opfamily == pk->pk_opfamily &&
+                       strategy == pk->pk_strategy &&
+                       nulls_first == pk->pk_nulls_first)
+                       return pk;
+       }
+
         /*
-        *      Reorder the joinkeys by picking out one that matches each pathkey,
-        *      and create a new joinkey/joinclause list in pathkey order
+        * Be sure canonical pathkeys are allocated in the main planning context.
+        * Not an issue in normal planning, but it is for GEQO.
          */
-       foreach(i, pathkeys)
+       oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+       pk = makePathKey(eclass, opfamily, strategy, nulls_first);
+       root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+
+       MemoryContextSwitchTo(oldcontext);
+
+       return pk;
+}
+
+/*
+ * pathkey_is_redundant
+ *        Is a pathkey redundant with one already in the given list?
+ *
+ * Both the given pathkey and the list members must be canonical for this
+ * to work properly.  We detect two cases:
+ *
+ * 1. If the new pathkey's equivalence class contains a constant, and isn't
+ * below an outer join, then we can disregard it as a sort key.  An example:
+ *                     SELECT ... WHERE x = 42 ORDER BY x, y;
+ * We may as well just sort by y.  Note that because of opfamily matching,
+ * this is semantically correct: we know that the equality constraint is one
+ * that actually binds the variable to a single value in the terms of any
+ * ordering operator that might go with the eclass.  This rule not only lets
+ * us simplify (or even skip) explicit sorts, but also allows matching index
+ * sort orders to a query when there are don't-care index columns.
+ *
+ * 2. If the new pathkey's equivalence class is the same as that of any
+ * existing member of the pathkey list, then it is redundant.  Some examples:
+ *                     SELECT ... ORDER BY x, x;
+ *                     SELECT ... ORDER BY x, x DESC;
+ *                     SELECT ... WHERE x = y ORDER BY x, y;
+ * In all these cases the second sort key cannot distinguish values that are
+ * considered equal by the first, and so there's no point in using it.
+ * Note in particular that we need not compare opfamily (all the opfamilies
+ * of the EC have the same notion of equality) nor sort direction.
+ *
+ * Because the equivclass.c machinery forms only one copy of any EC per query,
+ * pointer comparison is enough to decide whether canonical ECs are the same.
+ */
+static bool
+pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
+{
+       EquivalenceClass *new_ec = new_pathkey->pk_eclass;
+       ListCell   *lc;
+
+       /* Assert we've been given canonical pathkeys */
+       Assert(!new_ec->ec_merged);
+
+       /* Check for EC containing a constant --- unconditionally redundant */
+       if (MUST_BE_REDUNDANT(new_ec))
+               return true;
+
+       /* If same EC already used in list, then redundant */
+       foreach(lc, pathkeys)
         {
-               pathkey = lfirst(i);
-               matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
-                                                                                                          outer_or_inner);
+               PathKey    *old_pathkey = (PathKey *) lfirst(lc);
  
-               if (matched_joinkey_index != -1)
-               {
-                       matched_keys++;
-                       if (matchedJoinKeysPtr)
-                       {
-                               JoinKey    *joinkey = nth(matched_joinkey_index, joinkeys);
-                               matched_joinkeys = lappend(matched_joinkeys, joinkey);
-                       }
-                       
-                       if (matchedJoinClausesPtr)
-                       {
-                               Expr       *joinclause = nth(matched_joinkey_index,
-                                                                                        joinclauses);
-                               Assert(joinclauses);
-                               matched_joinclauses = lappend(matched_joinclauses, joinclause);
-                       }
-               }
-               else
-                       /*      A pathkey could not be matched. */
-                       break;
+               /* Assert we've been given canonical pathkeys */
+               Assert(!old_pathkey->pk_eclass->ec_merged);
+
+               if (new_ec == old_pathkey->pk_eclass)
+                       return true;
         }
  
+       return false;
+}
+
+/*
+ * canonicalize_pathkeys
+ *        Convert a not-necessarily-canonical pathkeys list to canonical form.
+ *
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.
+ */
+List *
+canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
+{
+       List       *new_pathkeys = NIL;
+       ListCell   *l;
+
+       foreach(l, pathkeys)
+       {
+               PathKey    *pathkey = (PathKey *) lfirst(l);
+               EquivalenceClass *eclass;
+               PathKey    *cpathkey;
+
+               /* Find the canonical (merged) EquivalenceClass */
+               eclass = pathkey->pk_eclass;
+               while (eclass->ec_merged)
+                       eclass = eclass->ec_merged;
+
+               /*
+                * If we can tell it's redundant just from the EC, skip.
+                * pathkey_is_redundant would notice that, but we needn't even bother
+                * constructing the node...
+                */
+               if (MUST_BE_REDUNDANT(eclass))
+                       continue;
+
+               /* OK, build a canonicalized PathKey struct */
+               cpathkey = make_canonical_pathkey(root,
+                                                                                 eclass,
+                                                                                 pathkey->pk_opfamily,
+                                                                                 pathkey->pk_strategy,
+                                                                                 pathkey->pk_nulls_first);
+
+               /* Add to list unless redundant */
+               if (!pathkey_is_redundant(cpathkey, new_pathkeys))
+                       new_pathkeys = lappend(new_pathkeys, cpathkey);
+       }
+       return new_pathkeys;
+}
+
+/*
+ * make_pathkey_from_sortinfo
+ *       Given an expression, a sortop, and a nulls-first flag, create
+ *       a PathKey.  If canonicalize = true, the result is a "canonical"
+ *       PathKey, otherwise not.  (But note it might be redundant anyway.)
+ *
+ * canonicalize should always be TRUE after EquivalenceClass merging has
+ * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
+ */
+static PathKey *
+make_pathkey_from_sortinfo(PlannerInfo *root,
+                                                  Expr *expr, Oid ordering_op,
+                                                  bool nulls_first,
+                                                  bool canonicalize)
+{
+       Oid                     equality_op;
+       List       *opfamilies;
+       Oid                     opfamily,
+                               lefttype,
+                               righttype;
+       int                     strategy;
+       ListCell   *lc;
+       EquivalenceClass *eclass;
+
         /*
-        *      Did we fail to match all the joinkeys?
-        *      Extra pathkeys are no problem.
+        * An ordering operator fully determines the behavior of its opfamily,
+        * so could only meaningfully appear in one family --- or perhaps two
+        * if one builds a reverse-sort opfamily, but there's not much point in
+        * that anymore.  But EquivalenceClasses need to contain opfamily lists
+        * based on the family membership of equality operators, which could
+        * easily be bigger.  So, look up the equality operator that goes with
+        * the ordering operator (this should be unique) and get its membership.
          */
-       if (matched_keys != length(joinkeys))
+       equality_op = get_equality_op_for_ordering_op(ordering_op);
+       if (!OidIsValid(equality_op))           /* shouldn't happen */
+               elog(ERROR, "could not find equality operator for ordering operator %u",
+                        ordering_op);
+       opfamilies = get_mergejoin_opfamilies(equality_op);
+       if (!opfamilies)                        /* certainly should find some */
+               elog(ERROR, "could not find opfamilies for ordering operator %u",
+                        ordering_op);
+
+       /*
+        * Next we have to determine the strategy number to put into the pathkey.
+        * In the presence of reverse-sort opclasses there might be two answers.
+        * We prefer the one associated with the first opfamilies member that
+        * this ordering_op appears in (this will be consistently defined in
+        * normal system operation; see comments for get_mergejoin_opfamilies()).
+        */
+       opfamily = InvalidOid;
+       strategy = 0;
+       foreach(lc, opfamilies)
         {
-                       if (matchedJoinKeysPtr)
-                               *matchedJoinKeysPtr = NIL;
-                       if (matchedJoinClausesPtr)
-                               *matchedJoinClausesPtr = NIL;
-                       return false;
+               opfamily = lfirst_oid(lc);
+               strategy = get_op_opfamily_strategy(ordering_op, opfamily);
+               if (strategy)
+                       break;
         }
+       if (!(strategy == BTLessStrategyNumber ||
+                 strategy == BTGreaterStrategyNumber))
+               elog(ERROR, "ordering operator %u is has wrong strategy number %d",
+                        ordering_op, strategy);
+
+       /* Need the declared input type of the operator, too */
+       op_input_types(ordering_op, &lefttype, &righttype);
+       Assert(lefttype == righttype);
+
+       /* Now find or create a matching EquivalenceClass */
+       eclass = get_eclass_for_sort_expr(root, expr, lefttype, opfamilies);
  
-       if (matchedJoinKeysPtr)
-               *matchedJoinKeysPtr = matched_joinkeys;
-       if (matchedJoinClausesPtr)
-               *matchedJoinClausesPtr = matched_joinclauses;
-       return true;
+       /* And finally we can find or create a PathKey node */
+       if (canonicalize)
+               return make_canonical_pathkey(root, eclass, opfamily,
+                                                                         strategy, nulls_first);
+       else
+               return makePathKey(eclass, opfamily, strategy, nulls_first);
  }
  
  
+/****************************************************************************
+ *             PATHKEY COMPARISONS
+ ****************************************************************************/
+
  /*
- * match_pathkey_joinkeys
- *       Returns the 0-based index into 'joinkeys' of the first joinkey whose
- *       outer or inner pathkey matches any subkey of 'pathkey'.
+ * compare_pathkeys
+ *       Compare two pathkeys to see if they are equivalent, and if not whether
+ *       one is "better" than the other.
   *
- *     All these keys are equivalent, so any of them can match.  See above.
+ *       This function may only be applied to canonicalized pathkey lists.
+ *       In the canonical representation, pathkeys can be checked for equality
+ *       by simple pointer comparison.
   */
-static int
-match_pathkey_joinkeys(List *pathkey,
-                                          List *joinkeys,
-                                          int outer_or_inner)
+PathKeysComparison
+compare_pathkeys(List *keys1, List *keys2)
  {
-       Var                *key;
-       int                     pos;
-       List       *i, *x;
-       JoinKey    *jk;
+       ListCell   *key1,
+                          *key2;
  
-       foreach(i, pathkey)
+       forboth(key1, keys1, key2, keys2)
         {
-               key = (Var *) lfirst(i);
-               pos = 0;
-               foreach(x, joinkeys)
-               {
-                       jk = (JoinKey *) lfirst(x);
-                       if (equal(key, extract_join_key(jk, outer_or_inner)))
-                               return pos;
-                       pos++;
-               }
+               PathKey    *pathkey1 = (PathKey *) lfirst(key1);
+               PathKey    *pathkey2 = (PathKey *) lfirst(key2);
+
+               /*
+                * XXX would like to check that we've been given canonicalized input,
+                * but PlannerInfo not accessible here...
+                */
+#ifdef NOT_USED
+               Assert(list_member_ptr(root->canon_pathkeys, pathkey1));
+               Assert(list_member_ptr(root->canon_pathkeys, pathkey2));
+#endif
+
+               if (pathkey1 != pathkey2)
+                       return PATHKEYS_DIFFERENT;      /* no need to keep looking */
+       }
+
+       /*
+        * If we reached the end of only one list, the other is longer and
+        * therefore not a subset.
+        */
+       if (key1 == NULL && key2 == NULL)
+               return PATHKEYS_EQUAL;
+       if (key1 != NULL)
+               return PATHKEYS_BETTER1;        /* key1 is longer */
+       return PATHKEYS_BETTER2;        /* key2 is longer */
+}
+
+/*
+ * pathkeys_contained_in
+ *       Common special case of compare_pathkeys: we just want to know
+ *       if keys2 are at least as well sorted as keys1.
+ */
+bool
+pathkeys_contained_in(List *keys1, List *keys2)
+{
+       switch (compare_pathkeys(keys1, keys2))
+       {
+               case PATHKEYS_EQUAL:
+               case PATHKEYS_BETTER2:
+                       return true;
+               default:
+                       break;
         }
-       return -1;                                      /* no index found       */
+       return false;
  }
  
+/*
+ * get_cheapest_path_for_pathkeys
+ *       Find the cheapest path (according to the specified criterion) that
+ *       satisfies the given pathkeys.  Return NULL if no such path.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'cost_criterion' is STARTUP_COST or TOTAL_COST
+ */
+Path *
+get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+                                                          CostSelector cost_criterion)
+{
+       Path       *matched_path = NULL;
+       ListCell   *l;
+
+       foreach(l, paths)
+       {
+               Path       *path = (Path *) lfirst(l);
+
+               /*
+                * Since cost comparison is a lot cheaper than pathkey comparison, do
+                * that first.  (XXX is that still true?)
+                */
+               if (matched_path != NULL &&
+                       compare_path_costs(matched_path, path, cost_criterion) <= 0)
+                       continue;
+
+               if (pathkeys_contained_in(pathkeys, path->pathkeys))
+                       matched_path = path;
+       }
+       return matched_path;
+}
  
  /*
- * get_cheapest_path_for_joinkeys
- *       Attempts to find a path in 'paths' whose keys match a set of join
- *       keys 'joinkeys'.      To match,
- *       1. the path node ordering must equal 'ordering'.
- *       2. each pathkey of a given path must match(i.e., be(equal) to) the
- *              appropriate pathkey of the corresponding join key in 'joinkeys',
- *              i.e., the Nth path key must match its pathkeys against the pathkey of
- *              the Nth join key in 'joinkeys'.
- *
- * 'joinkeys' is the list of key pairs to which the path keys must be
- *             matched
- * 'ordering' is the ordering of the(outer) path to which 'joinkeys'
- *             must correspond
- * 'paths' is a list of(inner) paths which are to be matched against
- *             each join key in 'joinkeys'
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- *             in 'joinkeys'
- *
- *     Find the cheapest path that matches the join keys
+ * get_cheapest_fractional_path_for_pathkeys
+ *       Find the cheapest path (for retrieving a specified fraction of all
+ *       the tuples) that satisfies the given pathkeys.
+ *       Return NULL if no such path.
+ *
+ * See compare_fractional_path_costs() for the interpretation of the fraction
+ * parameter.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'fraction' is the fraction of the total tuples expected to be retrieved
   */
  Path *
-get_cheapest_path_for_joinkeys(List *joinkeys,
-                                                                PathOrder *ordering,
-                                                                List *paths,
-                                                                int outer_or_inner)
+get_cheapest_fractional_path_for_pathkeys(List *paths,
+                                                                                 List *pathkeys,
+                                                                                 double fraction)
  {
         Path       *matched_path = NULL;
-       List       *i;
+       ListCell   *l;
  
-       foreach(i, paths)
+       foreach(l, paths)
         {
-               Path       *path = (Path *) lfirst(i);
-               int                     better_sort;
-               
-               if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
-                                                                          outer_or_inner, NULL, NULL) &&
-                       pathorder_match(ordering, path->pathorder, &better_sort) &&
-                       better_sort == 0)
-               {
-                       if (matched_path == NULL ||
-                               path->path_cost < matched_path->path_cost)
-                               matched_path = path;
-               }
+               Path       *path = (Path *) lfirst(l);
+
+               /*
+                * Since cost comparison is a lot cheaper than pathkey comparison, do
+                * that first.
+                */
+               if (matched_path != NULL &&
+                       compare_fractional_path_costs(matched_path, path, fraction) <= 0)
+                       continue;
+
+               if (pathkeys_contained_in(pathkeys, path->pathkeys))
+                       matched_path = path;
         }
         return matched_path;
  }
  
+/****************************************************************************
+ *             NEW PATHKEY FORMATION
+ ****************************************************************************/
  
  /*
- * make_pathkeys_from_joinkeys
- *       Builds a pathkey list for a path by pulling one of the pathkeys from
- *       a list of join keys 'joinkeys' and then finding the var node in the
- *       target list 'tlist' that corresponds to that pathkey.
- *
- * 'joinkeys' is a list of join key pairs
- * 'tlist' is a relation target list
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- *     in 'joinkeys'
- *
- * Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
- * It is a list of lists because of multi-key indexes.
+ * build_index_pathkeys
+ *       Build a pathkeys list that describes the ordering induced by an index
+ *       scan using the given index.  (Note that an unordered index doesn't
+ *       induce any ordering; such an index will have no sortop OIDS in
+ *       its sortops arrays, and we will return NIL.)
+ *
+ * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
+ * representing a backwards scan of the index. Return NIL if can't do it.
+ *
+ * The result is canonical, meaning that redundant pathkeys are removed;
+ * it may therefore have fewer entries than there are index columns.
+ *
+ * We generate the full pathkeys list whether or not all are useful for the
+ * current query.  Caller should do truncate_useless_pathkeys().
   */
  List *
-make_pathkeys_from_joinkeys(List *joinkeys,
-                                                         List *tlist,
-                                                         int outer_or_inner)
+build_index_pathkeys(PlannerInfo *root,
+                                        IndexOptInfo *index,
+                                        ScanDirection scandir)
  {
-       List       *pathkeys = NIL;
-       List       *jk;
+       List       *retval = NIL;
+       ListCell   *indexprs_item = list_head(index->indexprs);
+       int                     i;
+
+       for (i = 0; i < index->ncolumns; i++)
+       {
+               Oid                     sortop;
+               bool            nulls_first;
+               int                     ikey;
+               Expr       *indexkey;
+               PathKey    *cpathkey;
+
+               if (ScanDirectionIsBackward(scandir))
+               {
+                       sortop = index->revsortop[i];
+                       nulls_first = !index->nulls_first[i];
+               }
+               else
+               {
+                       sortop = index->fwdsortop[i];
+                       nulls_first = index->nulls_first[i];
+               }
+
+               if (!OidIsValid(sortop))
+                       break;                          /* no more orderable columns */
+
+               ikey = index->indexkeys[i];
+               if (ikey != 0)
+               {
+                       /* simple index column */
+                       indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey);
+               }
+               else
+               {
+                       /* expression --- assume we need not copy it */
+                       if (indexprs_item == NULL)
+                               elog(ERROR, "wrong number of index expressions");
+                       indexkey = (Expr *) lfirst(indexprs_item);
+                       indexprs_item = lnext(indexprs_item);
+               }
+
+               /* OK, make a canonical pathkey for this sort key */
+               cpathkey = make_pathkey_from_sortinfo(root,
+                                                                                         indexkey,
+                                                                                         sortop,
+                                                                                         nulls_first,
+                                                                                         true);
+
+               /* Add to list unless redundant */
+               if (!pathkey_is_redundant(cpathkey, retval))
+                       retval = lappend(retval, cpathkey);
+       }
+
+       return retval;
+}
+
+/*
+ * Find or make a Var node for the specified attribute of the rel.
+ *
+ * We first look for the var in the rel's target list, because that's
+ * easy and fast.  But the var might not be there (this should normally
+ * only happen for vars that are used in WHERE restriction clauses,
+ * but not in join clauses or in the SELECT target list).  In that case,
+ * gin up a Var node the hard way.
+ */
+static Var *
+find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno)
+{
+       ListCell   *temp;
+       Index           relid;
+       Oid                     reloid,
+                               vartypeid;
+       int32           type_mod;
  
-       foreach(jk, joinkeys)
+       foreach(temp, rel->reltargetlist)
         {
-               JoinKey    *jkey = (JoinKey *) lfirst(jk);
-               Var                *key;
-               List       *p, *p2;
-               bool            found = false;
+               Var                *var = (Var *) lfirst(temp);
+
+               if (IsA(var, Var) &&
+                       var->varattno == varattno)
+                       return var;
+       }
+
+       relid = rel->relid;
+       reloid = getrelid(relid, root->parse->rtable);
+       get_atttypetypmod(reloid, varattno, &vartypeid, &type_mod);
  
-               key = (Var *) extract_join_key(jkey, outer_or_inner);
+       return makeVar(relid, varattno, vartypeid, type_mod, 0);
+}
  
-               /* check to see if it is in the target list */
-               if (matching_tlist_var(key, tlist))
+/*
+ * convert_subquery_pathkeys
+ *       Build a pathkeys list that describes the ordering of a subquery's
+ *       result, in the terms of the outer query.      This is essentially a
+ *       task of conversion.
+ *
+ * 'rel': outer query's RelOptInfo for the subquery relation.
+ * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
+ *
+ * It is not necessary for caller to do truncate_useless_pathkeys(),
+ * because we select keys in a way that takes usefulness of the keys into
+ * account.
+ */
+List *
+convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
+                                                 List *subquery_pathkeys)
+{
+       List       *retval = NIL;
+       int                     retvallen = 0;
+       int                     outer_query_keys = list_length(root->query_pathkeys);
+       List       *sub_tlist = rel->subplan->targetlist;
+       ListCell   *i;
+
+       foreach(i, subquery_pathkeys)
+       {
+               PathKey    *sub_pathkey = (PathKey *) lfirst(i);
+               EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
+               PathKey    *best_pathkey = NULL;
+               int                     best_score = -1;
+               ListCell   *j;
+
+               /*
+                * The sub_pathkey's EquivalenceClass could contain multiple elements
+                * (representing knowledge that multiple items are effectively equal).
+                * Each element might match none, one, or more of the output columns
+                * that are visible to the outer query. This means we may have
+                * multiple possible representations of the sub_pathkey in the context
+                * of the outer query.  Ideally we would generate them all and put
+                * them all into an EC of the outer query, thereby propagating
+                * equality knowledge up to the outer query.  Right now we cannot do
+                * so, because the outer query's EquivalenceClasses are already frozen
+                * when this is called. Instead we prefer the one that has the highest
+                * "score" (number of EC peers, plus one if it matches the outer
+                * query_pathkeys). This is the most likely to be useful in the outer
+                * query.
+                */
+               foreach(j, sub_eclass->ec_members)
                 {
+                       EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
+                       Expr       *sub_expr = sub_member->em_expr;
+                       Expr       *rtarg;
+                       ListCell   *k;
+
                         /*
-                        * Include it in the pathkeys list if we haven't already done so
+                        * We handle two cases: the sub_pathkey key can be either an exact
+                        * match for a targetlist entry, or a RelabelType of a targetlist
+                        * entry.  (The latter case is worth extra code because it arises
+                        * frequently in connection with varchar fields.)
                          */
-                       foreach(p, pathkeys)
+                       if (IsA(sub_expr, RelabelType))
+                               rtarg = ((RelabelType *) sub_expr)->arg;
+                       else
+                               rtarg = NULL;
+
+                       foreach(k, sub_tlist)
                         {
-                               List       *pathkey = lfirst(p);
-       
-                               foreach(p2, pathkey)
+                               TargetEntry *tle = (TargetEntry *) lfirst(k);
+                               Expr       *outer_expr;
+                               EquivalenceClass *outer_ec;
+                               PathKey    *outer_pk;
+                               int                     score;
+
+                               /* resjunk items aren't visible to outer query */
+                               if (tle->resjunk)
+                                       continue;
+
+                               if (equal(tle->expr, sub_expr))
                                 {
-                                       Var                *pkey = lfirst(p2);
-       
-                                       if (equal(key, pkey))
-                                       {
-                                               found = true;
-                                               break;
-                                       }
+                                       /* Exact match */
+                                       outer_expr = (Expr *)
+                                               makeVar(rel->relid,
+                                                               tle->resno,
+                                                               exprType((Node *) tle->expr),
+                                                               exprTypmod((Node *) tle->expr),
+                                                               0);
+                               }
+                               else if (rtarg && equal(tle->expr, rtarg))
+                               {
+                                       /* Match after discarding RelabelType */
+                                       outer_expr = (Expr *)
+                                               makeVar(rel->relid,
+                                                               tle->resno,
+                                                               exprType((Node *) tle->expr),
+                                                               exprTypmod((Node *) tle->expr),
+                                                               0);
+                                       outer_expr = (Expr *)
+                                               makeRelabelType((Expr *) outer_expr,
+                                                                               ((RelabelType *) sub_expr)->resulttype,
+                                                                        ((RelabelType *) sub_expr)->resulttypmod,
+                                                                  ((RelabelType *) sub_expr)->relabelformat);
+                               }
+                               else
+                                       continue;
+
+                               /* Found a representation for this sub_pathkey */
+                               outer_ec = get_eclass_for_sort_expr(root,
+                                                                                                       outer_expr,
+                                                                                                       sub_member->em_datatype,
+                                                                                                       sub_eclass->ec_opfamilies);
+                               outer_pk = make_canonical_pathkey(root,
+                                                                                                 outer_ec,
+                                                                                                 sub_pathkey->pk_opfamily,
+                                                                                                 sub_pathkey->pk_strategy,
+                                                                                                 sub_pathkey->pk_nulls_first);
+                               /* score = # of equivalence peers */
+                               score = list_length(outer_ec->ec_members) - 1;
+                               /* +1 if it matches the proper query_pathkeys item */
+                               if (retvallen < outer_query_keys &&
+                                       list_nth(root->query_pathkeys, retvallen) == outer_pk)
+                                       score++;
+                               if (score > best_score)
+                               {
+                                       best_pathkey = outer_pk;
+                                       best_score = score;
                                 }
-                               if (found)
-                                       break;
                         }
-                       if (!found)
-                               pathkeys = lappend(pathkeys, lcons(key, NIL));
+               }
+
+               /*
+                * If we couldn't find a representation of this sub_pathkey, we're
+                * done (we can't use the ones to its right, either).
+                */
+               if (!best_pathkey)
+                       break;
+
+               /*
+                * Eliminate redundant ordering info; could happen if outer query
+                * equivalences subquery keys...
+                */
+               if (!pathkey_is_redundant(best_pathkey, retval))
+               {
+                       retval = lappend(retval, best_pathkey);
+                       retvallen++;
                 }
         }
-       return pathkeys;
+
+       return retval;
+}
+
+/*
+ * build_join_pathkeys
+ *       Build the path keys for a join relation constructed by mergejoin or
+ *       nestloop join.  This is normally the same as the outer path's keys.
+ *
+ *       EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
+ *       having the outer path's path keys, because null lefthand rows may be
+ *       inserted at random points.  It must be treated as unsorted.
+ *
+ *       We truncate away any pathkeys that are uninteresting for higher joins.
+ *
+ * 'joinrel' is the join relation that paths are being formed for
+ * 'jointype' is the join type (inner, left, full, etc)
+ * 'outer_pathkeys' is the list of the current outer path's path keys
+ *
+ * Returns the list of new path keys.
+ */
+List *
+build_join_pathkeys(PlannerInfo *root,
+                                       RelOptInfo *joinrel,
+                                       JoinType jointype,
+                                       List *outer_pathkeys)
+{
+       if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
+               return NIL;
+
+       /*
+        * This used to be quite a complex bit of code, but now that all pathkey
+        * sublists start out life canonicalized, we don't have to do a darn thing
+        * here!
+        *
+        * We do, however, need to truncate the pathkeys list, since it may
+        * contain pathkeys that were useful for forming this joinrel but are
+        * uninteresting to higher levels.
+        */
+       return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
  }
  
+/****************************************************************************
+ *             PATHKEYS AND SORT CLAUSES
+ ****************************************************************************/
+
+/*
+ * make_pathkeys_for_sortclauses
+ *             Generate a pathkeys list that represents the sort order specified
+ *             by a list of SortClauses (GroupClauses will work too!)
+ *
+ * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
+ * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
+ * merging has been performed, but FALSE if we haven't done EquivalenceClass
+ * merging yet.  (We provide this option because grouping_planner() needs to
+ * be able to represent requested pathkeys before the equivalence classes have
+ * been created for the query.)
+ *
+ * 'sortclauses' is a list of SortClause or GroupClause nodes
+ * 'tlist' is the targetlist to find the referenced tlist entries in
+ */
+List *
+make_pathkeys_for_sortclauses(PlannerInfo *root,
+                                                         List *sortclauses,
+                                                         List *tlist,
+                                                         bool canonicalize)
+{
+       List       *pathkeys = NIL;
+       ListCell   *l;
+
+       foreach(l, sortclauses)
+       {
+               SortClause *sortcl = (SortClause *) lfirst(l);
+               Expr       *sortkey;
+               PathKey    *pathkey;
+
+               sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+               pathkey = make_pathkey_from_sortinfo(root,
+                                                                                        sortkey,
+                                                                                        sortcl->sortop,
+                                                                                        sortcl->nulls_first,
+                                                                                        canonicalize);
+
+               /* Canonical form eliminates redundant ordering keys */
+               if (canonicalize)
+               {
+                       if (!pathkey_is_redundant(pathkey, pathkeys))
+                               pathkeys = lappend(pathkeys, pathkey);
+               }
+               else
+                       pathkeys = lappend(pathkeys, pathkey);
+       }
+       return pathkeys;
+}
  
  /****************************************************************************
- *             NEW PATHKEY FORMATION
+ *             PATHKEYS AND MERGECLAUSES
   ****************************************************************************/
  
  /*
- * new_join_pathkeys
- *       Find the path keys for a join relation by finding all vars in the list
- *       of join clauses 'joinclauses' such that:
- *             (1) the var corresponding to the outer join relation is a
- *                     key on the outer path
- *             (2) the var appears in the target list of the join relation
- *       In other words, add to each outer path key the inner path keys that
- *       are required for qualification.
- *
- * 'outer_pathkeys' is the list of the outer path's path keys
- * 'join_rel_tlist' is the target list of the join relation
- * 'joinclauses' is the list of restricting join clauses
+ * cache_mergeclause_eclasses
+ *             Make the cached EquivalenceClass links valid in a mergeclause
+ *             restrictinfo.
   *
- * Returns the list of new path keys.
+ * RestrictInfo contains fields in which we may cache pointers to
+ * EquivalenceClasses for the left and right inputs of the mergeclause.
+ * (If the mergeclause is a true equivalence clause these will be the
+ * same EquivalenceClass, otherwise not.)
+ */
+void
+cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
+{
+       Assert(restrictinfo->mergeopfamilies != NIL);
+
+       /* the cached values should be either both set or both not */
+       if (restrictinfo->left_ec == NULL)
+       {
+               Expr       *clause = restrictinfo->clause;
+               Oid                     lefttype,
+                                       righttype;
+
+               /* Need the declared input types of the operator */
+               op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
+
+               /* Find or create a matching EquivalenceClass for each side */
+               restrictinfo->left_ec =
+                       get_eclass_for_sort_expr(root,
+                                                                        (Expr *) get_leftop(clause),
+                                                                        lefttype,
+                                                                        restrictinfo->mergeopfamilies);
+               restrictinfo->right_ec =
+                       get_eclass_for_sort_expr(root,
+                                                                        (Expr *) get_rightop(clause),
+                                                                        righttype,
+                                                                        restrictinfo->mergeopfamilies);
+       }
+       else
+               Assert(restrictinfo->right_ec != NULL);
+}
+
+/*
+ * find_mergeclauses_for_pathkeys
+ *       This routine attempts to find a set of mergeclauses that can be
+ *       used with a specified ordering for one of the input relations.
+ *       If successful, it returns a list of mergeclauses.
+ *
+ * 'pathkeys' is a pathkeys list showing the ordering of an input path.
+ * 'outer_keys' is TRUE if these keys are for the outer input path,
+ *                     FALSE if for inner.
+ * 'restrictinfos' is a list of mergejoinable restriction clauses for the
+ *                     join relation being formed.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path.  (See
+ * select_mergejoin_clauses())
+ *
+ * The result is NIL if no merge can be done, else a maximal list of
+ * usable mergeclauses (represented as a list of their restrictinfo nodes).
+ */
+List *
+find_mergeclauses_for_pathkeys(PlannerInfo *root,
+                                                          List *pathkeys,
+                                                          bool outer_keys,
+                                                          List *restrictinfos)
+{
+       List       *mergeclauses = NIL;
+       ListCell   *i;
+
+       /* make sure we have eclasses cached in the clauses */
+       foreach(i, restrictinfos)
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
+
+               cache_mergeclause_eclasses(root, rinfo);
+       }
+
+       foreach(i, pathkeys)
+       {
+               PathKey    *pathkey = (PathKey *) lfirst(i);
+               EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+               List       *matched_restrictinfos = NIL;
+               ListCell   *j;
+
+               /*----------
+                * A mergejoin clause matches a pathkey if it has the same EC.
+                * If there are multiple matching clauses, take them all.  In plain
+                * inner-join scenarios we expect only one match, because
+                * equivalence-class processing will have removed any redundant
+                * mergeclauses.  However, in outer-join scenarios there might be
+                * multiple matches.  An example is
+                *
+                *      select * from a full join b
+                *              on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
+                *
+                * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
+                * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
+                * we *must* do so or we will be unable to form a valid plan.
+                *
+                * We expect that the given pathkeys list is canonical, which means
+                * no two members have the same EC, so it's not possible for this
+                * code to enter the same mergeclause into the result list twice.
+                *
+                * XXX it's possible that multiple matching clauses might have
+                * different ECs on the other side, in which case the order we put
+                * them into our result makes a difference in the pathkeys required
+                * for the other input path.  However this routine hasn't got any info
+                * about which order would be best, so for now we disregard that case
+                * (which is probably a corner case anyway).
+                *----------
+                */
+               foreach(j, restrictinfos)
+               {
+                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
+                       EquivalenceClass *clause_ec;
+
+                       if (outer_keys)
+                               clause_ec = rinfo->outer_is_left ?
+                                       rinfo->left_ec : rinfo->right_ec;
+                       else
+                               clause_ec = rinfo->outer_is_left ?
+                                       rinfo->right_ec : rinfo->left_ec;
+                       if (clause_ec == pathkey_ec)
+                               matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
+               }
+
+               /*
+                * If we didn't find a mergeclause, we're done --- any additional
+                * sort-key positions in the pathkeys are useless.      (But we can still
+                * mergejoin if we found at least one mergeclause.)
+                */
+               if (matched_restrictinfos == NIL)
+                       break;
+
+               /*
+                * If we did find usable mergeclause(s) for this sort-key position,
+                * add them to result list.
+                */
+               mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
+       }
+
+       return mergeclauses;
+}
+
+/*
+ * select_outer_pathkeys_for_merge
+ *       Builds a pathkey list representing a possible sort ordering
+ *       that can be used with the given mergeclauses.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ *                     that will be used in a merge join.
+ * 'joinrel' is the join relation we are trying to construct.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path.  (See
+ * select_mergejoin_clauses())
   *
+ * Returns a pathkeys list that can be applied to the outer relation.
+ *
+ * Since we assume here that a sort is required, there is no particular use
+ * in matching any available ordering of the outerrel.  (joinpath.c has an
+ * entirely separate code path for considering sort-free mergejoins.)  Rather,
+ * it's interesting to try to match the requested query_pathkeys so that a
+ * second output sort may be avoided; and failing that, we try to list "more
+ * popular" keys (those with the most unmatched EquivalenceClass peers)
+ * earlier, in hopes of making the resulting ordering useful for as many
+ * higher-level mergejoins as possible.
   */
  List *
-new_join_pathkeys(List *outer_pathkeys,
-                                 List *join_rel_tlist,
-                                 List *joinclauses)
+select_outer_pathkeys_for_merge(PlannerInfo *root,
+                                                               List *mergeclauses,
+                                                               RelOptInfo *joinrel)
  {
-       List       *final_pathkeys = NIL;
-       List       *i;
+       List       *pathkeys = NIL;
+       int                     nClauses = list_length(mergeclauses);
+       EquivalenceClass **ecs;
+       int                *scores;
+       int                     necs;
+       ListCell   *lc;
+       int                     j;
+
+       /* Might have no mergeclauses */
+       if (nClauses == 0)
+               return NIL;
+
+       /*
+        * Make arrays of the ECs used by the mergeclauses (dropping any
+        * duplicates) and their "popularity" scores.
+        */
+       ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
+       scores = (int *) palloc(nClauses * sizeof(int));
+       necs = 0;
+
+       foreach(lc, mergeclauses)
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+               EquivalenceClass *oeclass;
+               int                     score;
+               ListCell   *lc2;
+
+               /* get the outer eclass */
+               cache_mergeclause_eclasses(root, rinfo);
+
+               if (rinfo->outer_is_left)
+                       oeclass = rinfo->left_ec;
+               else
+                       oeclass = rinfo->right_ec;
+
+               /* reject duplicates */
+               for (j = 0; j < necs; j++)
+               {
+                       if (ecs[j] == oeclass)
+                               break;
+               }
+               if (j < necs)
+                       continue;
+
+               /* compute score */
+               score = 0;
+               foreach(lc2, oeclass->ec_members)
+               {
+                       EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+
+                       /* Potential future join partner? */
+                       if (!em->em_is_const && !em->em_is_child &&
+                               !bms_overlap(em->em_relids, joinrel->relids))
+                               score++;
+               }
+
+               ecs[necs] = oeclass;
+               scores[necs] = score;
+               necs++;
+       }
+
+       /*
+        * Find out if we have all the ECs mentioned in query_pathkeys; if so
+        * we can generate a sort order that's also useful for final output.
+        * There is no percentage in a partial match, though, so we have to
+        * have 'em all.
+        */
+       if (root->query_pathkeys)
+       {
+               foreach(lc, root->query_pathkeys)
+               {
+                       PathKey    *query_pathkey = (PathKey *) lfirst(lc);
+                       EquivalenceClass *query_ec = query_pathkey->pk_eclass;
+
+                       for (j = 0; j < necs; j++)
+                       {
+                               if (ecs[j] == query_ec)
+                                       break;          /* found match */
+                       }
+                       if (j >= necs)
+                               break;                  /* didn't find match */
+               }
+               /* if we got to the end of the list, we have them all */
+               if (lc == NULL)
+               {
+                       /* copy query_pathkeys as starting point for our output */
+                       pathkeys = list_copy(root->query_pathkeys);
+                       /* mark their ECs as already-emitted */
+                       foreach(lc, root->query_pathkeys)
+                       {
+                               PathKey    *query_pathkey = (PathKey *) lfirst(lc);
+                               EquivalenceClass *query_ec = query_pathkey->pk_eclass;
  
-       foreach(i, outer_pathkeys)
+                               for (j = 0; j < necs; j++)
+                               {
+                                       if (ecs[j] == query_ec)
+                                       {
+                                               scores[j] = -1;
+                                               break;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /*
+        * Add remaining ECs to the list in popularity order, using a default
+        * sort ordering.  (We could use qsort() here, but the list length is
+        * usually so small it's not worth it.)
+        */
+       for (;;)
         {
-               List       *outer_pathkey = lfirst(i);
-               List       *new_pathkey;
+               int             best_j;
+               int             best_score;
+               EquivalenceClass *ec;
+               PathKey *pathkey;
  
-               new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
-                                                                          joinclauses);
-               if (new_pathkey != NIL)
-                       final_pathkeys = lappend(final_pathkeys, new_pathkey);
+               best_j = 0;
+               best_score = scores[0];
+               for (j = 1; j < necs; j++)
+               {
+                       if (scores[j] > best_score)
+                       {
+                               best_j = j;
+                               best_score = scores[j];
+                       }
+               }
+               if (best_score < 0)
+                       break;                          /* all done */
+               ec = ecs[best_j];
+               scores[best_j] = -1;
+               pathkey = make_canonical_pathkey(root,
+                                                                                ec,
+                                                                                linitial_oid(ec->ec_opfamilies),
+                                                                                BTLessStrategyNumber,
+                                                                                false);
+               /* can't be redundant because no duplicate ECs */
+               Assert(!pathkey_is_redundant(pathkey, pathkeys));
+               pathkeys = lappend(pathkeys, pathkey);
         }
-       return final_pathkeys;
+
+       pfree(ecs);
+       pfree(scores);
+
+       return pathkeys;
  }
  
  /*
- * new_join_pathkey
- *       Generate an individual pathkey sublist, consisting of the outer vars
- *       already mentioned in 'pathkey' plus any inner vars that are joined to
- *       them (and thus can now also be considered path keys, per discussion
- *       at the top of this file).
+ * make_inner_pathkeys_for_merge
+ *       Builds a pathkey list representing the explicit sort order that
+ *       must be applied to an inner path to make it usable with the
+ *       given mergeclauses.
   *
- *       Note that each returned pathkey is the var node found in
- *       'join_rel_tlist' rather than the joinclause var node.
- *       (Is this important?)  Also, we return a fully copied list
- *       that does not share any subnodes with existing data structures.
- *       (Is that important, either?)
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ *                     that will be used in a merge join.
+ * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
+ *                     side of the join.
   *
- * Returns a new pathkey (list of pathkey variables).
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path.  (See
+ * select_mergejoin_clauses())
   *
+ * Returns a pathkeys list that can be applied to the inner relation.
+ *
+ * Note that it is not this routine's job to decide whether sorting is
+ * actually needed for a particular input path.  Assume a sort is necessary;
+ * just make the keys, eh?
   */
-static List *
-new_join_pathkey(List *pathkey,
-                                List *join_rel_tlist,
-                                List *joinclauses)
+List *
+make_inner_pathkeys_for_merge(PlannerInfo *root,
+                                                         List *mergeclauses,
+                                                         List *outer_pathkeys)
  {
-       List       *new_pathkey = NIL;
-       List       *i,
-                          *j;
+       List       *pathkeys = NIL;
+       EquivalenceClass *lastoeclass;
+       PathKey    *opathkey;
+       ListCell   *lc;
+       ListCell   *lop;
  
-       foreach(i, pathkey)
+       lastoeclass = NULL;
+       opathkey = NULL;
+       lop = list_head(outer_pathkeys);
+
+       foreach(lc, mergeclauses)
         {
-               Var                *key = (Var *) lfirst(i);
-               Expr       *tlist_key;
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+               EquivalenceClass *oeclass;
+               EquivalenceClass *ieclass;
+               PathKey    *pathkey;
+
+               cache_mergeclause_eclasses(root, rinfo);
+
+               if (rinfo->outer_is_left)
+               {
+                       oeclass = rinfo->left_ec;
+                       ieclass = rinfo->right_ec;
+               }
+               else
+               {
+                       oeclass = rinfo->right_ec;
+                       ieclass = rinfo->left_ec;
+               }
+
+               /* outer eclass should match current or next pathkeys */
+               /* we check this carefully for debugging reasons */
+               if (oeclass != lastoeclass)
+               {
+                       if (!lop)
+                               elog(ERROR, "too few pathkeys for mergeclauses");
+                       opathkey = (PathKey *) lfirst(lop);
+                       lop = lnext(lop);
+                       lastoeclass = opathkey->pk_eclass;
+                       if (oeclass != lastoeclass)
+                               elog(ERROR, "outer pathkeys do not match mergeclause");
+               }
+
+               /*
+                * Often, we'll have same EC on both sides, in which case the outer
+                * pathkey is also canonical for the inner side, and we can skip a
+                * useless search.
+                */
+               if (ieclass == oeclass)
+                       pathkey = opathkey;
+               else
+                       pathkey = make_canonical_pathkey(root,
+                                                                                        ieclass,
+                                                                                        opathkey->pk_opfamily,
+                                                                                        opathkey->pk_strategy,
+                                                                                        opathkey->pk_nulls_first);
+
+               /*
+                * Don't generate redundant pathkeys (can happen if multiple
+                * mergeclauses refer to same EC).
+                */
+               if (!pathkey_is_redundant(pathkey, pathkeys))
+                       pathkeys = lappend(pathkeys, pathkey);
+       }
+
+       return pathkeys;
+}
+
+/****************************************************************************
+ *             PATHKEY USEFULNESS CHECKS
+ *
+ * We only want to remember as many of the pathkeys of a path as have some
+ * potential use, either for subsequent mergejoins or for meeting the query's
+ * requested output ordering.  This ensures that add_path() won't consider
+ * a path to have a usefully different ordering unless it really is useful.
+ * These routines check for usefulness of given pathkeys.
+ ****************************************************************************/
+
+/*
+ * pathkeys_useful_for_merging
+ *             Count the number of pathkeys that may be useful for mergejoins
+ *             above the given relation.
+ *
+ * We consider a pathkey potentially useful if it corresponds to the merge
+ * ordering of either side of any joinclause for the rel.  This might be
+ * overoptimistic, since joinclauses that require different other relations
+ * might never be usable at the same time, but trying to be exact is likely
+ * to be more trouble than it's worth.
+ */
+int
+pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
+{
+       int                     useful = 0;
+       ListCell   *i;
  
-               Assert(key);
-                                                                       
-               tlist_key = matching_tlist_var(key, join_rel_tlist);
-               if (tlist_key && !member(tlist_key, new_pathkey))
-                       new_pathkey = lcons(copyObject(tlist_key), new_pathkey);
+       foreach(i, pathkeys)
+       {
+               PathKey    *pathkey = (PathKey *) lfirst(i);
+               bool            matched = false;
+               ListCell   *j;
  
-               foreach(j, joinclauses)
+               /*
+                * First look into the EquivalenceClass of the pathkey, to see if
+                * there are any members not yet joined to the rel.  If so, it's
+                * surely possible to generate a mergejoin clause using them.
+                */
+               if (rel->has_eclass_joins &&
+                       eclass_useful_for_merging(pathkey->pk_eclass, rel))
+                       matched = true;
+               else
                 {
-                       Expr       *joinclause = lfirst(j);
-                       Expr       *tlist_other_var;
-
-                       tlist_other_var = matching_tlist_var(
-                                                               other_join_clause_var(key, joinclause),
-                                                               join_rel_tlist);
-                       if (tlist_other_var && !member(tlist_other_var, new_pathkey))
-                               new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey);
+                       /*
+                        * Otherwise search the rel's joininfo list, which contains
+                        * non-EquivalenceClass-derivable join clauses that might
+                        * nonetheless be mergejoinable.
+                        */
+                       foreach(j, rel->joininfo)
+                       {
+                               RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+
+                               if (restrictinfo->mergeopfamilies == NIL)
+                                       continue;
+                               cache_mergeclause_eclasses(root, restrictinfo);
+
+                               if (pathkey->pk_eclass == restrictinfo->left_ec ||
+                                       pathkey->pk_eclass == restrictinfo->right_ec)
+                               {
+                                       matched = true;
+                                       break;
+                               }
+                       }
                 }
+
+               /*
+                * If we didn't find a mergeclause, we're done --- any additional
+                * sort-key positions in the pathkeys are useless.      (But we can still
+                * mergejoin if we found at least one mergeclause.)
+                */
+               if (matched)
+                       useful++;
+               else
+                       break;
         }
  
-       return new_pathkey;
+       return useful;
+}
+
+/*
+ * pathkeys_useful_for_ordering
+ *             Count the number of pathkeys that are useful for meeting the
+ *             query's requested output ordering.
+ *
+ * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
+ * no good to order by just the first key(s) of the requested ordering.
+ * So the result is always either 0 or list_length(root->query_pathkeys).
+ */
+int
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+{
+       if (root->query_pathkeys == NIL)
+               return 0;                               /* no special ordering requested */
+
+       if (pathkeys == NIL)
+               return 0;                               /* unordered path */
+
+       if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
+       {
+               /* It's useful ... or at least the first N keys are */
+               return list_length(root->query_pathkeys);
+       }
+
+       return 0;                                       /* path ordering not useful */
+}
+
+/*
+ * truncate_useless_pathkeys
+ *             Shorten the given pathkey list to just the useful pathkeys.
+ */
+List *
+truncate_useless_pathkeys(PlannerInfo *root,
+                                                 RelOptInfo *rel,
+                                                 List *pathkeys)
+{
+       int                     nuseful;
+       int                     nuseful2;
+
+       nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
+       nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+       if (nuseful2 > nuseful)
+               nuseful = nuseful2;
+
+       /*
+        * Note: not safe to modify input list destructively, but we can avoid
+        * copying the list if we're not actually going to change it
+        */
+       if (nuseful == list_length(pathkeys))
+               return pathkeys;
+       else
+               return list_truncate(list_copy(pathkeys), nuseful);
  }