/*-------------------------------------------------------------------------
*
- * joinutils.c
- * Utilities for matching and building join and path keys
+ * pathkeys.c
+ * Utilities for matching and building path keys
*
- * Copyright (c) 1994, Regents of the University of California
+ * See src/backend/optimizer/README for a great deal of information about
+ * the nature and use of path keys.
*
*
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.9 1999/05/17 00:26:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/pathkeys.c,v 1.82 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "nodes/pg_list.h"
-#include "nodes/relation.h"
+#include "access/skey.h"
+#include "nodes/makefuncs.h"
#include "nodes/plannodes.h"
-
-#include "optimizer/internal.h"
+#include "optimizer/clauses.h"
+#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
-#include "optimizer/var.h"
-#include "optimizer/keys.h"
#include "optimizer/tlist.h"
-#include "optimizer/joininfo.h"
-#include "optimizer/ordering.h"
-
-static int match_pathkey_joinkeys(List *pathkey, List *joinkeys,
- int outer_or_inner);
-static List *new_join_pathkey(List *pathkeys, List *join_rel_tlist,
- List *joinclauses);
-
-
-/*--------------------
- * Explanation of Path.pathkeys
- *
- * Path.pathkeys is a List of List of Var nodes that represent the sort
- * order of the result generated by the Path.
- *
- * In single/base relation RelOptInfo's, the Path's represent various ways
- * of generating the relation and the resulting ordering of the tuples.
- * Sequential scan Paths have NIL pathkeys, indicating no known ordering.
- * Index scans have Path.pathkeys that represent the chosen index.
- * A single-key index pathkeys would be { {tab1_indexkey1} }. For a
- * multi-key index pathkeys would be { {tab1_indexkey1}, {tab1_indexkey2} },
- * indicating major sort by indexkey1 and minor sort by indexkey2.
- *
- * Multi-relation RelOptInfo Path's are more complicated. Mergejoins are
- * only performed with equijoins ("="). Because of this, the multi-relation
- * path actually has more than one primary Var key. For example, a
- * mergejoin Path of "tab1.col1 = tab2.col1" would generate a pathkeys of
- * { {tab1.col1, tab2.col1} }, indicating that the major sort order of the
- * Path can be taken to be *either* tab1.col1 or tab2.col1.
- * They are equal, so they are both primary sort keys. This allows future
- * joins to use either Var as a pre-sorted key to prevent upper Mergejoins
- * from having to re-sort the Path. This is why pathkeys is a List of Lists.
- *
- * Note that while the order of the top list is meaningful (primary vs.
- * secondary sort key), the order of each sublist is arbitrary.
- *
- * For multi-key sorts, if the outer is sorted by a multi-key index, the
- * multi-key index remains after the join. If the inner has a multi-key
- * sort, only the primary key of the inner is added to the result.
- * Mergejoins only join on the primary key. Currently, non-primary keys
- * in the pathkeys List are of limited value.
- *
- * Although Hashjoins also work only with equijoin operators, it is *not*
- * safe to consider the output of a Hashjoin to be sorted in any particular
- * order --- not even the outer path's order. This is true because the
- * executor might have to split the join into multiple batches.
- *
- * NestJoin does not perform sorting, and allows non-equijoins, so it does
- * not allow useful pathkeys. (But couldn't we use the outer path's order?)
- *
- * -- bjm
- *--------------------
+#include "parser/parsetree.h"
+#include "parser/parse_expr.h"
+#include "utils/lsyscache.h"
+
+
+/*
+ * If an EC contains a const and isn't below-outer-join, any PathKey depending
+ * on it must be redundant, since there's only one possible value of the key.
*/
-
+#define MUST_BE_REDUNDANT(eclass) \
+ ((eclass)->ec_has_const && !(eclass)->ec_below_outer_join)
+
+static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first);
+static PathKey *make_canonical_pathkey(PlannerInfo *root,
+ EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first);
+static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static PathKey *make_pathkey_from_sortinfo(PlannerInfo *root,
+ Expr *expr, Oid ordering_op,
+ bool nulls_first,
+ bool canonicalize);
+static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
+ AttrNumber varattno);
+
+
/****************************************************************************
- * KEY COMPARISONS
+ * PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
****************************************************************************/
/*
- * order_joinkeys_by_pathkeys
- * Attempts to match the keys of a path against the keys of join clauses.
- * This is done by looking for a matching join key in 'joinkeys' for
- * every path key in the list 'path.keys'. If there is a matching join key
- * (not necessarily unique) for every path key, then the list of
- * corresponding join keys and join clauses are returned in the order in
- * which the keys matched the path keys.
- *
- * 'pathkeys' is a list of path keys:
- * ( ( (var) (var) ... ) ( (var) ... ) )
- * 'joinkeys' is a list of join keys:
- * ( (outer inner) (outer inner) ... )
- * 'joinclauses' is a list of clauses corresponding to the join keys in
- * 'joinkeys'
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- * in 'joinkeys'
- *
- * Returns the join keys and corresponding join clauses in a list if all
- * of the path keys were matched:
- * (
- * ( (outerkey0 innerkey0) ... (outerkeyN or innerkeyN) )
- * ( clause0 ... clauseN )
- * )
- * and nil otherwise.
- *
- * Returns a list of matched join keys and a list of matched join clauses
- * in pointers if valid order can be found.
+ * makePathKey
+ * create a PathKey node
+ *
+ * This does not promise to create a canonical PathKey, it's merely a
+ * convenience routine to build the specified node.
*/
-bool
-order_joinkeys_by_pathkeys(List *pathkeys,
- List *joinkeys,
- List *joinclauses,
- int outer_or_inner,
- List **matchedJoinKeysPtr,
- List **matchedJoinClausesPtr)
+static PathKey *
+makePathKey(EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first)
{
- List *matched_joinkeys = NIL;
- List *matched_joinclauses = NIL;
- List *pathkey = NIL;
- List *i = NIL;
- int matched_joinkey_index = -1;
- int matched_keys = 0;
+ PathKey *pk = makeNode(PathKey);
+
+ pk->pk_eclass = eclass;
+ pk->pk_opfamily = opfamily;
+ pk->pk_strategy = strategy;
+ pk->pk_nulls_first = nulls_first;
+
+ return pk;
+}
+
+/*
+ * make_canonical_pathkey
+ * Given the parameters for a PathKey, find any pre-existing matching
+ * pathkey in the query's list of "canonical" pathkeys. Make a new
+ * entry if there's not one already.
+ *
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.
+ */
+static PathKey *
+make_canonical_pathkey(PlannerInfo *root,
+ EquivalenceClass *eclass, Oid opfamily,
+ int strategy, bool nulls_first)
+{
+ PathKey *pk;
+ ListCell *lc;
+ MemoryContext oldcontext;
+
+ /* The passed eclass might be non-canonical, so chase up to the top */
+ while (eclass->ec_merged)
+ eclass = eclass->ec_merged;
+
+ foreach(lc, root->canon_pathkeys)
+ {
+ pk = (PathKey *) lfirst(lc);
+ if (eclass == pk->pk_eclass &&
+ opfamily == pk->pk_opfamily &&
+ strategy == pk->pk_strategy &&
+ nulls_first == pk->pk_nulls_first)
+ return pk;
+ }
+
/*
- * Reorder the joinkeys by picking out one that matches each pathkey,
- * and create a new joinkey/joinclause list in pathkey order
+ * Be sure canonical pathkeys are allocated in the main planning context.
+ * Not an issue in normal planning, but it is for GEQO.
*/
- foreach(i, pathkeys)
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ pk = makePathKey(eclass, opfamily, strategy, nulls_first);
+ root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return pk;
+}
+
+/*
+ * pathkey_is_redundant
+ * Is a pathkey redundant with one already in the given list?
+ *
+ * Both the given pathkey and the list members must be canonical for this
+ * to work properly. We detect two cases:
+ *
+ * 1. If the new pathkey's equivalence class contains a constant, and isn't
+ * below an outer join, then we can disregard it as a sort key. An example:
+ * SELECT ... WHERE x = 42 ORDER BY x, y;
+ * We may as well just sort by y. Note that because of opfamily matching,
+ * this is semantically correct: we know that the equality constraint is one
+ * that actually binds the variable to a single value in the terms of any
+ * ordering operator that might go with the eclass. This rule not only lets
+ * us simplify (or even skip) explicit sorts, but also allows matching index
+ * sort orders to a query when there are don't-care index columns.
+ *
+ * 2. If the new pathkey's equivalence class is the same as that of any
+ * existing member of the pathkey list, then it is redundant. Some examples:
+ * SELECT ... ORDER BY x, x;
+ * SELECT ... ORDER BY x, x DESC;
+ * SELECT ... WHERE x = y ORDER BY x, y;
+ * In all these cases the second sort key cannot distinguish values that are
+ * considered equal by the first, and so there's no point in using it.
+ * Note in particular that we need not compare opfamily (all the opfamilies
+ * of the EC have the same notion of equality) nor sort direction.
+ *
+ * Because the equivclass.c machinery forms only one copy of any EC per query,
+ * pointer comparison is enough to decide whether canonical ECs are the same.
+ */
+static bool
+pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
+{
+ EquivalenceClass *new_ec = new_pathkey->pk_eclass;
+ ListCell *lc;
+
+ /* Assert we've been given canonical pathkeys */
+ Assert(!new_ec->ec_merged);
+
+ /* Check for EC containing a constant --- unconditionally redundant */
+ if (MUST_BE_REDUNDANT(new_ec))
+ return true;
+
+ /* If same EC already used in list, then redundant */
+ foreach(lc, pathkeys)
{
- pathkey = lfirst(i);
- matched_joinkey_index = match_pathkey_joinkeys(pathkey, joinkeys,
- outer_or_inner);
+ PathKey *old_pathkey = (PathKey *) lfirst(lc);
- if (matched_joinkey_index != -1)
- {
- matched_keys++;
- if (matchedJoinKeysPtr)
- {
- JoinKey *joinkey = nth(matched_joinkey_index, joinkeys);
- matched_joinkeys = lappend(matched_joinkeys, joinkey);
- }
-
- if (matchedJoinClausesPtr)
- {
- Expr *joinclause = nth(matched_joinkey_index,
- joinclauses);
- Assert(joinclauses);
- matched_joinclauses = lappend(matched_joinclauses, joinclause);
- }
- }
- else
- /* A pathkey could not be matched. */
- break;
+ /* Assert we've been given canonical pathkeys */
+ Assert(!old_pathkey->pk_eclass->ec_merged);
+
+ if (new_ec == old_pathkey->pk_eclass)
+ return true;
}
+ return false;
+}
+
+/*
+ * canonicalize_pathkeys
+ * Convert a not-necessarily-canonical pathkeys list to canonical form.
+ *
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.
+ */
+List *
+canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
+{
+ List *new_pathkeys = NIL;
+ ListCell *l;
+
+ foreach(l, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(l);
+ EquivalenceClass *eclass;
+ PathKey *cpathkey;
+
+ /* Find the canonical (merged) EquivalenceClass */
+ eclass = pathkey->pk_eclass;
+ while (eclass->ec_merged)
+ eclass = eclass->ec_merged;
+
+ /*
+ * If we can tell it's redundant just from the EC, skip.
+ * pathkey_is_redundant would notice that, but we needn't even bother
+ * constructing the node...
+ */
+ if (MUST_BE_REDUNDANT(eclass))
+ continue;
+
+ /* OK, build a canonicalized PathKey struct */
+ cpathkey = make_canonical_pathkey(root,
+ eclass,
+ pathkey->pk_opfamily,
+ pathkey->pk_strategy,
+ pathkey->pk_nulls_first);
+
+ /* Add to list unless redundant */
+ if (!pathkey_is_redundant(cpathkey, new_pathkeys))
+ new_pathkeys = lappend(new_pathkeys, cpathkey);
+ }
+ return new_pathkeys;
+}
+
+/*
+ * make_pathkey_from_sortinfo
+ * Given an expression, a sortop, and a nulls-first flag, create
+ * a PathKey. If canonicalize = true, the result is a "canonical"
+ * PathKey, otherwise not. (But note it might be redundant anyway.)
+ *
+ * canonicalize should always be TRUE after EquivalenceClass merging has
+ * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
+ */
+static PathKey *
+make_pathkey_from_sortinfo(PlannerInfo *root,
+ Expr *expr, Oid ordering_op,
+ bool nulls_first,
+ bool canonicalize)
+{
+ Oid equality_op;
+ List *opfamilies;
+ Oid opfamily,
+ lefttype,
+ righttype;
+ int strategy;
+ ListCell *lc;
+ EquivalenceClass *eclass;
+
/*
- * Did we fail to match all the joinkeys?
- * Extra pathkeys are no problem.
+ * An ordering operator fully determines the behavior of its opfamily,
+ * so could only meaningfully appear in one family --- or perhaps two
+ * if one builds a reverse-sort opfamily, but there's not much point in
+ * that anymore. But EquivalenceClasses need to contain opfamily lists
+ * based on the family membership of equality operators, which could
+ * easily be bigger. So, look up the equality operator that goes with
+ * the ordering operator (this should be unique) and get its membership.
*/
- if (matched_keys != length(joinkeys))
+ equality_op = get_equality_op_for_ordering_op(ordering_op);
+ if (!OidIsValid(equality_op)) /* shouldn't happen */
+ elog(ERROR, "could not find equality operator for ordering operator %u",
+ ordering_op);
+ opfamilies = get_mergejoin_opfamilies(equality_op);
+ if (!opfamilies) /* certainly should find some */
+ elog(ERROR, "could not find opfamilies for ordering operator %u",
+ ordering_op);
+
+ /*
+ * Next we have to determine the strategy number to put into the pathkey.
+ * In the presence of reverse-sort opclasses there might be two answers.
+ * We prefer the one associated with the first opfamilies member that
+ * this ordering_op appears in (this will be consistently defined in
+ * normal system operation; see comments for get_mergejoin_opfamilies()).
+ */
+ opfamily = InvalidOid;
+ strategy = 0;
+ foreach(lc, opfamilies)
{
- if (matchedJoinKeysPtr)
- *matchedJoinKeysPtr = NIL;
- if (matchedJoinClausesPtr)
- *matchedJoinClausesPtr = NIL;
- return false;
+ opfamily = lfirst_oid(lc);
+ strategy = get_op_opfamily_strategy(ordering_op, opfamily);
+ if (strategy)
+ break;
}
+ if (!(strategy == BTLessStrategyNumber ||
+ strategy == BTGreaterStrategyNumber))
+ elog(ERROR, "ordering operator %u is has wrong strategy number %d",
+ ordering_op, strategy);
+
+ /* Need the declared input type of the operator, too */
+ op_input_types(ordering_op, &lefttype, &righttype);
+ Assert(lefttype == righttype);
+
+ /* Now find or create a matching EquivalenceClass */
+ eclass = get_eclass_for_sort_expr(root, expr, lefttype, opfamilies);
- if (matchedJoinKeysPtr)
- *matchedJoinKeysPtr = matched_joinkeys;
- if (matchedJoinClausesPtr)
- *matchedJoinClausesPtr = matched_joinclauses;
- return true;
+ /* And finally we can find or create a PathKey node */
+ if (canonicalize)
+ return make_canonical_pathkey(root, eclass, opfamily,
+ strategy, nulls_first);
+ else
+ return makePathKey(eclass, opfamily, strategy, nulls_first);
}
+/****************************************************************************
+ * PATHKEY COMPARISONS
+ ****************************************************************************/
+
/*
- * match_pathkey_joinkeys
- * Returns the 0-based index into 'joinkeys' of the first joinkey whose
- * outer or inner pathkey matches any subkey of 'pathkey'.
+ * compare_pathkeys
+ * Compare two pathkeys to see if they are equivalent, and if not whether
+ * one is "better" than the other.
*
- * All these keys are equivalent, so any of them can match. See above.
+ * This function may only be applied to canonicalized pathkey lists.
+ * In the canonical representation, pathkeys can be checked for equality
+ * by simple pointer comparison.
*/
-static int
-match_pathkey_joinkeys(List *pathkey,
- List *joinkeys,
- int outer_or_inner)
+PathKeysComparison
+compare_pathkeys(List *keys1, List *keys2)
{
- Var *key;
- int pos;
- List *i, *x;
- JoinKey *jk;
+ ListCell *key1,
+ *key2;
- foreach(i, pathkey)
+ forboth(key1, keys1, key2, keys2)
{
- key = (Var *) lfirst(i);
- pos = 0;
- foreach(x, joinkeys)
- {
- jk = (JoinKey *) lfirst(x);
- if (equal(key, extract_join_key(jk, outer_or_inner)))
- return pos;
- pos++;
- }
+ PathKey *pathkey1 = (PathKey *) lfirst(key1);
+ PathKey *pathkey2 = (PathKey *) lfirst(key2);
+
+ /*
+ * XXX would like to check that we've been given canonicalized input,
+ * but PlannerInfo not accessible here...
+ */
+#ifdef NOT_USED
+ Assert(list_member_ptr(root->canon_pathkeys, pathkey1));
+ Assert(list_member_ptr(root->canon_pathkeys, pathkey2));
+#endif
+
+ if (pathkey1 != pathkey2)
+ return PATHKEYS_DIFFERENT; /* no need to keep looking */
+ }
+
+ /*
+ * If we reached the end of only one list, the other is longer and
+ * therefore not a subset.
+ */
+ if (key1 == NULL && key2 == NULL)
+ return PATHKEYS_EQUAL;
+ if (key1 != NULL)
+ return PATHKEYS_BETTER1; /* key1 is longer */
+ return PATHKEYS_BETTER2; /* key2 is longer */
+}
+
+/*
+ * pathkeys_contained_in
+ * Common special case of compare_pathkeys: we just want to know
+ * if keys2 are at least as well sorted as keys1.
+ */
+bool
+pathkeys_contained_in(List *keys1, List *keys2)
+{
+ switch (compare_pathkeys(keys1, keys2))
+ {
+ case PATHKEYS_EQUAL:
+ case PATHKEYS_BETTER2:
+ return true;
+ default:
+ break;
}
- return -1; /* no index found */
+ return false;
}
+/*
+ * get_cheapest_path_for_pathkeys
+ * Find the cheapest path (according to the specified criterion) that
+ * satisfies the given pathkeys. Return NULL if no such path.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'cost_criterion' is STARTUP_COST or TOTAL_COST
+ */
+Path *
+get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+ CostSelector cost_criterion)
+{
+ Path *matched_path = NULL;
+ ListCell *l;
+
+ foreach(l, paths)
+ {
+ Path *path = (Path *) lfirst(l);
+
+ /*
+ * Since cost comparison is a lot cheaper than pathkey comparison, do
+ * that first. (XXX is that still true?)
+ */
+ if (matched_path != NULL &&
+ compare_path_costs(matched_path, path, cost_criterion) <= 0)
+ continue;
+
+ if (pathkeys_contained_in(pathkeys, path->pathkeys))
+ matched_path = path;
+ }
+ return matched_path;
+}
/*
- * get_cheapest_path_for_joinkeys
- * Attempts to find a path in 'paths' whose keys match a set of join
- * keys 'joinkeys'. To match,
- * 1. the path node ordering must equal 'ordering'.
- * 2. each pathkey of a given path must match(i.e., be(equal) to) the
- * appropriate pathkey of the corresponding join key in 'joinkeys',
- * i.e., the Nth path key must match its pathkeys against the pathkey of
- * the Nth join key in 'joinkeys'.
- *
- * 'joinkeys' is the list of key pairs to which the path keys must be
- * matched
- * 'ordering' is the ordering of the(outer) path to which 'joinkeys'
- * must correspond
- * 'paths' is a list of(inner) paths which are to be matched against
- * each join key in 'joinkeys'
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- * in 'joinkeys'
- *
- * Find the cheapest path that matches the join keys
+ * get_cheapest_fractional_path_for_pathkeys
+ * Find the cheapest path (for retrieving a specified fraction of all
+ * the tuples) that satisfies the given pathkeys.
+ * Return NULL if no such path.
+ *
+ * See compare_fractional_path_costs() for the interpretation of the fraction
+ * parameter.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'fraction' is the fraction of the total tuples expected to be retrieved
*/
Path *
-get_cheapest_path_for_joinkeys(List *joinkeys,
- PathOrder *ordering,
- List *paths,
- int outer_or_inner)
+get_cheapest_fractional_path_for_pathkeys(List *paths,
+ List *pathkeys,
+ double fraction)
{
Path *matched_path = NULL;
- List *i;
+ ListCell *l;
- foreach(i, paths)
+ foreach(l, paths)
{
- Path *path = (Path *) lfirst(i);
- int better_sort;
-
- if (order_joinkeys_by_pathkeys(path->pathkeys, joinkeys, NIL,
- outer_or_inner, NULL, NULL) &&
- pathorder_match(ordering, path->pathorder, &better_sort) &&
- better_sort == 0)
- {
- if (matched_path == NULL ||
- path->path_cost < matched_path->path_cost)
- matched_path = path;
- }
+ Path *path = (Path *) lfirst(l);
+
+ /*
+ * Since cost comparison is a lot cheaper than pathkey comparison, do
+ * that first.
+ */
+ if (matched_path != NULL &&
+ compare_fractional_path_costs(matched_path, path, fraction) <= 0)
+ continue;
+
+ if (pathkeys_contained_in(pathkeys, path->pathkeys))
+ matched_path = path;
}
return matched_path;
}
+/****************************************************************************
+ * NEW PATHKEY FORMATION
+ ****************************************************************************/
/*
- * make_pathkeys_from_joinkeys
- * Builds a pathkey list for a path by pulling one of the pathkeys from
- * a list of join keys 'joinkeys' and then finding the var node in the
- * target list 'tlist' that corresponds to that pathkey.
- *
- * 'joinkeys' is a list of join key pairs
- * 'tlist' is a relation target list
- * 'outer_or_inner' is a flag that selects the desired pathkey of a join key
- * in 'joinkeys'
- *
- * Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)).
- * It is a list of lists because of multi-key indexes.
+ * build_index_pathkeys
+ * Build a pathkeys list that describes the ordering induced by an index
+ * scan using the given index. (Note that an unordered index doesn't
+ * induce any ordering; such an index will have no sortop OIDS in
+ * its sortops arrays, and we will return NIL.)
+ *
+ * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
+ * representing a backwards scan of the index. Return NIL if can't do it.
+ *
+ * The result is canonical, meaning that redundant pathkeys are removed;
+ * it may therefore have fewer entries than there are index columns.
+ *
+ * We generate the full pathkeys list whether or not all are useful for the
+ * current query. Caller should do truncate_useless_pathkeys().
*/
List *
-make_pathkeys_from_joinkeys(List *joinkeys,
- List *tlist,
- int outer_or_inner)
+build_index_pathkeys(PlannerInfo *root,
+ IndexOptInfo *index,
+ ScanDirection scandir)
{
- List *pathkeys = NIL;
- List *jk;
+ List *retval = NIL;
+ ListCell *indexprs_item = list_head(index->indexprs);
+ int i;
+
+ for (i = 0; i < index->ncolumns; i++)
+ {
+ Oid sortop;
+ bool nulls_first;
+ int ikey;
+ Expr *indexkey;
+ PathKey *cpathkey;
+
+ if (ScanDirectionIsBackward(scandir))
+ {
+ sortop = index->revsortop[i];
+ nulls_first = !index->nulls_first[i];
+ }
+ else
+ {
+ sortop = index->fwdsortop[i];
+ nulls_first = index->nulls_first[i];
+ }
+
+ if (!OidIsValid(sortop))
+ break; /* no more orderable columns */
+
+ ikey = index->indexkeys[i];
+ if (ikey != 0)
+ {
+ /* simple index column */
+ indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey);
+ }
+ else
+ {
+ /* expression --- assume we need not copy it */
+ if (indexprs_item == NULL)
+ elog(ERROR, "wrong number of index expressions");
+ indexkey = (Expr *) lfirst(indexprs_item);
+ indexprs_item = lnext(indexprs_item);
+ }
+
+ /* OK, make a canonical pathkey for this sort key */
+ cpathkey = make_pathkey_from_sortinfo(root,
+ indexkey,
+ sortop,
+ nulls_first,
+ true);
+
+ /* Add to list unless redundant */
+ if (!pathkey_is_redundant(cpathkey, retval))
+ retval = lappend(retval, cpathkey);
+ }
+
+ return retval;
+}
+
+/*
+ * Find or make a Var node for the specified attribute of the rel.
+ *
+ * We first look for the var in the rel's target list, because that's
+ * easy and fast. But the var might not be there (this should normally
+ * only happen for vars that are used in WHERE restriction clauses,
+ * but not in join clauses or in the SELECT target list). In that case,
+ * gin up a Var node the hard way.
+ */
+static Var *
+find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno)
+{
+ ListCell *temp;
+ Index relid;
+ Oid reloid,
+ vartypeid;
+ int32 type_mod;
- foreach(jk, joinkeys)
+ foreach(temp, rel->reltargetlist)
{
- JoinKey *jkey = (JoinKey *) lfirst(jk);
- Var *key;
- List *p, *p2;
- bool found = false;
+ Var *var = (Var *) lfirst(temp);
+
+ if (IsA(var, Var) &&
+ var->varattno == varattno)
+ return var;
+ }
+
+ relid = rel->relid;
+ reloid = getrelid(relid, root->parse->rtable);
+ get_atttypetypmod(reloid, varattno, &vartypeid, &type_mod);
- key = (Var *) extract_join_key(jkey, outer_or_inner);
+ return makeVar(relid, varattno, vartypeid, type_mod, 0);
+}
- /* check to see if it is in the target list */
- if (matching_tlist_var(key, tlist))
+/*
+ * convert_subquery_pathkeys
+ * Build a pathkeys list that describes the ordering of a subquery's
+ * result, in the terms of the outer query. This is essentially a
+ * task of conversion.
+ *
+ * 'rel': outer query's RelOptInfo for the subquery relation.
+ * 'subquery_pathkeys': the subquery's output pathkeys, in its terms.
+ *
+ * It is not necessary for caller to do truncate_useless_pathkeys(),
+ * because we select keys in a way that takes usefulness of the keys into
+ * account.
+ */
+List *
+convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
+ List *subquery_pathkeys)
+{
+ List *retval = NIL;
+ int retvallen = 0;
+ int outer_query_keys = list_length(root->query_pathkeys);
+ List *sub_tlist = rel->subplan->targetlist;
+ ListCell *i;
+
+ foreach(i, subquery_pathkeys)
+ {
+ PathKey *sub_pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *sub_eclass = sub_pathkey->pk_eclass;
+ PathKey *best_pathkey = NULL;
+ int best_score = -1;
+ ListCell *j;
+
+ /*
+ * The sub_pathkey's EquivalenceClass could contain multiple elements
+ * (representing knowledge that multiple items are effectively equal).
+ * Each element might match none, one, or more of the output columns
+ * that are visible to the outer query. This means we may have
+ * multiple possible representations of the sub_pathkey in the context
+ * of the outer query. Ideally we would generate them all and put
+ * them all into an EC of the outer query, thereby propagating
+ * equality knowledge up to the outer query. Right now we cannot do
+ * so, because the outer query's EquivalenceClasses are already frozen
+ * when this is called. Instead we prefer the one that has the highest
+ * "score" (number of EC peers, plus one if it matches the outer
+ * query_pathkeys). This is the most likely to be useful in the outer
+ * query.
+ */
+ foreach(j, sub_eclass->ec_members)
{
+ EquivalenceMember *sub_member = (EquivalenceMember *) lfirst(j);
+ Expr *sub_expr = sub_member->em_expr;
+ Expr *rtarg;
+ ListCell *k;
+
/*
- * Include it in the pathkeys list if we haven't already done so
+ * We handle two cases: the sub_pathkey key can be either an exact
+ * match for a targetlist entry, or a RelabelType of a targetlist
+ * entry. (The latter case is worth extra code because it arises
+ * frequently in connection with varchar fields.)
*/
- foreach(p, pathkeys)
+ if (IsA(sub_expr, RelabelType))
+ rtarg = ((RelabelType *) sub_expr)->arg;
+ else
+ rtarg = NULL;
+
+ foreach(k, sub_tlist)
{
- List *pathkey = lfirst(p);
-
- foreach(p2, pathkey)
+ TargetEntry *tle = (TargetEntry *) lfirst(k);
+ Expr *outer_expr;
+ EquivalenceClass *outer_ec;
+ PathKey *outer_pk;
+ int score;
+
+ /* resjunk items aren't visible to outer query */
+ if (tle->resjunk)
+ continue;
+
+ if (equal(tle->expr, sub_expr))
{
- Var *pkey = lfirst(p2);
-
- if (equal(key, pkey))
- {
- found = true;
- break;
- }
+ /* Exact match */
+ outer_expr = (Expr *)
+ makeVar(rel->relid,
+ tle->resno,
+ exprType((Node *) tle->expr),
+ exprTypmod((Node *) tle->expr),
+ 0);
+ }
+ else if (rtarg && equal(tle->expr, rtarg))
+ {
+ /* Match after discarding RelabelType */
+ outer_expr = (Expr *)
+ makeVar(rel->relid,
+ tle->resno,
+ exprType((Node *) tle->expr),
+ exprTypmod((Node *) tle->expr),
+ 0);
+ outer_expr = (Expr *)
+ makeRelabelType((Expr *) outer_expr,
+ ((RelabelType *) sub_expr)->resulttype,
+ ((RelabelType *) sub_expr)->resulttypmod,
+ ((RelabelType *) sub_expr)->relabelformat);
+ }
+ else
+ continue;
+
+ /* Found a representation for this sub_pathkey */
+ outer_ec = get_eclass_for_sort_expr(root,
+ outer_expr,
+ sub_member->em_datatype,
+ sub_eclass->ec_opfamilies);
+ outer_pk = make_canonical_pathkey(root,
+ outer_ec,
+ sub_pathkey->pk_opfamily,
+ sub_pathkey->pk_strategy,
+ sub_pathkey->pk_nulls_first);
+ /* score = # of equivalence peers */
+ score = list_length(outer_ec->ec_members) - 1;
+ /* +1 if it matches the proper query_pathkeys item */
+ if (retvallen < outer_query_keys &&
+ list_nth(root->query_pathkeys, retvallen) == outer_pk)
+ score++;
+ if (score > best_score)
+ {
+ best_pathkey = outer_pk;
+ best_score = score;
}
- if (found)
- break;
}
- if (!found)
- pathkeys = lappend(pathkeys, lcons(key, NIL));
+ }
+
+ /*
+ * If we couldn't find a representation of this sub_pathkey, we're
+ * done (we can't use the ones to its right, either).
+ */
+ if (!best_pathkey)
+ break;
+
+ /*
+ * Eliminate redundant ordering info; could happen if outer query
+ * equivalences subquery keys...
+ */
+ if (!pathkey_is_redundant(best_pathkey, retval))
+ {
+ retval = lappend(retval, best_pathkey);
+ retvallen++;
}
}
- return pathkeys;
+
+ return retval;
+}
+
+/*
+ * build_join_pathkeys
+ * Build the path keys for a join relation constructed by mergejoin or
+ * nestloop join. This is normally the same as the outer path's keys.
+ *
+ * EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
+ * having the outer path's path keys, because null lefthand rows may be
+ * inserted at random points. It must be treated as unsorted.
+ *
+ * We truncate away any pathkeys that are uninteresting for higher joins.
+ *
+ * 'joinrel' is the join relation that paths are being formed for
+ * 'jointype' is the join type (inner, left, full, etc)
+ * 'outer_pathkeys' is the list of the current outer path's path keys
+ *
+ * Returns the list of new path keys.
+ */
+List *
+build_join_pathkeys(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ JoinType jointype,
+ List *outer_pathkeys)
+{
+ if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
+ return NIL;
+
+ /*
+ * This used to be quite a complex bit of code, but now that all pathkey
+ * sublists start out life canonicalized, we don't have to do a darn thing
+ * here!
+ *
+ * We do, however, need to truncate the pathkeys list, since it may
+ * contain pathkeys that were useful for forming this joinrel but are
+ * uninteresting to higher levels.
+ */
+ return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
}
+/****************************************************************************
+ * PATHKEYS AND SORT CLAUSES
+ ****************************************************************************/
+
+/*
+ * make_pathkeys_for_sortclauses
+ * Generate a pathkeys list that represents the sort order specified
+ * by a list of SortClauses (GroupClauses will work too!)
+ *
+ * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
+ * otherwise not. canonicalize should always be TRUE after EquivalenceClass
+ * merging has been performed, but FALSE if we haven't done EquivalenceClass
+ * merging yet. (We provide this option because grouping_planner() needs to
+ * be able to represent requested pathkeys before the equivalence classes have
+ * been created for the query.)
+ *
+ * 'sortclauses' is a list of SortClause or GroupClause nodes
+ * 'tlist' is the targetlist to find the referenced tlist entries in
+ */
+List *
+make_pathkeys_for_sortclauses(PlannerInfo *root,
+ List *sortclauses,
+ List *tlist,
+ bool canonicalize)
+{
+ List *pathkeys = NIL;
+ ListCell *l;
+
+ foreach(l, sortclauses)
+ {
+ SortClause *sortcl = (SortClause *) lfirst(l);
+ Expr *sortkey;
+ PathKey *pathkey;
+
+ sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+ pathkey = make_pathkey_from_sortinfo(root,
+ sortkey,
+ sortcl->sortop,
+ sortcl->nulls_first,
+ canonicalize);
+
+ /* Canonical form eliminates redundant ordering keys */
+ if (canonicalize)
+ {
+ if (!pathkey_is_redundant(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+ else
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+ return pathkeys;
+}
/****************************************************************************
- * NEW PATHKEY FORMATION
+ * PATHKEYS AND MERGECLAUSES
****************************************************************************/
/*
- * new_join_pathkeys
- * Find the path keys for a join relation by finding all vars in the list
- * of join clauses 'joinclauses' such that:
- * (1) the var corresponding to the outer join relation is a
- * key on the outer path
- * (2) the var appears in the target list of the join relation
- * In other words, add to each outer path key the inner path keys that
- * are required for qualification.
- *
- * 'outer_pathkeys' is the list of the outer path's path keys
- * 'join_rel_tlist' is the target list of the join relation
- * 'joinclauses' is the list of restricting join clauses
+ * cache_mergeclause_eclasses
+ * Make the cached EquivalenceClass links valid in a mergeclause
+ * restrictinfo.
*
- * Returns the list of new path keys.
+ * RestrictInfo contains fields in which we may cache pointers to
+ * EquivalenceClasses for the left and right inputs of the mergeclause.
+ * (If the mergeclause is a true equivalence clause these will be the
+ * same EquivalenceClass, otherwise not.)
+ */
+void
+cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
+{
+ Assert(restrictinfo->mergeopfamilies != NIL);
+
+ /* the cached values should be either both set or both not */
+ if (restrictinfo->left_ec == NULL)
+ {
+ Expr *clause = restrictinfo->clause;
+ Oid lefttype,
+ righttype;
+
+ /* Need the declared input types of the operator */
+ op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
+
+ /* Find or create a matching EquivalenceClass for each side */
+ restrictinfo->left_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_leftop(clause),
+ lefttype,
+ restrictinfo->mergeopfamilies);
+ restrictinfo->right_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_rightop(clause),
+ righttype,
+ restrictinfo->mergeopfamilies);
+ }
+ else
+ Assert(restrictinfo->right_ec != NULL);
+}
+
+/*
+ * find_mergeclauses_for_pathkeys
+ * This routine attempts to find a set of mergeclauses that can be
+ * used with a specified ordering for one of the input relations.
+ * If successful, it returns a list of mergeclauses.
+ *
+ * 'pathkeys' is a pathkeys list showing the ordering of an input path.
+ * 'outer_keys' is TRUE if these keys are for the outer input path,
+ * FALSE if for inner.
+ * 'restrictinfos' is a list of mergejoinable restriction clauses for the
+ * join relation being formed.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
+ *
+ * The result is NIL if no merge can be done, else a maximal list of
+ * usable mergeclauses (represented as a list of their restrictinfo nodes).
+ */
+List *
+find_mergeclauses_for_pathkeys(PlannerInfo *root,
+ List *pathkeys,
+ bool outer_keys,
+ List *restrictinfos)
+{
+ List *mergeclauses = NIL;
+ ListCell *i;
+
+ /* make sure we have eclasses cached in the clauses */
+ foreach(i, restrictinfos)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
+
+ cache_mergeclause_eclasses(root, rinfo);
+ }
+
+ foreach(i, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(i);
+ EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+ List *matched_restrictinfos = NIL;
+ ListCell *j;
+
+ /*----------
+ * A mergejoin clause matches a pathkey if it has the same EC.
+ * If there are multiple matching clauses, take them all. In plain
+ * inner-join scenarios we expect only one match, because
+ * equivalence-class processing will have removed any redundant
+ * mergeclauses. However, in outer-join scenarios there might be
+ * multiple matches. An example is
+ *
+ * select * from a full join b
+ * on a.v1 = b.v1 and a.v2 = b.v2 and a.v1 = b.v2;
+ *
+ * Given the pathkeys ({a.v1}, {a.v2}) it is okay to return all three
+ * clauses (in the order a.v1=b.v1, a.v1=b.v2, a.v2=b.v2) and indeed
+ * we *must* do so or we will be unable to form a valid plan.
+ *
+ * We expect that the given pathkeys list is canonical, which means
+ * no two members have the same EC, so it's not possible for this
+ * code to enter the same mergeclause into the result list twice.
+ *
+ * XXX it's possible that multiple matching clauses might have
+ * different ECs on the other side, in which case the order we put
+ * them into our result makes a difference in the pathkeys required
+ * for the other input path. However this routine hasn't got any info
+ * about which order would be best, so for now we disregard that case
+ * (which is probably a corner case anyway).
+ *----------
+ */
+ foreach(j, restrictinfos)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(j);
+ EquivalenceClass *clause_ec;
+
+ if (outer_keys)
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->left_ec : rinfo->right_ec;
+ else
+ clause_ec = rinfo->outer_is_left ?
+ rinfo->right_ec : rinfo->left_ec;
+ if (clause_ec == pathkey_ec)
+ matched_restrictinfos = lappend(matched_restrictinfos, rinfo);
+ }
+
+ /*
+ * If we didn't find a mergeclause, we're done --- any additional
+ * sort-key positions in the pathkeys are useless. (But we can still
+ * mergejoin if we found at least one mergeclause.)
+ */
+ if (matched_restrictinfos == NIL)
+ break;
+
+ /*
+ * If we did find usable mergeclause(s) for this sort-key position,
+ * add them to result list.
+ */
+ mergeclauses = list_concat(mergeclauses, matched_restrictinfos);
+ }
+
+ return mergeclauses;
+}
+
+/*
+ * select_outer_pathkeys_for_merge
+ * Builds a pathkey list representing a possible sort ordering
+ * that can be used with the given mergeclauses.
+ *
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ * that will be used in a merge join.
+ * 'joinrel' is the join relation we are trying to construct.
+ *
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
*
+ * Returns a pathkeys list that can be applied to the outer relation.
+ *
+ * Since we assume here that a sort is required, there is no particular use
+ * in matching any available ordering of the outerrel. (joinpath.c has an
+ * entirely separate code path for considering sort-free mergejoins.) Rather,
+ * it's interesting to try to match the requested query_pathkeys so that a
+ * second output sort may be avoided; and failing that, we try to list "more
+ * popular" keys (those with the most unmatched EquivalenceClass peers)
+ * earlier, in hopes of making the resulting ordering useful for as many
+ * higher-level mergejoins as possible.
*/
List *
-new_join_pathkeys(List *outer_pathkeys,
- List *join_rel_tlist,
- List *joinclauses)
+select_outer_pathkeys_for_merge(PlannerInfo *root,
+ List *mergeclauses,
+ RelOptInfo *joinrel)
{
- List *final_pathkeys = NIL;
- List *i;
+ List *pathkeys = NIL;
+ int nClauses = list_length(mergeclauses);
+ EquivalenceClass **ecs;
+ int *scores;
+ int necs;
+ ListCell *lc;
+ int j;
+
+ /* Might have no mergeclauses */
+ if (nClauses == 0)
+ return NIL;
+
+ /*
+ * Make arrays of the ECs used by the mergeclauses (dropping any
+ * duplicates) and their "popularity" scores.
+ */
+ ecs = (EquivalenceClass **) palloc(nClauses * sizeof(EquivalenceClass *));
+ scores = (int *) palloc(nClauses * sizeof(int));
+ necs = 0;
+
+ foreach(lc, mergeclauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ EquivalenceClass *oeclass;
+ int score;
+ ListCell *lc2;
+
+ /* get the outer eclass */
+ cache_mergeclause_eclasses(root, rinfo);
+
+ if (rinfo->outer_is_left)
+ oeclass = rinfo->left_ec;
+ else
+ oeclass = rinfo->right_ec;
+
+ /* reject duplicates */
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == oeclass)
+ break;
+ }
+ if (j < necs)
+ continue;
+
+ /* compute score */
+ score = 0;
+ foreach(lc2, oeclass->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+
+ /* Potential future join partner? */
+ if (!em->em_is_const && !em->em_is_child &&
+ !bms_overlap(em->em_relids, joinrel->relids))
+ score++;
+ }
+
+ ecs[necs] = oeclass;
+ scores[necs] = score;
+ necs++;
+ }
+
+ /*
+ * Find out if we have all the ECs mentioned in query_pathkeys; if so
+ * we can generate a sort order that's also useful for final output.
+ * There is no percentage in a partial match, though, so we have to
+ * have 'em all.
+ */
+ if (root->query_pathkeys)
+ {
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *query_ec = query_pathkey->pk_eclass;
+
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == query_ec)
+ break; /* found match */
+ }
+ if (j >= necs)
+ break; /* didn't find match */
+ }
+ /* if we got to the end of the list, we have them all */
+ if (lc == NULL)
+ {
+ /* copy query_pathkeys as starting point for our output */
+ pathkeys = list_copy(root->query_pathkeys);
+ /* mark their ECs as already-emitted */
+ foreach(lc, root->query_pathkeys)
+ {
+ PathKey *query_pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *query_ec = query_pathkey->pk_eclass;
- foreach(i, outer_pathkeys)
+ for (j = 0; j < necs; j++)
+ {
+ if (ecs[j] == query_ec)
+ {
+ scores[j] = -1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Add remaining ECs to the list in popularity order, using a default
+ * sort ordering. (We could use qsort() here, but the list length is
+ * usually so small it's not worth it.)
+ */
+ for (;;)
{
- List *outer_pathkey = lfirst(i);
- List *new_pathkey;
+ int best_j;
+ int best_score;
+ EquivalenceClass *ec;
+ PathKey *pathkey;
- new_pathkey = new_join_pathkey(outer_pathkey, join_rel_tlist,
- joinclauses);
- if (new_pathkey != NIL)
- final_pathkeys = lappend(final_pathkeys, new_pathkey);
+ best_j = 0;
+ best_score = scores[0];
+ for (j = 1; j < necs; j++)
+ {
+ if (scores[j] > best_score)
+ {
+ best_j = j;
+ best_score = scores[j];
+ }
+ }
+ if (best_score < 0)
+ break; /* all done */
+ ec = ecs[best_j];
+ scores[best_j] = -1;
+ pathkey = make_canonical_pathkey(root,
+ ec,
+ linitial_oid(ec->ec_opfamilies),
+ BTLessStrategyNumber,
+ false);
+ /* can't be redundant because no duplicate ECs */
+ Assert(!pathkey_is_redundant(pathkey, pathkeys));
+ pathkeys = lappend(pathkeys, pathkey);
}
- return final_pathkeys;
+
+ pfree(ecs);
+ pfree(scores);
+
+ return pathkeys;
}
/*
- * new_join_pathkey
- * Generate an individual pathkey sublist, consisting of the outer vars
- * already mentioned in 'pathkey' plus any inner vars that are joined to
- * them (and thus can now also be considered path keys, per discussion
- * at the top of this file).
+ * make_inner_pathkeys_for_merge
+ * Builds a pathkey list representing the explicit sort order that
+ * must be applied to an inner path to make it usable with the
+ * given mergeclauses.
*
- * Note that each returned pathkey is the var node found in
- * 'join_rel_tlist' rather than the joinclause var node.
- * (Is this important?) Also, we return a fully copied list
- * that does not share any subnodes with existing data structures.
- * (Is that important, either?)
+ * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
+ * that will be used in a merge join.
+ * 'outer_pathkeys' are the already-known canonical pathkeys for the outer
+ * side of the join.
*
- * Returns a new pathkey (list of pathkey variables).
+ * The restrictinfos must be marked (via outer_is_left) to show which side
+ * of each clause is associated with the current outer path. (See
+ * select_mergejoin_clauses())
*
+ * Returns a pathkeys list that can be applied to the inner relation.
+ *
+ * Note that it is not this routine's job to decide whether sorting is
+ * actually needed for a particular input path. Assume a sort is necessary;
+ * just make the keys, eh?
*/
-static List *
-new_join_pathkey(List *pathkey,
- List *join_rel_tlist,
- List *joinclauses)
+List *
+make_inner_pathkeys_for_merge(PlannerInfo *root,
+ List *mergeclauses,
+ List *outer_pathkeys)
{
- List *new_pathkey = NIL;
- List *i,
- *j;
+ List *pathkeys = NIL;
+ EquivalenceClass *lastoeclass;
+ PathKey *opathkey;
+ ListCell *lc;
+ ListCell *lop;
- foreach(i, pathkey)
+ lastoeclass = NULL;
+ opathkey = NULL;
+ lop = list_head(outer_pathkeys);
+
+ foreach(lc, mergeclauses)
{
- Var *key = (Var *) lfirst(i);
- Expr *tlist_key;
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+ EquivalenceClass *oeclass;
+ EquivalenceClass *ieclass;
+ PathKey *pathkey;
+
+ cache_mergeclause_eclasses(root, rinfo);
+
+ if (rinfo->outer_is_left)
+ {
+ oeclass = rinfo->left_ec;
+ ieclass = rinfo->right_ec;
+ }
+ else
+ {
+ oeclass = rinfo->right_ec;
+ ieclass = rinfo->left_ec;
+ }
+
+ /* outer eclass should match current or next pathkeys */
+ /* we check this carefully for debugging reasons */
+ if (oeclass != lastoeclass)
+ {
+ if (!lop)
+ elog(ERROR, "too few pathkeys for mergeclauses");
+ opathkey = (PathKey *) lfirst(lop);
+ lop = lnext(lop);
+ lastoeclass = opathkey->pk_eclass;
+ if (oeclass != lastoeclass)
+ elog(ERROR, "outer pathkeys do not match mergeclause");
+ }
+
+ /*
+ * Often, we'll have same EC on both sides, in which case the outer
+ * pathkey is also canonical for the inner side, and we can skip a
+ * useless search.
+ */
+ if (ieclass == oeclass)
+ pathkey = opathkey;
+ else
+ pathkey = make_canonical_pathkey(root,
+ ieclass,
+ opathkey->pk_opfamily,
+ opathkey->pk_strategy,
+ opathkey->pk_nulls_first);
+
+ /*
+ * Don't generate redundant pathkeys (can happen if multiple
+ * mergeclauses refer to same EC).
+ */
+ if (!pathkey_is_redundant(pathkey, pathkeys))
+ pathkeys = lappend(pathkeys, pathkey);
+ }
+
+ return pathkeys;
+}
+
+/****************************************************************************
+ * PATHKEY USEFULNESS CHECKS
+ *
+ * We only want to remember as many of the pathkeys of a path as have some
+ * potential use, either for subsequent mergejoins or for meeting the query's
+ * requested output ordering. This ensures that add_path() won't consider
+ * a path to have a usefully different ordering unless it really is useful.
+ * These routines check for usefulness of given pathkeys.
+ ****************************************************************************/
+
+/*
+ * pathkeys_useful_for_merging
+ * Count the number of pathkeys that may be useful for mergejoins
+ * above the given relation.
+ *
+ * We consider a pathkey potentially useful if it corresponds to the merge
+ * ordering of either side of any joinclause for the rel. This might be
+ * overoptimistic, since joinclauses that require different other relations
+ * might never be usable at the same time, but trying to be exact is likely
+ * to be more trouble than it's worth.
+ */
+int
+pathkeys_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
+{
+ int useful = 0;
+ ListCell *i;
- Assert(key);
-
- tlist_key = matching_tlist_var(key, join_rel_tlist);
- if (tlist_key && !member(tlist_key, new_pathkey))
- new_pathkey = lcons(copyObject(tlist_key), new_pathkey);
+ foreach(i, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(i);
+ bool matched = false;
+ ListCell *j;
- foreach(j, joinclauses)
+ /*
+ * First look into the EquivalenceClass of the pathkey, to see if
+ * there are any members not yet joined to the rel. If so, it's
+ * surely possible to generate a mergejoin clause using them.
+ */
+ if (rel->has_eclass_joins &&
+ eclass_useful_for_merging(pathkey->pk_eclass, rel))
+ matched = true;
+ else
{
- Expr *joinclause = lfirst(j);
- Expr *tlist_other_var;
-
- tlist_other_var = matching_tlist_var(
- other_join_clause_var(key, joinclause),
- join_rel_tlist);
- if (tlist_other_var && !member(tlist_other_var, new_pathkey))
- new_pathkey = lcons(copyObject(tlist_other_var), new_pathkey);
+ /*
+ * Otherwise search the rel's joininfo list, which contains
+ * non-EquivalenceClass-derivable join clauses that might
+ * nonetheless be mergejoinable.
+ */
+ foreach(j, rel->joininfo)
+ {
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
+
+ if (restrictinfo->mergeopfamilies == NIL)
+ continue;
+ cache_mergeclause_eclasses(root, restrictinfo);
+
+ if (pathkey->pk_eclass == restrictinfo->left_ec ||
+ pathkey->pk_eclass == restrictinfo->right_ec)
+ {
+ matched = true;
+ break;
+ }
+ }
}
+
+ /*
+ * If we didn't find a mergeclause, we're done --- any additional
+ * sort-key positions in the pathkeys are useless. (But we can still
+ * mergejoin if we found at least one mergeclause.)
+ */
+ if (matched)
+ useful++;
+ else
+ break;
}
- return new_pathkey;
+ return useful;
+}
+
+/*
+ * pathkeys_useful_for_ordering
+ * Count the number of pathkeys that are useful for meeting the
+ * query's requested output ordering.
+ *
+ * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
+ * no good to order by just the first key(s) of the requested ordering.
+ * So the result is always either 0 or list_length(root->query_pathkeys).
+ */
+int
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+{
+ if (root->query_pathkeys == NIL)
+ return 0; /* no special ordering requested */
+
+ if (pathkeys == NIL)
+ return 0; /* unordered path */
+
+ if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
+ {
+ /* It's useful ... or at least the first N keys are */
+ return list_length(root->query_pathkeys);
+ }
+
+ return 0; /* path ordering not useful */
+}
+
+/*
+ * truncate_useless_pathkeys
+ * Shorten the given pathkey list to just the useful pathkeys.
+ */
+List *
+truncate_useless_pathkeys(PlannerInfo *root,
+ RelOptInfo *rel,
+ List *pathkeys)
+{
+ int nuseful;
+ int nuseful2;
+
+ nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
+ nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ if (nuseful2 > nuseful)
+ nuseful = nuseful2;
+
+ /*
+ * Note: not safe to modify input list destructively, but we can avoid
+ * copying the list if we're not actually going to change it
+ */
+ if (nuseful == list_length(pathkeys))
+ return pathkeys;
+ else
+ return list_truncate(list_copy(pathkeys), nuseful);
}