orderings doesn't create any real problem.
+Order of processing for EquivalenceClasses and PathKeys
+-------------------------------------------------------
+
+As alluded to above, there is a specific sequence of phases in the
+processing of EquivalenceClasses and PathKeys during planning. During the
+initial scanning of the query's quals (deconstruct_jointree followed by
+reconsider_outer_join_clauses), we construct EquivalenceClasses based on
+mergejoinable clauses found in the quals. At the end of this process,
+we know all we can know about equivalence of different variables, so
+subsequently there will be no further merging of EquivalenceClasses.
+At that point it is possible to consider the EquivalenceClasses as
+"canonical" and build canonical PathKeys that reference them. Before
+we reach that point (actually, before entering query_planner at all)
+we also ensure that we have constructed EquivalenceClasses for all the
+expressions used in the query's ORDER BY and related clauses. These
+classes might or might not get merged together, depending on what we
+find in the quals.
+
+Because all the EquivalenceClasses are known before we begin path
+generation, we can use them as a guide to which indexes are of interest:
+if an index's column is not mentioned in any EquivalenceClass then that
+index's sort order cannot possibly be helpful for the query. This allows
+short-circuiting of much of the processing of create_index_paths() for
+irrelevant indexes.
+
+There are some cases where planner.c constructs additional
+EquivalenceClasses and PathKeys after query_planner has completed.
+In these cases, the extra ECs/PKs are needed to represent sort orders
+that were not considered during query_planner. Such situations should be
+minimized since it is impossible for query_planner to return a plan
+producing such a sort order, meaning a explicit sort will always be needed.
+Currently this happens only for queries involving multiple window functions
+with different orderings, for which extra sorts are needed anyway.
-Though Bob Devine <bob.devine@worldnet.att.net> was not involved in the
-coding of our optimizer, he is available to field questions about
-optimizer topics.
-- bjm & tgl
* join. (This is the reason why we need a failure return. It's more
* convenient to check this case here than at the call sites...)
*
+ * On success return, we have also initialized the clause's left_ec/right_ec
+ * fields to point to the EquivalenceClass representing it. This saves lookup
+ * effort later.
+ *
* Note: constructing merged EquivalenceClasses is a standard UNION-FIND
* problem, for which there exist better data structures than simple lists.
* If this code ever proves to be a bottleneck then it could be sped up ---
*em2;
ListCell *lc1;
+ /* Should not already be marked as having generated an eclass */
+ Assert(restrictinfo->left_ec == NULL);
+ Assert(restrictinfo->right_ec == NULL);
+
/* Extract info from given clause */
Assert(is_opclause(clause));
opno = ((OpExpr *) clause)->opno;
{
ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
ec1->ec_below_outer_join |= below_outer_join;
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec1;
+ restrictinfo->right_ec = ec1;
/* mark the RI as usable with this pair of EMs */
- /* NB: can't set left_ec/right_ec until merging is finished */
restrictinfo->left_em = em1;
restrictinfo->right_em = em2;
return true;
ec2->ec_relids = NULL;
ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
ec1->ec_below_outer_join |= below_outer_join;
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec1;
+ restrictinfo->right_ec = ec1;
/* mark the RI as usable with this pair of EMs */
restrictinfo->left_em = em1;
restrictinfo->right_em = em2;
em2 = add_eq_member(ec1, item2, item2_relids, false, item2_type);
ec1->ec_sources = lappend(ec1->ec_sources, restrictinfo);
ec1->ec_below_outer_join |= below_outer_join;
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec1;
+ restrictinfo->right_ec = ec1;
/* mark the RI as usable with this pair of EMs */
restrictinfo->left_em = em1;
restrictinfo->right_em = em2;
em1 = add_eq_member(ec2, item1, item1_relids, false, item1_type);
ec2->ec_sources = lappend(ec2->ec_sources, restrictinfo);
ec2->ec_below_outer_join |= below_outer_join;
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec2;
+ restrictinfo->right_ec = ec2;
/* mark the RI as usable with this pair of EMs */
restrictinfo->left_em = em1;
restrictinfo->right_em = em2;
root->eq_classes = lappend(root->eq_classes, ec);
+ /* mark the RI as associated with this eclass */
+ restrictinfo->left_ec = ec;
+ restrictinfo->right_ec = ec;
/* mark the RI as usable with this pair of EMs */
restrictinfo->left_em = em1;
restrictinfo->right_em = em2;
/*
* get_eclass_for_sort_expr
* Given an expression and opfamily info, find an existing equivalence
- * class it is a member of; if none, build a new single-member
+ * class it is a member of; if none, optionally build a new single-member
* EquivalenceClass for it.
*
* sortref is the SortGroupRef of the originating SortGroupClause, if any,
* or zero if not. (It should never be zero if the expression is volatile!)
*
+ * If create_it is TRUE, we'll build a new EquivalenceClass when there is no
+ * match. If create_it is FALSE, we just return NULL when no match.
+ *
* This can be used safely both before and after EquivalenceClass merging;
* since it never causes merging it does not invalidate any existing ECs
- * or PathKeys.
+ * or PathKeys. However, ECs added after path generation has begun are
+ * of limited usefulness, so usually it's best to create them beforehand.
*
* Note: opfamilies must be chosen consistently with the way
* process_equivalence() would do; that is, generated from a mergejoinable
Expr *expr,
Oid expr_datatype,
List *opfamilies,
- Index sortref)
+ Index sortref,
+ bool create_it)
{
EquivalenceClass *newec;
EquivalenceMember *newem;
}
}
+ /* No match; does caller want a NULL result? */
+ if (!create_it)
+ return NULL;
+
/*
- * No match, so build a new single-member EC
+ * OK, build a new single-member EC
*
* Here, we must be sure that we construct the EC in the right context. We
* can assume, however, that the passed expr is long-lived.
rinfo->parent_ec = parent_ec;
/*
- * We can set these now, rather than letting them be looked up later,
- * since this is only used after EC merging is complete.
+ * We know the correct values for left_ec/right_ec, ie this particular EC,
+ * so we can just set them directly instead of forcing another lookup.
*/
rinfo->left_ec = ec;
rinfo->right_ec = ec;
* mergejoin is not really all that big a deal, and so it's not clear
* that improving this is important.
*/
- cache_mergeclause_eclasses(root, restrictinfo);
+ update_mergeclause_eclasses(root, restrictinfo);
if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
Expr *expr, Oid ordering_op,
bool nulls_first,
Index sortref,
+ bool create_it,
bool canonicalize);
static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel,
AttrNumber varattno);
* If the PathKey is being generated from a SortGroupClause, sortref should be
* the SortGroupClause's SortGroupRef; otherwise zero.
*
+ * create_it is TRUE if we should create any missing EquivalenceClass
+ * needed to represent the sort key. If it's FALSE, we return NULL if the
+ * sort key isn't already present in any EquivalenceClass.
+ *
* canonicalize should always be TRUE after EquivalenceClass merging has
* been performed, but FALSE if we haven't done EquivalenceClass merging yet.
*/
Expr *expr, Oid ordering_op,
bool nulls_first,
Index sortref,
+ bool create_it,
bool canonicalize)
{
Oid opfamily,
COERCE_DONTCARE);
}
- /* Now find or create a matching EquivalenceClass */
+ /* Now find or (optionally) create a matching EquivalenceClass */
eclass = get_eclass_for_sort_expr(root, expr, opcintype, opfamilies,
- sortref);
+ sortref, create_it);
+
+ /* Fail if no EC and !create_it */
+ if (!eclass)
+ return NULL;
/* And finally we can find or create a PathKey node */
if (canonicalize)
* The result is canonical, meaning that redundant pathkeys are removed;
* it may therefore have fewer entries than there are index columns.
*
- * We generate the full pathkeys list whether or not all are useful for the
- * current query. Caller should do truncate_useless_pathkeys().
+ * Another reason for stopping early is that we may be able to tell that
+ * an index column's sort order is uninteresting for this query. However,
+ * that test is just based on the existence of an EquivalenceClass and not
+ * on position in pathkey lists, so it's not complete. Caller should call
+ * truncate_useless_pathkeys() to possibly remove more pathkeys.
*/
List *
build_index_pathkeys(PlannerInfo *root,
indexprs_item = lnext(indexprs_item);
}
- /* OK, make a canonical pathkey for this sort key */
+ /* OK, try to make a canonical pathkey for this sort key */
cpathkey = make_pathkey_from_sortinfo(root,
indexkey,
sortop,
nulls_first,
0,
+ false,
true);
+ /*
+ * If the sort key isn't already present in any EquivalenceClass,
+ * then it's not an interesting sort order for this query. So
+ * we can stop now --- lower-order sort keys aren't useful either.
+ */
+ if (!cpathkey)
+ break;
+
/* Add to list unless redundant */
if (!pathkey_is_redundant(cpathkey, retval))
retval = lappend(retval, cpathkey);
outer_expr,
sub_member->em_datatype,
sub_eclass->ec_opfamilies,
- 0);
- best_pathkey =
- make_canonical_pathkey(root,
- outer_ec,
- sub_pathkey->pk_opfamily,
- sub_pathkey->pk_strategy,
- sub_pathkey->pk_nulls_first);
+ 0,
+ false);
+
+ /*
+ * If we don't find a matching EC, sub-pathkey isn't
+ * interesting to the outer query
+ */
+ if (outer_ec)
+ best_pathkey =
+ make_canonical_pathkey(root,
+ outer_ec,
+ sub_pathkey->pk_opfamily,
+ sub_pathkey->pk_strategy,
+ sub_pathkey->pk_nulls_first);
}
}
else
outer_expr,
sub_member->em_datatype,
sub_eclass->ec_opfamilies,
- 0);
+ 0,
+ false);
+
+ /*
+ * If we don't find a matching EC, this sub-pathkey isn't
+ * interesting to the outer query
+ */
+ if (!outer_ec)
+ continue;
+
outer_pk = make_canonical_pathkey(root,
outer_ec,
sub_pathkey->pk_opfamily,
sortcl->sortop,
sortcl->nulls_first,
sortcl->tleSortGroupRef,
+ true,
canonicalize);
/* Canonical form eliminates redundant ordering keys */
****************************************************************************/
/*
- * cache_mergeclause_eclasses
- * Make the cached EquivalenceClass links valid in a mergeclause
- * restrictinfo.
+ * initialize_mergeclause_eclasses
+ * Set the EquivalenceClass links in a mergeclause restrictinfo.
*
* RestrictInfo contains fields in which we may cache pointers to
* EquivalenceClasses for the left and right inputs of the mergeclause.
* (If the mergeclause is a true equivalence clause these will be the
- * same EquivalenceClass, otherwise not.)
+ * same EquivalenceClass, otherwise not.) If the mergeclause is either
+ * used to generate an EquivalenceClass, or derived from an EquivalenceClass,
+ * then it's easy to set up the left_ec and right_ec members --- otherwise,
+ * this function should be called to set them up. We will generate new
+ * EquivalenceClauses if necessary to represent the mergeclause's left and
+ * right sides.
+ *
+ * Note this is called before EC merging is complete, so the links won't
+ * necessarily point to canonical ECs. Before they are actually used for
+ * anything, update_mergeclause_eclasses must be called to ensure that
+ * they've been updated to point to canonical ECs.
*/
void
-cache_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
+initialize_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
{
+ Expr *clause = restrictinfo->clause;
+ Oid lefttype,
+ righttype;
+
+ /* Should be a mergeclause ... */
Assert(restrictinfo->mergeopfamilies != NIL);
+ /* ... with links not yet set */
+ Assert(restrictinfo->left_ec == NULL);
+ Assert(restrictinfo->right_ec == NULL);
+
+ /* Need the declared input types of the operator */
+ op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
+
+ /* Find or create a matching EquivalenceClass for each side */
+ restrictinfo->left_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_leftop(clause),
+ lefttype,
+ restrictinfo->mergeopfamilies,
+ 0,
+ true);
+ restrictinfo->right_ec =
+ get_eclass_for_sort_expr(root,
+ (Expr *) get_rightop(clause),
+ righttype,
+ restrictinfo->mergeopfamilies,
+ 0,
+ true);
+}
- /* the cached values should be either both set or both not */
- if (restrictinfo->left_ec == NULL)
- {
- Expr *clause = restrictinfo->clause;
- Oid lefttype,
- righttype;
-
- /* Need the declared input types of the operator */
- op_input_types(((OpExpr *) clause)->opno, &lefttype, &righttype);
-
- /* Find or create a matching EquivalenceClass for each side */
- restrictinfo->left_ec =
- get_eclass_for_sort_expr(root,
- (Expr *) get_leftop(clause),
- lefttype,
- restrictinfo->mergeopfamilies,
- 0);
- restrictinfo->right_ec =
- get_eclass_for_sort_expr(root,
- (Expr *) get_rightop(clause),
- righttype,
- restrictinfo->mergeopfamilies,
- 0);
- }
- else
- Assert(restrictinfo->right_ec != NULL);
+/*
+ * update_mergeclause_eclasses
+ * Make the cached EquivalenceClass links valid in a mergeclause
+ * restrictinfo.
+ *
+ * These pointers should have been set by process_equivalence or
+ * initialize_mergeclause_eclasses, but they might have been set to
+ * non-canonical ECs that got merged later. Chase up to the canonical
+ * merged parent if so.
+ */
+void
+update_mergeclause_eclasses(PlannerInfo *root, RestrictInfo *restrictinfo)
+{
+ /* Should be a merge clause ... */
+ Assert(restrictinfo->mergeopfamilies != NIL);
+ /* ... with pointers already set */
+ Assert(restrictinfo->left_ec != NULL);
+ Assert(restrictinfo->right_ec != NULL);
+
+ /* Chase up to the top as needed */
+ while (restrictinfo->left_ec->ec_merged)
+ restrictinfo->left_ec = restrictinfo->left_ec->ec_merged;
+ while (restrictinfo->right_ec->ec_merged)
+ restrictinfo->right_ec = restrictinfo->right_ec->ec_merged;
}
/*
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
- cache_mergeclause_eclasses(root, rinfo);
+ update_mergeclause_eclasses(root, rinfo);
}
foreach(i, pathkeys)
ListCell *lc2;
/* get the outer eclass */
- cache_mergeclause_eclasses(root, rinfo);
+ update_mergeclause_eclasses(root, rinfo);
if (rinfo->outer_is_left)
oeclass = rinfo->left_ec;
EquivalenceClass *ieclass;
PathKey *pathkey;
- cache_mergeclause_eclasses(root, rinfo);
+ update_mergeclause_eclasses(root, rinfo);
if (rinfo->outer_is_left)
{
if (restrictinfo->mergeopfamilies == NIL)
continue;
- cache_mergeclause_eclasses(root, restrictinfo);
+ update_mergeclause_eclasses(root, restrictinfo);
if (pathkey->pk_eclass == restrictinfo->left_ec ||
pathkey->pk_eclass == restrictinfo->right_ec)
*
* If none of the above hold, pass it off to
* distribute_restrictinfo_to_rels().
+ *
+ * In all cases, it's important to initialize the left_ec and right_ec
+ * fields of a mergejoinable clause, so that all possibly mergejoinable
+ * expressions have representations in EquivalenceClasses. If
+ * process_equivalence is successful, it will take care of that;
+ * otherwise, we have to call initialize_mergeclause_eclasses to do it.
*/
if (restrictinfo->mergeopfamilies)
{
{
if (process_equivalence(root, restrictinfo, below_outer_join))
return;
- /* EC rejected it, so pass to distribute_restrictinfo_to_rels */
+ /* EC rejected it, so set left_ec/right_ec the hard way ... */
+ initialize_mergeclause_eclasses(root, restrictinfo);
+ /* ... and fall through to distribute_restrictinfo_to_rels */
}
else if (maybe_outer_join && restrictinfo->can_join)
{
+ /* we need to set up left_ec/right_ec the hard way */
+ initialize_mergeclause_eclasses(root, restrictinfo);
+ /* now see if it should go to any outer-join lists */
if (bms_is_subset(restrictinfo->left_relids,
outerjoin_nonnullable) &&
!bms_overlap(restrictinfo->right_relids,
restrictinfo);
return;
}
+ /* nope, so fall through to distribute_restrictinfo_to_rels */
+ }
+ else
+ {
+ /* we still need to set up left_ec/right_ec */
+ initialize_mergeclause_eclasses(root, restrictinfo);
}
}
*
* This overlaps the functionality of process_implied_equality(), but we
* must return the RestrictInfo, not push it into the joininfo tree.
+ *
+ * Note: we do not do initialize_mergeclause_eclasses() here. It is
+ * caller's responsibility that left_ec/right_ec be set as necessary.
*/
RestrictInfo *
build_implied_join_equality(Oid opno,
Expr *expr,
Oid expr_datatype,
List *opfamilies,
- Index sortref);
+ Index sortref,
+ bool create_it);
extern void generate_base_implied_equalities(PlannerInfo *root);
extern List *generate_join_implied_equalities(PlannerInfo *root,
RelOptInfo *joinrel,
List *sortclauses,
List *tlist,
bool canonicalize);
-extern void cache_mergeclause_eclasses(PlannerInfo *root,
+extern void initialize_mergeclause_eclasses(PlannerInfo *root,
+ RestrictInfo *restrictinfo);
+extern void update_mergeclause_eclasses(PlannerInfo *root,
RestrictInfo *restrictinfo);
extern List *find_mergeclauses_for_pathkeys(PlannerInfo *root,
List *pathkeys,