From db9f0e1d9a4a0842c814a464cdc9758c3f20b96c Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Mon, 29 Apr 2013 14:49:01 -0400
Subject: [PATCH] Postpone creation of pathkeys lists to fix bug #8049.

This patch gets rid of the concept of, and infrastructure for,
non-canonical PathKeys; we now only ever create canonical pathkey lists.

The need for non-canonical pathkeys came from the desire to have
grouping_planner initialize query_pathkeys and related pathkey lists before
calling query_planner.  However, since query_planner didn't actually *do*
anything with those lists before they'd been made canonical, we can get rid
of the whole mess by just not creating the lists at all until the point
where we formerly canonicalized them.

There are several ways in which we could implement that without making
query_planner itself deal with grouping/sorting features (which are
supposed to be the province of grouping_planner).  I chose to add a
callback function to query_planner's API; other alternatives would have
required adding more fields to PlannerInfo, which while not bad in itself
would create an ABI break for planner-related plugins in the 9.2 release
series.  This still breaks ABI for anything that calls query_planner
directly, but it seems somewhat unlikely that there are any such plugins.

I had originally conceived of this change as merely a step on the way to
fixing bug #8049 from Teun Hoogendoorn; but it turns out that this fixes
that bug all by itself, as per the added regression test.  The reason is
that now get_eclass_for_sort_expr is adding the ORDER BY expression at the
end of EquivalenceClass creation not the start, and so anything that is in
a multi-member EquivalenceClass has already been created with correct
em_nullable_relids.  I am suspicious that there are related scenarios in
which we still need to teach get_eclass_for_sort_expr to compute correct
nullable_relids, but am not eager to risk destabilizing either 9.2 or 9.3
to fix bugs that are only hypothetical.  So for the moment, do this and
stop here.

Back-patch to 9.2 but not to earlier branches, since they don't exhibit
this bug for lack of join-clause-movement logic that depends on
em_nullable_relids being correct.  (We might have to revisit that choice
if any related bugs turn up.)  In 9.2, don't change the signature of
make_pathkeys_for_sortclauses nor remove canonicalize_pathkeys, so as
not to risk more plugin breakage than we have to.
---
 src/backend/nodes/equalfuncs.c          |  18 +--
 src/backend/optimizer/README            |  21 +--
 src/backend/optimizer/path/equivclass.c |  18 ++-
 src/backend/optimizer/path/pathkeys.c   | 159 ++++---------------
 src/backend/optimizer/plan/planagg.c    |  35 +++--
 src/backend/optimizer/plan/planmain.c   |  52 ++----
 src/backend/optimizer/plan/planner.c    | 201 +++++++++++++-----------
 src/include/nodes/relation.h            |   2 +-
 src/include/optimizer/paths.h           |   4 +-
 src/include/optimizer/planmain.h        |   4 +
 src/test/regress/expected/join.out      |  29 ++++
 src/test/regress/sql/join.sql           |  17 ++
 12 files changed, 246 insertions(+), 314 deletions(-)

diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 7245fa32a0..3f96595e8e 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -728,22 +728,8 @@ _equalFromExpr(const FromExpr *a, const FromExpr *b)
 static bool
 _equalPathKey(const PathKey *a, const PathKey *b)
 {
-	/*
-	 * This is normally used on non-canonicalized PathKeys, so must chase up
-	 * to the topmost merged EquivalenceClass and see if those are the same
-	 * (by pointer equality).
-	 */
-	EquivalenceClass *a_eclass;
-	EquivalenceClass *b_eclass;
-
-	a_eclass = a->pk_eclass;
-	while (a_eclass->ec_merged)
-		a_eclass = a_eclass->ec_merged;
-	b_eclass = b->pk_eclass;
-	while (b_eclass->ec_merged)
-		b_eclass = b_eclass->ec_merged;
-	if (a_eclass != b_eclass)
-		return false;
+	/* We assume pointer equality is sufficient to compare the eclasses */
+	COMPARE_SCALAR_FIELD(pk_eclass);
 	COMPARE_SCALAR_FIELD(pk_opfamily);
 	COMPARE_SCALAR_FIELD(pk_strategy);
 	COMPARE_SCALAR_FIELD(pk_nulls_first);
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index 438c2ca182..751766fb9d 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -589,16 +589,6 @@ since they are easily compared to the pathkeys of a potential candidate
 path.  So, SortGroupClause lists are turned into pathkeys lists for use
 inside the optimizer.
 
-Because we have to generate pathkeys lists from the sort clauses before
-we've finished EquivalenceClass merging, we cannot use the pointer-equality
-method of comparing PathKeys in the earliest stages of the planning
-process.  Instead, we generate "non canonical" PathKeys that reference
-single-element EquivalenceClasses that might get merged later.  After we
-complete EquivalenceClass merging, we replace these with "canonical"
-PathKeys that reference only fully-merged classes, and after that we make
-sure we don't generate more than one copy of each "canonical" PathKey.
-Then it is safe to use pointer comparison on canonical PathKeys.
-
 An additional refinement we can make is to insist that canonical pathkey
 lists (sort orderings) do not mention the same EquivalenceClass more than
 once.  For example, in all these cases the second sort column is redundant,
@@ -651,12 +641,11 @@ mergejoinable clauses found in the quals.  At the end of this process,
 we know all we can know about equivalence of different variables, so
 subsequently there will be no further merging of EquivalenceClasses.
 At that point it is possible to consider the EquivalenceClasses as
-"canonical" and build canonical PathKeys that reference them.  Before
-we reach that point (actually, before entering query_planner at all)
-we also ensure that we have constructed EquivalenceClasses for all the
-expressions used in the query's ORDER BY and related clauses.  These
-classes might or might not get merged together, depending on what we
-find in the quals.
+"canonical" and build canonical PathKeys that reference them.  At this
+time we construct PathKeys for the query's ORDER BY and related clauses.
+(Any ordering expressions that do not appear elsewhere will result in
+the creation of new EquivalenceClasses, but this cannot result in merging
+existing classes, so canonical-ness is not lost.)
 
 Because all the EquivalenceClasses are known before we begin path
 generation, we can use them as a guide to which indexes are of interest:
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 5c4ac066a5..cbb4f5cd95 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -285,11 +285,19 @@ process_equivalence(PlannerInfo *root, RestrictInfo *restrictinfo,
 		}
 
 		/*
-		 * Case 2: need to merge ec1 and ec2.  We add ec2's items to ec1, then
-		 * set ec2's ec_merged link to point to ec1 and remove ec2 from the
-		 * eq_classes list.  We cannot simply delete ec2 because that could
-		 * leave dangling pointers in existing PathKeys.  We leave it behind
-		 * with a link so that the merged EC can be found.
+		 * Case 2: need to merge ec1 and ec2.  This should never happen after
+		 * we've built any canonical pathkeys; if it did, those pathkeys might
+		 * be rendered non-canonical by the merge.
+		 */
+		if (root->canon_pathkeys != NIL)
+			elog(ERROR, "too late to merge equivalence classes");
+
+		/*
+		 * We add ec2's items to ec1, then set ec2's ec_merged link to point
+		 * to ec1 and remove ec2 from the eq_classes list.  We cannot simply
+		 * delete ec2 because that could leave dangling pointers in existing
+		 * PathKeys.  We leave it behind with a link so that the merged EC can
+		 * be found.
 		 */
 		ec1->ec_members = list_concat(ec1->ec_members, ec2->ec_members);
 		ec1->ec_sources = list_concat(ec1->ec_sources, ec2->ec_sources);
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index c52fd1e7af..6724996919 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -28,8 +28,6 @@
 #include "utils/lsyscache.h"
 
 
-static PathKey *makePathKey(EquivalenceClass *eclass, Oid opfamily,
-			int strategy, bool nulls_first);
 static PathKey *make_canonical_pathkey(PlannerInfo *root,
 					   EquivalenceClass *eclass, Oid opfamily,
 					   int strategy, bool nulls_first);
@@ -41,27 +39,6 @@ static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey);
  *		PATHKEY CONSTRUCTION AND REDUNDANCY TESTING
  ****************************************************************************/
 
-/*
- * makePathKey
- *		create a PathKey node
- *
- * This does not promise to create a canonical PathKey, it's merely a
- * convenience routine to build the specified node.
- */
-static PathKey *
-makePathKey(EquivalenceClass *eclass, Oid opfamily,
-			int strategy, bool nulls_first)
-{
-	PathKey    *pk = makeNode(PathKey);
-
-	pk->pk_eclass = eclass;
-	pk->pk_opfamily = opfamily;
-	pk->pk_strategy = strategy;
-	pk->pk_nulls_first = nulls_first;
-
-	return pk;
-}
-
 /*
  * make_canonical_pathkey
  *	  Given the parameters for a PathKey, find any pre-existing matching
@@ -69,7 +46,9 @@ makePathKey(EquivalenceClass *eclass, Oid opfamily,
  *	  entry if there's not one already.
  *
  * Note that this function must not be used until after we have completed
- * merging EquivalenceClasses.
+ * merging EquivalenceClasses.	(We don't try to enforce that here; instead,
+ * equivclass.c will complain if a merge occurs after root->canon_pathkeys
+ * has become nonempty.)
  */
 static PathKey *
 make_canonical_pathkey(PlannerInfo *root,
@@ -100,7 +79,12 @@ make_canonical_pathkey(PlannerInfo *root,
 	 */
 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 
-	pk = makePathKey(eclass, opfamily, strategy, nulls_first);
+	pk = makeNode(PathKey);
+	pk->pk_eclass = eclass;
+	pk->pk_opfamily = opfamily;
+	pk->pk_strategy = strategy;
+	pk->pk_nulls_first = nulls_first;
+
 	root->canon_pathkeys = lappend(root->canon_pathkeys, pk);
 
 	MemoryContextSwitchTo(oldcontext);
@@ -112,8 +96,7 @@ make_canonical_pathkey(PlannerInfo *root,
  * pathkey_is_redundant
  *	   Is a pathkey redundant with one already in the given list?
  *
- * Both the given pathkey and the list members must be canonical for this
- * to work properly.  We detect two cases:
+ * We detect two cases:
  *
  * 1. If the new pathkey's equivalence class contains a constant, and isn't
  * below an outer join, then we can disregard it as a sort key.  An example:
@@ -135,6 +118,12 @@ make_canonical_pathkey(PlannerInfo *root,
  * Note in particular that we need not compare opfamily (all the opfamilies
  * of the EC have the same notion of equality) nor sort direction.
  *
+ * Both the given pathkey and the list members must be canonical for this
+ * to work properly, but that's okay since we no longer ever construct any
+ * non-canonical pathkeys.	(Note: the notion of a pathkey *list* being
+ * canonical includes the additional requirement of no redundant entries,
+ * which is exactly what we are checking for here.)
+ *
  * Because the equivclass.c machinery forms only one copy of any EC per query,
  * pointer comparison is enough to decide whether canonical ECs are the same.
  */
@@ -144,9 +133,6 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 	EquivalenceClass *new_ec = new_pathkey->pk_eclass;
 	ListCell   *lc;
 
-	/* Assert we've been given canonical pathkeys */
-	Assert(!new_ec->ec_merged);
-
 	/* Check for EC containing a constant --- unconditionally redundant */
 	if (EC_MUST_BE_REDUNDANT(new_ec))
 		return true;
@@ -156,9 +142,6 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 	{
 		PathKey    *old_pathkey = (PathKey *) lfirst(lc);
 
-		/* Assert we've been given canonical pathkeys */
-		Assert(!old_pathkey->pk_eclass->ec_merged);
-
 		if (new_ec == old_pathkey->pk_eclass)
 			return true;
 	}
@@ -166,57 +149,10 @@ pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 	return false;
 }
 
-/*
- * canonicalize_pathkeys
- *	   Convert a not-necessarily-canonical pathkeys list to canonical form.
- *
- * Note that this function must not be used until after we have completed
- * merging EquivalenceClasses.
- */
-List *
-canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
-{
-	List	   *new_pathkeys = NIL;
-	ListCell   *l;
-
-	foreach(l, pathkeys)
-	{
-		PathKey    *pathkey = (PathKey *) lfirst(l);
-		EquivalenceClass *eclass;
-		PathKey    *cpathkey;
-
-		/* Find the canonical (merged) EquivalenceClass */
-		eclass = pathkey->pk_eclass;
-		while (eclass->ec_merged)
-			eclass = eclass->ec_merged;
-
-		/*
-		 * If we can tell it's redundant just from the EC, skip.
-		 * pathkey_is_redundant would notice that, but we needn't even bother
-		 * constructing the node...
-		 */
-		if (EC_MUST_BE_REDUNDANT(eclass))
-			continue;
-
-		/* OK, build a canonicalized PathKey struct */
-		cpathkey = make_canonical_pathkey(root,
-										  eclass,
-										  pathkey->pk_opfamily,
-										  pathkey->pk_strategy,
-										  pathkey->pk_nulls_first);
-
-		/* Add to list unless redundant */
-		if (!pathkey_is_redundant(cpathkey, new_pathkeys))
-			new_pathkeys = lappend(new_pathkeys, cpathkey);
-	}
-	return new_pathkeys;
-}
-
 /*
  * make_pathkey_from_sortinfo
  *	  Given an expression and sort-order information, create a PathKey.
- *	  If canonicalize = true, the result is a "canonical" PathKey,
- *	  otherwise not.  (But note it might be redundant anyway.)
+ *	  The result is always a "canonical" PathKey, but it might be redundant.
  *
  * If the PathKey is being generated from a SortGroupClause, sortref should be
  * the SortGroupClause's SortGroupRef; otherwise zero.
@@ -229,9 +165,6 @@ canonicalize_pathkeys(PlannerInfo *root, List *pathkeys)
  * create_it is TRUE if we should create any missing EquivalenceClass
  * needed to represent the sort key.  If it's FALSE, we return NULL if the
  * sort key isn't already present in any EquivalenceClass.
- *
- * canonicalize should always be TRUE after EquivalenceClass merging has
- * been performed, but FALSE if we haven't done EquivalenceClass merging yet.
  */
 static PathKey *
 make_pathkey_from_sortinfo(PlannerInfo *root,
@@ -243,8 +176,7 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
 						   bool nulls_first,
 						   Index sortref,
 						   Relids rel,
-						   bool create_it,
-						   bool canonicalize)
+						   bool create_it)
 {
 	int16		strategy;
 	Oid			equality_op;
@@ -281,11 +213,8 @@ make_pathkey_from_sortinfo(PlannerInfo *root,
 		return NULL;
 
 	/* And finally we can find or create a PathKey node */
-	if (canonicalize)
-		return make_canonical_pathkey(root, eclass, opfamily,
-									  strategy, nulls_first);
-	else
-		return makePathKey(eclass, opfamily, strategy, nulls_first);
+	return make_canonical_pathkey(root, eclass, opfamily,
+								  strategy, nulls_first);
 }
 
 /*
@@ -301,8 +230,7 @@ make_pathkey_from_sortop(PlannerInfo *root,
 						 Oid ordering_op,
 						 bool nulls_first,
 						 Index sortref,
-						 bool create_it,
-						 bool canonicalize)
+						 bool create_it)
 {
 	Oid			opfamily,
 				opcintype,
@@ -327,8 +255,7 @@ make_pathkey_from_sortop(PlannerInfo *root,
 									  nulls_first,
 									  sortref,
 									  NULL,
-									  create_it,
-									  canonicalize);
+									  create_it);
 }
 
 
@@ -341,9 +268,8 @@ make_pathkey_from_sortop(PlannerInfo *root,
  *	  Compare two pathkeys to see if they are equivalent, and if not whether
  *	  one is "better" than the other.
  *
- *	  This function may only be applied to canonicalized pathkey lists.
- *	  In the canonical representation, pathkeys can be checked for equality
- *	  by simple pointer comparison.
+ *	  We assume the pathkeys are canonical, and so they can be checked for
+ *	  equality by simple pointer comparison.
  */
 PathKeysComparison
 compare_pathkeys(List *keys1, List *keys2)
@@ -364,15 +290,6 @@ compare_pathkeys(List *keys1, List *keys2)
 		PathKey    *pathkey1 = (PathKey *) lfirst(key1);
 		PathKey    *pathkey2 = (PathKey *) lfirst(key2);
 
-		/*
-		 * XXX would like to check that we've been given canonicalized input,
-		 * but PlannerInfo not accessible here...
-		 */
-#ifdef NOT_USED
-		Assert(list_member_ptr(root->canon_pathkeys, pathkey1));
-		Assert(list_member_ptr(root->canon_pathkeys, pathkey2));
-#endif
-
 		if (pathkey1 != pathkey2)
 			return PATHKEYS_DIFFERENT;	/* no need to keep looking */
 	}
@@ -414,7 +331,7 @@ pathkeys_contained_in(List *keys1, List *keys2)
  *	  Return NULL if no such path.
  *
  * 'paths' is a list of possible paths that all generate the same relation
- * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'pathkeys' represents a required ordering (in canonical form!)
  * 'required_outer' denotes allowable outer relations for parameterized paths
  * 'cost_criterion' is STARTUP_COST or TOTAL_COST
  */
@@ -455,7 +372,7 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
  * parameter.
  *
  * 'paths' is a list of possible paths that all generate the same relation
- * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'pathkeys' represents a required ordering (in canonical form!)
  * 'required_outer' denotes allowable outer relations for parameterized paths
  * 'fraction' is the fraction of the total tuples expected to be retrieved
  */
@@ -554,8 +471,7 @@ build_index_pathkeys(PlannerInfo *root,
 											  nulls_first,
 											  0,
 											  index->rel->relids,
-											  false,
-											  true);
+											  false);
 
 		/*
 		 * If the sort key isn't already present in any EquivalenceClass, then
@@ -829,12 +745,8 @@ build_join_pathkeys(PlannerInfo *root,
  *		Generate a pathkeys list that represents the sort order specified
  *		by a list of SortGroupClauses
  *
- * If canonicalize is TRUE, the resulting PathKeys are all in canonical form;
- * otherwise not.  canonicalize should always be TRUE after EquivalenceClass
- * merging has been performed, but FALSE if we haven't done EquivalenceClass
- * merging yet.  (We provide this option because grouping_planner() needs to
- * be able to represent requested pathkeys before the equivalence classes have
- * been created for the query.)
+ * The resulting PathKeys are always in canonical form.  (Actually, there
+ * is no longer any code anywhere that creates non-canonical PathKeys.)
  *
  * 'sortclauses' is a list of SortGroupClause nodes
  * 'tlist' is the targetlist to find the referenced tlist entries in
@@ -842,8 +754,7 @@ build_join_pathkeys(PlannerInfo *root,
 List *
 make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
-							  List *tlist,
-							  bool canonicalize)
+							  List *tlist)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;
@@ -861,16 +772,10 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 										   sortcl->sortop,
 										   sortcl->nulls_first,
 										   sortcl->tleSortGroupRef,
-										   true,
-										   canonicalize);
+										   true);
 
 		/* Canonical form eliminates redundant ordering keys */
-		if (canonicalize)
-		{
-			if (!pathkey_is_redundant(pathkey, pathkeys))
-				pathkeys = lappend(pathkeys, pathkey);
-		}
-		else
+		if (!pathkey_is_redundant(pathkey, pathkeys))
 			pathkeys = lappend(pathkeys, pathkey);
 	}
 	return pathkeys;
diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c
index 4007ac4558..5bbfd2377e 100644
--- a/src/backend/optimizer/plan/planagg.c
+++ b/src/backend/optimizer/plan/planagg.c
@@ -48,6 +48,7 @@
 static bool find_minmax_aggs_walker(Node *node, List **context);
 static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 				  Oid eqop, Oid sortop, bool nulls_first);
+static void minmax_qp_callback(PlannerInfo *root, void *extra);
 static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *mminfo);
 static Node *replace_aggs_with_params_mutator(Node *node, PlannerInfo *root);
 static Oid	fetch_agg_sort_op(Oid aggfnoid);
@@ -445,26 +446,12 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 										   Int64GetDatum(1), false,
 										   FLOAT8PASSBYVAL);
 
-	/*
-	 * Set up requested pathkeys.
-	 */
-	subroot->group_pathkeys = NIL;
-	subroot->window_pathkeys = NIL;
-	subroot->distinct_pathkeys = NIL;
-
-	subroot->sort_pathkeys =
-		make_pathkeys_for_sortclauses(subroot,
-									  parse->sortClause,
-									  parse->targetList,
-									  false);
-
-	subroot->query_pathkeys = subroot->sort_pathkeys;
-
 	/*
 	 * Generate the best paths for this query, telling query_planner that we
 	 * have LIMIT 1.
 	 */
 	query_planner(subroot, parse->targetList, 1.0, 1.0,
+				  minmax_qp_callback, NULL,
 				  &cheapest_path, &sorted_path, &dNumGroups);
 
 	/*
@@ -505,6 +492,24 @@ build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
 	return true;
 }
 
+/*
+ * Compute query_pathkeys and other pathkeys during plan generation
+ */
+static void
+minmax_qp_callback(PlannerInfo *root, void *extra)
+{
+	root->group_pathkeys = NIL;
+	root->window_pathkeys = NIL;
+	root->distinct_pathkeys = NIL;
+
+	root->sort_pathkeys =
+		make_pathkeys_for_sortclauses(root,
+									  root->parse->sortClause,
+									  root->parse->targetList);
+
+	root->query_pathkeys = root->sort_pathkeys;
+}
+
 /*
  * Construct a suitable plan for a converted aggregate query
  */
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index a919914f94..42a98945a3 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -30,10 +30,6 @@
 #include "utils/selfuncs.h"
 
 
-/* Local functions */
-static void canonicalize_all_pathkeys(PlannerInfo *root);
-
-
 /*
  * query_planner
  *	  Generate a path (that is, a simplified plan) for a basic query,
@@ -55,6 +51,8 @@ static void canonicalize_all_pathkeys(PlannerInfo *root);
  * tuple_fraction is the fraction of tuples we expect will be retrieved
  * limit_tuples is a hard limit on number of tuples to retrieve,
  *		or -1 if no limit
+ * qp_callback is a function to compute query_pathkeys once it's safe to do so
+ * qp_extra is optional extra data to pass to qp_callback
  *
  * Output parameters:
  * *cheapest_path receives the overall-cheapest path for the query
@@ -63,18 +61,11 @@ static void canonicalize_all_pathkeys(PlannerInfo *root);
  * *num_groups receives the estimated number of groups, or 1 if query
  *				does not use grouping
  *
- * Note: the PlannerInfo node also includes a query_pathkeys field, which is
- * both an input and an output of query_planner().	The input value signals
- * query_planner that the indicated sort order is wanted in the final output
- * plan.  But this value has not yet been "canonicalized", since the needed
- * info does not get computed until we scan the qual clauses.  We canonicalize
- * it as soon as that task is done.  (The main reason query_pathkeys is a
- * PlannerInfo field and not a passed parameter is that the low-level routines
- * in indxpath.c need to see it.)
- *
- * Note: the PlannerInfo node includes other pathkeys fields besides
- * query_pathkeys, all of which need to be canonicalized once the info is
- * available.  See canonicalize_all_pathkeys.
+ * Note: the PlannerInfo node also includes a query_pathkeys field, which
+ * tells query_planner the sort order that is desired in the final output
+ * plan.  This value is *not* available at call time, but is computed by
+ * qp_callback once we have completed merging the query's equivalence classes.
+ * (We cannot construct canonical pathkeys until that's done.)
  *
  * tuple_fraction is interpreted as follows:
  *	  0: expect all tuples to be retrieved (normal case)
@@ -89,6 +80,7 @@ static void canonicalize_all_pathkeys(PlannerInfo *root);
 void
 query_planner(PlannerInfo *root, List *tlist,
 			  double tuple_fraction, double limit_tuples,
+			  query_pathkeys_callback qp_callback, void *qp_extra,
 			  Path **cheapest_path, Path **sorted_path,
 			  double *num_groups)
 {
@@ -118,11 +110,11 @@ query_planner(PlannerInfo *root, List *tlist,
 		*sorted_path = NULL;
 
 		/*
-		 * We still are required to canonicalize any pathkeys, in case it's
-		 * something like "SELECT 2+2 ORDER BY 1".
+		 * We still are required to call qp_callback, in case it's something
+		 * like "SELECT 2+2 ORDER BY 1".
 		 */
 		root->canon_pathkeys = NIL;
-		canonicalize_all_pathkeys(root);
+		(*qp_callback) (root, qp_extra);
 		return;
 	}
 
@@ -205,10 +197,10 @@ query_planner(PlannerInfo *root, List *tlist,
 
 	/*
 	 * We have completed merging equivalence sets, so it's now possible to
-	 * convert previously generated pathkeys (in particular, the requested
-	 * query_pathkeys) to canonical form.
+	 * generate pathkeys in canonical form; so compute query_pathkeys and
+	 * other pathkeys fields in PlannerInfo.
 	 */
-	canonicalize_all_pathkeys(root);
+	(*qp_callback) (root, qp_extra);
 
 	/*
 	 * Examine any "placeholder" expressions generated during subquery pullup.
@@ -429,19 +421,3 @@ query_planner(PlannerInfo *root, List *tlist,
 	*cheapest_path = cheapestpath;
 	*sorted_path = sortedpath;
 }
-
-
-/*
- * canonicalize_all_pathkeys
- *		Canonicalize all pathkeys that were generated before entering
- *		query_planner and then stashed in PlannerInfo.
- */
-static void
-canonicalize_all_pathkeys(PlannerInfo *root)
-{
-	root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
-	root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
-	root->window_pathkeys = canonicalize_pathkeys(root, root->window_pathkeys);
-	root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
-	root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
-}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index db91b8277d..df274fe783 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -59,7 +59,14 @@ planner_hook_type planner_hook = NULL;
 #define EXPRKIND_APPINFO		7
 #define EXPRKIND_PHV			8
 
+/* Passthrough data for standard_qp_callback */
+typedef struct
+{
+	List	   *tlist;			/* preprocessed query targetlist */
+	List	   *activeWindows;	/* active windows, if any */
+} standard_qp_extra;
 
+/* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
 static Plan *inheritance_planner(PlannerInfo *root);
@@ -70,6 +77,7 @@ static double preprocess_limit(PlannerInfo *root,
 				 int64 *offset_est, int64 *count_est);
 static bool limit_needed(Query *parse);
 static void preprocess_groupclause(PlannerInfo *root);
+static void standard_qp_callback(PlannerInfo *root, void *extra);
 static bool choose_hashed_grouping(PlannerInfo *root,
 					   double tuple_fraction, double limit_tuples,
 					   double path_rows, int path_width,
@@ -94,7 +102,7 @@ static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
 static List *make_windowInputTargetList(PlannerInfo *root,
 						   List *tlist, List *activeWindows);
 static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
-						 List *tlist, bool canonicalize);
+						 List *tlist);
 static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
 						   List *tlist,
 						   int numSortCols, AttrNumber *sortColIdx,
@@ -1052,8 +1060,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		 */
 		current_pathkeys = make_pathkeys_for_sortclauses(root,
 														 set_sortclauses,
-													 result_plan->targetlist,
-														 true);
+													 result_plan->targetlist);
 
 		/*
 		 * We should not need to call preprocess_targetlist, since we must be
@@ -1082,8 +1089,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		Assert(parse->distinctClause == NIL);
 		root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
 															parse->sortClause,
-															tlist,
-															true);
+															tlist);
 	}
 	else
 	{
@@ -1092,6 +1098,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		double		sub_limit_tuples;
 		AttrNumber *groupColIdx = NULL;
 		bool		need_tlist_eval = true;
+		standard_qp_extra qp_extra;
 		Path	   *cheapest_path;
 		Path	   *sorted_path;
 		Path	   *best_path;
@@ -1167,82 +1174,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 			preprocess_minmax_aggregates(root, tlist);
 		}
 
-		/*
-		 * Calculate pathkeys that represent grouping/ordering requirements.
-		 * Stash them in PlannerInfo so that query_planner can canonicalize
-		 * them after EquivalenceClasses have been formed.	The sortClause is
-		 * certainly sort-able, but GROUP BY and DISTINCT might not be, in
-		 * which case we just leave their pathkeys empty.
-		 */
-		if (parse->groupClause &&
-			grouping_is_sortable(parse->groupClause))
-			root->group_pathkeys =
-				make_pathkeys_for_sortclauses(root,
-											  parse->groupClause,
-											  tlist,
-											  false);
-		else
-			root->group_pathkeys = NIL;
-
-		/* We consider only the first (bottom) window in pathkeys logic */
-		if (activeWindows != NIL)
-		{
-			WindowClause *wc = (WindowClause *) linitial(activeWindows);
-
-			root->window_pathkeys = make_pathkeys_for_window(root,
-															 wc,
-															 tlist,
-															 false);
-		}
-		else
-			root->window_pathkeys = NIL;
-
-		if (parse->distinctClause &&
-			grouping_is_sortable(parse->distinctClause))
-			root->distinct_pathkeys =
-				make_pathkeys_for_sortclauses(root,
-											  parse->distinctClause,
-											  tlist,
-											  false);
-		else
-			root->distinct_pathkeys = NIL;
-
-		root->sort_pathkeys =
-			make_pathkeys_for_sortclauses(root,
-										  parse->sortClause,
-										  tlist,
-										  false);
-
-		/*
-		 * Figure out whether we want a sorted result from query_planner.
-		 *
-		 * If we have a sortable GROUP BY clause, then we want a result sorted
-		 * properly for grouping.  Otherwise, if we have window functions to
-		 * evaluate, we try to sort for the first window.  Otherwise, if
-		 * there's a sortable DISTINCT clause that's more rigorous than the
-		 * ORDER BY clause, we try to produce output that's sufficiently well
-		 * sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
-		 * clause, we want to sort by the ORDER BY clause.
-		 *
-		 * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
-		 * superset of GROUP BY, it would be tempting to request sort by ORDER
-		 * BY --- but that might just leave us failing to exploit an available
-		 * sort order at all.  Needs more thought.	The choice for DISTINCT
-		 * versus ORDER BY is much easier, since we know that the parser
-		 * ensured that one is a superset of the other.
-		 */
-		if (root->group_pathkeys)
-			root->query_pathkeys = root->group_pathkeys;
-		else if (root->window_pathkeys)
-			root->query_pathkeys = root->window_pathkeys;
-		else if (list_length(root->distinct_pathkeys) >
-				 list_length(root->sort_pathkeys))
-			root->query_pathkeys = root->distinct_pathkeys;
-		else if (root->sort_pathkeys)
-			root->query_pathkeys = root->sort_pathkeys;
-		else
-			root->query_pathkeys = NIL;
-
 		/*
 		 * Figure out whether there's a hard limit on the number of rows that
 		 * query_planner's result subplan needs to return.  Even if we know a
@@ -1258,13 +1189,19 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		else
 			sub_limit_tuples = limit_tuples;
 
+		/* Set up data needed by standard_qp_callback */
+		qp_extra.tlist = tlist;
+		qp_extra.activeWindows = activeWindows;
+
 		/*
 		 * Generate the best unsorted and presorted paths for this Query (but
-		 * note there may not be any presorted path).  query_planner will also
-		 * estimate the number of groups in the query, and canonicalize all
-		 * the pathkeys.
+		 * note there may not be any presorted path).  We also generate (in
+		 * standard_qp_callback) pathkey representations of the query's sort
+		 * clause, distinct clause, etc.  query_planner will also estimate the
+		 * number of groups in the query.
 		 */
 		query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
+					  standard_qp_callback, &qp_extra,
 					  &cheapest_path, &sorted_path, &dNumGroups);
 
 		/*
@@ -1597,8 +1534,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 
 				window_pathkeys = make_pathkeys_for_window(root,
 														   wc,
-														   tlist,
-														   true);
+														   tlist);
 
 				/*
 				 * This is a bit tricky: we build a sort node even if we don't
@@ -2439,6 +2375,88 @@ preprocess_groupclause(PlannerInfo *root)
 	parse->groupClause = new_groupclause;
 }
 
+/*
+ * Compute query_pathkeys and other pathkeys during plan generation
+ */
+static void
+standard_qp_callback(PlannerInfo *root, void *extra)
+{
+	Query	   *parse = root->parse;
+	standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
+	List	   *tlist = qp_extra->tlist;
+	List	   *activeWindows = qp_extra->activeWindows;
+
+	/*
+	 * Calculate pathkeys that represent grouping/ordering requirements.  The
+	 * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
+	 * be, in which case we just leave their pathkeys empty.
+	 */
+	if (parse->groupClause &&
+		grouping_is_sortable(parse->groupClause))
+		root->group_pathkeys =
+			make_pathkeys_for_sortclauses(root,
+										  parse->groupClause,
+										  tlist);
+	else
+		root->group_pathkeys = NIL;
+
+	/* We consider only the first (bottom) window in pathkeys logic */
+	if (activeWindows != NIL)
+	{
+		WindowClause *wc = (WindowClause *) linitial(activeWindows);
+
+		root->window_pathkeys = make_pathkeys_for_window(root,
+														 wc,
+														 tlist);
+	}
+	else
+		root->window_pathkeys = NIL;
+
+	if (parse->distinctClause &&
+		grouping_is_sortable(parse->distinctClause))
+		root->distinct_pathkeys =
+			make_pathkeys_for_sortclauses(root,
+										  parse->distinctClause,
+										  tlist);
+	else
+		root->distinct_pathkeys = NIL;
+
+	root->sort_pathkeys =
+		make_pathkeys_for_sortclauses(root,
+									  parse->sortClause,
+									  tlist);
+
+	/*
+	 * Figure out whether we want a sorted result from query_planner.
+	 *
+	 * If we have a sortable GROUP BY clause, then we want a result sorted
+	 * properly for grouping.  Otherwise, if we have window functions to
+	 * evaluate, we try to sort for the first window.  Otherwise, if there's a
+	 * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
+	 * we try to produce output that's sufficiently well sorted for the
+	 * DISTINCT.  Otherwise, if there is an ORDER BY clause, we want to sort
+	 * by the ORDER BY clause.
+	 *
+	 * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
+	 * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
+	 * that might just leave us failing to exploit an available sort order at
+	 * all.  Needs more thought.  The choice for DISTINCT versus ORDER BY is
+	 * much easier, since we know that the parser ensured that one is a
+	 * superset of the other.
+	 */
+	if (root->group_pathkeys)
+		root->query_pathkeys = root->group_pathkeys;
+	else if (root->window_pathkeys)
+		root->query_pathkeys = root->window_pathkeys;
+	else if (list_length(root->distinct_pathkeys) >
+			 list_length(root->sort_pathkeys))
+		root->query_pathkeys = root->distinct_pathkeys;
+	else if (root->sort_pathkeys)
+		root->query_pathkeys = root->sort_pathkeys;
+	else
+		root->query_pathkeys = NIL;
+}
+
 /*
  * choose_hashed_grouping - should we use hashed grouping?
  *
@@ -3235,7 +3253,7 @@ make_windowInputTargetList(PlannerInfo *root,
  */
 static List *
 make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
-						 List *tlist, bool canonicalize)
+						 List *tlist)
 {
 	List	   *window_pathkeys;
 	List	   *window_sortclauses;
@@ -3257,8 +3275,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 									 list_copy(wc->orderClause));
 	window_pathkeys = make_pathkeys_for_sortclauses(root,
 													window_sortclauses,
-													tlist,
-													canonicalize);
+													tlist);
 	list_free(window_sortclauses);
 	return window_pathkeys;
 }
@@ -3336,8 +3353,7 @@ get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
 			sortclauses = lappend(sortclauses, sgc);
 			new_pathkeys = make_pathkeys_for_sortclauses(root,
 														 sortclauses,
-														 tlist,
-														 true);
+														 tlist);
 			if (list_length(new_pathkeys) > list_length(pathkeys))
 			{
 				/* this sort clause is actually significant */
@@ -3355,8 +3371,7 @@ get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
 			sortclauses = lappend(sortclauses, sgc);
 			new_pathkeys = make_pathkeys_for_sortclauses(root,
 														 sortclauses,
-														 tlist,
-														 true);
+														 tlist);
 			if (list_length(new_pathkeys) > list_length(pathkeys))
 			{
 				/* this sort clause is actually significant */
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index f34f6d5a99..15407e6a4b 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -206,7 +206,7 @@ typedef struct PlannerInfo
 	List	   *placeholder_list;		/* list of PlaceHolderInfos */
 
 	List	   *query_pathkeys; /* desired pathkeys for query_planner(), and
-								 * actual pathkeys afterwards */
+								 * actual pathkeys after planning */
 
 	List	   *group_pathkeys; /* groupClause pathkeys, if any */
 	List	   *window_pathkeys;	/* pathkeys of bottom window, if any */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 88ab4630fe..9ef93c70c6 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -154,7 +154,6 @@ typedef enum
 	PATHKEYS_DIFFERENT			/* neither pathkey includes the other */
 } PathKeysComparison;
 
-extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys);
 extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
 extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
@@ -174,8 +173,7 @@ extern List *build_join_pathkeys(PlannerInfo *root,
 					List *outer_pathkeys);
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
-							  List *tlist,
-							  bool canonicalize);
+							  List *tlist);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 								RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 16d685846e..33eaf32627 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -21,11 +21,15 @@
 #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
 extern double cursor_tuple_fraction;
 
+/* query_planner callback to compute query_pathkeys */
+typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
+
 /*
  * prototypes for plan/planmain.c
  */
 extern void query_planner(PlannerInfo *root, List *tlist,
 			  double tuple_fraction, double limit_tuples,
+			  query_pathkeys_callback qp_callback, void *qp_extra,
 			  Path **cheapest_path, Path **sorted_path,
 			  double *num_groups);
 
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 3421a559f2..31c2a320a6 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2826,6 +2826,35 @@ select b.unique1 from
         
 (5 rows)
 
+explain (costs off)
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Nested Loop Left Join
+   Filter: ((COALESCE(tenk1.unique1, (-1)) + int8_tbl.q1) = 122)
+   ->  Seq Scan on int8_tbl
+   ->  Index Scan using tenk1_unique2 on tenk1
+         Index Cond: (int8_tbl.q2 = unique2)
+(5 rows)
+
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+ unique1 | q1  | fault 
+---------+-----+-------
+         | 123 |   122
+(1 row)
+
 --
 -- test handling of potential equivalence clauses above outer joins
 --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 6f51b85327..656766acd3 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -749,6 +749,23 @@ select b.unique1 from
   right join int4_tbl i2 on i2.f1 = b.tenthous
   order by 1;
 
+explain (costs off)
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+
+select * from
+(
+  select unique1, q1, coalesce(unique1, -1) + q1 as fault
+  from int8_tbl left join tenk1 on (q2 = unique2)
+) ss
+where fault = 122
+order by fault;
+
 --
 -- test handling of potential equivalence clauses above outer joins
 --
-- 
2.40.0