From 9e8b99420fe5f80495ada8dc50aeb7b954b33093 Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 8 Mar 2016 22:32:03 -0500
Subject: [PATCH] Improve handling of group-column indexes in GroupingSetsPath.

Instead of having planner.c compute a groupColIdx array and store it in
GroupingSetsPaths, make create_groupingsets_plan() find the grouping
columns by searching in the child plan node's tlist.  Although that's
probably a bit slower for create_groupingsets_plan(), it's more like
the way every other plan node type does this, and it provides positive
confirmation that we know which child output columns we're supposed to be
grouping on.  (Indeed, looking at this now, I'm not at all sure that it
wasn't broken before, because create_groupingsets_plan() isn't demanding
an exact tlist match from its child node.)  Also, this allows substantial
simplification in planner.c, because it no longer needs to compute the
groupColIdx array at all; no other cases were using it.

I'd intended to put off this refactoring until later (like 9.7), but
in view of the likely bug fix and the need to rationalize planner.c's
tlist handling so we can do something sane with Konstantin Knizhnik's
function-evaluation-postponement patch, I think it can't wait.
---
 src/backend/nodes/outfuncs.c            |   1 -
 src/backend/optimizer/plan/createplan.c |  12 +--
 src/backend/optimizer/plan/planner.c    | 130 +++++-------------------
 src/backend/optimizer/util/pathnode.c   |   5 +-
 src/backend/optimizer/util/tlist.c      |  20 ++++
 src/include/nodes/relation.h            |   1 -
 src/include/optimizer/pathnode.h        |   1 -
 src/include/optimizer/tlist.h           |   2 +
 8 files changed, 57 insertions(+), 115 deletions(-)

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 3119b9ea01..eb0fc1e121 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1875,7 +1875,6 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node)
 	_outPathInfo(str, (const Path *) node);
 
 	WRITE_NODE_FIELD(subpath);
-	/* we don't bother to print groupColIdx */
 	WRITE_NODE_FIELD(rollup_groupclauses);
 	WRITE_NODE_FIELD(rollup_lists);
 	WRITE_NODE_FIELD(qual);
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 3024ff9f78..0c2d593499 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1640,13 +1640,11 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 {
 	Agg		   *plan;
 	Plan	   *subplan;
-	AttrNumber *groupColIdx = best_path->groupColIdx;
 	List	   *rollup_groupclauses = best_path->rollup_groupclauses;
 	List	   *rollup_lists = best_path->rollup_lists;
 	AttrNumber *grouping_map;
 	int			maxref;
 	List	   *chain;
-	int			i;
 	ListCell   *lc,
 			   *lc2;
 
@@ -1662,8 +1660,9 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 	subplan = create_plan_recurse(root, best_path->subpath, CP_LABEL_TLIST);
 
 	/*
-	 * Compute the mapping from tleSortGroupRef to column index.  First,
-	 * identify max SortGroupRef in groupClause, for array sizing.
+	 * Compute the mapping from tleSortGroupRef to column index in the child's
+	 * tlist.  First, identify max SortGroupRef in groupClause, for array
+	 * sizing.
 	 */
 	maxref = 0;
 	foreach(lc, root->parse->groupClause)
@@ -1676,12 +1675,13 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 
 	grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber));
 
-	i = 0;
+	/* Now look up the column numbers in the child's tlist */
 	foreach(lc, root->parse->groupClause)
 	{
 		SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
+		TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist);
 
-		grouping_map[gc->tleSortGroupRef] = groupColIdx[i++];
+		grouping_map[gc->tleSortGroupRef] = tle->resno;
 	}
 
 	/*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index cbeceeb1fb..896f73d3e4 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -104,7 +104,6 @@ static double get_number_of_groups(PlannerInfo *root,
 static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
-					  AttrNumber *groupColIdx,
 					  List *rollup_lists,
 					  List *rollup_groupclauses);
 static RelOptInfo *create_window_paths(PlannerInfo *root,
@@ -125,9 +124,7 @@ static RelOptInfo *create_distinct_paths(PlannerInfo *root,
 static RelOptInfo *create_ordered_paths(PlannerInfo *root,
 					 RelOptInfo *input_rel,
 					 double limit_tuples);
-static PathTarget *make_scanjoin_target(PlannerInfo *root, List *tlist,
-					 AttrNumber **groupColIdx);
-static int	get_grouping_column_index(Query *parse, TargetEntry *tle);
+static PathTarget *make_scanjoin_target(PlannerInfo *root, List *tlist);
 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
 static List *make_windowInputTargetList(PlannerInfo *root,
@@ -1462,7 +1459,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 	{
 		/* No set operations, do regular planning */
 		PathTarget *sub_target;
-		AttrNumber *groupColIdx;
 		double		tlist_rows;
 		List	   *grouping_tlist;
 		WindowFuncLists *wflists = NULL;
@@ -1656,8 +1652,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 		 * because the target width estimates can use per-Var width numbers
 		 * that were obtained within query_planner().
 		 */
-		sub_target = make_scanjoin_target(root, tlist,
-										  &groupColIdx);
+		sub_target = make_scanjoin_target(root, tlist);
 
 		/*
 		 * Forcibly apply that tlist to all the Paths for the scan/join rel.
@@ -1714,7 +1709,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 												current_rel,
 												create_pathtarget(root,
 															 grouping_tlist),
-												groupColIdx,
 												rollup_lists,
 												rollup_groupclauses);
 		}
@@ -3077,7 +3071,6 @@ get_number_of_groups(PlannerInfo *root,
  *
  * input_rel: contains the source-data Paths
  * target: the pathtarget for the result Paths to compute
- * groupColIdx: array of indexes of grouping columns in the source data
  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
  * rollup_groupclauses: list of grouping clauses for grouping sets,
  *		or NIL if not doing grouping sets
@@ -3092,7 +3085,6 @@ static RelOptInfo *
 create_grouping_paths(PlannerInfo *root,
 					  RelOptInfo *input_rel,
 					  PathTarget *target,
-					  AttrNumber *groupColIdx,
 					  List *rollup_lists,
 					  List *rollup_groupclauses)
 {
@@ -3236,7 +3228,6 @@ create_grouping_paths(PlannerInfo *root,
 													  path,
 													  target,
 												  (List *) parse->havingQual,
-													  groupColIdx,
 													  rollup_lists,
 													  rollup_groupclauses,
 													  &agg_costs,
@@ -3766,24 +3757,19 @@ create_ordered_paths(PlannerInfo *root,
  * into PathTarget format, which is more compact and includes cost/width.
  *
  * 'tlist' is the query's target list.
- * 'groupColIdx' receives an array of column numbers for the GROUP BY
- *			expressions (if there are any) in the returned target list.
  *
  * The result is the PathTarget to be applied to the Paths returned from
  * query_planner().
  */
 static PathTarget *
 make_scanjoin_target(PlannerInfo *root,
-					 List *tlist,
-					 AttrNumber **groupColIdx)
+					 List *tlist)
 {
 	Query	   *parse = root->parse;
 	List	   *sub_tlist;
 	List	   *non_group_cols;
 	List	   *non_group_vars;
-	int			numCols;
-
-	*groupColIdx = NULL;
+	ListCell   *tl;
 
 	/*
 	 * If we're not grouping or aggregating or windowing, there's nothing to
@@ -3800,64 +3786,35 @@ make_scanjoin_target(PlannerInfo *root,
 	sub_tlist = NIL;
 	non_group_cols = NIL;
 
-	numCols = list_length(parse->groupClause);
-	if (numCols > 0)
+	foreach(tl, tlist)
 	{
-		/*
-		 * If grouping, create sub_tlist entries for all GROUP BY columns, and
-		 * make an array showing where the group columns are in the sub_tlist.
-		 *
-		 * Note: with this implementation, the array entries will always be
-		 * 1..N, but we don't want callers to assume that.
-		 */
-		AttrNumber *grpColIdx;
-		ListCell   *tl;
-
-		grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
-		*groupColIdx = grpColIdx;
+		TargetEntry *tle = (TargetEntry *) lfirst(tl);
 
-		foreach(tl, tlist)
+		if (tle->ressortgroupref && parse->groupClause &&
+			get_sortgroupref_clause_noerr(tle->ressortgroupref,
+										  parse->groupClause) != NULL)
 		{
-			TargetEntry *tle = (TargetEntry *) lfirst(tl);
-			int			colno;
-
-			colno = get_grouping_column_index(parse, tle);
-			if (colno >= 0)
-			{
-				/*
-				 * It's a grouping column, so add it to the result tlist and
-				 * remember its resno in grpColIdx[].
-				 */
-				TargetEntry *newtle;
-
-				newtle = makeTargetEntry(tle->expr,
-										 list_length(sub_tlist) + 1,
-										 NULL,
-										 false);
-				newtle->ressortgroupref = tle->ressortgroupref;
-				sub_tlist = lappend(sub_tlist, newtle);
+			/*
+			 * It's a grouping column, so add it to the result tlist as-is.
+			 */
+			TargetEntry *newtle;
 
-				Assert(grpColIdx[colno] == 0);	/* no dups expected */
-				grpColIdx[colno] = newtle->resno;
-			}
-			else
-			{
-				/*
-				 * Non-grouping column, so just remember the expression for
-				 * later call to pull_var_clause.  There's no need for
-				 * pull_var_clause to examine the TargetEntry node itself.
-				 */
-				non_group_cols = lappend(non_group_cols, tle->expr);
-			}
+			newtle = makeTargetEntry(tle->expr,
+									 list_length(sub_tlist) + 1,
+									 NULL,
+									 false);
+			newtle->ressortgroupref = tle->ressortgroupref;
+			sub_tlist = lappend(sub_tlist, newtle);
+		}
+		else
+		{
+			/*
+			 * Non-grouping column, so just remember the expression for later
+			 * call to pull_var_clause.  There's no need for pull_var_clause
+			 * to examine the TargetEntry node itself.
+			 */
+			non_group_cols = lappend(non_group_cols, tle->expr);
 		}
-	}
-	else
-	{
-		/*
-		 * With no grouping columns, just pass whole tlist to pull_var_clause.
-		 * Need (shallow) copy to avoid damaging input tlist below.
-		 */
-		non_group_cols = list_copy(tlist);
 	}
 
 	/*
@@ -3886,37 +3843,6 @@ make_scanjoin_target(PlannerInfo *root,
 	return create_pathtarget(root, sub_tlist);
 }
 
-/*
- * get_grouping_column_index
- *		Get the GROUP BY column position, if any, of a targetlist entry.
- *
- * Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
- * if it's not a grouping column.  Note: the result is unique because the
- * parser won't make multiple groupClause entries for the same TLE.
- */
-static int
-get_grouping_column_index(Query *parse, TargetEntry *tle)
-{
-	int			colno = 0;
-	Index		ressortgroupref = tle->ressortgroupref;
-	ListCell   *gl;
-
-	/* No need to search groupClause if TLE hasn't got a sortgroupref */
-	if (ressortgroupref == 0)
-		return -1;
-
-	foreach(gl, parse->groupClause)
-	{
-		SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
-
-		if (grpcl->tleSortGroupRef == ressortgroupref)
-			return colno;
-		colno++;
-	}
-
-	return -1;
-}
-
 /*
  * postprocess_setop_tlist
  *	  Fix up targetlist returned by plan_set_operations().
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index fe5e830385..6e7980041f 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2438,13 +2438,12 @@ create_agg_path(PlannerInfo *root,
  *
  * GroupingSetsPath represents sorted grouping with one or more grouping sets.
  * The input path's result must be sorted to match the last entry in
- * rollup_groupclauses, and groupColIdx[] identifies its sort columns.
+ * rollup_groupclauses.
  *
  * 'rel' is the parent relation associated with the result
  * 'subpath' is the path representing the source of data
  * 'target' is the PathTarget to be computed
  * 'having_qual' is the HAVING quals if any
- * 'groupColIdx' is an array of indexes of grouping columns in the source data
  * 'rollup_lists' is a list of grouping sets
  * 'rollup_groupclauses' is a list of grouping clauses for grouping sets
  * 'agg_costs' contains cost info about the aggregate functions to be computed
@@ -2456,7 +2455,6 @@ create_groupingsets_path(PlannerInfo *root,
 						 Path *subpath,
 						 PathTarget *target,
 						 List *having_qual,
-						 AttrNumber *groupColIdx,
 						 List *rollup_lists,
 						 List *rollup_groupclauses,
 						 const AggClauseCosts *agg_costs,
@@ -2487,7 +2485,6 @@ create_groupingsets_path(PlannerInfo *root,
 	else
 		pathnode->path.pathkeys = NIL;
 
-	pathnode->groupColIdx = groupColIdx;
 	pathnode->rollup_groupclauses = rollup_groupclauses;
 	pathnode->rollup_lists = rollup_lists;
 	pathnode->qual = having_qual;
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 2e90ecb4a6..c51642fde5 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -444,6 +444,26 @@ get_sortgroupref_clause(Index sortref, List *clauses)
 	return NULL;				/* keep compiler quiet */
 }
 
+/*
+ * get_sortgroupref_clause_noerr
+ *		As above, but return NULL rather than throwing an error if not found.
+ */
+SortGroupClause *
+get_sortgroupref_clause_noerr(Index sortref, List *clauses)
+{
+	ListCell   *l;
+
+	foreach(l, clauses)
+	{
+		SortGroupClause *cl = (SortGroupClause *) lfirst(l);
+
+		if (cl->tleSortGroupRef == sortref)
+			return cl;
+	}
+
+	return NULL;
+}
+
 /*
  * extract_grouping_ops - make an array of the equality operator OIDs
  *		for a SortGroupClause list
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 098a48690f..641728bb0f 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1311,7 +1311,6 @@ typedef struct GroupingSetsPath
 {
 	Path		path;
 	Path	   *subpath;		/* path representing input source */
-	AttrNumber *groupColIdx;	/* grouping col indexes */
 	List	   *rollup_groupclauses;	/* list of lists of SortGroupClause's */
 	List	   *rollup_lists;	/* parallel list of lists of grouping sets */
 	List	   *qual;			/* quals (HAVING quals), if any */
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index df4be93bae..3007adb8c2 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -173,7 +173,6 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
 						 Path *subpath,
 						 PathTarget *target,
 						 List *having_qual,
-						 AttrNumber *groupColIdx,
 						 List *rollup_lists,
 						 List *rollup_groupclauses,
 						 const AggClauseCosts *agg_costs,
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index fc6bc088b1..4493ff9e27 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -46,6 +46,8 @@ extern List *get_sortgrouplist_exprs(List *sgClauses,
 
 extern SortGroupClause *get_sortgroupref_clause(Index sortref,
 						List *clauses);
+extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref,
+							  List *clauses);
 
 extern Oid *extract_grouping_ops(List *groupClause);
 extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
-- 
2.49.0