From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Tue, 31 Jul 2001 17:56:31 +0000 (+0000)
Subject: Fix optimizer to not try to push WHERE clauses down into a sub-SELECT that
X-Git-Tag: REL7_2_BETA1~798
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=421467cdc8231a9da7ae6116ce6030d7d756f137;p=postgresql

Fix optimizer to not try to push WHERE clauses down into a sub-SELECT that
has a DISTINCT ON clause, per bug report from Anthony Wood.  While at it,
improve the DISTINCT-ON-clause recognizer routine to not be fooled by out-
of-order DISTINCT lists.
---

diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f378486c83..8a6e9b9374 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.77 2001/07/16 17:57:02 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.78 2001/07/31 17:56:30 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -296,8 +296,12 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
 	 * the quals down into the component queries of the setop, but getting it
 	 * right seems nontrivial.  Work on this later.)
 	 *
-	 * 2. If the subquery has a LIMIT clause we must not push down any quals,
-	 * since that could change the set of rows returned.
+	 * 2. If the subquery has a LIMIT clause or a DISTINCT ON clause, we must
+	 * not push down any quals, since that could change the set of rows
+	 * returned.  (Actually, we could push down quals into a DISTINCT ON
+	 * subquery if they refer only to DISTINCT-ed output columns, but checking
+	 * that seems more work than it's worth.  In any case, a plain DISTINCT is
+	 * safe to push down past.)
 	 *
 	 * 3. We do not push down clauses that contain subselects, mainly because
 	 * I'm not sure it will work correctly (the subplan hasn't yet transformed
@@ -311,7 +315,8 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,
 	 */
 	if (subquery->setOperations == NULL &&
 		subquery->limitOffset == NULL &&
-		subquery->limitCount == NULL)
+		subquery->limitCount == NULL &&
+		!has_distinct_on_clause(subquery))
 	{
 		/* OK to consider pushing down individual quals */
 		List	   *upperrestrictlist = NIL;
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 6ee962fd75..5bb850e0da 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.86 2001/06/19 22:39:11 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.87 2001/07/31 17:56:31 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -728,6 +728,58 @@ pull_constant_clauses(List *quals, List **constantQual)
 }
 
 
+/*****************************************************************************
+ *		Tests on clauses of queries
+ *
+ * Possibly this code should go someplace else, since this isn't quite the
+ * same meaning of "clause" as is used elsewhere in this module.  But I can't
+ * think of a better place for it...
+ *****************************************************************************/
+
+/*
+ * Test whether a query uses DISTINCT ON, ie, has a distinct-list that is
+ * just a subset of the output columns.
+ */
+bool
+has_distinct_on_clause(Query *query)
+{
+	List   *targetList;
+
+	/* Is there a DISTINCT clause at all? */
+	if (query->distinctClause == NIL)
+		return false;
+	/*
+	 * If the DISTINCT list contains all the nonjunk targetlist items,
+	 * then it's a simple DISTINCT, else it's DISTINCT ON.  We do not
+	 * require the lists to be in the same order (since the parser may
+	 * have adjusted the DISTINCT clause ordering to agree with ORDER BY).
+	 */
+	foreach(targetList, query->targetList)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(targetList);
+		Index		ressortgroupref;
+		List	   *distinctClause;
+
+		if (tle->resdom->resjunk)
+			continue;
+		ressortgroupref = tle->resdom->ressortgroupref;
+		if (ressortgroupref == 0)
+			return true;		/* definitely not in DISTINCT list */
+		foreach(distinctClause, query->distinctClause)
+		{
+			SortClause *scl = (SortClause *) lfirst(distinctClause);
+
+			if (scl->tleSortGroupRef == ressortgroupref)
+				break;			/* found TLE in DISTINCT */
+		}
+		if (distinctClause == NIL)
+			return true;		/* this TLE is not in DISTINCT list */
+	}
+	/* It's a simple DISTINCT */
+	return false;
+}
+
+
 /*****************************************************************************
  *																			 *
  *		General clause-manipulating routines								 *
@@ -735,7 +787,7 @@ pull_constant_clauses(List *quals, List **constantQual)
  *****************************************************************************/
 
 /*
- * clause_relids_vars
+ * clause_get_relids_vars
  *	  Retrieves distinct relids and vars appearing within a clause.
  *
  * '*relids' is set to an integer list of all distinct "varno"s appearing
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index afc2b63430..bda842e794 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -3,7 +3,7 @@
  *				back to source text
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.80 2001/07/16 05:06:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.81 2001/07/31 17:56:31 tgl Exp $
  *
  *	  This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -119,7 +119,6 @@ static void get_utility_query_def(Query *query, deparse_context *context);
 static void get_basic_select_query(Query *query, deparse_context *context);
 static void get_setop_query(Node *setOp, Query *query,
 				deparse_context *context, bool toplevel);
-static bool simple_distinct(List *distinctClause, List *targetList);
 static void get_rule_sortgroupclause(SortClause *srt, List *tlist,
 									 bool force_colno,
 									 deparse_context *context);
@@ -1064,9 +1063,7 @@ get_basic_select_query(Query *query, deparse_context *context)
 	/* Add the DISTINCT clause if given */
 	if (query->distinctClause != NIL)
 	{
-		if (simple_distinct(query->distinctClause, query->targetList))
-			appendStringInfo(buf, " DISTINCT");
-		else
+		if (has_distinct_on_clause(query))
 		{
 			appendStringInfo(buf, " DISTINCT ON (");
 			sep = "";
@@ -1081,6 +1078,8 @@ get_basic_select_query(Query *query, deparse_context *context)
 			}
 			appendStringInfo(buf, ")");
 		}
+		else
+			appendStringInfo(buf, " DISTINCT");
 	}
 
 	/* Then we tell what to select (the targetlist) */
@@ -1207,34 +1206,6 @@ get_setop_query(Node *setOp, Query *query, deparse_context *context,
 	}
 }
 
-/*
- * Detect whether a DISTINCT list can be represented as just DISTINCT
- * or needs DISTINCT ON.  It's simple if it contains exactly the nonjunk
- * targetlist items.
- */
-static bool
-simple_distinct(List *distinctClause, List *targetList)
-{
-	while (targetList)
-	{
-		TargetEntry *tle = (TargetEntry *) lfirst(targetList);
-
-		if (!tle->resdom->resjunk)
-		{
-			if (distinctClause == NIL)
-				return false;
-			if (((SortClause *) lfirst(distinctClause))->tleSortGroupRef !=
-				tle->resdom->ressortgroupref)
-				return false;
-			distinctClause = lnext(distinctClause);
-		}
-		targetList = lnext(targetList);
-	}
-	if (distinctClause != NIL)
-		return false;
-	return true;
-}
-
 /*
  * Display a sort/group clause.
  */
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 79bb7a1359..1ed5c25cd7 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: clauses.h,v 1.44 2001/05/20 20:28:20 tgl Exp $
+ * $Id: clauses.h,v 1.45 2001/07/31 17:56:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -51,6 +51,8 @@ extern bool contain_noncachable_functions(Node *clause);
 extern bool is_pseudo_constant_clause(Node *clause);
 extern List *pull_constant_clauses(List *quals, List **constantQual);
 
+extern bool has_distinct_on_clause(Query *query);
+
 extern void clause_get_relids_vars(Node *clause, Relids *relids, List **vars);
 extern int	NumRelids(Node *clause);
 extern void CommuteClause(Expr *clause);