From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 7 Apr 2016 17:11:30 +0000 (-0400)
Subject: Refactor join_is_removable() to separate out distinctness-proving logic.
X-Git-Tag: REL9_6_BETA1~256
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f338dd7585cab45da9053e883ad65a440a99d3be;p=postgresql

Refactor join_is_removable() to separate out distinctness-proving logic.

Extracted from pending unique-join patch, since this is a rather large
delta but it's simply moving code out into separately-accessible
subroutines.

I (tgl) did choose to add a bit more logic to rel_supports_distinctness,
so that it verifies that there's at least one potentially usable unique
index rather than just checking indexlist != NIL.  Otherwise there's
no functional change here.

David Rowley
---

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index f7f5714e62..3d305eb9d8 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -37,6 +37,9 @@ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
 static void remove_rel_from_query(PlannerInfo *root, int relid,
 					  Relids joinrelids);
 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
+static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
+static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
+					List *clause_list);
 static Oid	distinct_col_search(int colno, List *colnos, List *opids);
 
 
@@ -152,7 +155,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 {
 	int			innerrelid;
 	RelOptInfo *innerrel;
-	Query	   *subquery = NULL;
 	Relids		joinrelids;
 	List	   *clause_list = NIL;
 	ListCell   *l;
@@ -171,38 +173,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 
 	innerrel = find_base_rel(root, innerrelid);
 
-	if (innerrel->reloptkind != RELOPT_BASEREL)
-		return false;
-
 	/*
 	 * Before we go to the effort of checking whether any innerrel variables
 	 * are needed above the join, make a quick check to eliminate cases in
 	 * which we will surely be unable to prove uniqueness of the innerrel.
 	 */
-	if (innerrel->rtekind == RTE_RELATION)
-	{
-		/*
-		 * For a plain-relation innerrel, we only know how to prove uniqueness
-		 * by reference to unique indexes.  If there are no indexes then
-		 * there's certainly no unique indexes so there's no point in going
-		 * further.
-		 */
-		if (innerrel->indexlist == NIL)
-			return false;
-	}
-	else if (innerrel->rtekind == RTE_SUBQUERY)
-	{
-		subquery = root->simple_rte_array[innerrelid]->subquery;
-
-		/*
-		 * If the subquery has no qualities that support distinctness proofs
-		 * then there's no point in going further.
-		 */
-		if (!query_supports_distinctness(subquery))
-			return false;
-	}
-	else
-		return false;			/* unsupported rtekind */
+	if (!rel_supports_distinctness(root, innerrel))
+		return false;
 
 	/* Compute the relid set for the join we are considering */
 	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
@@ -291,7 +268,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 			continue;			/* not mergejoinable */
 
 		/*
-		 * Check if clause has the form "outer op inner" or "inner op outer".
+		 * Check if clause has the form "outer op inner" or "inner op outer",
+		 * and if so mark which side is inner.
 		 */
 		if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
 									 innerrel->relids))
@@ -302,65 +280,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
 	}
 
 	/*
-	 * relation_has_unique_index_for automatically adds any usable restriction
-	 * clauses for the innerrel, so we needn't do that here.  (XXX we are not
-	 * considering restriction clauses for subqueries; is that worth doing?)
+	 * Now that we have the relevant equality join clauses, try to prove the
+	 * innerrel distinct.
 	 */
-
-	if (innerrel->rtekind == RTE_RELATION)
-	{
-		/* Now examine the indexes to see if we have a matching unique index */
-		if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
-			return true;
-	}
-	else	/* innerrel->rtekind == RTE_SUBQUERY */
-	{
-		List	   *colnos = NIL;
-		List	   *opids = NIL;
-
-		/*
-		 * Build the argument lists for query_is_distinct_for: a list of
-		 * output column numbers that the query needs to be distinct over, and
-		 * a list of equality operators that the output columns need to be
-		 * distinct according to.
-		 */
-		foreach(l, clause_list)
-		{
-			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-			Oid			op;
-			Var		   *var;
-
-			/*
-			 * Get the equality operator we need uniqueness according to.
-			 * (This might be a cross-type operator and thus not exactly the
-			 * same operator the subquery would consider; that's all right
-			 * since query_is_distinct_for can resolve such cases.)  The
-			 * mergejoinability test above should have selected only OpExprs.
-			 */
-			Assert(IsA(rinfo->clause, OpExpr));
-			op = ((OpExpr *) rinfo->clause)->opno;
-
-			/* clause_sides_match_join identified the inner side for us */
-			if (rinfo->outer_is_left)
-				var = (Var *) get_rightop(rinfo->clause);
-			else
-				var = (Var *) get_leftop(rinfo->clause);
-
-			/*
-			 * If inner side isn't a Var referencing a subquery output column,
-			 * this clause doesn't help us.
-			 */
-			if (!var || !IsA(var, Var) ||
-				var->varno != innerrelid || var->varlevelsup != 0)
-				continue;
-
-			colnos = lappend_int(colnos, var->varattno);
-			opids = lappend_oid(opids, op);
-		}
-
-		if (query_is_distinct_for(subquery, colnos, opids))
-			return true;
-	}
+	if (rel_is_distinct_for(root, innerrel, clause_list))
+		return true;
 
 	/*
 	 * Some day it would be nice to check for other methods of establishing
@@ -561,6 +485,152 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
 }
 
 
+/*
+ * rel_supports_distinctness
+ *		Could the relation possibly be proven distinct on some set of columns?
+ *
+ * This is effectively a pre-checking function for rel_is_distinct_for().
+ * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
+ * with this rel, but it should not expend a lot of cycles.  The idea is
+ * that callers can avoid doing possibly-expensive processing to compute
+ * rel_is_distinct_for()'s argument lists if the call could not possibly
+ * succeed.
+ */
+static bool
+rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
+{
+	/* We only know about baserels ... */
+	if (rel->reloptkind != RELOPT_BASEREL)
+		return false;
+	if (rel->rtekind == RTE_RELATION)
+	{
+		/*
+		 * For a plain relation, we only know how to prove uniqueness by
+		 * reference to unique indexes.  Make sure there's at least one
+		 * suitable unique index.  It must be immediately enforced, and if
+		 * it's a partial index, it must match the query.  (Keep these
+		 * conditions in sync with relation_has_unique_index_for!)
+		 */
+		ListCell   *lc;
+
+		foreach(lc, rel->indexlist)
+		{
+			IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
+
+			if (ind->unique && ind->immediate &&
+				(ind->indpred == NIL || ind->predOK))
+				return true;
+		}
+	}
+	else if (rel->rtekind == RTE_SUBQUERY)
+	{
+		Query	   *subquery = root->simple_rte_array[rel->relid]->subquery;
+
+		/* Check if the subquery has any qualities that support distinctness */
+		if (query_supports_distinctness(subquery))
+			return true;
+	}
+	/* We have no proof rules for any other rtekinds. */
+	return false;
+}
+
+/*
+ * rel_is_distinct_for
+ *		Does the relation return only distinct rows according to clause_list?
+ *
+ * clause_list is a list of join restriction clauses involving this rel and
+ * some other one.  Return true if no two rows emitted by this rel could
+ * possibly join to the same row of the other rel.
+ *
+ * The caller must have already determined that each condition is a
+ * mergejoinable equality with an expression in this relation on one side, and
+ * an expression not involving this relation on the other.  The transient
+ * outer_is_left flag is used to identify which side references this relation:
+ * left side if outer_is_left is false, right side if it is true.
+ *
+ * Note that the passed-in clause_list may be destructively modified!  This
+ * is OK for current uses, because the clause_list is built by the caller for
+ * the sole purpose of passing to this function.
+ */
+static bool
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+{
+	/*
+	 * We could skip a couple of tests here if we assume all callers checked
+	 * rel_supports_distinctness first, but it doesn't seem worth taking any
+	 * risk for.
+	 */
+	if (rel->reloptkind != RELOPT_BASEREL)
+		return false;
+	if (rel->rtekind == RTE_RELATION)
+	{
+		/*
+		 * Examine the indexes to see if we have a matching unique index.
+		 * relation_has_unique_index_for automatically adds any usable
+		 * restriction clauses for the rel, so we needn't do that here.
+		 */
+		if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
+			return true;
+	}
+	else if (rel->rtekind == RTE_SUBQUERY)
+	{
+		Index		relid = rel->relid;
+		Query	   *subquery = root->simple_rte_array[relid]->subquery;
+		List	   *colnos = NIL;
+		List	   *opids = NIL;
+		ListCell   *l;
+
+		/*
+		 * Build the argument lists for query_is_distinct_for: a list of
+		 * output column numbers that the query needs to be distinct over, and
+		 * a list of equality operators that the output columns need to be
+		 * distinct according to.
+		 *
+		 * (XXX we are not considering restriction clauses attached to the
+		 * subquery; is that worth doing?)
+		 */
+		foreach(l, clause_list)
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+			Oid			op;
+			Var		   *var;
+
+			/*
+			 * Get the equality operator we need uniqueness according to.
+			 * (This might be a cross-type operator and thus not exactly the
+			 * same operator the subquery would consider; that's all right
+			 * since query_is_distinct_for can resolve such cases.)  The
+			 * caller's mergejoinability test should have selected only
+			 * OpExprs.
+			 */
+			Assert(IsA(rinfo->clause, OpExpr));
+			op = ((OpExpr *) rinfo->clause)->opno;
+
+			/* caller identified the inner side for us */
+			if (rinfo->outer_is_left)
+				var = (Var *) get_rightop(rinfo->clause);
+			else
+				var = (Var *) get_leftop(rinfo->clause);
+
+			/*
+			 * If inner side isn't a Var referencing a subquery output column,
+			 * this clause doesn't help us.
+			 */
+			if (!var || !IsA(var, Var) ||
+				var->varno != relid || var->varlevelsup != 0)
+				continue;
+
+			colnos = lappend_int(colnos, var->varattno);
+			opids = lappend_oid(opids, op);
+		}
+
+		if (query_is_distinct_for(subquery, colnos, opids))
+			return true;
+	}
+	return false;
+}
+
+
 /*
  * query_supports_distinctness - could the query possibly be proven distinct
  *		on some set of output columns?