static void remove_rel_from_query(PlannerInfo *root, int relid,
Relids joinrelids);
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
+static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
+static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
+ List *clause_list);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
{
int innerrelid;
RelOptInfo *innerrel;
- Query *subquery = NULL;
Relids joinrelids;
List *clause_list = NIL;
ListCell *l;
innerrel = find_base_rel(root, innerrelid);
- if (innerrel->reloptkind != RELOPT_BASEREL)
- return false;
-
/*
* Before we go to the effort of checking whether any innerrel variables
* are needed above the join, make a quick check to eliminate cases in
* which we will surely be unable to prove uniqueness of the innerrel.
*/
- if (innerrel->rtekind == RTE_RELATION)
- {
- /*
- * For a plain-relation innerrel, we only know how to prove uniqueness
- * by reference to unique indexes. If there are no indexes then
- * there's certainly no unique indexes so there's no point in going
- * further.
- */
- if (innerrel->indexlist == NIL)
- return false;
- }
- else if (innerrel->rtekind == RTE_SUBQUERY)
- {
- subquery = root->simple_rte_array[innerrelid]->subquery;
-
- /*
- * If the subquery has no qualities that support distinctness proofs
- * then there's no point in going further.
- */
- if (!query_supports_distinctness(subquery))
- return false;
- }
- else
- return false; /* unsupported rtekind */
+ if (!rel_supports_distinctness(root, innerrel))
+ return false;
/* Compute the relid set for the join we are considering */
joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
continue; /* not mergejoinable */
/*
- * Check if clause has the form "outer op inner" or "inner op outer".
+ * Check if clause has the form "outer op inner" or "inner op outer",
+ * and if so mark which side is inner.
*/
if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
innerrel->relids))
}
/*
- * relation_has_unique_index_for automatically adds any usable restriction
- * clauses for the innerrel, so we needn't do that here. (XXX we are not
- * considering restriction clauses for subqueries; is that worth doing?)
+ * Now that we have the relevant equality join clauses, try to prove the
+ * innerrel distinct.
*/
-
- if (innerrel->rtekind == RTE_RELATION)
- {
- /* Now examine the indexes to see if we have a matching unique index */
- if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
- return true;
- }
- else /* innerrel->rtekind == RTE_SUBQUERY */
- {
- List *colnos = NIL;
- List *opids = NIL;
-
- /*
- * Build the argument lists for query_is_distinct_for: a list of
- * output column numbers that the query needs to be distinct over, and
- * a list of equality operators that the output columns need to be
- * distinct according to.
- */
- foreach(l, clause_list)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
- Oid op;
- Var *var;
-
- /*
- * Get the equality operator we need uniqueness according to.
- * (This might be a cross-type operator and thus not exactly the
- * same operator the subquery would consider; that's all right
- * since query_is_distinct_for can resolve such cases.) The
- * mergejoinability test above should have selected only OpExprs.
- */
- Assert(IsA(rinfo->clause, OpExpr));
- op = ((OpExpr *) rinfo->clause)->opno;
-
- /* clause_sides_match_join identified the inner side for us */
- if (rinfo->outer_is_left)
- var = (Var *) get_rightop(rinfo->clause);
- else
- var = (Var *) get_leftop(rinfo->clause);
-
- /*
- * If inner side isn't a Var referencing a subquery output column,
- * this clause doesn't help us.
- */
- if (!var || !IsA(var, Var) ||
- var->varno != innerrelid || var->varlevelsup != 0)
- continue;
-
- colnos = lappend_int(colnos, var->varattno);
- opids = lappend_oid(opids, op);
- }
-
- if (query_is_distinct_for(subquery, colnos, opids))
- return true;
- }
+ if (rel_is_distinct_for(root, innerrel, clause_list))
+ return true;
/*
* Some day it would be nice to check for other methods of establishing
}
+/*
+ * rel_supports_distinctness
+ * Could the relation possibly be proven distinct on some set of columns?
+ *
+ * This is effectively a pre-checking function for rel_is_distinct_for().
+ * It must return TRUE if rel_is_distinct_for() could possibly return TRUE
+ * with this rel, but it should not expend a lot of cycles. The idea is
+ * that callers can avoid doing possibly-expensive processing to compute
+ * rel_is_distinct_for()'s argument lists if the call could not possibly
+ * succeed.
+ */
+static bool
+rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
+{
+ /* We only know about baserels ... */
+ if (rel->reloptkind != RELOPT_BASEREL)
+ return false;
+ if (rel->rtekind == RTE_RELATION)
+ {
+ /*
+ * For a plain relation, we only know how to prove uniqueness by
+ * reference to unique indexes. Make sure there's at least one
+ * suitable unique index. It must be immediately enforced, and if
+ * it's a partial index, it must match the query. (Keep these
+ * conditions in sync with relation_has_unique_index_for!)
+ */
+ ListCell *lc;
+
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc);
+
+ if (ind->unique && ind->immediate &&
+ (ind->indpred == NIL || ind->predOK))
+ return true;
+ }
+ }
+ else if (rel->rtekind == RTE_SUBQUERY)
+ {
+ Query *subquery = root->simple_rte_array[rel->relid]->subquery;
+
+ /* Check if the subquery has any qualities that support distinctness */
+ if (query_supports_distinctness(subquery))
+ return true;
+ }
+ /* We have no proof rules for any other rtekinds. */
+ return false;
+}
+
+/*
+ * rel_is_distinct_for
+ * Does the relation return only distinct rows according to clause_list?
+ *
+ * clause_list is a list of join restriction clauses involving this rel and
+ * some other one. Return true if no two rows emitted by this rel could
+ * possibly join to the same row of the other rel.
+ *
+ * The caller must have already determined that each condition is a
+ * mergejoinable equality with an expression in this relation on one side, and
+ * an expression not involving this relation on the other. The transient
+ * outer_is_left flag is used to identify which side references this relation:
+ * left side if outer_is_left is false, right side if it is true.
+ *
+ * Note that the passed-in clause_list may be destructively modified! This
+ * is OK for current uses, because the clause_list is built by the caller for
+ * the sole purpose of passing to this function.
+ */
+static bool
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+{
+ /*
+ * We could skip a couple of tests here if we assume all callers checked
+ * rel_supports_distinctness first, but it doesn't seem worth taking any
+ * risk for.
+ */
+ if (rel->reloptkind != RELOPT_BASEREL)
+ return false;
+ if (rel->rtekind == RTE_RELATION)
+ {
+ /*
+ * Examine the indexes to see if we have a matching unique index.
+ * relation_has_unique_index_for automatically adds any usable
+ * restriction clauses for the rel, so we needn't do that here.
+ */
+ if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
+ return true;
+ }
+ else if (rel->rtekind == RTE_SUBQUERY)
+ {
+ Index relid = rel->relid;
+ Query *subquery = root->simple_rte_array[relid]->subquery;
+ List *colnos = NIL;
+ List *opids = NIL;
+ ListCell *l;
+
+ /*
+ * Build the argument lists for query_is_distinct_for: a list of
+ * output column numbers that the query needs to be distinct over, and
+ * a list of equality operators that the output columns need to be
+ * distinct according to.
+ *
+ * (XXX we are not considering restriction clauses attached to the
+ * subquery; is that worth doing?)
+ */
+ foreach(l, clause_list)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ Oid op;
+ Var *var;
+
+ /*
+ * Get the equality operator we need uniqueness according to.
+ * (This might be a cross-type operator and thus not exactly the
+ * same operator the subquery would consider; that's all right
+ * since query_is_distinct_for can resolve such cases.) The
+ * caller's mergejoinability test should have selected only
+ * OpExprs.
+ */
+ Assert(IsA(rinfo->clause, OpExpr));
+ op = ((OpExpr *) rinfo->clause)->opno;
+
+ /* caller identified the inner side for us */
+ if (rinfo->outer_is_left)
+ var = (Var *) get_rightop(rinfo->clause);
+ else
+ var = (Var *) get_leftop(rinfo->clause);
+
+ /*
+ * If inner side isn't a Var referencing a subquery output column,
+ * this clause doesn't help us.
+ */
+ if (!var || !IsA(var, Var) ||
+ var->varno != relid || var->varlevelsup != 0)
+ continue;
+
+ colnos = lappend_int(colnos, var->varattno);
+ opids = lappend_oid(opids, op);
+ }
+
+ if (query_is_distinct_for(subquery, colnos, opids))
+ return true;
+ }
+ return false;
+}
+
+
/*
* query_supports_distinctness - could the query possibly be proven distinct
* on some set of output columns?