From f338dd7585cab45da9053e883ad65a440a99d3be Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 7 Apr 2016 13:11:30 -0400 Subject: [PATCH] Refactor join_is_removable() to separate out distinctness-proving logic. Extracted from pending unique-join patch, since this is a rather large delta but it's simply moving code out into separately-accessible subroutines. I (tgl) did choose to add a bit more logic to rel_supports_distinctness, so that it verifies that there's at least one potentially usable unique index rather than just checking indexlist != NIL. Otherwise there's no functional change here. David Rowley --- src/backend/optimizer/plan/analyzejoins.c | 244 ++++++++++++++-------- 1 file changed, 157 insertions(+), 87 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index f7f5714e62..3d305eb9d8 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -37,6 +37,9 @@ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); static void remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids); static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); +static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); +static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, + List *clause_list); static Oid distinct_col_search(int colno, List *colnos, List *opids); @@ -152,7 +155,6 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) { int innerrelid; RelOptInfo *innerrel; - Query *subquery = NULL; Relids joinrelids; List *clause_list = NIL; ListCell *l; @@ -171,38 +173,13 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) innerrel = find_base_rel(root, innerrelid); - if (innerrel->reloptkind != RELOPT_BASEREL) - return false; - /* * Before we go to the effort of checking whether any innerrel variables * are needed above the join, make a quick check to eliminate cases in * which we will surely be unable to prove uniqueness of the innerrel. */ - if (innerrel->rtekind == RTE_RELATION) - { - /* - * For a plain-relation innerrel, we only know how to prove uniqueness - * by reference to unique indexes. If there are no indexes then - * there's certainly no unique indexes so there's no point in going - * further. - */ - if (innerrel->indexlist == NIL) - return false; - } - else if (innerrel->rtekind == RTE_SUBQUERY) - { - subquery = root->simple_rte_array[innerrelid]->subquery; - - /* - * If the subquery has no qualities that support distinctness proofs - * then there's no point in going further. - */ - if (!query_supports_distinctness(subquery)) - return false; - } - else - return false; /* unsupported rtekind */ + if (!rel_supports_distinctness(root, innerrel)) + return false; /* Compute the relid set for the join we are considering */ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); @@ -291,7 +268,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) continue; /* not mergejoinable */ /* - * Check if clause has the form "outer op inner" or "inner op outer". + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. */ if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, innerrel->relids)) @@ -302,65 +280,11 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) } /* - * relation_has_unique_index_for automatically adds any usable restriction - * clauses for the innerrel, so we needn't do that here. (XXX we are not - * considering restriction clauses for subqueries; is that worth doing?) + * Now that we have the relevant equality join clauses, try to prove the + * innerrel distinct. */ - - if (innerrel->rtekind == RTE_RELATION) - { - /* Now examine the indexes to see if we have a matching unique index */ - if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL)) - return true; - } - else /* innerrel->rtekind == RTE_SUBQUERY */ - { - List *colnos = NIL; - List *opids = NIL; - - /* - * Build the argument lists for query_is_distinct_for: a list of - * output column numbers that the query needs to be distinct over, and - * a list of equality operators that the output columns need to be - * distinct according to. - */ - foreach(l, clause_list) - { - RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); - Oid op; - Var *var; - - /* - * Get the equality operator we need uniqueness according to. - * (This might be a cross-type operator and thus not exactly the - * same operator the subquery would consider; that's all right - * since query_is_distinct_for can resolve such cases.) The - * mergejoinability test above should have selected only OpExprs. - */ - Assert(IsA(rinfo->clause, OpExpr)); - op = ((OpExpr *) rinfo->clause)->opno; - - /* clause_sides_match_join identified the inner side for us */ - if (rinfo->outer_is_left) - var = (Var *) get_rightop(rinfo->clause); - else - var = (Var *) get_leftop(rinfo->clause); - - /* - * If inner side isn't a Var referencing a subquery output column, - * this clause doesn't help us. - */ - if (!var || !IsA(var, Var) || - var->varno != innerrelid || var->varlevelsup != 0) - continue; - - colnos = lappend_int(colnos, var->varattno); - opids = lappend_oid(opids, op); - } - - if (query_is_distinct_for(subquery, colnos, opids)) - return true; - } + if (rel_is_distinct_for(root, innerrel, clause_list)) + return true; /* * Some day it would be nice to check for other methods of establishing @@ -561,6 +485,152 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved) } +/* + * rel_supports_distinctness + * Could the relation possibly be proven distinct on some set of columns? + * + * This is effectively a pre-checking function for rel_is_distinct_for(). + * It must return TRUE if rel_is_distinct_for() could possibly return TRUE + * with this rel, but it should not expend a lot of cycles. The idea is + * that callers can avoid doing possibly-expensive processing to compute + * rel_is_distinct_for()'s argument lists if the call could not possibly + * succeed. + */ +static bool +rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) +{ + /* We only know about baserels ... */ + if (rel->reloptkind != RELOPT_BASEREL) + return false; + if (rel->rtekind == RTE_RELATION) + { + /* + * For a plain relation, we only know how to prove uniqueness by + * reference to unique indexes. Make sure there's at least one + * suitable unique index. It must be immediately enforced, and if + * it's a partial index, it must match the query. (Keep these + * conditions in sync with relation_has_unique_index_for!) + */ + ListCell *lc; + + foreach(lc, rel->indexlist) + { + IndexOptInfo *ind = (IndexOptInfo *) lfirst(lc); + + if (ind->unique && ind->immediate && + (ind->indpred == NIL || ind->predOK)) + return true; + } + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Query *subquery = root->simple_rte_array[rel->relid]->subquery; + + /* Check if the subquery has any qualities that support distinctness */ + if (query_supports_distinctness(subquery)) + return true; + } + /* We have no proof rules for any other rtekinds. */ + return false; +} + +/* + * rel_is_distinct_for + * Does the relation return only distinct rows according to clause_list? + * + * clause_list is a list of join restriction clauses involving this rel and + * some other one. Return true if no two rows emitted by this rel could + * possibly join to the same row of the other rel. + * + * The caller must have already determined that each condition is a + * mergejoinable equality with an expression in this relation on one side, and + * an expression not involving this relation on the other. The transient + * outer_is_left flag is used to identify which side references this relation: + * left side if outer_is_left is false, right side if it is true. + * + * Note that the passed-in clause_list may be destructively modified! This + * is OK for current uses, because the clause_list is built by the caller for + * the sole purpose of passing to this function. + */ +static bool +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +{ + /* + * We could skip a couple of tests here if we assume all callers checked + * rel_supports_distinctness first, but it doesn't seem worth taking any + * risk for. + */ + if (rel->reloptkind != RELOPT_BASEREL) + return false; + if (rel->rtekind == RTE_RELATION) + { + /* + * Examine the indexes to see if we have a matching unique index. + * relation_has_unique_index_for automatically adds any usable + * restriction clauses for the rel, so we needn't do that here. + */ + if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + return true; + } + else if (rel->rtekind == RTE_SUBQUERY) + { + Index relid = rel->relid; + Query *subquery = root->simple_rte_array[relid]->subquery; + List *colnos = NIL; + List *opids = NIL; + ListCell *l; + + /* + * Build the argument lists for query_is_distinct_for: a list of + * output column numbers that the query needs to be distinct over, and + * a list of equality operators that the output columns need to be + * distinct according to. + * + * (XXX we are not considering restriction clauses attached to the + * subquery; is that worth doing?) + */ + foreach(l, clause_list) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); + Oid op; + Var *var; + + /* + * Get the equality operator we need uniqueness according to. + * (This might be a cross-type operator and thus not exactly the + * same operator the subquery would consider; that's all right + * since query_is_distinct_for can resolve such cases.) The + * caller's mergejoinability test should have selected only + * OpExprs. + */ + Assert(IsA(rinfo->clause, OpExpr)); + op = ((OpExpr *) rinfo->clause)->opno; + + /* caller identified the inner side for us */ + if (rinfo->outer_is_left) + var = (Var *) get_rightop(rinfo->clause); + else + var = (Var *) get_leftop(rinfo->clause); + + /* + * If inner side isn't a Var referencing a subquery output column, + * this clause doesn't help us. + */ + if (!var || !IsA(var, Var) || + var->varno != relid || var->varlevelsup != 0) + continue; + + colnos = lappend_int(colnos, var->varattno); + opids = lappend_oid(opids, op); + } + + if (query_is_distinct_for(subquery, colnos, opids)) + return true; + } + return false; +} + + /* * query_supports_distinctness - could the query possibly be proven distinct * on some set of output columns? -- 2.40.0