1 /*-------------------------------------------------------------------------
4 * Routines for simplifying joins after initial query analysis
6 * While we do a great deal of join simplification in prep/prepjointree.c,
7 * certain optimizations cannot be performed at that stage for lack of
8 * detailed information about the query. The routines here are invoked
9 * after initsplan.c has done its work, and can do additional join removal
10 * and simplification steps based on the information extracted. The penalty
11 * is that we have to work harder to clean up after ourselves when we modify
12 * the query, since the derived data structures have to be updated too.
14 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
15 * Portions Copyright (c) 1994, Regents of the University of California
19 * src/backend/optimizer/plan/analyzejoins.c
21 *-------------------------------------------------------------------------
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/joininfo.h"
28 #include "optimizer/pathnode.h"
29 #include "optimizer/paths.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/tlist.h"
32 #include "utils/lsyscache.h"
35 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
36 static void remove_rel_from_query(PlannerInfo *root, int relid,
38 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
39 static Oid distinct_col_search(int colno, List *colnos, List *opids);
43 * remove_useless_joins
44 * Check for relations that don't actually need to be joined at all,
45 * and remove them from the query.
47 * We are passed the current joinlist and return the updated list. Other
48 * data structures that have to be updated are accessible via "root".
51 remove_useless_joins(PlannerInfo *root, List *joinlist)
56 * We are only interested in relations that are left-joined to, so we can
57 * scan the join_info_list to find them easily.
60 foreach(lc, root->join_info_list)
62 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
66 /* Skip if not removable */
67 if (!join_is_removable(root, sjinfo))
71 * Currently, join_is_removable can only succeed when the sjinfo's
72 * righthand is a single baserel. Remove that rel from the query and
75 innerrelid = bms_singleton_member(sjinfo->min_righthand);
77 remove_rel_from_query(root, innerrelid,
78 bms_union(sjinfo->min_lefthand,
79 sjinfo->min_righthand));
81 /* We verify that exactly one reference gets removed from joinlist */
83 joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
85 elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
88 * We can delete this SpecialJoinInfo from the list too, since it's no
91 root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
94 * Restart the scan. This is necessary to ensure we find all
95 * removable joins independently of ordering of the join_info_list
96 * (note that removal of attr_needed bits may make a join appear
97 * removable that did not before). Also, since we just deleted the
98 * current list cell, we'd have to have some kluge to continue the
108 * clause_sides_match_join
109 * Determine whether a join clause is of the right form to use in this join.
111 * We already know that the clause is a binary opclause referencing only the
112 * rels in the current join. The point here is to check whether it has the
113 * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
114 * rather than mixing outer and inner vars on either side. If it matches,
115 * we set the transient flag outer_is_left to identify which side is which.
118 clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
121 if (bms_is_subset(rinfo->left_relids, outerrelids) &&
122 bms_is_subset(rinfo->right_relids, innerrelids))
124 /* lefthand side is outer */
125 rinfo->outer_is_left = true;
128 else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
129 bms_is_subset(rinfo->right_relids, outerrelids))
131 /* righthand side is outer */
132 rinfo->outer_is_left = false;
135 return false; /* no good for these input relations */
140 * Check whether we need not perform this special join at all, because
141 * it will just duplicate its left input.
143 * This is true for a left join for which the join condition cannot match
144 * more than one inner-side row. (There are other possibly interesting
145 * cases, but we don't have the infrastructure to prove them.) We also
146 * have to check that the inner side doesn't generate any variables needed
150 join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
153 RelOptInfo *innerrel;
154 Query *subquery = NULL;
156 List *clause_list = NIL;
161 * Must be a non-delaying left join to a single baserel, else we aren't
162 * going to be able to do anything with it.
164 if (sjinfo->jointype != JOIN_LEFT ||
165 sjinfo->delay_upper_joins)
168 if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
171 innerrel = find_base_rel(root, innerrelid);
173 if (innerrel->reloptkind != RELOPT_BASEREL)
177 * Before we go to the effort of checking whether any innerrel variables
178 * are needed above the join, make a quick check to eliminate cases in
179 * which we will surely be unable to prove uniqueness of the innerrel.
181 if (innerrel->rtekind == RTE_RELATION)
184 * For a plain-relation innerrel, we only know how to prove uniqueness
185 * by reference to unique indexes. If there are no indexes then
186 * there's certainly no unique indexes so there's no point in going
189 if (innerrel->indexlist == NIL)
192 else if (innerrel->rtekind == RTE_SUBQUERY)
194 subquery = root->simple_rte_array[innerrelid]->subquery;
197 * If the subquery has no qualities that support distinctness proofs
198 * then there's no point in going further.
200 if (!query_supports_distinctness(subquery))
204 return false; /* unsupported rtekind */
206 /* Compute the relid set for the join we are considering */
207 joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
210 * We can't remove the join if any inner-rel attributes are used above the
213 * Note that this test only detects use of inner-rel attributes in higher
214 * join conditions and the target list. There might be such attributes in
215 * pushed-down conditions at this join, too. We check that case below.
217 * As a micro-optimization, it seems better to start with max_attr and
218 * count down rather than starting with min_attr and counting up, on the
219 * theory that the system attributes are somewhat less likely to be wanted
220 * and should be tested last.
222 for (attroff = innerrel->max_attr - innerrel->min_attr;
226 if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
231 * Similarly check that the inner rel isn't needed by any PlaceHolderVars
232 * that will be used above the join. We only need to fail if such a PHV
233 * actually references some inner-rel attributes; but the correct check
234 * for that is relatively expensive, so we first check against ph_eval_at,
235 * which must mention the inner rel if the PHV uses any inner-rel attrs as
236 * non-lateral references. Note that if the PHV's syntactic scope is just
237 * the inner rel, we can't drop the rel even if the PHV is variable-free.
239 foreach(l, root->placeholder_list)
241 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
243 if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
244 return false; /* it references innerrel laterally */
245 if (bms_is_subset(phinfo->ph_needed, joinrelids))
246 continue; /* PHV is not used above the join */
247 if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
248 continue; /* it definitely doesn't reference innerrel */
249 if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
250 return false; /* there isn't any other place to eval PHV */
251 if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
253 return false; /* it does reference innerrel */
257 * Search for mergejoinable clauses that constrain the inner rel against
258 * either the outer rel or a pseudoconstant. If an operator is
259 * mergejoinable then it behaves like equality for some btree opclass, so
260 * it's what we want. The mergejoinability test also eliminates clauses
261 * containing volatile functions, which we couldn't depend on.
263 foreach(l, innerrel->joininfo)
265 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
268 * If it's not a join clause for this outer join, we can't use it.
269 * Note that if the clause is pushed-down, then it is logically from
270 * above the outer join, even if it references no other rels (it might
271 * be from WHERE, for example).
273 if (restrictinfo->is_pushed_down ||
274 !bms_equal(restrictinfo->required_relids, joinrelids))
277 * If such a clause actually references the inner rel then join
278 * removal has to be disallowed. We have to check this despite
279 * the previous attr_needed checks because of the possibility of
280 * pushed-down clauses referencing the rel.
282 if (bms_is_member(innerrelid, restrictinfo->clause_relids))
284 continue; /* else, ignore; not useful here */
287 /* Ignore if it's not a mergejoinable clause */
288 if (!restrictinfo->can_join ||
289 restrictinfo->mergeopfamilies == NIL)
290 continue; /* not mergejoinable */
293 * Check if clause has the form "outer op inner" or "inner op outer".
295 if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
297 continue; /* no good for these input relations */
299 /* OK, add to list */
300 clause_list = lappend(clause_list, restrictinfo);
304 * relation_has_unique_index_for automatically adds any usable restriction
305 * clauses for the innerrel, so we needn't do that here. (XXX we are not
306 * considering restriction clauses for subqueries; is that worth doing?)
309 if (innerrel->rtekind == RTE_RELATION)
311 /* Now examine the indexes to see if we have a matching unique index */
312 if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
315 else /* innerrel->rtekind == RTE_SUBQUERY */
321 * Build the argument lists for query_is_distinct_for: a list of
322 * output column numbers that the query needs to be distinct over, and
323 * a list of equality operators that the output columns need to be
324 * distinct according to.
326 foreach(l, clause_list)
328 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
333 * Get the equality operator we need uniqueness according to.
334 * (This might be a cross-type operator and thus not exactly the
335 * same operator the subquery would consider; that's all right
336 * since query_is_distinct_for can resolve such cases.) The
337 * mergejoinability test above should have selected only OpExprs.
339 Assert(IsA(rinfo->clause, OpExpr));
340 op = ((OpExpr *) rinfo->clause)->opno;
342 /* clause_sides_match_join identified the inner side for us */
343 if (rinfo->outer_is_left)
344 var = (Var *) get_rightop(rinfo->clause);
346 var = (Var *) get_leftop(rinfo->clause);
349 * If inner side isn't a Var referencing a subquery output column,
350 * this clause doesn't help us.
352 if (!var || !IsA(var, Var) ||
353 var->varno != innerrelid || var->varlevelsup != 0)
356 colnos = lappend_int(colnos, var->varattno);
357 opids = lappend_oid(opids, op);
360 if (query_is_distinct_for(subquery, colnos, opids))
365 * Some day it would be nice to check for other methods of establishing
373 * Remove the target relid from the planner's data structures, having
374 * determined that there is no need to include it in the query.
376 * We are not terribly thorough here. We must make sure that the rel is
377 * no longer treated as a baserel, and that attributes of other baserels
378 * are no longer marked as being needed at joins involving this rel.
379 * Also, join quals involving the rel have to be removed from the joininfo
380 * lists, but only if they belong to the outer join identified by joinrelids.
383 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
385 RelOptInfo *rel = find_base_rel(root, relid);
392 * Mark the rel as "dead" to show it is no longer part of the join tree.
393 * (Removing it from the baserel array altogether seems too risky.)
395 rel->reloptkind = RELOPT_DEADREL;
398 * Remove references to the rel from other baserels' attr_needed arrays.
400 for (rti = 1; rti < root->simple_rel_array_size; rti++)
402 RelOptInfo *otherrel = root->simple_rel_array[rti];
405 /* there may be empty slots corresponding to non-baserel RTEs */
406 if (otherrel == NULL)
409 Assert(otherrel->relid == rti); /* sanity check on array */
411 /* no point in processing target rel itself */
415 for (attroff = otherrel->max_attr - otherrel->min_attr;
419 otherrel->attr_needed[attroff] =
420 bms_del_member(otherrel->attr_needed[attroff], relid);
425 * Likewise remove references from SpecialJoinInfo data structures.
427 * This is relevant in case the outer join we're deleting is nested inside
428 * other outer joins: the upper joins' relid sets have to be adjusted. The
429 * RHS of the target outer join will be made empty here, but that's OK
430 * since caller will delete that SpecialJoinInfo entirely.
432 foreach(l, root->join_info_list)
434 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
436 sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
437 sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
438 sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
439 sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
443 * Likewise remove references from PlaceHolderVar data structures,
444 * removing any no-longer-needed placeholders entirely.
446 * Removal is a bit tricker than it might seem: we can remove PHVs that
447 * are used at the target rel and/or in the join qual, but not those that
448 * are used at join partner rels or above the join. It's not that easy to
449 * distinguish PHVs used at partner rels from those used in the join qual,
450 * since they will both have ph_needed sets that are subsets of
451 * joinrelids. However, a PHV used at a partner rel could not have the
452 * target rel in ph_eval_at, so we check that while deciding whether to
453 * remove or just update the PHV. There is no corresponding test in
454 * join_is_removable because it doesn't need to distinguish those cases.
456 for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
458 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
461 Assert(!bms_is_member(relid, phinfo->ph_lateral));
462 if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
463 bms_is_member(relid, phinfo->ph_eval_at))
464 root->placeholder_list = list_delete_ptr(root->placeholder_list,
468 phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
469 Assert(!bms_is_empty(phinfo->ph_eval_at));
470 phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
475 * Remove any joinquals referencing the rel from the joininfo lists.
477 * In some cases, a joinqual has to be put back after deleting its
478 * reference to the target rel. This can occur for pseudoconstant and
479 * outerjoin-delayed quals, which can get marked as requiring the rel in
480 * order to force them to be evaluated at or above the join. We can't
481 * just discard them, though. Only quals that logically belonged to the
482 * outer join being discarded should be removed from the query.
484 * We must make a copy of the rel's old joininfo list before starting the
485 * loop, because otherwise remove_join_clause_from_rels would destroy the
486 * list while we're scanning it.
488 joininfos = list_copy(rel->joininfo);
489 foreach(l, joininfos)
491 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
493 remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
495 if (rinfo->is_pushed_down ||
496 !bms_equal(rinfo->required_relids, joinrelids))
498 /* Recheck that qual doesn't actually reference the target rel */
499 Assert(!bms_is_member(relid, rinfo->clause_relids));
502 * The required_relids probably aren't shared with anything else,
503 * but let's copy them just to be sure.
505 rinfo->required_relids = bms_copy(rinfo->required_relids);
506 rinfo->required_relids = bms_del_member(rinfo->required_relids,
508 distribute_restrictinfo_to_rels(root, rinfo);
514 * Remove any occurrences of the target relid from a joinlist structure.
516 * It's easiest to build a whole new list structure, so we handle it that
517 * way. Efficiency is not a big deal here.
519 * *nremoved is incremented by the number of occurrences removed (there
520 * should be exactly one, but the caller checks that).
523 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
528 foreach(jl, joinlist)
530 Node *jlnode = (Node *) lfirst(jl);
532 if (IsA(jlnode, RangeTblRef))
534 int varno = ((RangeTblRef *) jlnode)->rtindex;
539 result = lappend(result, jlnode);
541 else if (IsA(jlnode, List))
543 /* Recurse to handle subproblem */
546 sublist = remove_rel_from_joinlist((List *) jlnode,
548 /* Avoid including empty sub-lists in the result */
550 result = lappend(result, sublist);
554 elog(ERROR, "unrecognized joinlist node type: %d",
555 (int) nodeTag(jlnode));
564 * query_supports_distinctness - could the query possibly be proven distinct
565 * on some set of output columns?
567 * This is effectively a pre-checking function for query_is_distinct_for().
568 * It must return TRUE if query_is_distinct_for() could possibly return TRUE
569 * with this query, but it should not expend a lot of cycles. The idea is
570 * that callers can avoid doing possibly-expensive processing to compute
571 * query_is_distinct_for()'s argument lists if the call could not possibly
575 query_supports_distinctness(Query *query)
577 if (query->distinctClause != NIL ||
578 query->groupClause != NIL ||
579 query->groupingSets != NIL ||
582 query->setOperations)
589 * query_is_distinct_for - does query never return duplicates of the
592 * query is a not-yet-planned subquery (in current usage, it's always from
593 * a subquery RTE, which the planner avoids scribbling on).
595 * colnos is an integer list of output column numbers (resno's). We are
596 * interested in whether rows consisting of just these columns are certain
597 * to be distinct. "Distinctness" is defined according to whether the
598 * corresponding upper-level equality operators listed in opids would think
599 * the values are distinct. (Note: the opids entries could be cross-type
600 * operators, and thus not exactly the equality operators that the subquery
601 * would use itself. We use equality_ops_are_compatible() to check
602 * compatibility. That looks at btree or hash opfamily membership, and so
603 * should give trustworthy answers for all operators that we might need
604 * to deal with here.)
607 query_is_distinct_for(Query *query, List *colnos, List *opids)
612 Assert(list_length(colnos) == list_length(opids));
615 * A set-returning function in the query's targetlist can result in
616 * returning duplicate rows, if the SRF is evaluated after the
617 * de-duplication step; so we play it safe and say "no" if there are any
618 * SRFs. (We could be certain that it's okay if SRFs appear only in the
619 * specified columns, since those must be evaluated before de-duplication;
620 * but it doesn't presently seem worth the complication to check that.)
622 if (expression_returns_set((Node *) query->targetList))
626 * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
627 * columns in the DISTINCT clause appear in colnos and operator semantics
630 if (query->distinctClause)
632 foreach(l, query->distinctClause)
634 SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
635 TargetEntry *tle = get_sortgroupclause_tle(sgc,
638 opid = distinct_col_search(tle->resno, colnos, opids);
639 if (!OidIsValid(opid) ||
640 !equality_ops_are_compatible(opid, sgc->eqop))
641 break; /* exit early if no match */
643 if (l == NULL) /* had matches for all? */
648 * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
649 * the grouped columns appear in colnos and operator semantics match.
651 if (query->groupClause && !query->groupingSets)
653 foreach(l, query->groupClause)
655 SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
656 TargetEntry *tle = get_sortgroupclause_tle(sgc,
659 opid = distinct_col_search(tle->resno, colnos, opids);
660 if (!OidIsValid(opid) ||
661 !equality_ops_are_compatible(opid, sgc->eqop))
662 break; /* exit early if no match */
664 if (l == NULL) /* had matches for all? */
667 else if (query->groupingSets)
670 * If we have grouping sets with expressions, we probably don't have
671 * uniqueness and analysis would be hard. Punt.
673 if (query->groupClause)
677 * If we have no groupClause (therefore no grouping expressions), we
678 * might have one or many empty grouping sets. If there's just one,
679 * then we're returning only one row and are certainly unique. But
680 * otherwise, we know we're certainly not unique.
682 if (list_length(query->groupingSets) == 1 &&
683 ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
691 * If we have no GROUP BY, but do have aggregates or HAVING, then the
692 * result is at most one row so it's surely unique, for any operators.
694 if (query->hasAggs || query->havingQual)
699 * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
702 if (query->setOperations)
704 SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
706 Assert(IsA(topop, SetOperationStmt));
707 Assert(topop->op != SETOP_NONE);
713 /* We're good if all the nonjunk output columns are in colnos */
714 lg = list_head(topop->groupClauses);
715 foreach(l, query->targetList)
717 TargetEntry *tle = (TargetEntry *) lfirst(l);
718 SortGroupClause *sgc;
721 continue; /* ignore resjunk columns */
723 /* non-resjunk columns should have grouping clauses */
725 sgc = (SortGroupClause *) lfirst(lg);
728 opid = distinct_col_search(tle->resno, colnos, opids);
729 if (!OidIsValid(opid) ||
730 !equality_ops_are_compatible(opid, sgc->eqop))
731 break; /* exit early if no match */
733 if (l == NULL) /* had matches for all? */
739 * XXX Are there any other cases in which we can easily see the result
742 * If you do add more smarts to this function, be sure to update
743 * query_supports_distinctness() to match.
750 * distinct_col_search - subroutine for query_is_distinct_for
752 * If colno is in colnos, return the corresponding element of opids,
753 * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
754 * but if it does, we arbitrarily select the first match.)
757 distinct_col_search(int colno, List *colnos, List *opids)
762 forboth(lc1, colnos, lc2, opids)
764 if (colno == lfirst_int(lc1))
765 return lfirst_oid(lc2);