]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/plan/analyzejoins.c
d188d9724aff0d3faf561d18d0f034614da01e68
[postgresql] / src / backend / optimizer / plan / analyzejoins.c
1 /*-------------------------------------------------------------------------
2  *
3  * analyzejoins.c
4  *        Routines for simplifying joins after initial query analysis
5  *
6  * While we do a great deal of join simplification in prep/prepjointree.c,
7  * certain optimizations cannot be performed at that stage for lack of
8  * detailed information about the query.  The routines here are invoked
9  * after initsplan.c has done its work, and can do additional join removal
10  * and simplification steps based on the information extracted.  The penalty
11  * is that we have to work harder to clean up after ourselves when we modify
12  * the query, since the derived data structures have to be updated too.
13  *
14  * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
15  * Portions Copyright (c) 1994, Regents of the University of California
16  *
17  *
18  * IDENTIFICATION
19  *        src/backend/optimizer/plan/analyzejoins.c
20  *
21  *-------------------------------------------------------------------------
22  */
23 #include "postgres.h"
24
25 #include "nodes/nodeFuncs.h"
26 #include "optimizer/clauses.h"
27 #include "optimizer/joininfo.h"
28 #include "optimizer/pathnode.h"
29 #include "optimizer/paths.h"
30 #include "optimizer/planmain.h"
31 #include "optimizer/tlist.h"
32 #include "utils/lsyscache.h"
33
34 /* local functions */
35 static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
36 static void remove_rel_from_query(PlannerInfo *root, int relid,
37                                           Relids joinrelids);
38 static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
39 static Oid      distinct_col_search(int colno, List *colnos, List *opids);
40
41
42 /*
43  * remove_useless_joins
44  *              Check for relations that don't actually need to be joined at all,
45  *              and remove them from the query.
46  *
47  * We are passed the current joinlist and return the updated list.  Other
48  * data structures that have to be updated are accessible via "root".
49  */
50 List *
51 remove_useless_joins(PlannerInfo *root, List *joinlist)
52 {
53         ListCell   *lc;
54
55         /*
56          * We are only interested in relations that are left-joined to, so we can
57          * scan the join_info_list to find them easily.
58          */
59 restart:
60         foreach(lc, root->join_info_list)
61         {
62                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
63                 int                     innerrelid;
64                 int                     nremoved;
65
66                 /* Skip if not removable */
67                 if (!join_is_removable(root, sjinfo))
68                         continue;
69
70                 /*
71                  * Currently, join_is_removable can only succeed when the sjinfo's
72                  * righthand is a single baserel.  Remove that rel from the query and
73                  * joinlist.
74                  */
75                 innerrelid = bms_singleton_member(sjinfo->min_righthand);
76
77                 remove_rel_from_query(root, innerrelid,
78                                                           bms_union(sjinfo->min_lefthand,
79                                                                                 sjinfo->min_righthand));
80
81                 /* We verify that exactly one reference gets removed from joinlist */
82                 nremoved = 0;
83                 joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
84                 if (nremoved != 1)
85                         elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
86
87                 /*
88                  * We can delete this SpecialJoinInfo from the list too, since it's no
89                  * longer of interest.
90                  */
91                 root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
92
93                 /*
94                  * Restart the scan.  This is necessary to ensure we find all
95                  * removable joins independently of ordering of the join_info_list
96                  * (note that removal of attr_needed bits may make a join appear
97                  * removable that did not before).  Also, since we just deleted the
98                  * current list cell, we'd have to have some kluge to continue the
99                  * list scan anyway.
100                  */
101                 goto restart;
102         }
103
104         return joinlist;
105 }
106
107 /*
108  * clause_sides_match_join
109  *        Determine whether a join clause is of the right form to use in this join.
110  *
111  * We already know that the clause is a binary opclause referencing only the
112  * rels in the current join.  The point here is to check whether it has the
113  * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
114  * rather than mixing outer and inner vars on either side.  If it matches,
115  * we set the transient flag outer_is_left to identify which side is which.
116  */
117 static inline bool
118 clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
119                                                 Relids innerrelids)
120 {
121         if (bms_is_subset(rinfo->left_relids, outerrelids) &&
122                 bms_is_subset(rinfo->right_relids, innerrelids))
123         {
124                 /* lefthand side is outer */
125                 rinfo->outer_is_left = true;
126                 return true;
127         }
128         else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
129                          bms_is_subset(rinfo->right_relids, outerrelids))
130         {
131                 /* righthand side is outer */
132                 rinfo->outer_is_left = false;
133                 return true;
134         }
135         return false;                           /* no good for these input relations */
136 }
137
138 /*
139  * join_is_removable
140  *        Check whether we need not perform this special join at all, because
141  *        it will just duplicate its left input.
142  *
143  * This is true for a left join for which the join condition cannot match
144  * more than one inner-side row.  (There are other possibly interesting
145  * cases, but we don't have the infrastructure to prove them.)  We also
146  * have to check that the inner side doesn't generate any variables needed
147  * above the join.
148  */
149 static bool
150 join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
151 {
152         int                     innerrelid;
153         RelOptInfo *innerrel;
154         Query      *subquery = NULL;
155         Relids          joinrelids;
156         List       *clause_list = NIL;
157         ListCell   *l;
158         int                     attroff;
159
160         /*
161          * Must be a non-delaying left join to a single baserel, else we aren't
162          * going to be able to do anything with it.
163          */
164         if (sjinfo->jointype != JOIN_LEFT ||
165                 sjinfo->delay_upper_joins)
166                 return false;
167
168         if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
169                 return false;
170
171         innerrel = find_base_rel(root, innerrelid);
172
173         if (innerrel->reloptkind != RELOPT_BASEREL)
174                 return false;
175
176         /*
177          * Before we go to the effort of checking whether any innerrel variables
178          * are needed above the join, make a quick check to eliminate cases in
179          * which we will surely be unable to prove uniqueness of the innerrel.
180          */
181         if (innerrel->rtekind == RTE_RELATION)
182         {
183                 /*
184                  * For a plain-relation innerrel, we only know how to prove uniqueness
185                  * by reference to unique indexes.  If there are no indexes then
186                  * there's certainly no unique indexes so there's no point in going
187                  * further.
188                  */
189                 if (innerrel->indexlist == NIL)
190                         return false;
191         }
192         else if (innerrel->rtekind == RTE_SUBQUERY)
193         {
194                 subquery = root->simple_rte_array[innerrelid]->subquery;
195
196                 /*
197                  * If the subquery has no qualities that support distinctness proofs
198                  * then there's no point in going further.
199                  */
200                 if (!query_supports_distinctness(subquery))
201                         return false;
202         }
203         else
204                 return false;                   /* unsupported rtekind */
205
206         /* Compute the relid set for the join we are considering */
207         joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
208
209         /*
210          * We can't remove the join if any inner-rel attributes are used above the
211          * join.
212          *
213          * Note that this test only detects use of inner-rel attributes in higher
214          * join conditions and the target list.  There might be such attributes in
215          * pushed-down conditions at this join, too.  We check that case below.
216          *
217          * As a micro-optimization, it seems better to start with max_attr and
218          * count down rather than starting with min_attr and counting up, on the
219          * theory that the system attributes are somewhat less likely to be wanted
220          * and should be tested last.
221          */
222         for (attroff = innerrel->max_attr - innerrel->min_attr;
223                  attroff >= 0;
224                  attroff--)
225         {
226                 if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
227                         return false;
228         }
229
230         /*
231          * Similarly check that the inner rel isn't needed by any PlaceHolderVars
232          * that will be used above the join.  We only need to fail if such a PHV
233          * actually references some inner-rel attributes; but the correct check
234          * for that is relatively expensive, so we first check against ph_eval_at,
235          * which must mention the inner rel if the PHV uses any inner-rel attrs as
236          * non-lateral references.  Note that if the PHV's syntactic scope is just
237          * the inner rel, we can't drop the rel even if the PHV is variable-free.
238          */
239         foreach(l, root->placeholder_list)
240         {
241                 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
242
243                 if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
244                         return false;           /* it references innerrel laterally */
245                 if (bms_is_subset(phinfo->ph_needed, joinrelids))
246                         continue;                       /* PHV is not used above the join */
247                 if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
248                         continue;                       /* it definitely doesn't reference innerrel */
249                 if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
250                         return false;           /* there isn't any other place to eval PHV */
251                 if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
252                                                 innerrel->relids))
253                         return false;           /* it does reference innerrel */
254         }
255
256         /*
257          * Search for mergejoinable clauses that constrain the inner rel against
258          * either the outer rel or a pseudoconstant.  If an operator is
259          * mergejoinable then it behaves like equality for some btree opclass, so
260          * it's what we want.  The mergejoinability test also eliminates clauses
261          * containing volatile functions, which we couldn't depend on.
262          */
263         foreach(l, innerrel->joininfo)
264         {
265                 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
266
267                 /*
268                  * If it's not a join clause for this outer join, we can't use it.
269                  * Note that if the clause is pushed-down, then it is logically from
270                  * above the outer join, even if it references no other rels (it might
271                  * be from WHERE, for example).
272                  */
273                 if (restrictinfo->is_pushed_down ||
274                         !bms_equal(restrictinfo->required_relids, joinrelids))
275                 {
276                         /*
277                          * If such a clause actually references the inner rel then join
278                          * removal has to be disallowed.  We have to check this despite
279                          * the previous attr_needed checks because of the possibility of
280                          * pushed-down clauses referencing the rel.
281                          */
282                         if (bms_is_member(innerrelid, restrictinfo->clause_relids))
283                                 return false;
284                         continue;                       /* else, ignore; not useful here */
285                 }
286
287                 /* Ignore if it's not a mergejoinable clause */
288                 if (!restrictinfo->can_join ||
289                         restrictinfo->mergeopfamilies == NIL)
290                         continue;                       /* not mergejoinable */
291
292                 /*
293                  * Check if clause has the form "outer op inner" or "inner op outer".
294                  */
295                 if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
296                                                                          innerrel->relids))
297                         continue;                       /* no good for these input relations */
298
299                 /* OK, add to list */
300                 clause_list = lappend(clause_list, restrictinfo);
301         }
302
303         /*
304          * relation_has_unique_index_for automatically adds any usable restriction
305          * clauses for the innerrel, so we needn't do that here.  (XXX we are not
306          * considering restriction clauses for subqueries; is that worth doing?)
307          */
308
309         if (innerrel->rtekind == RTE_RELATION)
310         {
311                 /* Now examine the indexes to see if we have a matching unique index */
312                 if (relation_has_unique_index_for(root, innerrel, clause_list, NIL, NIL))
313                         return true;
314         }
315         else    /* innerrel->rtekind == RTE_SUBQUERY */
316         {
317                 List       *colnos = NIL;
318                 List       *opids = NIL;
319
320                 /*
321                  * Build the argument lists for query_is_distinct_for: a list of
322                  * output column numbers that the query needs to be distinct over, and
323                  * a list of equality operators that the output columns need to be
324                  * distinct according to.
325                  */
326                 foreach(l, clause_list)
327                 {
328                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
329                         Oid                     op;
330                         Var                *var;
331
332                         /*
333                          * Get the equality operator we need uniqueness according to.
334                          * (This might be a cross-type operator and thus not exactly the
335                          * same operator the subquery would consider; that's all right
336                          * since query_is_distinct_for can resolve such cases.)  The
337                          * mergejoinability test above should have selected only OpExprs.
338                          */
339                         Assert(IsA(rinfo->clause, OpExpr));
340                         op = ((OpExpr *) rinfo->clause)->opno;
341
342                         /* clause_sides_match_join identified the inner side for us */
343                         if (rinfo->outer_is_left)
344                                 var = (Var *) get_rightop(rinfo->clause);
345                         else
346                                 var = (Var *) get_leftop(rinfo->clause);
347
348                         /*
349                          * If inner side isn't a Var referencing a subquery output column,
350                          * this clause doesn't help us.
351                          */
352                         if (!var || !IsA(var, Var) ||
353                                 var->varno != innerrelid || var->varlevelsup != 0)
354                                 continue;
355
356                         colnos = lappend_int(colnos, var->varattno);
357                         opids = lappend_oid(opids, op);
358                 }
359
360                 if (query_is_distinct_for(subquery, colnos, opids))
361                         return true;
362         }
363
364         /*
365          * Some day it would be nice to check for other methods of establishing
366          * distinctness.
367          */
368         return false;
369 }
370
371
372 /*
373  * Remove the target relid from the planner's data structures, having
374  * determined that there is no need to include it in the query.
375  *
376  * We are not terribly thorough here.  We must make sure that the rel is
377  * no longer treated as a baserel, and that attributes of other baserels
378  * are no longer marked as being needed at joins involving this rel.
379  * Also, join quals involving the rel have to be removed from the joininfo
380  * lists, but only if they belong to the outer join identified by joinrelids.
381  */
382 static void
383 remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
384 {
385         RelOptInfo *rel = find_base_rel(root, relid);
386         List       *joininfos;
387         Index           rti;
388         ListCell   *l;
389         ListCell   *nextl;
390
391         /*
392          * Mark the rel as "dead" to show it is no longer part of the join tree.
393          * (Removing it from the baserel array altogether seems too risky.)
394          */
395         rel->reloptkind = RELOPT_DEADREL;
396
397         /*
398          * Remove references to the rel from other baserels' attr_needed arrays.
399          */
400         for (rti = 1; rti < root->simple_rel_array_size; rti++)
401         {
402                 RelOptInfo *otherrel = root->simple_rel_array[rti];
403                 int                     attroff;
404
405                 /* there may be empty slots corresponding to non-baserel RTEs */
406                 if (otherrel == NULL)
407                         continue;
408
409                 Assert(otherrel->relid == rti); /* sanity check on array */
410
411                 /* no point in processing target rel itself */
412                 if (otherrel == rel)
413                         continue;
414
415                 for (attroff = otherrel->max_attr - otherrel->min_attr;
416                          attroff >= 0;
417                          attroff--)
418                 {
419                         otherrel->attr_needed[attroff] =
420                                 bms_del_member(otherrel->attr_needed[attroff], relid);
421                 }
422         }
423
424         /*
425          * Likewise remove references from SpecialJoinInfo data structures.
426          *
427          * This is relevant in case the outer join we're deleting is nested inside
428          * other outer joins: the upper joins' relid sets have to be adjusted. The
429          * RHS of the target outer join will be made empty here, but that's OK
430          * since caller will delete that SpecialJoinInfo entirely.
431          */
432         foreach(l, root->join_info_list)
433         {
434                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
435
436                 sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
437                 sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
438                 sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
439                 sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
440         }
441
442         /*
443          * Likewise remove references from PlaceHolderVar data structures,
444          * removing any no-longer-needed placeholders entirely.
445          *
446          * Removal is a bit tricker than it might seem: we can remove PHVs that
447          * are used at the target rel and/or in the join qual, but not those that
448          * are used at join partner rels or above the join.  It's not that easy to
449          * distinguish PHVs used at partner rels from those used in the join qual,
450          * since they will both have ph_needed sets that are subsets of
451          * joinrelids.  However, a PHV used at a partner rel could not have the
452          * target rel in ph_eval_at, so we check that while deciding whether to
453          * remove or just update the PHV.  There is no corresponding test in
454          * join_is_removable because it doesn't need to distinguish those cases.
455          */
456         for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
457         {
458                 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
459
460                 nextl = lnext(l);
461                 Assert(!bms_is_member(relid, phinfo->ph_lateral));
462                 if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
463                         bms_is_member(relid, phinfo->ph_eval_at))
464                         root->placeholder_list = list_delete_ptr(root->placeholder_list,
465                                                                                                          phinfo);
466                 else
467                 {
468                         phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
469                         Assert(!bms_is_empty(phinfo->ph_eval_at));
470                         phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
471                 }
472         }
473
474         /*
475          * Remove any joinquals referencing the rel from the joininfo lists.
476          *
477          * In some cases, a joinqual has to be put back after deleting its
478          * reference to the target rel.  This can occur for pseudoconstant and
479          * outerjoin-delayed quals, which can get marked as requiring the rel in
480          * order to force them to be evaluated at or above the join.  We can't
481          * just discard them, though.  Only quals that logically belonged to the
482          * outer join being discarded should be removed from the query.
483          *
484          * We must make a copy of the rel's old joininfo list before starting the
485          * loop, because otherwise remove_join_clause_from_rels would destroy the
486          * list while we're scanning it.
487          */
488         joininfos = list_copy(rel->joininfo);
489         foreach(l, joininfos)
490         {
491                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
492
493                 remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
494
495                 if (rinfo->is_pushed_down ||
496                         !bms_equal(rinfo->required_relids, joinrelids))
497                 {
498                         /* Recheck that qual doesn't actually reference the target rel */
499                         Assert(!bms_is_member(relid, rinfo->clause_relids));
500
501                         /*
502                          * The required_relids probably aren't shared with anything else,
503                          * but let's copy them just to be sure.
504                          */
505                         rinfo->required_relids = bms_copy(rinfo->required_relids);
506                         rinfo->required_relids = bms_del_member(rinfo->required_relids,
507                                                                                                         relid);
508                         distribute_restrictinfo_to_rels(root, rinfo);
509                 }
510         }
511 }
512
513 /*
514  * Remove any occurrences of the target relid from a joinlist structure.
515  *
516  * It's easiest to build a whole new list structure, so we handle it that
517  * way.  Efficiency is not a big deal here.
518  *
519  * *nremoved is incremented by the number of occurrences removed (there
520  * should be exactly one, but the caller checks that).
521  */
522 static List *
523 remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
524 {
525         List       *result = NIL;
526         ListCell   *jl;
527
528         foreach(jl, joinlist)
529         {
530                 Node       *jlnode = (Node *) lfirst(jl);
531
532                 if (IsA(jlnode, RangeTblRef))
533                 {
534                         int                     varno = ((RangeTblRef *) jlnode)->rtindex;
535
536                         if (varno == relid)
537                                 (*nremoved)++;
538                         else
539                                 result = lappend(result, jlnode);
540                 }
541                 else if (IsA(jlnode, List))
542                 {
543                         /* Recurse to handle subproblem */
544                         List       *sublist;
545
546                         sublist = remove_rel_from_joinlist((List *) jlnode,
547                                                                                            relid, nremoved);
548                         /* Avoid including empty sub-lists in the result */
549                         if (sublist)
550                                 result = lappend(result, sublist);
551                 }
552                 else
553                 {
554                         elog(ERROR, "unrecognized joinlist node type: %d",
555                                  (int) nodeTag(jlnode));
556                 }
557         }
558
559         return result;
560 }
561
562
563 /*
564  * query_supports_distinctness - could the query possibly be proven distinct
565  *              on some set of output columns?
566  *
567  * This is effectively a pre-checking function for query_is_distinct_for().
568  * It must return TRUE if query_is_distinct_for() could possibly return TRUE
569  * with this query, but it should not expend a lot of cycles.  The idea is
570  * that callers can avoid doing possibly-expensive processing to compute
571  * query_is_distinct_for()'s argument lists if the call could not possibly
572  * succeed.
573  */
574 bool
575 query_supports_distinctness(Query *query)
576 {
577         if (query->distinctClause != NIL ||
578                 query->groupClause != NIL ||
579                 query->groupingSets != NIL ||
580                 query->hasAggs ||
581                 query->havingQual ||
582                 query->setOperations)
583                 return true;
584
585         return false;
586 }
587
588 /*
589  * query_is_distinct_for - does query never return duplicates of the
590  *              specified columns?
591  *
592  * query is a not-yet-planned subquery (in current usage, it's always from
593  * a subquery RTE, which the planner avoids scribbling on).
594  *
595  * colnos is an integer list of output column numbers (resno's).  We are
596  * interested in whether rows consisting of just these columns are certain
597  * to be distinct.  "Distinctness" is defined according to whether the
598  * corresponding upper-level equality operators listed in opids would think
599  * the values are distinct.  (Note: the opids entries could be cross-type
600  * operators, and thus not exactly the equality operators that the subquery
601  * would use itself.  We use equality_ops_are_compatible() to check
602  * compatibility.  That looks at btree or hash opfamily membership, and so
603  * should give trustworthy answers for all operators that we might need
604  * to deal with here.)
605  */
606 bool
607 query_is_distinct_for(Query *query, List *colnos, List *opids)
608 {
609         ListCell   *l;
610         Oid                     opid;
611
612         Assert(list_length(colnos) == list_length(opids));
613
614         /*
615          * A set-returning function in the query's targetlist can result in
616          * returning duplicate rows, if the SRF is evaluated after the
617          * de-duplication step; so we play it safe and say "no" if there are any
618          * SRFs.  (We could be certain that it's okay if SRFs appear only in the
619          * specified columns, since those must be evaluated before de-duplication;
620          * but it doesn't presently seem worth the complication to check that.)
621          */
622         if (expression_returns_set((Node *) query->targetList))
623                 return false;
624
625         /*
626          * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
627          * columns in the DISTINCT clause appear in colnos and operator semantics
628          * match.
629          */
630         if (query->distinctClause)
631         {
632                 foreach(l, query->distinctClause)
633                 {
634                         SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
635                         TargetEntry *tle = get_sortgroupclause_tle(sgc,
636                                                                                                            query->targetList);
637
638                         opid = distinct_col_search(tle->resno, colnos, opids);
639                         if (!OidIsValid(opid) ||
640                                 !equality_ops_are_compatible(opid, sgc->eqop))
641                                 break;                  /* exit early if no match */
642                 }
643                 if (l == NULL)                  /* had matches for all? */
644                         return true;
645         }
646
647         /*
648          * Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
649          * the grouped columns appear in colnos and operator semantics match.
650          */
651         if (query->groupClause && !query->groupingSets)
652         {
653                 foreach(l, query->groupClause)
654                 {
655                         SortGroupClause *sgc = (SortGroupClause *) lfirst(l);
656                         TargetEntry *tle = get_sortgroupclause_tle(sgc,
657                                                                                                            query->targetList);
658
659                         opid = distinct_col_search(tle->resno, colnos, opids);
660                         if (!OidIsValid(opid) ||
661                                 !equality_ops_are_compatible(opid, sgc->eqop))
662                                 break;                  /* exit early if no match */
663                 }
664                 if (l == NULL)                  /* had matches for all? */
665                         return true;
666         }
667         else if (query->groupingSets)
668         {
669                 /*
670                  * If we have grouping sets with expressions, we probably don't have
671                  * uniqueness and analysis would be hard. Punt.
672                  */
673                 if (query->groupClause)
674                         return false;
675
676                 /*
677                  * If we have no groupClause (therefore no grouping expressions), we
678                  * might have one or many empty grouping sets. If there's just one,
679                  * then we're returning only one row and are certainly unique. But
680                  * otherwise, we know we're certainly not unique.
681                  */
682                 if (list_length(query->groupingSets) == 1 &&
683                         ((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
684                         return true;
685                 else
686                         return false;
687         }
688         else
689         {
690                 /*
691                  * If we have no GROUP BY, but do have aggregates or HAVING, then the
692                  * result is at most one row so it's surely unique, for any operators.
693                  */
694                 if (query->hasAggs || query->havingQual)
695                         return true;
696         }
697
698         /*
699          * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
700          * except with ALL.
701          */
702         if (query->setOperations)
703         {
704                 SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
705
706                 Assert(IsA(topop, SetOperationStmt));
707                 Assert(topop->op != SETOP_NONE);
708
709                 if (!topop->all)
710                 {
711                         ListCell   *lg;
712
713                         /* We're good if all the nonjunk output columns are in colnos */
714                         lg = list_head(topop->groupClauses);
715                         foreach(l, query->targetList)
716                         {
717                                 TargetEntry *tle = (TargetEntry *) lfirst(l);
718                                 SortGroupClause *sgc;
719
720                                 if (tle->resjunk)
721                                         continue;       /* ignore resjunk columns */
722
723                                 /* non-resjunk columns should have grouping clauses */
724                                 Assert(lg != NULL);
725                                 sgc = (SortGroupClause *) lfirst(lg);
726                                 lg = lnext(lg);
727
728                                 opid = distinct_col_search(tle->resno, colnos, opids);
729                                 if (!OidIsValid(opid) ||
730                                         !equality_ops_are_compatible(opid, sgc->eqop))
731                                         break;          /* exit early if no match */
732                         }
733                         if (l == NULL)          /* had matches for all? */
734                                 return true;
735                 }
736         }
737
738         /*
739          * XXX Are there any other cases in which we can easily see the result
740          * must be distinct?
741          *
742          * If you do add more smarts to this function, be sure to update
743          * query_supports_distinctness() to match.
744          */
745
746         return false;
747 }
748
749 /*
750  * distinct_col_search - subroutine for query_is_distinct_for
751  *
752  * If colno is in colnos, return the corresponding element of opids,
753  * else return InvalidOid.  (Ordinarily colnos would not contain duplicates,
754  * but if it does, we arbitrarily select the first match.)
755  */
756 static Oid
757 distinct_col_search(int colno, List *colnos, List *opids)
758 {
759         ListCell   *lc1,
760                            *lc2;
761
762         forboth(lc1, colnos, lc2, opids)
763         {
764                 if (colno == lfirst_int(lc1))
765                         return lfirst_oid(lc2);
766         }
767         return InvalidOid;
768 }