]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/util/restrictinfo.c
Don't try to remove duplicate OR-subclauses in create_bitmap_subplan and
[postgresql] / src / backend / optimizer / util / restrictinfo.c
1 /*-------------------------------------------------------------------------
2  *
3  * restrictinfo.c
4  *        RestrictInfo node manipulation routines.
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.40 2005/10/13 00:06:46 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "optimizer/clauses.h"
18 #include "optimizer/cost.h"
19 #include "optimizer/paths.h"
20 #include "optimizer/predtest.h"
21 #include "optimizer/restrictinfo.h"
22 #include "optimizer/var.h"
23
24
25 static RestrictInfo *make_restrictinfo_internal(Expr *clause,
26                                                    Expr *orclause,
27                                                    bool is_pushed_down,
28                                                    Relids required_relids);
29 static Expr *make_sub_restrictinfos(Expr *clause,
30                                            bool is_pushed_down);
31 static RestrictInfo *join_clause_is_redundant(PlannerInfo *root,
32                                                  RestrictInfo *rinfo,
33                                                  List *reference_list,
34                                                  bool isouterjoin);
35
36
37 /*
38  * make_restrictinfo
39  *
40  * Build a RestrictInfo node containing the given subexpression.
41  *
42  * The is_pushed_down flag must be supplied by the caller.
43  * required_relids can be NULL, in which case it defaults to the
44  * actual clause contents (i.e., clause_relids).
45  *
46  * We initialize fields that depend only on the given subexpression, leaving
47  * others that depend on context (or may never be needed at all) to be filled
48  * later.
49  */
50 RestrictInfo *
51 make_restrictinfo(Expr *clause, bool is_pushed_down, Relids required_relids)
52 {
53         /*
54          * If it's an OR clause, build a modified copy with RestrictInfos
55          * inserted above each subclause of the top-level AND/OR structure.
56          */
57         if (or_clause((Node *) clause))
58                 return (RestrictInfo *) make_sub_restrictinfos(clause, is_pushed_down);
59
60         /* Shouldn't be an AND clause, else AND/OR flattening messed up */
61         Assert(!and_clause((Node *) clause));
62
63         return make_restrictinfo_internal(clause, NULL, is_pushed_down,
64                                                                           required_relids);
65 }
66
67 /*
68  * make_restrictinfo_from_bitmapqual
69  *
70  * Given the bitmapqual Path structure for a bitmap indexscan, generate
71  * RestrictInfo node(s) equivalent to the condition represented by the
72  * indexclauses of the Path structure.
73  *
74  * The result is a List (effectively, implicit-AND representation) of
75  * RestrictInfos.
76  *
77  * If include_predicates is true, we add any partial index predicates to
78  * the explicit index quals.  When this is not true, we return a condition
79  * that might be weaker than the actual scan represents.
80  *
81  * To do this through the normal make_restrictinfo() API, callers would have
82  * to strip off the RestrictInfo nodes present in the indexclauses lists, and
83  * then make_restrictinfo() would have to build new ones.  It's better to have
84  * a specialized routine to allow sharing of RestrictInfos.
85  *
86  * The qual manipulations here are much the same as in create_bitmap_subplan;
87  * keep the two routines in sync!
88  */
89 List *
90 make_restrictinfo_from_bitmapqual(Path *bitmapqual,
91                                                                   bool is_pushed_down,
92                                                                   bool include_predicates)
93 {
94         List       *result;
95         ListCell   *l;
96
97         if (IsA(bitmapqual, BitmapAndPath))
98         {
99                 BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
100
101                 /*
102                  * There may well be redundant quals among the subplans, since a
103                  * top-level WHERE qual might have gotten used to form several
104                  * different index quals.  We don't try exceedingly hard to
105                  * eliminate redundancies, but we do eliminate obvious duplicates
106                  * by using list_concat_unique.
107                  */
108                 result = NIL;
109                 foreach(l, apath->bitmapquals)
110                 {
111                         List       *sublist;
112
113                         sublist = make_restrictinfo_from_bitmapqual((Path *) lfirst(l),
114                                                                                                                 is_pushed_down,
115                                                                                                                 include_predicates);
116                         result = list_concat_unique(result, sublist);
117                 }
118         }
119         else if (IsA(bitmapqual, BitmapOrPath))
120         {
121                 BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
122                 List       *withris = NIL;
123                 List       *withoutris = NIL;
124
125                 /*
126                  * Here, we only detect qual-free subplans.  A qual-free subplan would
127                  * cause us to generate "... OR true ..."  which we may as well reduce
128                  * to just "true".  We do not try to eliminate redundant subclauses
129                  * because (a) it's not as likely as in the AND case, and (b) we might
130                  * well be working with hundreds or even thousands of OR conditions,
131                  * perhaps from a long IN list.  The performance of list_append_unique
132                  * would be unacceptable.
133                  */
134                 foreach(l, opath->bitmapquals)
135                 {
136                         List       *sublist;
137
138                         sublist = make_restrictinfo_from_bitmapqual((Path *) lfirst(l),
139                                                                                                                 is_pushed_down,
140                                                                                                                 include_predicates);
141                         if (sublist == NIL)
142                         {
143                                 /*
144                                  * If we find a qual-less subscan, it represents a constant
145                                  * TRUE, and hence the OR result is also constant TRUE, so
146                                  * we can stop here.
147                                  */
148                                 return NIL;
149                         }
150                         /* Create AND subclause with RestrictInfos */
151                         withris = lappend(withris,
152                                                           make_ands_explicit(sublist));
153                         /* And one without */
154                         sublist = get_actual_clauses(sublist);
155                         withoutris = lappend(withoutris,
156                                                                  make_ands_explicit(sublist));
157                 }
158
159                 /*
160                  * Avoid generating one-element ORs, which could happen
161                  * due to redundancy elimination.
162                  */
163                 if (list_length(withris) <= 1)
164                         result = withris;
165                 else
166                 {
167                         /* Here's the magic part not available to outside callers */
168                         result =
169                                 list_make1(make_restrictinfo_internal(make_orclause(withoutris),
170                                                                                                           make_orclause(withris),
171                                                                                                           is_pushed_down,
172                                                                                                           NULL));
173                 }
174         }
175         else if (IsA(bitmapqual, IndexPath))
176         {
177                 IndexPath *ipath = (IndexPath *) bitmapqual;
178
179                 result = list_copy(ipath->indexclauses);
180                 if (include_predicates && ipath->indexinfo->indpred != NIL)
181                 {
182                         foreach(l, ipath->indexinfo->indpred)
183                         {
184                                 Expr   *pred = (Expr *) lfirst(l);
185
186                                 /*
187                                  * We know that the index predicate must have been implied
188                                  * by the query condition as a whole, but it may or may not
189                                  * be implied by the conditions that got pushed into the
190                                  * bitmapqual.  Avoid generating redundant conditions.
191                                  */
192                                 if (!predicate_implied_by(list_make1(pred), result))
193                                         result = lappend(result,
194                                                                          make_restrictinfo(pred,
195                                                                                                            is_pushed_down,
196                                                                                                            NULL));
197                         }
198                 }
199         }
200         else
201         {
202                 elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
203                 result = NIL;                   /* keep compiler quiet */
204         }
205
206         return result;
207 }
208
209 /*
210  * make_restrictinfo_internal
211  *
212  * Common code for the main entry points and the recursive cases.
213  */
214 static RestrictInfo *
215 make_restrictinfo_internal(Expr *clause, Expr *orclause,
216                                                    bool is_pushed_down, Relids required_relids)
217 {
218         RestrictInfo *restrictinfo = makeNode(RestrictInfo);
219
220         restrictinfo->clause = clause;
221         restrictinfo->orclause = orclause;
222         restrictinfo->is_pushed_down = is_pushed_down;
223         restrictinfo->can_join = false;         /* may get set below */
224
225         /*
226          * If it's a binary opclause, set up left/right relids info. In any
227          * case set up the total clause relids info.
228          */
229         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
230         {
231                 restrictinfo->left_relids = pull_varnos(get_leftop(clause));
232                 restrictinfo->right_relids = pull_varnos(get_rightop(clause));
233
234                 restrictinfo->clause_relids = bms_union(restrictinfo->left_relids,
235                                                                                          restrictinfo->right_relids);
236
237                 /*
238                  * Does it look like a normal join clause, i.e., a binary operator
239                  * relating expressions that come from distinct relations? If so
240                  * we might be able to use it in a join algorithm.      Note that this
241                  * is a purely syntactic test that is made regardless of context.
242                  */
243                 if (!bms_is_empty(restrictinfo->left_relids) &&
244                         !bms_is_empty(restrictinfo->right_relids) &&
245                         !bms_overlap(restrictinfo->left_relids,
246                                                  restrictinfo->right_relids))
247                         restrictinfo->can_join = true;
248         }
249         else
250         {
251                 /* Not a binary opclause, so mark left/right relid sets as empty */
252                 restrictinfo->left_relids = NULL;
253                 restrictinfo->right_relids = NULL;
254                 /* and get the total relid set the hard way */
255                 restrictinfo->clause_relids = pull_varnos((Node *) clause);
256         }
257
258         /* required_relids defaults to clause_relids */
259         if (required_relids != NULL)
260                 restrictinfo->required_relids = required_relids;
261         else
262                 restrictinfo->required_relids = restrictinfo->clause_relids;
263
264         /*
265          * Fill in all the cacheable fields with "not yet set" markers. None
266          * of these will be computed until/unless needed.  Note in particular
267          * that we don't mark a binary opclause as mergejoinable or
268          * hashjoinable here; that happens only if it appears in the right
269          * context (top level of a joinclause list).
270          */
271         restrictinfo->eval_cost.startup = -1;
272         restrictinfo->this_selec = -1;
273
274         restrictinfo->mergejoinoperator = InvalidOid;
275         restrictinfo->left_sortop = InvalidOid;
276         restrictinfo->right_sortop = InvalidOid;
277
278         restrictinfo->left_pathkey = NIL;
279         restrictinfo->right_pathkey = NIL;
280
281         restrictinfo->left_mergescansel = -1;
282         restrictinfo->right_mergescansel = -1;
283
284         restrictinfo->hashjoinoperator = InvalidOid;
285
286         restrictinfo->left_bucketsize = -1;
287         restrictinfo->right_bucketsize = -1;
288
289         return restrictinfo;
290 }
291
292 /*
293  * Recursively insert sub-RestrictInfo nodes into a boolean expression.
294  *
295  * We put RestrictInfos above simple (non-AND/OR) clauses and above
296  * sub-OR clauses, but not above sub-AND clauses, because there's no need.
297  * This may seem odd but it is closely related to the fact that we use
298  * implicit-AND lists at top level of RestrictInfo lists.  Only ORs and
299  * simple clauses are valid RestrictInfos.
300  */
301 static Expr *
302 make_sub_restrictinfos(Expr *clause, bool is_pushed_down)
303 {
304         if (or_clause((Node *) clause))
305         {
306                 List       *orlist = NIL;
307                 ListCell   *temp;
308
309                 foreach(temp, ((BoolExpr *) clause)->args)
310                         orlist = lappend(orlist,
311                                                          make_sub_restrictinfos(lfirst(temp),
312                                                                                                         is_pushed_down));
313                 return (Expr *) make_restrictinfo_internal(clause,
314                                                                                                    make_orclause(orlist),
315                                                                                                    is_pushed_down,
316                                                                                                    NULL);
317         }
318         else if (and_clause((Node *) clause))
319         {
320                 List       *andlist = NIL;
321                 ListCell   *temp;
322
323                 foreach(temp, ((BoolExpr *) clause)->args)
324                         andlist = lappend(andlist,
325                                                           make_sub_restrictinfos(lfirst(temp),
326                                                                                                          is_pushed_down));
327                 return make_andclause(andlist);
328         }
329         else
330                 return (Expr *) make_restrictinfo_internal(clause,
331                                                                                                    NULL,
332                                                                                                    is_pushed_down,
333                                                                                                    NULL);
334 }
335
336 /*
337  * restriction_is_or_clause
338  *
339  * Returns t iff the restrictinfo node contains an 'or' clause.
340  */
341 bool
342 restriction_is_or_clause(RestrictInfo *restrictinfo)
343 {
344         if (restrictinfo->orclause != NULL)
345                 return true;
346         else
347                 return false;
348 }
349
350 /*
351  * get_actual_clauses
352  *
353  * Returns a list containing the bare clauses from 'restrictinfo_list'.
354  */
355 List *
356 get_actual_clauses(List *restrictinfo_list)
357 {
358         List       *result = NIL;
359         ListCell   *temp;
360
361         foreach(temp, restrictinfo_list)
362         {
363                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(temp);
364
365                 Assert(IsA(rinfo, RestrictInfo));
366
367                 result = lappend(result, rinfo->clause);
368         }
369         return result;
370 }
371
372 /*
373  * get_actual_join_clauses
374  *
375  * Extract clauses from 'restrictinfo_list', separating those that
376  * syntactically match the join level from those that were pushed down.
377  */
378 void
379 get_actual_join_clauses(List *restrictinfo_list,
380                                                 List **joinquals, List **otherquals)
381 {
382         ListCell   *temp;
383
384         *joinquals = NIL;
385         *otherquals = NIL;
386
387         foreach(temp, restrictinfo_list)
388         {
389                 RestrictInfo *clause = (RestrictInfo *) lfirst(temp);
390
391                 if (clause->is_pushed_down)
392                         *otherquals = lappend(*otherquals, clause->clause);
393                 else
394                         *joinquals = lappend(*joinquals, clause->clause);
395         }
396 }
397
398 /*
399  * remove_redundant_join_clauses
400  *
401  * Given a list of RestrictInfo clauses that are to be applied in a join,
402  * remove any duplicate or redundant clauses.
403  *
404  * We must eliminate duplicates when forming the restrictlist for a joinrel,
405  * since we will see many of the same clauses arriving from both input
406  * relations. Also, if a clause is a mergejoinable clause, it's possible that
407  * it is redundant with previous clauses (see optimizer/README for
408  * discussion). We detect that case and omit the redundant clause from the
409  * result list.
410  *
411  * The result is a fresh List, but it points to the same member nodes
412  * as were in the input.
413  */
414 List *
415 remove_redundant_join_clauses(PlannerInfo *root, List *restrictinfo_list,
416                                                           bool isouterjoin)
417 {
418         List       *result = NIL;
419         ListCell   *item;
420         QualCost        cost;
421
422         /*
423          * If there are any redundant clauses, we want to eliminate the ones
424          * that are more expensive in favor of the ones that are less so. Run
425          * cost_qual_eval() to ensure the eval_cost fields are set up.
426          */
427         cost_qual_eval(&cost, restrictinfo_list);
428
429         /*
430          * We don't have enough knowledge yet to be able to estimate the
431          * number of times a clause might be evaluated, so it's hard to weight
432          * the startup and per-tuple costs appropriately.  For now just weight
433          * 'em the same.
434          */
435 #define CLAUSECOST(r)  ((r)->eval_cost.startup + (r)->eval_cost.per_tuple)
436
437         foreach(item, restrictinfo_list)
438         {
439                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
440                 RestrictInfo *prevrinfo;
441
442                 /* is it redundant with any prior clause? */
443                 prevrinfo = join_clause_is_redundant(root, rinfo, result, isouterjoin);
444                 if (prevrinfo == NULL)
445                 {
446                         /* no, so add it to result list */
447                         result = lappend(result, rinfo);
448                 }
449                 else if (CLAUSECOST(rinfo) < CLAUSECOST(prevrinfo))
450                 {
451                         /* keep this one, drop the previous one */
452                         result = list_delete_ptr(result, prevrinfo);
453                         result = lappend(result, rinfo);
454                 }
455                 /* else, drop this one */
456         }
457
458         return result;
459 }
460
461 /*
462  * select_nonredundant_join_clauses
463  *
464  * Given a list of RestrictInfo clauses that are to be applied in a join,
465  * select the ones that are not redundant with any clause in the
466  * reference_list.
467  *
468  * This is similar to remove_redundant_join_clauses, but we are looking for
469  * redundancies with a separate list of clauses (i.e., clauses that have
470  * already been applied below the join itself).
471  *
472  * Note that we assume the given restrictinfo_list has already been checked
473  * for local redundancies, so we don't check again.
474  */
475 List *
476 select_nonredundant_join_clauses(PlannerInfo *root,
477                                                                  List *restrictinfo_list,
478                                                                  List *reference_list,
479                                                                  bool isouterjoin)
480 {
481         List       *result = NIL;
482         ListCell   *item;
483
484         foreach(item, restrictinfo_list)
485         {
486                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
487
488                 /* drop it if redundant with any reference clause */
489                 if (join_clause_is_redundant(root, rinfo, reference_list, isouterjoin) != NULL)
490                         continue;
491
492                 /* otherwise, add it to result list */
493                 result = lappend(result, rinfo);
494         }
495
496         return result;
497 }
498
499 /*
500  * join_clause_is_redundant
501  *              If rinfo is redundant with any clause in reference_list,
502  *              return one such clause; otherwise return NULL.
503  *
504  * This is the guts of both remove_redundant_join_clauses and
505  * select_nonredundant_join_clauses.  See the docs above for motivation.
506  *
507  * We can detect redundant mergejoinable clauses very cheaply by using their
508  * left and right pathkeys, which uniquely identify the sets of equijoined
509  * variables in question.  All the members of a pathkey set that are in the
510  * left relation have already been forced to be equal; likewise for those in
511  * the right relation.  So, we need to have only one clause that checks
512  * equality between any set member on the left and any member on the right;
513  * by transitivity, all the rest are then equal.
514  *
515  * However, clauses that are of the form "var expr = const expr" cannot be
516  * eliminated as redundant.  This is because when there are const expressions
517  * in a pathkey set, generate_implied_equalities() suppresses "var = var"
518  * clauses in favor of "var = const" clauses.  We cannot afford to drop any
519  * of the latter, even though they might seem redundant by the pathkey
520  * membership test.
521  *
522  * Weird special case: if we have two clauses that seem redundant
523  * except one is pushed down into an outer join and the other isn't,
524  * then they're not really redundant, because one constrains the
525  * joined rows after addition of null fill rows, and the other doesn't.
526  */
527 static RestrictInfo *
528 join_clause_is_redundant(PlannerInfo *root,
529                                                  RestrictInfo *rinfo,
530                                                  List *reference_list,
531                                                  bool isouterjoin)
532 {
533         ListCell   *refitem;
534
535         /* always consider exact duplicates redundant */
536         foreach(refitem, reference_list)
537         {
538                 RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
539
540                 if (equal(rinfo, refrinfo))
541                         return refrinfo;
542         }
543
544         /* check for redundant merge clauses */
545         if (rinfo->mergejoinoperator != InvalidOid)
546         {
547                 /* do the cheap test first: is it a "var = const" clause? */
548                 if (bms_is_empty(rinfo->left_relids) ||
549                         bms_is_empty(rinfo->right_relids))
550                         return NULL;            /* var = const, so not redundant */
551
552                 cache_mergeclause_pathkeys(root, rinfo);
553
554                 foreach(refitem, reference_list)
555                 {
556                         RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
557
558                         if (refrinfo->mergejoinoperator != InvalidOid)
559                         {
560                                 cache_mergeclause_pathkeys(root, refrinfo);
561
562                                 if (rinfo->left_pathkey == refrinfo->left_pathkey &&
563                                         rinfo->right_pathkey == refrinfo->right_pathkey &&
564                                         (rinfo->is_pushed_down == refrinfo->is_pushed_down ||
565                                          !isouterjoin))
566                                 {
567                                         /* Yup, it's redundant */
568                                         return refrinfo;
569                                 }
570                         }
571                 }
572         }
573
574         /* otherwise, not redundant */
575         return NULL;
576 }