]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/util/restrictinfo.c
Teach planner about some cases where a restriction clause can be
[postgresql] / src / backend / optimizer / util / restrictinfo.c
1 /*-------------------------------------------------------------------------
2  *
3  * restrictinfo.c
4  *        RestrictInfo node manipulation routines.
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.38 2005/07/02 23:00:41 tgl Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16
17 #include "optimizer/clauses.h"
18 #include "optimizer/cost.h"
19 #include "optimizer/paths.h"
20 #include "optimizer/restrictinfo.h"
21 #include "optimizer/var.h"
22
23
24 static RestrictInfo *make_restrictinfo_internal(Expr *clause,
25                                                    Expr *orclause,
26                                                    bool is_pushed_down,
27                                                    Relids required_relids);
28 static Expr *make_sub_restrictinfos(Expr *clause,
29                                            bool is_pushed_down);
30 static RestrictInfo *join_clause_is_redundant(PlannerInfo *root,
31                                                  RestrictInfo *rinfo,
32                                                  List *reference_list,
33                                                  bool isouterjoin);
34
35
36 /*
37  * make_restrictinfo
38  *
39  * Build a RestrictInfo node containing the given subexpression.
40  *
41  * The is_pushed_down flag must be supplied by the caller.
42  * required_relids can be NULL, in which case it defaults to the
43  * actual clause contents (i.e., clause_relids).
44  *
45  * We initialize fields that depend only on the given subexpression, leaving
46  * others that depend on context (or may never be needed at all) to be filled
47  * later.
48  */
49 RestrictInfo *
50 make_restrictinfo(Expr *clause, bool is_pushed_down, Relids required_relids)
51 {
52         /*
53          * If it's an OR clause, build a modified copy with RestrictInfos
54          * inserted above each subclause of the top-level AND/OR structure.
55          */
56         if (or_clause((Node *) clause))
57                 return (RestrictInfo *) make_sub_restrictinfos(clause, is_pushed_down);
58
59         /* Shouldn't be an AND clause, else AND/OR flattening messed up */
60         Assert(!and_clause((Node *) clause));
61
62         return make_restrictinfo_internal(clause, NULL, is_pushed_down,
63                                                                           required_relids);
64 }
65
66 /*
67  * make_restrictinfo_from_bitmapqual
68  *
69  * Given the bitmapqual Path structure for a bitmap indexscan, generate
70  * RestrictInfo node(s) equivalent to the condition represented by the
71  * indexclauses of the Path structure.
72  *
73  * The result is a List since we might need to return multiple RestrictInfos.
74  *
75  * To do this through the normal make_restrictinfo() API, callers would have
76  * to strip off the RestrictInfo nodes present in the indexclauses lists, and
77  * then make_restrictinfo() would have to build new ones.  It's better to have
78  * a specialized routine to allow sharing of RestrictInfos.
79  */
80 List *
81 make_restrictinfo_from_bitmapqual(Path *bitmapqual, bool is_pushed_down)
82 {
83         List       *result;
84
85         if (IsA(bitmapqual, BitmapAndPath))
86         {
87                 BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
88                 ListCell   *l;
89
90                 result = NIL;
91                 foreach(l, apath->bitmapquals)
92                 {
93                         List       *sublist;
94
95                         sublist = make_restrictinfo_from_bitmapqual((Path *) lfirst(l),
96                                                                                                                 is_pushed_down);
97                         result = list_concat(result, sublist);
98                 }
99         }
100         else if (IsA(bitmapqual, BitmapOrPath))
101         {
102                 BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
103                 List       *withris = NIL;
104                 List       *withoutris = NIL;
105                 ListCell   *l;
106
107                 foreach(l, opath->bitmapquals)
108                 {
109                         List       *sublist;
110
111                         sublist = make_restrictinfo_from_bitmapqual((Path *) lfirst(l),
112                                                                                                                 is_pushed_down);
113                         if (sublist == NIL)
114                         {
115                                 /* constant TRUE input yields constant TRUE OR result */
116                                 /* (though this probably cannot happen) */
117                                 return NIL;
118                         }
119                         /* Create AND subclause with RestrictInfos */
120                         withris = lappend(withris, make_ands_explicit(sublist));
121                         /* And one without */
122                         sublist = get_actual_clauses(sublist);
123                         withoutris = lappend(withoutris, make_ands_explicit(sublist));
124                 }
125                 /* Here's the magic part not available to outside callers */
126                 result =
127                         list_make1(make_restrictinfo_internal(make_orclause(withoutris),
128                                                                                                   make_orclause(withris),
129                                                                                                   is_pushed_down,
130                                                                                                   NULL));
131         }
132         else if (IsA(bitmapqual, IndexPath))
133         {
134                 IndexPath *ipath = (IndexPath *) bitmapqual;
135
136                 result = list_copy(ipath->indexclauses);
137         }
138         else
139         {
140                 elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
141                 result = NIL;                   /* keep compiler quiet */
142         }
143
144         return result;
145 }
146
147 /*
148  * make_restrictinfo_internal
149  *
150  * Common code for the main entry points and the recursive cases.
151  */
152 static RestrictInfo *
153 make_restrictinfo_internal(Expr *clause, Expr *orclause,
154                                                    bool is_pushed_down, Relids required_relids)
155 {
156         RestrictInfo *restrictinfo = makeNode(RestrictInfo);
157
158         restrictinfo->clause = clause;
159         restrictinfo->orclause = orclause;
160         restrictinfo->is_pushed_down = is_pushed_down;
161         restrictinfo->can_join = false;         /* may get set below */
162
163         /*
164          * If it's a binary opclause, set up left/right relids info. In any
165          * case set up the total clause relids info.
166          */
167         if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
168         {
169                 restrictinfo->left_relids = pull_varnos(get_leftop(clause));
170                 restrictinfo->right_relids = pull_varnos(get_rightop(clause));
171
172                 restrictinfo->clause_relids = bms_union(restrictinfo->left_relids,
173                                                                                          restrictinfo->right_relids);
174
175                 /*
176                  * Does it look like a normal join clause, i.e., a binary operator
177                  * relating expressions that come from distinct relations? If so
178                  * we might be able to use it in a join algorithm.      Note that this
179                  * is a purely syntactic test that is made regardless of context.
180                  */
181                 if (!bms_is_empty(restrictinfo->left_relids) &&
182                         !bms_is_empty(restrictinfo->right_relids) &&
183                         !bms_overlap(restrictinfo->left_relids,
184                                                  restrictinfo->right_relids))
185                         restrictinfo->can_join = true;
186         }
187         else
188         {
189                 /* Not a binary opclause, so mark left/right relid sets as empty */
190                 restrictinfo->left_relids = NULL;
191                 restrictinfo->right_relids = NULL;
192                 /* and get the total relid set the hard way */
193                 restrictinfo->clause_relids = pull_varnos((Node *) clause);
194         }
195
196         /* required_relids defaults to clause_relids */
197         if (required_relids != NULL)
198                 restrictinfo->required_relids = required_relids;
199         else
200                 restrictinfo->required_relids = restrictinfo->clause_relids;
201
202         /*
203          * Fill in all the cacheable fields with "not yet set" markers. None
204          * of these will be computed until/unless needed.  Note in particular
205          * that we don't mark a binary opclause as mergejoinable or
206          * hashjoinable here; that happens only if it appears in the right
207          * context (top level of a joinclause list).
208          */
209         restrictinfo->eval_cost.startup = -1;
210         restrictinfo->this_selec = -1;
211
212         restrictinfo->mergejoinoperator = InvalidOid;
213         restrictinfo->left_sortop = InvalidOid;
214         restrictinfo->right_sortop = InvalidOid;
215
216         restrictinfo->left_pathkey = NIL;
217         restrictinfo->right_pathkey = NIL;
218
219         restrictinfo->left_mergescansel = -1;
220         restrictinfo->right_mergescansel = -1;
221
222         restrictinfo->hashjoinoperator = InvalidOid;
223
224         restrictinfo->left_bucketsize = -1;
225         restrictinfo->right_bucketsize = -1;
226
227         return restrictinfo;
228 }
229
230 /*
231  * Recursively insert sub-RestrictInfo nodes into a boolean expression.
232  *
233  * We put RestrictInfos above simple (non-AND/OR) clauses and above
234  * sub-OR clauses, but not above sub-AND clauses, because there's no need.
235  * This may seem odd but it is closely related to the fact that we use
236  * implicit-AND lists at top level of RestrictInfo lists.  Only ORs and
237  * simple clauses are valid RestrictInfos.
238  */
239 static Expr *
240 make_sub_restrictinfos(Expr *clause, bool is_pushed_down)
241 {
242         if (or_clause((Node *) clause))
243         {
244                 List       *orlist = NIL;
245                 ListCell   *temp;
246
247                 foreach(temp, ((BoolExpr *) clause)->args)
248                         orlist = lappend(orlist,
249                                                          make_sub_restrictinfos(lfirst(temp),
250                                                                                                         is_pushed_down));
251                 return (Expr *) make_restrictinfo_internal(clause,
252                                                                                                    make_orclause(orlist),
253                                                                                                    is_pushed_down,
254                                                                                                    NULL);
255         }
256         else if (and_clause((Node *) clause))
257         {
258                 List       *andlist = NIL;
259                 ListCell   *temp;
260
261                 foreach(temp, ((BoolExpr *) clause)->args)
262                         andlist = lappend(andlist,
263                                                           make_sub_restrictinfos(lfirst(temp),
264                                                                                                          is_pushed_down));
265                 return make_andclause(andlist);
266         }
267         else
268                 return (Expr *) make_restrictinfo_internal(clause,
269                                                                                                    NULL,
270                                                                                                    is_pushed_down,
271                                                                                                    NULL);
272 }
273
274 /*
275  * restriction_is_or_clause
276  *
277  * Returns t iff the restrictinfo node contains an 'or' clause.
278  */
279 bool
280 restriction_is_or_clause(RestrictInfo *restrictinfo)
281 {
282         if (restrictinfo->orclause != NULL)
283                 return true;
284         else
285                 return false;
286 }
287
288 /*
289  * get_actual_clauses
290  *
291  * Returns a list containing the bare clauses from 'restrictinfo_list'.
292  */
293 List *
294 get_actual_clauses(List *restrictinfo_list)
295 {
296         List       *result = NIL;
297         ListCell   *temp;
298
299         foreach(temp, restrictinfo_list)
300         {
301                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(temp);
302
303                 Assert(IsA(rinfo, RestrictInfo));
304
305                 result = lappend(result, rinfo->clause);
306         }
307         return result;
308 }
309
310 /*
311  * get_actual_join_clauses
312  *
313  * Extract clauses from 'restrictinfo_list', separating those that
314  * syntactically match the join level from those that were pushed down.
315  */
316 void
317 get_actual_join_clauses(List *restrictinfo_list,
318                                                 List **joinquals, List **otherquals)
319 {
320         ListCell   *temp;
321
322         *joinquals = NIL;
323         *otherquals = NIL;
324
325         foreach(temp, restrictinfo_list)
326         {
327                 RestrictInfo *clause = (RestrictInfo *) lfirst(temp);
328
329                 if (clause->is_pushed_down)
330                         *otherquals = lappend(*otherquals, clause->clause);
331                 else
332                         *joinquals = lappend(*joinquals, clause->clause);
333         }
334 }
335
336 /*
337  * remove_redundant_join_clauses
338  *
339  * Given a list of RestrictInfo clauses that are to be applied in a join,
340  * remove any duplicate or redundant clauses.
341  *
342  * We must eliminate duplicates when forming the restrictlist for a joinrel,
343  * since we will see many of the same clauses arriving from both input
344  * relations. Also, if a clause is a mergejoinable clause, it's possible that
345  * it is redundant with previous clauses (see optimizer/README for
346  * discussion). We detect that case and omit the redundant clause from the
347  * result list.
348  *
349  * The result is a fresh List, but it points to the same member nodes
350  * as were in the input.
351  */
352 List *
353 remove_redundant_join_clauses(PlannerInfo *root, List *restrictinfo_list,
354                                                           bool isouterjoin)
355 {
356         List       *result = NIL;
357         ListCell   *item;
358         QualCost        cost;
359
360         /*
361          * If there are any redundant clauses, we want to eliminate the ones
362          * that are more expensive in favor of the ones that are less so. Run
363          * cost_qual_eval() to ensure the eval_cost fields are set up.
364          */
365         cost_qual_eval(&cost, restrictinfo_list);
366
367         /*
368          * We don't have enough knowledge yet to be able to estimate the
369          * number of times a clause might be evaluated, so it's hard to weight
370          * the startup and per-tuple costs appropriately.  For now just weight
371          * 'em the same.
372          */
373 #define CLAUSECOST(r)  ((r)->eval_cost.startup + (r)->eval_cost.per_tuple)
374
375         foreach(item, restrictinfo_list)
376         {
377                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
378                 RestrictInfo *prevrinfo;
379
380                 /* is it redundant with any prior clause? */
381                 prevrinfo = join_clause_is_redundant(root, rinfo, result, isouterjoin);
382                 if (prevrinfo == NULL)
383                 {
384                         /* no, so add it to result list */
385                         result = lappend(result, rinfo);
386                 }
387                 else if (CLAUSECOST(rinfo) < CLAUSECOST(prevrinfo))
388                 {
389                         /* keep this one, drop the previous one */
390                         result = list_delete_ptr(result, prevrinfo);
391                         result = lappend(result, rinfo);
392                 }
393                 /* else, drop this one */
394         }
395
396         return result;
397 }
398
399 /*
400  * select_nonredundant_join_clauses
401  *
402  * Given a list of RestrictInfo clauses that are to be applied in a join,
403  * select the ones that are not redundant with any clause in the
404  * reference_list.
405  *
406  * This is similar to remove_redundant_join_clauses, but we are looking for
407  * redundancies with a separate list of clauses (i.e., clauses that have
408  * already been applied below the join itself).
409  *
410  * Note that we assume the given restrictinfo_list has already been checked
411  * for local redundancies, so we don't check again.
412  */
413 List *
414 select_nonredundant_join_clauses(PlannerInfo *root,
415                                                                  List *restrictinfo_list,
416                                                                  List *reference_list,
417                                                                  bool isouterjoin)
418 {
419         List       *result = NIL;
420         ListCell   *item;
421
422         foreach(item, restrictinfo_list)
423         {
424                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
425
426                 /* drop it if redundant with any reference clause */
427                 if (join_clause_is_redundant(root, rinfo, reference_list, isouterjoin) != NULL)
428                         continue;
429
430                 /* otherwise, add it to result list */
431                 result = lappend(result, rinfo);
432         }
433
434         return result;
435 }
436
437 /*
438  * join_clause_is_redundant
439  *              If rinfo is redundant with any clause in reference_list,
440  *              return one such clause; otherwise return NULL.
441  *
442  * This is the guts of both remove_redundant_join_clauses and
443  * select_nonredundant_join_clauses.  See the docs above for motivation.
444  *
445  * We can detect redundant mergejoinable clauses very cheaply by using their
446  * left and right pathkeys, which uniquely identify the sets of equijoined
447  * variables in question.  All the members of a pathkey set that are in the
448  * left relation have already been forced to be equal; likewise for those in
449  * the right relation.  So, we need to have only one clause that checks
450  * equality between any set member on the left and any member on the right;
451  * by transitivity, all the rest are then equal.
452  *
453  * However, clauses that are of the form "var expr = const expr" cannot be
454  * eliminated as redundant.  This is because when there are const expressions
455  * in a pathkey set, generate_implied_equalities() suppresses "var = var"
456  * clauses in favor of "var = const" clauses.  We cannot afford to drop any
457  * of the latter, even though they might seem redundant by the pathkey
458  * membership test.
459  *
460  * Weird special case: if we have two clauses that seem redundant
461  * except one is pushed down into an outer join and the other isn't,
462  * then they're not really redundant, because one constrains the
463  * joined rows after addition of null fill rows, and the other doesn't.
464  */
465 static RestrictInfo *
466 join_clause_is_redundant(PlannerInfo *root,
467                                                  RestrictInfo *rinfo,
468                                                  List *reference_list,
469                                                  bool isouterjoin)
470 {
471         ListCell   *refitem;
472
473         /* always consider exact duplicates redundant */
474         foreach(refitem, reference_list)
475         {
476                 RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
477
478                 if (equal(rinfo, refrinfo))
479                         return refrinfo;
480         }
481
482         /* check for redundant merge clauses */
483         if (rinfo->mergejoinoperator != InvalidOid)
484         {
485                 /* do the cheap test first: is it a "var = const" clause? */
486                 if (bms_is_empty(rinfo->left_relids) ||
487                         bms_is_empty(rinfo->right_relids))
488                         return NULL;            /* var = const, so not redundant */
489
490                 cache_mergeclause_pathkeys(root, rinfo);
491
492                 foreach(refitem, reference_list)
493                 {
494                         RestrictInfo *refrinfo = (RestrictInfo *) lfirst(refitem);
495
496                         if (refrinfo->mergejoinoperator != InvalidOid)
497                         {
498                                 cache_mergeclause_pathkeys(root, refrinfo);
499
500                                 if (rinfo->left_pathkey == refrinfo->left_pathkey &&
501                                         rinfo->right_pathkey == refrinfo->right_pathkey &&
502                                         (rinfo->is_pushed_down == refrinfo->is_pushed_down ||
503                                          !isouterjoin))
504                                 {
505                                         /* Yup, it's redundant */
506                                         return refrinfo;
507                                 }
508                         }
509                 }
510         }
511
512         /* otherwise, not redundant */
513         return NULL;
514 }