]> granicus.if.org Git - postgresql/blob - src/backend/optimizer/prep/prepqual.c
Stamp copyrights for year 2011.
[postgresql] / src / backend / optimizer / prep / prepqual.c
1 /*-------------------------------------------------------------------------
2  *
3  * prepqual.c
4  *        Routines for preprocessing qualification expressions
5  *
6  *
7  * The parser regards AND and OR as purely binary operators, so a qual like
8  *              (A = 1) OR (A = 2) OR (A = 3) ...
9  * will produce a nested parsetree
10  *              (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
11  * In reality, the optimizer and executor regard AND and OR as N-argument
12  * operators, so this tree can be flattened to
13  *              (OR (A = 1) (A = 2) (A = 3) ...)
14  *
15  * Formerly, this module was responsible for doing the initial flattening,
16  * but now we leave it to eval_const_expressions to do that since it has to
17  * make a complete pass over the expression tree anyway.  Instead, we just
18  * have to ensure that our manipulations preserve AND/OR flatness.
19  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
20  * tree after local transformations that might introduce nested AND/ORs.
21  *
22  *
23  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
24  * Portions Copyright (c) 1994, Regents of the University of California
25  *
26  *
27  * IDENTIFICATION
28  *        src/backend/optimizer/prep/prepqual.c
29  *
30  *-------------------------------------------------------------------------
31  */
32
33 #include "postgres.h"
34
35 #include "nodes/makefuncs.h"
36 #include "optimizer/clauses.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
39
40
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_duplicate_ors(Expr *qual);
44 static Expr *process_duplicate_ors(List *orlist);
45
46
47 /*
48  * negate_clause
49  *        Negate a Boolean expression.
50  *
51  * Input is a clause to be negated (e.g., the argument of a NOT clause).
52  * Returns a new clause equivalent to the negation of the given clause.
53  *
54  * Although this can be invoked on its own, it's mainly intended as a helper
55  * for eval_const_expressions(), and that context drives several design
56  * decisions.  In particular, if the input is already AND/OR flat, we must
57  * preserve that property.  We also don't bother to recurse in situations
58  * where we can assume that lower-level executions of eval_const_expressions
59  * would already have simplified sub-clauses of the input.
60  *
61  * The difference between this and a simple make_notclause() is that this
62  * tries to get rid of the NOT node by logical simplification.  It's clearly
63  * always a win if the NOT node can be eliminated altogether.  However, our
64  * use of DeMorgan's laws could result in having more NOT nodes rather than
65  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
66  * important to expose as much top-level AND/OR structure as possible.
67  * Also, eliminating an intermediate NOT may allow us to flatten two levels
68  * of AND or OR together that we couldn't have otherwise.  Finally, one of
69  * the motivations for doing this is to ensure that logically equivalent
70  * expressions will be seen as physically equal(), so we should always apply
71  * the same transformations.
72  */
73 Node *
74 negate_clause(Node *node)
75 {
76         if (node == NULL)                       /* should not happen */
77                 elog(ERROR, "can't negate an empty subexpression");
78         switch (nodeTag(node))
79         {
80                 case T_Const:
81                         {
82                                 Const      *c = (Const *) node;
83
84                                 /* NOT NULL is still NULL */
85                                 if (c->constisnull)
86                                         return makeBoolConst(false, true);
87                                 /* otherwise pretty easy */
88                                 return makeBoolConst(!DatumGetBool(c->constvalue), false);
89                         }
90                         break;
91                 case T_OpExpr:
92                         {
93                                 /*
94                                  * Negate operator if possible: (NOT (< A B)) => (>= A B)
95                                  */
96                                 OpExpr     *opexpr = (OpExpr *) node;
97                                 Oid                     negator = get_negator(opexpr->opno);
98
99                                 if (negator)
100                                 {
101                                         OpExpr     *newopexpr = makeNode(OpExpr);
102
103                                         newopexpr->opno = negator;
104                                         newopexpr->opfuncid = InvalidOid;
105                                         newopexpr->opresulttype = opexpr->opresulttype;
106                                         newopexpr->opretset = opexpr->opretset;
107                                         newopexpr->args = opexpr->args;
108                                         newopexpr->location = opexpr->location;
109                                         return (Node *) newopexpr;
110                                 }
111                         }
112                         break;
113                 case T_ScalarArrayOpExpr:
114                         {
115                                 /*
116                                  * Negate a ScalarArrayOpExpr if its operator has a negator;
117                                  * for example x = ANY (list) becomes x <> ALL (list)
118                                  */
119                                 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
120                                 Oid                     negator = get_negator(saopexpr->opno);
121
122                                 if (negator)
123                                 {
124                                         ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
125
126                                         newopexpr->opno = negator;
127                                         newopexpr->opfuncid = InvalidOid;
128                                         newopexpr->useOr = !saopexpr->useOr;
129                                         newopexpr->args = saopexpr->args;
130                                         newopexpr->location = saopexpr->location;
131                                         return (Node *) newopexpr;
132                                 }
133                         }
134                         break;
135                 case T_BoolExpr:
136                         {
137                                 BoolExpr   *expr = (BoolExpr *) node;
138
139                                 switch (expr->boolop)
140                                 {
141                                         /*--------------------
142                                          * Apply DeMorgan's Laws:
143                                          *              (NOT (AND A B)) => (OR (NOT A) (NOT B))
144                                          *              (NOT (OR A B))  => (AND (NOT A) (NOT B))
145                                          * i.e., swap AND for OR and negate each subclause.
146                                          *
147                                          * If the input is already AND/OR flat and has no NOT
148                                          * directly above AND or OR, this transformation preserves
149                                          * those properties.  For example, if no direct child of
150                                          * the given AND clause is an AND or a NOT-above-OR, then
151                                          * the recursive calls of negate_clause() can't return any
152                                          * OR clauses.  So we needn't call pull_ors() before
153                                          * building a new OR clause.  Similarly for the OR case.
154                                          *--------------------
155                                          */
156                                         case AND_EXPR:
157                                                 {
158                                                         List       *nargs = NIL;
159                                                         ListCell   *lc;
160
161                                                         foreach(lc, expr->args)
162                                                         {
163                                                                 nargs = lappend(nargs,
164                                                                                                 negate_clause(lfirst(lc)));
165                                                         }
166                                                         return (Node *) make_orclause(nargs);
167                                                 }
168                                                 break;
169                                         case OR_EXPR:
170                                                 {
171                                                         List       *nargs = NIL;
172                                                         ListCell   *lc;
173
174                                                         foreach(lc, expr->args)
175                                                         {
176                                                                 nargs = lappend(nargs,
177                                                                                                 negate_clause(lfirst(lc)));
178                                                         }
179                                                         return (Node *) make_andclause(nargs);
180                                                 }
181                                                 break;
182                                         case NOT_EXPR:
183                                                 /*
184                                                  * NOT underneath NOT: they cancel.  We assume the
185                                                  * input is already simplified, so no need to recurse.
186                                                  */
187                                                 return (Node *) linitial(expr->args);
188                                         default:
189                                                 elog(ERROR, "unrecognized boolop: %d",
190                                                          (int) expr->boolop);
191                                                 break;
192                                 }
193                         }
194                         break;
195                 case T_NullTest:
196                         {
197                                 NullTest   *expr = (NullTest *) node;
198
199                                 /*
200                                  * In the rowtype case, the two flavors of NullTest are *not*
201                                  * logical inverses, so we can't simplify.  But it does work
202                                  * for scalar datatypes.
203                                  */
204                                 if (!expr->argisrow)
205                                 {
206                                         NullTest   *newexpr = makeNode(NullTest);
207
208                                         newexpr->arg = expr->arg;
209                                         newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
210                                                                                          IS_NOT_NULL : IS_NULL);
211                                         newexpr->argisrow = expr->argisrow;
212                                         return (Node *) newexpr;
213                                 }
214                         }
215                         break;
216                 case T_BooleanTest:
217                         {
218                                 BooleanTest   *expr = (BooleanTest *) node;
219                                 BooleanTest   *newexpr = makeNode(BooleanTest);
220
221                                 newexpr->arg = expr->arg;
222                                 switch (expr->booltesttype)
223                                 {
224                                         case IS_TRUE:
225                                                 newexpr->booltesttype = IS_NOT_TRUE;
226                                                 break;
227                                         case IS_NOT_TRUE:
228                                                 newexpr->booltesttype = IS_TRUE;
229                                                 break;
230                                         case IS_FALSE:
231                                                 newexpr->booltesttype = IS_NOT_FALSE;
232                                                 break;
233                                         case IS_NOT_FALSE:
234                                                 newexpr->booltesttype = IS_FALSE;
235                                                 break;
236                                         case IS_UNKNOWN:
237                                                 newexpr->booltesttype = IS_NOT_UNKNOWN;
238                                                 break;
239                                         case IS_NOT_UNKNOWN:
240                                                 newexpr->booltesttype = IS_UNKNOWN;
241                                                 break;
242                                         default:
243                                                 elog(ERROR, "unrecognized booltesttype: %d",
244                                                          (int) expr->booltesttype);
245                                                 break;
246                                 }
247                                 return (Node *) newexpr;
248                         }
249                         break;
250                 default:
251                         /* else fall through */
252                         break;
253         }
254
255         /*
256          * Otherwise we don't know how to simplify this, so just tack on an
257          * explicit NOT node.
258          */
259         return (Node *) make_notclause((Expr *) node);
260 }
261
262
263 /*
264  * canonicalize_qual
265  *        Convert a qualification expression to the most useful form.
266  *
267  * The name of this routine is a holdover from a time when it would try to
268  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
269  * Eventually, we recognized that that had more theoretical purity than
270  * actual usefulness, and so now the transformation doesn't involve any
271  * notion of reaching a canonical form.
272  *
273  * NOTE: we assume the input has already been through eval_const_expressions
274  * and therefore possesses AND/OR flatness.  Formerly this function included
275  * its own flattening logic, but that requires a useless extra pass over the
276  * tree.
277  *
278  * Returns the modified qualification.
279  */
280 Expr *
281 canonicalize_qual(Expr *qual)
282 {
283         Expr       *newqual;
284
285         /* Quick exit for empty qual */
286         if (qual == NULL)
287                 return NULL;
288
289         /*
290          * Pull up redundant subclauses in OR-of-AND trees.  We do this only
291          * within the top-level AND/OR structure; there's no point in looking
292          * deeper.
293          */
294         newqual = find_duplicate_ors(qual);
295
296         return newqual;
297 }
298
299
300 /*
301  * pull_ands
302  *        Recursively flatten nested AND clauses into a single and-clause list.
303  *
304  * Input is the arglist of an AND clause.
305  * Returns the rebuilt arglist (note original list structure is not touched).
306  */
307 static List *
308 pull_ands(List *andlist)
309 {
310         List       *out_list = NIL;
311         ListCell   *arg;
312
313         foreach(arg, andlist)
314         {
315                 Node       *subexpr = (Node *) lfirst(arg);
316
317                 /*
318                  * Note: we can destructively concat the subexpression's arglist
319                  * because we know the recursive invocation of pull_ands will have
320                  * built a new arglist not shared with any other expr. Otherwise we'd
321                  * need a list_copy here.
322                  */
323                 if (and_clause(subexpr))
324                         out_list = list_concat(out_list,
325                                                                    pull_ands(((BoolExpr *) subexpr)->args));
326                 else
327                         out_list = lappend(out_list, subexpr);
328         }
329         return out_list;
330 }
331
332 /*
333  * pull_ors
334  *        Recursively flatten nested OR clauses into a single or-clause list.
335  *
336  * Input is the arglist of an OR clause.
337  * Returns the rebuilt arglist (note original list structure is not touched).
338  */
339 static List *
340 pull_ors(List *orlist)
341 {
342         List       *out_list = NIL;
343         ListCell   *arg;
344
345         foreach(arg, orlist)
346         {
347                 Node       *subexpr = (Node *) lfirst(arg);
348
349                 /*
350                  * Note: we can destructively concat the subexpression's arglist
351                  * because we know the recursive invocation of pull_ors will have
352                  * built a new arglist not shared with any other expr. Otherwise we'd
353                  * need a list_copy here.
354                  */
355                 if (or_clause(subexpr))
356                         out_list = list_concat(out_list,
357                                                                    pull_ors(((BoolExpr *) subexpr)->args));
358                 else
359                         out_list = lappend(out_list, subexpr);
360         }
361         return out_list;
362 }
363
364
365 /*--------------------
366  * The following code attempts to apply the inverse OR distributive law:
367  *              ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
368  * That is, locate OR clauses in which every subclause contains an
369  * identical term, and pull out the duplicated terms.
370  *
371  * This may seem like a fairly useless activity, but it turns out to be
372  * applicable to many machine-generated queries, and there are also queries
373  * in some of the TPC benchmarks that need it.  This was in fact almost the
374  * sole useful side-effect of the old prepqual code that tried to force
375  * the query into canonical AND-of-ORs form: the canonical equivalent of
376  *              ((A AND B) OR (A AND C))
377  * is
378  *              ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
379  * which the code was able to simplify to
380  *              (A AND (A OR C) AND (B OR A) AND (B OR C))
381  * thus successfully extracting the common condition A --- but at the cost
382  * of cluttering the qual with many redundant clauses.
383  *--------------------
384  */
385
386 /*
387  * find_duplicate_ors
388  *        Given a qualification tree with the NOTs pushed down, search for
389  *        OR clauses to which the inverse OR distributive law might apply.
390  *        Only the top-level AND/OR structure is searched.
391  *
392  * Returns the modified qualification.  AND/OR flatness is preserved.
393  */
394 static Expr *
395 find_duplicate_ors(Expr *qual)
396 {
397         if (or_clause((Node *) qual))
398         {
399                 List       *orlist = NIL;
400                 ListCell   *temp;
401
402                 /* Recurse */
403                 foreach(temp, ((BoolExpr *) qual)->args)
404                         orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
405
406                 /*
407                  * Don't need pull_ors() since this routine will never introduce an OR
408                  * where there wasn't one before.
409                  */
410                 return process_duplicate_ors(orlist);
411         }
412         else if (and_clause((Node *) qual))
413         {
414                 List       *andlist = NIL;
415                 ListCell   *temp;
416
417                 /* Recurse */
418                 foreach(temp, ((BoolExpr *) qual)->args)
419                         andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
420                 /* Flatten any ANDs introduced just below here */
421                 andlist = pull_ands(andlist);
422                 /* The AND list can't get shorter, so result is always an AND */
423                 return make_andclause(andlist);
424         }
425         else
426                 return qual;
427 }
428
429 /*
430  * process_duplicate_ors
431  *        Given a list of exprs which are ORed together, try to apply
432  *        the inverse OR distributive law.
433  *
434  * Returns the resulting expression (could be an AND clause, an OR
435  * clause, or maybe even a single subexpression).
436  */
437 static Expr *
438 process_duplicate_ors(List *orlist)
439 {
440         List       *reference = NIL;
441         int                     num_subclauses = 0;
442         List       *winners;
443         List       *neworlist;
444         ListCell   *temp;
445
446         if (orlist == NIL)
447                 return NULL;                    /* probably can't happen */
448         if (list_length(orlist) == 1)           /* single-expression OR (can this
449                                                                                  * happen?) */
450                 return linitial(orlist);
451
452         /*
453          * Choose the shortest AND clause as the reference list --- obviously, any
454          * subclause not in this clause isn't in all the clauses. If we find a
455          * clause that's not an AND, we can treat it as a one-element AND clause,
456          * which necessarily wins as shortest.
457          */
458         foreach(temp, orlist)
459         {
460                 Expr       *clause = (Expr *) lfirst(temp);
461
462                 if (and_clause((Node *) clause))
463                 {
464                         List       *subclauses = ((BoolExpr *) clause)->args;
465                         int                     nclauses = list_length(subclauses);
466
467                         if (reference == NIL || nclauses < num_subclauses)
468                         {
469                                 reference = subclauses;
470                                 num_subclauses = nclauses;
471                         }
472                 }
473                 else
474                 {
475                         reference = list_make1(clause);
476                         break;
477                 }
478         }
479
480         /*
481          * Just in case, eliminate any duplicates in the reference list.
482          */
483         reference = list_union(NIL, reference);
484
485         /*
486          * Check each element of the reference list to see if it's in all the OR
487          * clauses.  Build a new list of winning clauses.
488          */
489         winners = NIL;
490         foreach(temp, reference)
491         {
492                 Expr       *refclause = (Expr *) lfirst(temp);
493                 bool            win = true;
494                 ListCell   *temp2;
495
496                 foreach(temp2, orlist)
497                 {
498                         Expr       *clause = (Expr *) lfirst(temp2);
499
500                         if (and_clause((Node *) clause))
501                         {
502                                 if (!list_member(((BoolExpr *) clause)->args, refclause))
503                                 {
504                                         win = false;
505                                         break;
506                                 }
507                         }
508                         else
509                         {
510                                 if (!equal(refclause, clause))
511                                 {
512                                         win = false;
513                                         break;
514                                 }
515                         }
516                 }
517
518                 if (win)
519                         winners = lappend(winners, refclause);
520         }
521
522         /*
523          * If no winners, we can't transform the OR
524          */
525         if (winners == NIL)
526                 return make_orclause(orlist);
527
528         /*
529          * Generate new OR list consisting of the remaining sub-clauses.
530          *
531          * If any clause degenerates to empty, then we have a situation like (A
532          * AND B) OR (A), which can be reduced to just A --- that is, the
533          * additional conditions in other arms of the OR are irrelevant.
534          *
535          * Note that because we use list_difference, any multiple occurrences of a
536          * winning clause in an AND sub-clause will be removed automatically.
537          */
538         neworlist = NIL;
539         foreach(temp, orlist)
540         {
541                 Expr       *clause = (Expr *) lfirst(temp);
542
543                 if (and_clause((Node *) clause))
544                 {
545                         List       *subclauses = ((BoolExpr *) clause)->args;
546
547                         subclauses = list_difference(subclauses, winners);
548                         if (subclauses != NIL)
549                         {
550                                 if (list_length(subclauses) == 1)
551                                         neworlist = lappend(neworlist, linitial(subclauses));
552                                 else
553                                         neworlist = lappend(neworlist, make_andclause(subclauses));
554                         }
555                         else
556                         {
557                                 neworlist = NIL;        /* degenerate case, see above */
558                                 break;
559                         }
560                 }
561                 else
562                 {
563                         if (!list_member(winners, clause))
564                                 neworlist = lappend(neworlist, clause);
565                         else
566                         {
567                                 neworlist = NIL;        /* degenerate case, see above */
568                                 break;
569                         }
570                 }
571         }
572
573         /*
574          * Append reduced OR to the winners list, if it's not degenerate, handling
575          * the special case of one element correctly (can that really happen?).
576          * Also be careful to maintain AND/OR flatness in case we pulled up a
577          * sub-sub-OR-clause.
578          */
579         if (neworlist != NIL)
580         {
581                 if (list_length(neworlist) == 1)
582                         winners = lappend(winners, linitial(neworlist));
583                 else
584                         winners = lappend(winners, make_orclause(pull_ors(neworlist)));
585         }
586
587         /*
588          * And return the constructed AND clause, again being wary of a single
589          * element and AND/OR flatness.
590          */
591         if (list_length(winners) == 1)
592                 return (Expr *) linitial(winners);
593         else
594                 return make_andclause(pull_ands(winners));
595 }