1 /*-------------------------------------------------------------------------
4 * Routines for preprocessing qualification expressions
7 * The parser regards AND and OR as purely binary operators, so a qual like
8 * (A = 1) OR (A = 2) OR (A = 3) ...
9 * will produce a nested parsetree
10 * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
11 * In reality, the optimizer and executor regard AND and OR as N-argument
12 * operators, so this tree can be flattened to
13 * (OR (A = 1) (A = 2) (A = 3) ...)
15 * Formerly, this module was responsible for doing the initial flattening,
16 * but now we leave it to eval_const_expressions to do that since it has to
17 * make a complete pass over the expression tree anyway. Instead, we just
18 * have to ensure that our manipulations preserve AND/OR flatness.
19 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
20 * tree after local transformations that might introduce nested AND/ORs.
23 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
28 * src/backend/optimizer/prep/prepqual.c
30 *-------------------------------------------------------------------------
35 #include "nodes/makefuncs.h"
36 #include "optimizer/clauses.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_duplicate_ors(Expr *qual);
44 static Expr *process_duplicate_ors(List *orlist);
49 * Negate a Boolean expression.
51 * Input is a clause to be negated (e.g., the argument of a NOT clause).
52 * Returns a new clause equivalent to the negation of the given clause.
54 * Although this can be invoked on its own, it's mainly intended as a helper
55 * for eval_const_expressions(), and that context drives several design
56 * decisions. In particular, if the input is already AND/OR flat, we must
57 * preserve that property. We also don't bother to recurse in situations
58 * where we can assume that lower-level executions of eval_const_expressions
59 * would already have simplified sub-clauses of the input.
61 * The difference between this and a simple make_notclause() is that this
62 * tries to get rid of the NOT node by logical simplification. It's clearly
63 * always a win if the NOT node can be eliminated altogether. However, our
64 * use of DeMorgan's laws could result in having more NOT nodes rather than
65 * fewer. We do that unconditionally anyway, because in WHERE clauses it's
66 * important to expose as much top-level AND/OR structure as possible.
67 * Also, eliminating an intermediate NOT may allow us to flatten two levels
68 * of AND or OR together that we couldn't have otherwise. Finally, one of
69 * the motivations for doing this is to ensure that logically equivalent
70 * expressions will be seen as physically equal(), so we should always apply
71 * the same transformations.
74 negate_clause(Node *node)
76 if (node == NULL) /* should not happen */
77 elog(ERROR, "can't negate an empty subexpression");
78 switch (nodeTag(node))
82 Const *c = (Const *) node;
84 /* NOT NULL is still NULL */
86 return makeBoolConst(false, true);
87 /* otherwise pretty easy */
88 return makeBoolConst(!DatumGetBool(c->constvalue), false);
94 * Negate operator if possible: (NOT (< A B)) => (>= A B)
96 OpExpr *opexpr = (OpExpr *) node;
97 Oid negator = get_negator(opexpr->opno);
101 OpExpr *newopexpr = makeNode(OpExpr);
103 newopexpr->opno = negator;
104 newopexpr->opfuncid = InvalidOid;
105 newopexpr->opresulttype = opexpr->opresulttype;
106 newopexpr->opretset = opexpr->opretset;
107 newopexpr->args = opexpr->args;
108 newopexpr->location = opexpr->location;
109 return (Node *) newopexpr;
113 case T_ScalarArrayOpExpr:
116 * Negate a ScalarArrayOpExpr if its operator has a negator;
117 * for example x = ANY (list) becomes x <> ALL (list)
119 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
120 Oid negator = get_negator(saopexpr->opno);
124 ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
126 newopexpr->opno = negator;
127 newopexpr->opfuncid = InvalidOid;
128 newopexpr->useOr = !saopexpr->useOr;
129 newopexpr->args = saopexpr->args;
130 newopexpr->location = saopexpr->location;
131 return (Node *) newopexpr;
137 BoolExpr *expr = (BoolExpr *) node;
139 switch (expr->boolop)
141 /*--------------------
142 * Apply DeMorgan's Laws:
143 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
144 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
145 * i.e., swap AND for OR and negate each subclause.
147 * If the input is already AND/OR flat and has no NOT
148 * directly above AND or OR, this transformation preserves
149 * those properties. For example, if no direct child of
150 * the given AND clause is an AND or a NOT-above-OR, then
151 * the recursive calls of negate_clause() can't return any
152 * OR clauses. So we needn't call pull_ors() before
153 * building a new OR clause. Similarly for the OR case.
154 *--------------------
161 foreach(lc, expr->args)
163 nargs = lappend(nargs,
164 negate_clause(lfirst(lc)));
166 return (Node *) make_orclause(nargs);
174 foreach(lc, expr->args)
176 nargs = lappend(nargs,
177 negate_clause(lfirst(lc)));
179 return (Node *) make_andclause(nargs);
184 * NOT underneath NOT: they cancel. We assume the
185 * input is already simplified, so no need to recurse.
187 return (Node *) linitial(expr->args);
189 elog(ERROR, "unrecognized boolop: %d",
197 NullTest *expr = (NullTest *) node;
200 * In the rowtype case, the two flavors of NullTest are *not*
201 * logical inverses, so we can't simplify. But it does work
202 * for scalar datatypes.
206 NullTest *newexpr = makeNode(NullTest);
208 newexpr->arg = expr->arg;
209 newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
210 IS_NOT_NULL : IS_NULL);
211 newexpr->argisrow = expr->argisrow;
212 return (Node *) newexpr;
218 BooleanTest *expr = (BooleanTest *) node;
219 BooleanTest *newexpr = makeNode(BooleanTest);
221 newexpr->arg = expr->arg;
222 switch (expr->booltesttype)
225 newexpr->booltesttype = IS_NOT_TRUE;
228 newexpr->booltesttype = IS_TRUE;
231 newexpr->booltesttype = IS_NOT_FALSE;
234 newexpr->booltesttype = IS_FALSE;
237 newexpr->booltesttype = IS_NOT_UNKNOWN;
240 newexpr->booltesttype = IS_UNKNOWN;
243 elog(ERROR, "unrecognized booltesttype: %d",
244 (int) expr->booltesttype);
247 return (Node *) newexpr;
251 /* else fall through */
256 * Otherwise we don't know how to simplify this, so just tack on an
259 return (Node *) make_notclause((Expr *) node);
265 * Convert a qualification expression to the most useful form.
267 * The name of this routine is a holdover from a time when it would try to
268 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
269 * Eventually, we recognized that that had more theoretical purity than
270 * actual usefulness, and so now the transformation doesn't involve any
271 * notion of reaching a canonical form.
273 * NOTE: we assume the input has already been through eval_const_expressions
274 * and therefore possesses AND/OR flatness. Formerly this function included
275 * its own flattening logic, but that requires a useless extra pass over the
278 * Returns the modified qualification.
281 canonicalize_qual(Expr *qual)
285 /* Quick exit for empty qual */
290 * Pull up redundant subclauses in OR-of-AND trees. We do this only
291 * within the top-level AND/OR structure; there's no point in looking
294 newqual = find_duplicate_ors(qual);
302 * Recursively flatten nested AND clauses into a single and-clause list.
304 * Input is the arglist of an AND clause.
305 * Returns the rebuilt arglist (note original list structure is not touched).
308 pull_ands(List *andlist)
310 List *out_list = NIL;
313 foreach(arg, andlist)
315 Node *subexpr = (Node *) lfirst(arg);
318 * Note: we can destructively concat the subexpression's arglist
319 * because we know the recursive invocation of pull_ands will have
320 * built a new arglist not shared with any other expr. Otherwise we'd
321 * need a list_copy here.
323 if (and_clause(subexpr))
324 out_list = list_concat(out_list,
325 pull_ands(((BoolExpr *) subexpr)->args));
327 out_list = lappend(out_list, subexpr);
334 * Recursively flatten nested OR clauses into a single or-clause list.
336 * Input is the arglist of an OR clause.
337 * Returns the rebuilt arglist (note original list structure is not touched).
340 pull_ors(List *orlist)
342 List *out_list = NIL;
347 Node *subexpr = (Node *) lfirst(arg);
350 * Note: we can destructively concat the subexpression's arglist
351 * because we know the recursive invocation of pull_ors will have
352 * built a new arglist not shared with any other expr. Otherwise we'd
353 * need a list_copy here.
355 if (or_clause(subexpr))
356 out_list = list_concat(out_list,
357 pull_ors(((BoolExpr *) subexpr)->args));
359 out_list = lappend(out_list, subexpr);
365 /*--------------------
366 * The following code attempts to apply the inverse OR distributive law:
367 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
368 * That is, locate OR clauses in which every subclause contains an
369 * identical term, and pull out the duplicated terms.
371 * This may seem like a fairly useless activity, but it turns out to be
372 * applicable to many machine-generated queries, and there are also queries
373 * in some of the TPC benchmarks that need it. This was in fact almost the
374 * sole useful side-effect of the old prepqual code that tried to force
375 * the query into canonical AND-of-ORs form: the canonical equivalent of
376 * ((A AND B) OR (A AND C))
378 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
379 * which the code was able to simplify to
380 * (A AND (A OR C) AND (B OR A) AND (B OR C))
381 * thus successfully extracting the common condition A --- but at the cost
382 * of cluttering the qual with many redundant clauses.
383 *--------------------
388 * Given a qualification tree with the NOTs pushed down, search for
389 * OR clauses to which the inverse OR distributive law might apply.
390 * Only the top-level AND/OR structure is searched.
392 * Returns the modified qualification. AND/OR flatness is preserved.
395 find_duplicate_ors(Expr *qual)
397 if (or_clause((Node *) qual))
403 foreach(temp, ((BoolExpr *) qual)->args)
404 orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
407 * Don't need pull_ors() since this routine will never introduce an OR
408 * where there wasn't one before.
410 return process_duplicate_ors(orlist);
412 else if (and_clause((Node *) qual))
418 foreach(temp, ((BoolExpr *) qual)->args)
419 andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
420 /* Flatten any ANDs introduced just below here */
421 andlist = pull_ands(andlist);
422 /* The AND list can't get shorter, so result is always an AND */
423 return make_andclause(andlist);
430 * process_duplicate_ors
431 * Given a list of exprs which are ORed together, try to apply
432 * the inverse OR distributive law.
434 * Returns the resulting expression (could be an AND clause, an OR
435 * clause, or maybe even a single subexpression).
438 process_duplicate_ors(List *orlist)
440 List *reference = NIL;
441 int num_subclauses = 0;
447 return NULL; /* probably can't happen */
448 if (list_length(orlist) == 1) /* single-expression OR (can this
450 return linitial(orlist);
453 * Choose the shortest AND clause as the reference list --- obviously, any
454 * subclause not in this clause isn't in all the clauses. If we find a
455 * clause that's not an AND, we can treat it as a one-element AND clause,
456 * which necessarily wins as shortest.
458 foreach(temp, orlist)
460 Expr *clause = (Expr *) lfirst(temp);
462 if (and_clause((Node *) clause))
464 List *subclauses = ((BoolExpr *) clause)->args;
465 int nclauses = list_length(subclauses);
467 if (reference == NIL || nclauses < num_subclauses)
469 reference = subclauses;
470 num_subclauses = nclauses;
475 reference = list_make1(clause);
481 * Just in case, eliminate any duplicates in the reference list.
483 reference = list_union(NIL, reference);
486 * Check each element of the reference list to see if it's in all the OR
487 * clauses. Build a new list of winning clauses.
490 foreach(temp, reference)
492 Expr *refclause = (Expr *) lfirst(temp);
496 foreach(temp2, orlist)
498 Expr *clause = (Expr *) lfirst(temp2);
500 if (and_clause((Node *) clause))
502 if (!list_member(((BoolExpr *) clause)->args, refclause))
510 if (!equal(refclause, clause))
519 winners = lappend(winners, refclause);
523 * If no winners, we can't transform the OR
526 return make_orclause(orlist);
529 * Generate new OR list consisting of the remaining sub-clauses.
531 * If any clause degenerates to empty, then we have a situation like (A
532 * AND B) OR (A), which can be reduced to just A --- that is, the
533 * additional conditions in other arms of the OR are irrelevant.
535 * Note that because we use list_difference, any multiple occurrences of a
536 * winning clause in an AND sub-clause will be removed automatically.
539 foreach(temp, orlist)
541 Expr *clause = (Expr *) lfirst(temp);
543 if (and_clause((Node *) clause))
545 List *subclauses = ((BoolExpr *) clause)->args;
547 subclauses = list_difference(subclauses, winners);
548 if (subclauses != NIL)
550 if (list_length(subclauses) == 1)
551 neworlist = lappend(neworlist, linitial(subclauses));
553 neworlist = lappend(neworlist, make_andclause(subclauses));
557 neworlist = NIL; /* degenerate case, see above */
563 if (!list_member(winners, clause))
564 neworlist = lappend(neworlist, clause);
567 neworlist = NIL; /* degenerate case, see above */
574 * Append reduced OR to the winners list, if it's not degenerate, handling
575 * the special case of one element correctly (can that really happen?).
576 * Also be careful to maintain AND/OR flatness in case we pulled up a
579 if (neworlist != NIL)
581 if (list_length(neworlist) == 1)
582 winners = lappend(winners, linitial(neworlist));
584 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
588 * And return the constructed AND clause, again being wary of a single
589 * element and AND/OR flatness.
591 if (list_length(winners) == 1)
592 return (Expr *) linitial(winners);
594 return make_andclause(pull_ands(winners));