1 /*-------------------------------------------------------------------------
4 * Routines for preprocessing qualification expressions
7 * The parser regards AND and OR as purely binary operators, so a qual like
8 * (A = 1) OR (A = 2) OR (A = 3) ...
9 * will produce a nested parsetree
10 * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
11 * In reality, the optimizer and executor regard AND and OR as N-argument
12 * operators, so this tree can be flattened to
13 * (OR (A = 1) (A = 2) (A = 3) ...)
15 * Formerly, this module was responsible for doing the initial flattening,
16 * but now we leave it to eval_const_expressions to do that since it has to
17 * make a complete pass over the expression tree anyway. Instead, we just
18 * have to ensure that our manipulations preserve AND/OR flatness.
19 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
20 * tree after local transformations that might introduce nested AND/ORs.
23 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
28 * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.51 2005/10/15 02:49:21 momjian Exp $
30 *-------------------------------------------------------------------------
35 #include "nodes/makefuncs.h"
36 #include "optimizer/clauses.h"
37 #include "optimizer/prep.h"
38 #include "utils/lsyscache.h"
41 static List *pull_ands(List *andlist);
42 static List *pull_ors(List *orlist);
43 static Expr *find_nots(Expr *qual);
44 static Expr *push_nots(Expr *qual);
45 static Expr *find_duplicate_ors(Expr *qual);
46 static Expr *process_duplicate_ors(List *orlist);
51 * Convert a qualification expression to the most useful form.
53 * The name of this routine is a holdover from a time when it would try to
54 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
55 * Eventually, we recognized that that had more theoretical purity than
56 * actual usefulness, and so now the transformation doesn't involve any
57 * notion of reaching a canonical form.
59 * NOTE: we assume the input has already been through eval_const_expressions
60 * and therefore possesses AND/OR flatness. Formerly this function included
61 * its own flattening logic, but that requires a useless extra pass over the
64 * Returns the modified qualification.
67 canonicalize_qual(Expr *qual)
71 /* Quick exit for empty qual */
76 * Push down NOTs. We do this only in the top-level boolean expression,
77 * without examining arguments of operators/functions. The main reason for
78 * doing this is to expose as much top-level AND/OR structure as we can,
79 * so there's no point in descending further.
81 newqual = find_nots(qual);
84 * Pull up redundant subclauses in OR-of-AND trees. Again, we do this
85 * only within the top-level AND/OR structure.
87 newqual = find_duplicate_ors(newqual);
95 * Recursively flatten nested AND clauses into a single and-clause list.
97 * Input is the arglist of an AND clause.
98 * Returns the rebuilt arglist (note original list structure is not touched).
101 pull_ands(List *andlist)
103 List *out_list = NIL;
106 foreach(arg, andlist)
108 Node *subexpr = (Node *) lfirst(arg);
111 * Note: we can destructively concat the subexpression's arglist
112 * because we know the recursive invocation of pull_ands will have
113 * built a new arglist not shared with any other expr. Otherwise we'd
114 * need a list_copy here.
116 if (and_clause(subexpr))
117 out_list = list_concat(out_list,
118 pull_ands(((BoolExpr *) subexpr)->args));
120 out_list = lappend(out_list, subexpr);
127 * Recursively flatten nested OR clauses into a single or-clause list.
129 * Input is the arglist of an OR clause.
130 * Returns the rebuilt arglist (note original list structure is not touched).
133 pull_ors(List *orlist)
135 List *out_list = NIL;
140 Node *subexpr = (Node *) lfirst(arg);
143 * Note: we can destructively concat the subexpression's arglist
144 * because we know the recursive invocation of pull_ors will have
145 * built a new arglist not shared with any other expr. Otherwise we'd
146 * need a list_copy here.
148 if (or_clause(subexpr))
149 out_list = list_concat(out_list,
150 pull_ors(((BoolExpr *) subexpr)->args));
152 out_list = lappend(out_list, subexpr);
160 * Traverse the qualification, looking for NOTs to take care of.
161 * For NOT clauses, apply push_nots() to try to push down the NOT.
162 * For AND and OR clause types, simply recurse. Otherwise stop
163 * recursing (we do not worry about structure below the top AND/OR tree).
165 * Returns the modified qualification. AND/OR flatness is preserved.
168 find_nots(Expr *qual)
170 if (and_clause((Node *) qual))
175 foreach(temp, ((BoolExpr *) qual)->args)
176 t_list = lappend(t_list, find_nots(lfirst(temp)));
177 return make_andclause(pull_ands(t_list));
179 else if (or_clause((Node *) qual))
184 foreach(temp, ((BoolExpr *) qual)->args)
185 t_list = lappend(t_list, find_nots(lfirst(temp)));
186 return make_orclause(pull_ors(t_list));
188 else if (not_clause((Node *) qual))
189 return push_nots(get_notclausearg(qual));
196 * Push down a NOT as far as possible.
198 * Input is an expression to be negated (e.g., the argument of a NOT clause).
199 * Returns a new qual equivalent to the negation of the given qual.
202 push_nots(Expr *qual)
204 if (is_opclause(qual))
207 * Negate an operator clause if possible: (NOT (< A B)) => (>= A B)
208 * Otherwise, retain the clause as it is (the NOT can't be pushed down
211 OpExpr *opexpr = (OpExpr *) qual;
212 Oid negator = get_negator(opexpr->opno);
215 return make_opclause(negator,
216 opexpr->opresulttype,
218 (Expr *) get_leftop(qual),
219 (Expr *) get_rightop(qual));
221 return make_notclause(qual);
223 else if (and_clause((Node *) qual))
225 /*--------------------
226 * Apply DeMorgan's Laws:
227 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
228 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
229 * i.e., swap AND for OR and negate all the subclauses.
230 *--------------------
235 foreach(temp, ((BoolExpr *) qual)->args)
236 t_list = lappend(t_list, push_nots(lfirst(temp)));
237 return make_orclause(pull_ors(t_list));
239 else if (or_clause((Node *) qual))
244 foreach(temp, ((BoolExpr *) qual)->args)
245 t_list = lappend(t_list, push_nots(lfirst(temp)));
246 return make_andclause(pull_ands(t_list));
248 else if (not_clause((Node *) qual))
251 * Another NOT cancels this NOT, so eliminate the NOT and stop
252 * negating this branch. But search the subexpression for more NOTs
255 return find_nots(get_notclausearg(qual));
260 * We don't know how to negate anything else, place a NOT at this
261 * level. No point in recursing deeper, either.
263 return make_notclause(qual);
268 /*--------------------
269 * The following code attempts to apply the inverse OR distributive law:
270 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
271 * That is, locate OR clauses in which every subclause contains an
272 * identical term, and pull out the duplicated terms.
274 * This may seem like a fairly useless activity, but it turns out to be
275 * applicable to many machine-generated queries, and there are also queries
276 * in some of the TPC benchmarks that need it. This was in fact almost the
277 * sole useful side-effect of the old prepqual code that tried to force
278 * the query into canonical AND-of-ORs form: the canonical equivalent of
279 * ((A AND B) OR (A AND C))
281 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
282 * which the code was able to simplify to
283 * (A AND (A OR C) AND (B OR A) AND (B OR C))
284 * thus successfully extracting the common condition A --- but at the cost
285 * of cluttering the qual with many redundant clauses.
286 *--------------------
291 * Given a qualification tree with the NOTs pushed down, search for
292 * OR clauses to which the inverse OR distributive law might apply.
293 * Only the top-level AND/OR structure is searched.
295 * Returns the modified qualification. AND/OR flatness is preserved.
298 find_duplicate_ors(Expr *qual)
300 if (or_clause((Node *) qual))
306 foreach(temp, ((BoolExpr *) qual)->args)
307 orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
310 * Don't need pull_ors() since this routine will never introduce an OR
311 * where there wasn't one before.
313 return process_duplicate_ors(orlist);
315 else if (and_clause((Node *) qual))
321 foreach(temp, ((BoolExpr *) qual)->args)
322 andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
323 /* Flatten any ANDs introduced just below here */
324 andlist = pull_ands(andlist);
325 /* The AND list can't get shorter, so result is always an AND */
326 return make_andclause(andlist);
333 * process_duplicate_ors
334 * Given a list of exprs which are ORed together, try to apply
335 * the inverse OR distributive law.
337 * Returns the resulting expression (could be an AND clause, an OR
338 * clause, or maybe even a single subexpression).
341 process_duplicate_ors(List *orlist)
343 List *reference = NIL;
344 int num_subclauses = 0;
350 return NULL; /* probably can't happen */
351 if (list_length(orlist) == 1) /* single-expression OR (can this
353 return linitial(orlist);
356 * Choose the shortest AND clause as the reference list --- obviously, any
357 * subclause not in this clause isn't in all the clauses. If we find a
358 * clause that's not an AND, we can treat it as a one-element AND clause,
359 * which necessarily wins as shortest.
361 foreach(temp, orlist)
363 Expr *clause = (Expr *) lfirst(temp);
365 if (and_clause((Node *) clause))
367 List *subclauses = ((BoolExpr *) clause)->args;
368 int nclauses = list_length(subclauses);
370 if (reference == NIL || nclauses < num_subclauses)
372 reference = subclauses;
373 num_subclauses = nclauses;
378 reference = list_make1(clause);
384 * Just in case, eliminate any duplicates in the reference list.
386 reference = list_union(NIL, reference);
389 * Check each element of the reference list to see if it's in all the OR
390 * clauses. Build a new list of winning clauses.
393 foreach(temp, reference)
395 Expr *refclause = (Expr *) lfirst(temp);
399 foreach(temp2, orlist)
401 Expr *clause = (Expr *) lfirst(temp2);
403 if (and_clause((Node *) clause))
405 if (!list_member(((BoolExpr *) clause)->args, refclause))
413 if (!equal(refclause, clause))
422 winners = lappend(winners, refclause);
426 * If no winners, we can't transform the OR
429 return make_orclause(orlist);
432 * Generate new OR list consisting of the remaining sub-clauses.
434 * If any clause degenerates to empty, then we have a situation like (A AND
435 * B) OR (A), which can be reduced to just A --- that is, the additional
436 * conditions in other arms of the OR are irrelevant.
438 * Note that because we use list_difference, any multiple occurrences of a
439 * winning clause in an AND sub-clause will be removed automatically.
442 foreach(temp, orlist)
444 Expr *clause = (Expr *) lfirst(temp);
446 if (and_clause((Node *) clause))
448 List *subclauses = ((BoolExpr *) clause)->args;
450 subclauses = list_difference(subclauses, winners);
451 if (subclauses != NIL)
453 if (list_length(subclauses) == 1)
454 neworlist = lappend(neworlist, linitial(subclauses));
456 neworlist = lappend(neworlist, make_andclause(subclauses));
460 neworlist = NIL; /* degenerate case, see above */
466 if (!list_member(winners, clause))
467 neworlist = lappend(neworlist, clause);
470 neworlist = NIL; /* degenerate case, see above */
477 * Append reduced OR to the winners list, if it's not degenerate, handling
478 * the special case of one element correctly (can that really happen?).
479 * Also be careful to maintain AND/OR flatness in case we pulled up a
482 if (neworlist != NIL)
484 if (list_length(neworlist) == 1)
485 winners = lappend(winners, linitial(neworlist));
487 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
491 * And return the constructed AND clause, again being wary of a single
492 * element and AND/OR flatness.
494 if (list_length(winners) == 1)
495 return (Expr *) linitial(winners);
497 return make_andclause(pull_ands(winners));