1 /*-------------------------------------------------------------------------
4 * Routines for preprocessing qualification expressions
6 * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.46 2004/08/29 05:06:44 momjian Exp $
13 *-------------------------------------------------------------------------
18 #include "nodes/makefuncs.h"
19 #include "optimizer/clauses.h"
20 #include "optimizer/prep.h"
21 #include "utils/lsyscache.h"
24 static Node *flatten_andors_mutator(Node *node, void *context);
25 static List *pull_ands(List *andlist);
26 static List *pull_ors(List *orlist);
27 static Expr *find_nots(Expr *qual);
28 static Expr *push_nots(Expr *qual);
29 static Expr *find_duplicate_ors(Expr *qual);
30 static Expr *process_duplicate_ors(List *orlist);
35 * Convert a qualification expression to the most useful form.
37 * The name of this routine is a holdover from a time when it would try to
38 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
39 * Eventually, we recognized that that had more theoretical purity than
40 * actual usefulness, and so now the transformation doesn't involve any
41 * notion of reaching a canonical form.
43 * Returns the modified qualification.
46 canonicalize_qual(Expr *qual)
50 /* Quick exit for empty qual */
55 * Flatten AND and OR groups throughout the expression tree.
57 newqual = (Expr *) flatten_andors((Node *) qual);
60 * Push down NOTs. We do this only in the top-level boolean
61 * expression, without examining arguments of operators/functions. The
62 * main reason for doing this is to expose as much top-level AND/OR
63 * structure as we can, so there's no point in descending further.
65 newqual = find_nots(newqual);
68 * Pull up redundant subclauses in OR-of-AND trees. Again, we do this
69 * only within the top-level AND/OR structure.
71 newqual = find_duplicate_ors(newqual);
77 /*--------------------
78 * The parser regards AND and OR as purely binary operators, so a qual like
79 * (A = 1) OR (A = 2) OR (A = 3) ...
80 * will produce a nested parsetree
81 * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
82 * In reality, the optimizer and executor regard AND and OR as n-argument
83 * operators, so this tree can be flattened to
84 * (OR (A = 1) (A = 2) (A = 3) ...)
85 * which is the responsibility of the routines below.
87 * flatten_andors() does the basic transformation with no initial assumptions.
88 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
89 * tree after local transformations that might introduce nested AND/ORs.
95 * Given an expression tree, simplify nested AND/OR clauses into flat
96 * AND/OR clauses with more arguments. The entire tree is processed.
98 * Returns the rebuilt expr (note original structure is not touched).
100 * This is exported so that other modules can perform the part of
101 * canonicalize_qual processing that applies to entire trees, rather
102 * than just the top-level boolean expressions.
105 flatten_andors(Node *node)
107 return flatten_andors_mutator(node, NULL);
111 flatten_andors_mutator(Node *node, void *context)
115 if (IsA(node, BoolExpr))
117 BoolExpr *bexpr = (BoolExpr *) node;
119 if (bexpr->boolop == AND_EXPR)
121 List *out_list = NIL;
124 foreach(arg, bexpr->args)
126 Node *subexpr = flatten_andors((Node *) lfirst(arg));
129 * Note: we can destructively concat the subexpression's
130 * arglist because we know the recursive invocation of
131 * flatten_andors will have built a new arglist not shared
132 * with any other expr. Otherwise we'd need a list_copy
135 if (and_clause(subexpr))
136 out_list = list_concat(out_list,
137 ((BoolExpr *) subexpr)->args);
139 out_list = lappend(out_list, subexpr);
141 return (Node *) make_andclause(out_list);
143 if (bexpr->boolop == OR_EXPR)
145 List *out_list = NIL;
148 foreach(arg, bexpr->args)
150 Node *subexpr = flatten_andors((Node *) lfirst(arg));
153 * Note: we can destructively concat the subexpression's
154 * arglist because we know the recursive invocation of
155 * flatten_andors will have built a new arglist not shared
156 * with any other expr. Otherwise we'd need a list_copy
159 if (or_clause(subexpr))
160 out_list = list_concat(out_list,
161 ((BoolExpr *) subexpr)->args);
163 out_list = lappend(out_list, subexpr);
165 return (Node *) make_orclause(out_list);
167 /* else it's a NOT clause, fall through */
169 return expression_tree_mutator(node, flatten_andors_mutator, context);
174 * Recursively flatten nested AND clauses into a single and-clause list.
176 * Input is the arglist of an AND clause.
177 * Returns the rebuilt arglist (note original list structure is not touched).
180 pull_ands(List *andlist)
182 List *out_list = NIL;
185 foreach(arg, andlist)
187 Node *subexpr = (Node *) lfirst(arg);
190 * Note: we can destructively concat the subexpression's arglist
191 * because we know the recursive invocation of pull_ands will have
192 * built a new arglist not shared with any other expr. Otherwise
193 * we'd need a list_copy here.
195 if (and_clause(subexpr))
196 out_list = list_concat(out_list,
197 pull_ands(((BoolExpr *) subexpr)->args));
199 out_list = lappend(out_list, subexpr);
206 * Recursively flatten nested OR clauses into a single or-clause list.
208 * Input is the arglist of an OR clause.
209 * Returns the rebuilt arglist (note original list structure is not touched).
212 pull_ors(List *orlist)
214 List *out_list = NIL;
219 Node *subexpr = (Node *) lfirst(arg);
222 * Note: we can destructively concat the subexpression's arglist
223 * because we know the recursive invocation of pull_ors will have
224 * built a new arglist not shared with any other expr. Otherwise
225 * we'd need a list_copy here.
227 if (or_clause(subexpr))
228 out_list = list_concat(out_list,
229 pull_ors(((BoolExpr *) subexpr)->args));
231 out_list = lappend(out_list, subexpr);
239 * Traverse the qualification, looking for NOTs to take care of.
240 * For NOT clauses, apply push_nots() to try to push down the NOT.
241 * For AND and OR clause types, simply recurse. Otherwise stop
242 * recursing (we do not worry about structure below the top AND/OR tree).
244 * Returns the modified qualification. AND/OR flatness is preserved.
247 find_nots(Expr *qual)
252 if (and_clause((Node *) qual))
257 foreach(temp, ((BoolExpr *) qual)->args)
258 t_list = lappend(t_list, find_nots(lfirst(temp)));
259 return make_andclause(pull_ands(t_list));
261 else if (or_clause((Node *) qual))
266 foreach(temp, ((BoolExpr *) qual)->args)
267 t_list = lappend(t_list, find_nots(lfirst(temp)));
268 return make_orclause(pull_ors(t_list));
270 else if (not_clause((Node *) qual))
271 return push_nots(get_notclausearg(qual));
278 * Push down a NOT as far as possible.
280 * Input is an expression to be negated (e.g., the argument of a NOT clause).
281 * Returns a new qual equivalent to the negation of the given qual.
284 push_nots(Expr *qual)
287 return make_notclause(qual); /* XXX is this right? Or
291 * Negate an operator clause if possible: (NOT (< A B)) => (> A B)
292 * Otherwise, retain the clause as it is (the NOT can't be pushed down
295 if (is_opclause(qual))
297 OpExpr *opexpr = (OpExpr *) qual;
298 Oid negator = get_negator(opexpr->opno);
301 return make_opclause(negator,
302 opexpr->opresulttype,
304 (Expr *) get_leftop(qual),
305 (Expr *) get_rightop(qual));
307 return make_notclause(qual);
309 else if (and_clause((Node *) qual))
311 /*--------------------
312 * Apply DeMorgan's Laws:
313 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
314 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
315 * i.e., swap AND for OR and negate all the subclauses.
316 *--------------------
321 foreach(temp, ((BoolExpr *) qual)->args)
322 t_list = lappend(t_list, push_nots(lfirst(temp)));
323 return make_orclause(pull_ors(t_list));
325 else if (or_clause((Node *) qual))
330 foreach(temp, ((BoolExpr *) qual)->args)
331 t_list = lappend(t_list, push_nots(lfirst(temp)));
332 return make_andclause(pull_ands(t_list));
334 else if (not_clause((Node *) qual))
337 * Another NOT cancels this NOT, so eliminate the NOT and stop
338 * negating this branch.
340 return get_notclausearg(qual);
345 * We don't know how to negate anything else, place a NOT at this
348 return make_notclause(qual);
353 /*--------------------
354 * The following code attempts to apply the inverse OR distributive law:
355 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
356 * That is, locate OR clauses in which every subclause contains an
357 * identical term, and pull out the duplicated terms.
359 * This may seem like a fairly useless activity, but it turns out to be
360 * applicable to many machine-generated queries, and there are also queries
361 * in some of the TPC benchmarks that need it. This was in fact almost the
362 * sole useful side-effect of the old prepqual code that tried to force
363 * the query into canonical AND-of-ORs form: the canonical equivalent of
364 * ((A AND B) OR (A AND C))
366 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
367 * which the code was able to simplify to
368 * (A AND (A OR C) AND (B OR A) AND (B OR C))
369 * thus successfully extracting the common condition A --- but at the cost
370 * of cluttering the qual with many redundant clauses.
371 *--------------------
376 * Given a qualification tree with the NOTs pushed down, search for
377 * OR clauses to which the inverse OR distributive law might apply.
378 * Only the top-level AND/OR structure is searched.
380 * Returns the modified qualification. AND/OR flatness is preserved.
383 find_duplicate_ors(Expr *qual)
388 if (or_clause((Node *) qual))
394 foreach(temp, ((BoolExpr *) qual)->args)
395 orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
398 * Don't need pull_ors() since this routine will never introduce
399 * an OR where there wasn't one before.
401 return process_duplicate_ors(orlist);
403 else if (and_clause((Node *) qual))
409 foreach(temp, ((BoolExpr *) qual)->args)
410 andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
411 /* Flatten any ANDs introduced just below here */
412 andlist = pull_ands(andlist);
413 /* The AND list can't get shorter, so result is always an AND */
414 return make_andclause(andlist);
421 * process_duplicate_ors
422 * Given a list of exprs which are ORed together, try to apply
423 * the inverse OR distributive law.
425 * Returns the resulting expression (could be an AND clause, an OR
426 * clause, or maybe even a single subexpression).
429 process_duplicate_ors(List *orlist)
431 List *reference = NIL;
432 int num_subclauses = 0;
438 return NULL; /* probably can't happen */
439 if (list_length(orlist) == 1) /* single-expression OR (can this
441 return linitial(orlist);
444 * Choose the shortest AND clause as the reference list --- obviously,
445 * any subclause not in this clause isn't in all the clauses. If we
446 * find a clause that's not an AND, we can treat it as a one-element
447 * AND clause, which necessarily wins as shortest.
449 foreach(temp, orlist)
451 Expr *clause = (Expr *) lfirst(temp);
453 if (and_clause((Node *) clause))
455 List *subclauses = ((BoolExpr *) clause)->args;
456 int nclauses = list_length(subclauses);
458 if (reference == NIL || nclauses < num_subclauses)
460 reference = subclauses;
461 num_subclauses = nclauses;
466 reference = list_make1(clause);
472 * Just in case, eliminate any duplicates in the reference list.
474 reference = list_union(NIL, reference);
477 * Check each element of the reference list to see if it's in all the
478 * OR clauses. Build a new list of winning clauses.
481 foreach(temp, reference)
483 Expr *refclause = (Expr *) lfirst(temp);
487 foreach(temp2, orlist)
489 Expr *clause = (Expr *) lfirst(temp2);
491 if (and_clause((Node *) clause))
493 if (!list_member(((BoolExpr *) clause)->args, refclause))
501 if (!equal(refclause, clause))
510 winners = lappend(winners, refclause);
514 * If no winners, we can't transform the OR
517 return make_orclause(orlist);
520 * Generate new OR list consisting of the remaining sub-clauses.
522 * If any clause degenerates to empty, then we have a situation like (A
523 * AND B) OR (A), which can be reduced to just A --- that is, the
524 * additional conditions in other arms of the OR are irrelevant.
526 * Note that because we use list_difference, any multiple occurrences of
527 * a winning clause in an AND sub-clause will be removed
531 foreach(temp, orlist)
533 Expr *clause = (Expr *) lfirst(temp);
535 if (and_clause((Node *) clause))
537 List *subclauses = ((BoolExpr *) clause)->args;
539 subclauses = list_difference(subclauses, winners);
540 if (subclauses != NIL)
542 if (list_length(subclauses) == 1)
543 neworlist = lappend(neworlist, linitial(subclauses));
545 neworlist = lappend(neworlist, make_andclause(subclauses));
549 neworlist = NIL; /* degenerate case, see above */
555 if (!list_member(winners, clause))
556 neworlist = lappend(neworlist, clause);
559 neworlist = NIL; /* degenerate case, see above */
566 * Append reduced OR to the winners list, if it's not degenerate,
567 * handling the special case of one element correctly (can that really
568 * happen?). Also be careful to maintain AND/OR flatness in case we
569 * pulled up a sub-sub-OR-clause.
571 if (neworlist != NIL)
573 if (list_length(neworlist) == 1)
574 winners = lappend(winners, linitial(neworlist));
576 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
580 * And return the constructed AND clause, again being wary of a single
581 * element and AND/OR flatness.
583 if (list_length(winners) == 1)
584 return (Expr *) linitial(winners);
586 return make_andclause(pull_ands(winners));