/*------------------------------------------------------------------------- * * prepqual.c * Routines for preprocessing qualification expressions * * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.46 2004/08/29 05:06:44 momjian Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/prep.h" #include "utils/lsyscache.h" static Node *flatten_andors_mutator(Node *node, void *context); static List *pull_ands(List *andlist); static List *pull_ors(List *orlist); static Expr *find_nots(Expr *qual); static Expr *push_nots(Expr *qual); static Expr *find_duplicate_ors(Expr *qual); static Expr *process_duplicate_ors(List *orlist); /* * canonicalize_qual * Convert a qualification expression to the most useful form. * * The name of this routine is a holdover from a time when it would try to * force the expression into canonical AND-of-ORs or OR-of-ANDs form. * Eventually, we recognized that that had more theoretical purity than * actual usefulness, and so now the transformation doesn't involve any * notion of reaching a canonical form. * * Returns the modified qualification. */ Expr * canonicalize_qual(Expr *qual) { Expr *newqual; /* Quick exit for empty qual */ if (qual == NULL) return NULL; /* * Flatten AND and OR groups throughout the expression tree. */ newqual = (Expr *) flatten_andors((Node *) qual); /* * Push down NOTs. We do this only in the top-level boolean * expression, without examining arguments of operators/functions. The * main reason for doing this is to expose as much top-level AND/OR * structure as we can, so there's no point in descending further. */ newqual = find_nots(newqual); /* * Pull up redundant subclauses in OR-of-AND trees. Again, we do this * only within the top-level AND/OR structure. */ newqual = find_duplicate_ors(newqual); return newqual; } /*-------------------- * The parser regards AND and OR as purely binary operators, so a qual like * (A = 1) OR (A = 2) OR (A = 3) ... * will produce a nested parsetree * (OR (A = 1) (OR (A = 2) (OR (A = 3) ...))) * In reality, the optimizer and executor regard AND and OR as n-argument * operators, so this tree can be flattened to * (OR (A = 1) (A = 2) (A = 3) ...) * which is the responsibility of the routines below. * * flatten_andors() does the basic transformation with no initial assumptions. * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR * tree after local transformations that might introduce nested AND/ORs. *-------------------- */ /* * flatten_andors * Given an expression tree, simplify nested AND/OR clauses into flat * AND/OR clauses with more arguments. The entire tree is processed. * * Returns the rebuilt expr (note original structure is not touched). * * This is exported so that other modules can perform the part of * canonicalize_qual processing that applies to entire trees, rather * than just the top-level boolean expressions. */ Node * flatten_andors(Node *node) { return flatten_andors_mutator(node, NULL); } static Node * flatten_andors_mutator(Node *node, void *context) { if (node == NULL) return NULL; if (IsA(node, BoolExpr)) { BoolExpr *bexpr = (BoolExpr *) node; if (bexpr->boolop == AND_EXPR) { List *out_list = NIL; ListCell *arg; foreach(arg, bexpr->args) { Node *subexpr = flatten_andors((Node *) lfirst(arg)); /* * Note: we can destructively concat the subexpression's * arglist because we know the recursive invocation of * flatten_andors will have built a new arglist not shared * with any other expr. Otherwise we'd need a list_copy * here. */ if (and_clause(subexpr)) out_list = list_concat(out_list, ((BoolExpr *) subexpr)->args); else out_list = lappend(out_list, subexpr); } return (Node *) make_andclause(out_list); } if (bexpr->boolop == OR_EXPR) { List *out_list = NIL; ListCell *arg; foreach(arg, bexpr->args) { Node *subexpr = flatten_andors((Node *) lfirst(arg)); /* * Note: we can destructively concat the subexpression's * arglist because we know the recursive invocation of * flatten_andors will have built a new arglist not shared * with any other expr. Otherwise we'd need a list_copy * here. */ if (or_clause(subexpr)) out_list = list_concat(out_list, ((BoolExpr *) subexpr)->args); else out_list = lappend(out_list, subexpr); } return (Node *) make_orclause(out_list); } /* else it's a NOT clause, fall through */ } return expression_tree_mutator(node, flatten_andors_mutator, context); } /* * pull_ands * Recursively flatten nested AND clauses into a single and-clause list. * * Input is the arglist of an AND clause. * Returns the rebuilt arglist (note original list structure is not touched). */ static List * pull_ands(List *andlist) { List *out_list = NIL; ListCell *arg; foreach(arg, andlist) { Node *subexpr = (Node *) lfirst(arg); /* * Note: we can destructively concat the subexpression's arglist * because we know the recursive invocation of pull_ands will have * built a new arglist not shared with any other expr. Otherwise * we'd need a list_copy here. */ if (and_clause(subexpr)) out_list = list_concat(out_list, pull_ands(((BoolExpr *) subexpr)->args)); else out_list = lappend(out_list, subexpr); } return out_list; } /* * pull_ors * Recursively flatten nested OR clauses into a single or-clause list. * * Input is the arglist of an OR clause. * Returns the rebuilt arglist (note original list structure is not touched). */ static List * pull_ors(List *orlist) { List *out_list = NIL; ListCell *arg; foreach(arg, orlist) { Node *subexpr = (Node *) lfirst(arg); /* * Note: we can destructively concat the subexpression's arglist * because we know the recursive invocation of pull_ors will have * built a new arglist not shared with any other expr. Otherwise * we'd need a list_copy here. */ if (or_clause(subexpr)) out_list = list_concat(out_list, pull_ors(((BoolExpr *) subexpr)->args)); else out_list = lappend(out_list, subexpr); } return out_list; } /* * find_nots * Traverse the qualification, looking for NOTs to take care of. * For NOT clauses, apply push_nots() to try to push down the NOT. * For AND and OR clause types, simply recurse. Otherwise stop * recursing (we do not worry about structure below the top AND/OR tree). * * Returns the modified qualification. AND/OR flatness is preserved. */ static Expr * find_nots(Expr *qual) { if (qual == NULL) return NULL; if (and_clause((Node *) qual)) { List *t_list = NIL; ListCell *temp; foreach(temp, ((BoolExpr *) qual)->args) t_list = lappend(t_list, find_nots(lfirst(temp))); return make_andclause(pull_ands(t_list)); } else if (or_clause((Node *) qual)) { List *t_list = NIL; ListCell *temp; foreach(temp, ((BoolExpr *) qual)->args) t_list = lappend(t_list, find_nots(lfirst(temp))); return make_orclause(pull_ors(t_list)); } else if (not_clause((Node *) qual)) return push_nots(get_notclausearg(qual)); else return qual; } /* * push_nots * Push down a NOT as far as possible. * * Input is an expression to be negated (e.g., the argument of a NOT clause). * Returns a new qual equivalent to the negation of the given qual. */ static Expr * push_nots(Expr *qual) { if (qual == NULL) return make_notclause(qual); /* XXX is this right? Or * possible? */ /* * Negate an operator clause if possible: (NOT (< A B)) => (> A B) * Otherwise, retain the clause as it is (the NOT can't be pushed down * any farther). */ if (is_opclause(qual)) { OpExpr *opexpr = (OpExpr *) qual; Oid negator = get_negator(opexpr->opno); if (negator) return make_opclause(negator, opexpr->opresulttype, opexpr->opretset, (Expr *) get_leftop(qual), (Expr *) get_rightop(qual)); else return make_notclause(qual); } else if (and_clause((Node *) qual)) { /*-------------------- * Apply DeMorgan's Laws: * (NOT (AND A B)) => (OR (NOT A) (NOT B)) * (NOT (OR A B)) => (AND (NOT A) (NOT B)) * i.e., swap AND for OR and negate all the subclauses. *-------------------- */ List *t_list = NIL; ListCell *temp; foreach(temp, ((BoolExpr *) qual)->args) t_list = lappend(t_list, push_nots(lfirst(temp))); return make_orclause(pull_ors(t_list)); } else if (or_clause((Node *) qual)) { List *t_list = NIL; ListCell *temp; foreach(temp, ((BoolExpr *) qual)->args) t_list = lappend(t_list, push_nots(lfirst(temp))); return make_andclause(pull_ands(t_list)); } else if (not_clause((Node *) qual)) { /* * Another NOT cancels this NOT, so eliminate the NOT and stop * negating this branch. */ return get_notclausearg(qual); } else { /* * We don't know how to negate anything else, place a NOT at this * level. */ return make_notclause(qual); } } /*-------------------- * The following code attempts to apply the inverse OR distributive law: * ((A AND B) OR (A AND C)) => (A AND (B OR C)) * That is, locate OR clauses in which every subclause contains an * identical term, and pull out the duplicated terms. * * This may seem like a fairly useless activity, but it turns out to be * applicable to many machine-generated queries, and there are also queries * in some of the TPC benchmarks that need it. This was in fact almost the * sole useful side-effect of the old prepqual code that tried to force * the query into canonical AND-of-ORs form: the canonical equivalent of * ((A AND B) OR (A AND C)) * is * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C)) * which the code was able to simplify to * (A AND (A OR C) AND (B OR A) AND (B OR C)) * thus successfully extracting the common condition A --- but at the cost * of cluttering the qual with many redundant clauses. *-------------------- */ /* * find_duplicate_ors * Given a qualification tree with the NOTs pushed down, search for * OR clauses to which the inverse OR distributive law might apply. * Only the top-level AND/OR structure is searched. * * Returns the modified qualification. AND/OR flatness is preserved. */ static Expr * find_duplicate_ors(Expr *qual) { if (qual == NULL) return NULL; if (or_clause((Node *) qual)) { List *orlist = NIL; ListCell *temp; /* Recurse */ foreach(temp, ((BoolExpr *) qual)->args) orlist = lappend(orlist, find_duplicate_ors(lfirst(temp))); /* * Don't need pull_ors() since this routine will never introduce * an OR where there wasn't one before. */ return process_duplicate_ors(orlist); } else if (and_clause((Node *) qual)) { List *andlist = NIL; ListCell *temp; /* Recurse */ foreach(temp, ((BoolExpr *) qual)->args) andlist = lappend(andlist, find_duplicate_ors(lfirst(temp))); /* Flatten any ANDs introduced just below here */ andlist = pull_ands(andlist); /* The AND list can't get shorter, so result is always an AND */ return make_andclause(andlist); } else return qual; } /* * process_duplicate_ors * Given a list of exprs which are ORed together, try to apply * the inverse OR distributive law. * * Returns the resulting expression (could be an AND clause, an OR * clause, or maybe even a single subexpression). */ static Expr * process_duplicate_ors(List *orlist) { List *reference = NIL; int num_subclauses = 0; List *winners; List *neworlist; ListCell *temp; if (orlist == NIL) return NULL; /* probably can't happen */ if (list_length(orlist) == 1) /* single-expression OR (can this * happen?) */ return linitial(orlist); /* * Choose the shortest AND clause as the reference list --- obviously, * any subclause not in this clause isn't in all the clauses. If we * find a clause that's not an AND, we can treat it as a one-element * AND clause, which necessarily wins as shortest. */ foreach(temp, orlist) { Expr *clause = (Expr *) lfirst(temp); if (and_clause((Node *) clause)) { List *subclauses = ((BoolExpr *) clause)->args; int nclauses = list_length(subclauses); if (reference == NIL || nclauses < num_subclauses) { reference = subclauses; num_subclauses = nclauses; } } else { reference = list_make1(clause); break; } } /* * Just in case, eliminate any duplicates in the reference list. */ reference = list_union(NIL, reference); /* * Check each element of the reference list to see if it's in all the * OR clauses. Build a new list of winning clauses. */ winners = NIL; foreach(temp, reference) { Expr *refclause = (Expr *) lfirst(temp); bool win = true; ListCell *temp2; foreach(temp2, orlist) { Expr *clause = (Expr *) lfirst(temp2); if (and_clause((Node *) clause)) { if (!list_member(((BoolExpr *) clause)->args, refclause)) { win = false; break; } } else { if (!equal(refclause, clause)) { win = false; break; } } } if (win) winners = lappend(winners, refclause); } /* * If no winners, we can't transform the OR */ if (winners == NIL) return make_orclause(orlist); /* * Generate new OR list consisting of the remaining sub-clauses. * * If any clause degenerates to empty, then we have a situation like (A * AND B) OR (A), which can be reduced to just A --- that is, the * additional conditions in other arms of the OR are irrelevant. * * Note that because we use list_difference, any multiple occurrences of * a winning clause in an AND sub-clause will be removed * automatically. */ neworlist = NIL; foreach(temp, orlist) { Expr *clause = (Expr *) lfirst(temp); if (and_clause((Node *) clause)) { List *subclauses = ((BoolExpr *) clause)->args; subclauses = list_difference(subclauses, winners); if (subclauses != NIL) { if (list_length(subclauses) == 1) neworlist = lappend(neworlist, linitial(subclauses)); else neworlist = lappend(neworlist, make_andclause(subclauses)); } else { neworlist = NIL; /* degenerate case, see above */ break; } } else { if (!list_member(winners, clause)) neworlist = lappend(neworlist, clause); else { neworlist = NIL; /* degenerate case, see above */ break; } } } /* * Append reduced OR to the winners list, if it's not degenerate, * handling the special case of one element correctly (can that really * happen?). Also be careful to maintain AND/OR flatness in case we * pulled up a sub-sub-OR-clause. */ if (neworlist != NIL) { if (list_length(neworlist) == 1) winners = lappend(winners, linitial(neworlist)); else winners = lappend(winners, make_orclause(pull_ors(neworlist))); } /* * And return the constructed AND clause, again being wary of a single * element and AND/OR flatness. */ if (list_length(winners) == 1) return (Expr *) linitial(winners); else return make_andclause(pull_ands(winners)); }