* prepqual.c
* Routines for preprocessing qualification expressions
*
- * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.22 2000/01/28 03:22:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.41 2003/12/30 21:49:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-#include <sys/types.h>
#include "postgres.h"
#include "optimizer/prep.h"
#include "utils/lsyscache.h"
-static Expr *flatten_andors(Expr *qual);
-static List *pull_ors(List *orlist);
+
+static Node *flatten_andors_mutator(Node *node, void *context);
+static void flatten_andors_and_walker(FastList *out_list, List *andlist);
+static void flatten_andors_or_walker(FastList *out_list, List *orlist);
static List *pull_ands(List *andlist);
+static void pull_ands_walker(FastList *out_list, List *andlist);
+static List *pull_ors(List *orlist);
+static void pull_ors_walker(FastList *out_list, List *orlist);
static Expr *find_nots(Expr *qual);
static Expr *push_nots(Expr *qual);
-static Expr *find_ors(Expr *qual);
-static Expr *or_normalize(List *orlist);
-static Expr *find_ands(Expr *qual);
-static Expr *and_normalize(List *andlist);
-static Expr *qual_cleanup(Expr *qual);
-static List *remove_duplicates(List *list);
-static void count_bool_nodes(Expr *qual, double *nodes,
- double *cnfnodes, double *dnfnodes);
-
-/*****************************************************************************
- *
- * CNF/DNF CONVERSION ROUTINES
- *
- * These routines convert an arbitrary boolean expression into
- * conjunctive normal form or disjunctive normal form.
- *
- * Normalization is only carried out in the top AND/OR/NOT portion
- * of the given tree; we do not attempt to normalize boolean expressions
- * that may appear as arguments of operators or functions in the tree.
- *
- * Query qualifications (WHERE clauses) are ordinarily transformed into
- * CNF, ie, AND-of-ORs form, because then the optimizer can use any one
- * of the independent AND clauses as a filtering qualification. However,
- * quals that are naturally expressed as OR-of-ANDs can suffer an
- * exponential growth in size in this transformation, so we also consider
- * converting to DNF (OR-of-ANDs), and we may also leave well enough alone
- * if both transforms cause unreasonable growth. The OR-of-ANDs format
- * is useful for indexscan implementation, so we prefer that format when
- * there is just one relation involved.
- *
- * canonicalize_qual() does "smart" conversion to either CNF or DNF, per
- * the above considerations, while cnfify() and dnfify() simply perform
- * the demanded transformation. The latter two may become dead code
- * eventually.
- *****************************************************************************/
+static Expr *find_duplicate_ors(Expr *qual);
+static Expr *process_duplicate_ors(List *orlist);
/*
* canonicalize_qual
- * Convert a qualification to the most useful normalized form.
- *
- * Returns the modified qualification.
+ * Convert a qualification expression to the most useful form.
*
- * If 'removeAndFlag' is true then it removes explicit AND at the top level,
- * producing a list of implicitly-ANDed conditions. Otherwise, a regular
- * boolean expression is returned. Since most callers pass 'true', we
- * prefer to declare the result as List *, not Expr *.
+ * The name of this routine is a holdover from a time when it would try to
+ * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
+ * Eventually, we recognized that that had more theoretical purity than
+ * actual usefulness, and so now the transformation doesn't involve any
+ * notion of reaching a canonical form.
*
- * XXX This code could be much smarter, at the cost of also being slower,
- * if we tried to compute selectivities and/or see whether there are
- * actually indexes to support an indexscan implementation of a DNF qual.
- * We could even try converting the CNF clauses that mention a single
- * relation into a single DNF clause to see if that looks cheaper to
- * implement. For now, though, we just try to avoid doing anything
- * quite as stupid as unconditionally converting to CNF was...
+ * Returns the modified qualification.
*/
-List *
-canonicalize_qual(Expr *qual, bool removeAndFlag)
+Expr *
+canonicalize_qual(Expr *qual)
{
Expr *newqual;
- double nodes,
- cnfnodes,
- dnfnodes;
- bool cnfok,
- dnfok;
+ /* Quick exit for empty qual */
if (qual == NULL)
- return NIL;
-
- /* Flatten AND and OR groups throughout the tree.
- * This improvement is always worthwhile, so do it unconditionally.
- */
- qual = flatten_andors(qual);
-
- /* Push down NOTs. We do this only in the top-level boolean
- * expression, without examining arguments of operators/functions.
- * Even so, it might not be a win if we are unable to find negators
- * for all the operators involved; perhaps we should compare before-
- * and-after tree sizes?
- */
- newqual = find_nots(qual);
+ return NULL;
/*
- * Choose whether to convert to CNF, or DNF, or leave well enough alone.
- *
- * We make an approximate estimate of the number of bottom-level nodes
- * that will appear in the CNF and DNF forms of the query.
+ * Flatten AND and OR groups throughout the expression tree.
*/
- count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes);
- /*
- * First heuristic is to forget about *both* normal forms if there are
- * a huge number of terms in the qual clause. This would only happen
- * with machine-generated queries, presumably; and most likely such
- * a query is already in either CNF or DNF.
- */
- cnfok = dnfok = true;
- if (nodes >= 500.0)
- cnfok = dnfok = false;
- /*
- * Second heuristic is to forget about either CNF or DNF if it shows
- * unreasonable growth compared to the original form of the qual,
- * where we define "unreasonable" a tad arbitrarily as 4x more
- * operators.
- */
- if (cnfnodes >= 4.0 * nodes)
- cnfok = false;
- if (dnfnodes >= 4.0 * nodes)
- dnfok = false;
- /*
- * Third heuristic is to prefer DNF if top level is already an OR,
- * and only one relation is mentioned, and DNF is no larger than
- * the CNF representation. (Pretty shaky; can we improve on this?)
- */
- if (dnfok && dnfnodes <= cnfnodes && or_clause((Node *) newqual) &&
- NumRelids((Node *) newqual) == 1)
- cnfok = false;
- /*
- * Otherwise, we prefer CNF.
- *
- * XXX obviously, these rules could be improved upon.
- */
- if (cnfok)
- {
- /* Normalize into conjunctive normal form, and clean up the result. */
- newqual = qual_cleanup(find_ors(newqual));
- }
- else if (dnfok)
- {
- /* Normalize into disjunctive normal form, and clean up the result. */
- newqual = qual_cleanup(find_ands(newqual));
- }
-
- /* Convert to implicit-AND list if requested */
- if (removeAndFlag)
- {
- newqual = (Expr *) make_ands_implicit(newqual);
- }
-
- return (List *) newqual;
-}
-
-/*
- * cnfify
- * Convert a qualification to conjunctive normal form by applying
- * successive normalizations.
- *
- * Returns the modified qualification.
- *
- * If 'removeAndFlag' is true then it removes explicit AND at the top level,
- * producing a list of implicitly-ANDed conditions. Otherwise, a regular
- * boolean expression is returned. Since most callers pass 'true', we
- * prefer to declare the result as List *, not Expr *.
- */
-List *
-cnfify(Expr *qual, bool removeAndFlag)
-{
- Expr *newqual;
-
- if (qual == NULL)
- return NIL;
+ newqual = (Expr *) flatten_andors((Node *) qual);
- /* Flatten AND and OR groups throughout the tree.
- * This improvement is always worthwhile.
- */
- newqual = flatten_andors(qual);
- /* Push down NOTs. We do this only in the top-level boolean
+ /*
+ * Push down NOTs. We do this only in the top-level boolean
* expression, without examining arguments of operators/functions.
+ * The main reason for doing this is to expose as much top-level AND/OR
+ * structure as we can, so there's no point in descending further.
*/
newqual = find_nots(newqual);
- /* Normalize into conjunctive normal form. */
- newqual = find_ors(newqual);
- /* Clean up the result. */
- newqual = qual_cleanup(newqual);
- if (removeAndFlag)
- {
- newqual = (Expr *) make_ands_implicit(newqual);
- }
-
- return (List *) newqual;
-}
-
-/*
- * dnfify
- * Convert a qualification to disjunctive normal form by applying
- * successive normalizations.
- *
- * Returns the modified qualification.
- *
- * We do not offer a 'removeOrFlag' in this case; the usages are
- * different.
- */
-Expr *
-dnfify(Expr *qual)
-{
- Expr *newqual;
-
- if (qual == NULL)
- return NULL;
-
- /* Flatten AND and OR groups throughout the tree.
- * This improvement is always worthwhile.
- */
- newqual = flatten_andors(qual);
- /* Push down NOTs. We do this only in the top-level boolean
- * expression, without examining arguments of operators/functions.
+ /*
+ * Pull up redundant subclauses in OR-of-AND trees. Again, we do this
+ * only within the top-level AND/OR structure.
*/
- newqual = find_nots(newqual);
- /* Normalize into disjunctive normal form. */
- newqual = find_ands(newqual);
- /* Clean up the result. */
- newqual = qual_cleanup(newqual);
+ newqual = find_duplicate_ors(newqual);
return newqual;
}
+
/*--------------------
* The parser regards AND and OR as purely binary operators, so a qual like
* (A = 1) OR (A = 2) OR (A = 3) ...
*--------------------
*/
-/*--------------------
+/*
* flatten_andors
- * Given a qualification, simplify nested AND/OR clauses into flat
- * AND/OR clauses with more arguments.
+ * Given an expression tree, simplify nested AND/OR clauses into flat
+ * AND/OR clauses with more arguments. The entire tree is processed.
*
- * Returns the rebuilt expr (note original list structure is not touched).
- *--------------------
+ * Returns the rebuilt expr (note original structure is not touched).
+ *
+ * This is exported so that other modules can perform the part of
+ * canonicalize_qual processing that applies to entire trees, rather
+ * than just the top-level boolean expressions.
*/
-static Expr *
-flatten_andors(Expr *qual)
+Node *
+flatten_andors(Node *node)
{
- if (qual == NULL)
- return NULL;
+ return flatten_andors_mutator(node, NULL);
+}
- if (and_clause((Node *) qual))
+static Node *
+flatten_andors_mutator(Node *node, void *context)
+{
+ if (node == NULL)
+ return NULL;
+ if (IsA(node, BoolExpr))
{
- List *out_list = NIL;
- List *arg;
+ BoolExpr *bexpr = (BoolExpr *) node;
- foreach(arg, qual->args)
+ if (bexpr->boolop == AND_EXPR)
{
- Expr *subexpr = flatten_andors((Expr *) lfirst(arg));
-
- /*
- * Note: we can destructively nconc the subexpression's arglist
- * because we know the recursive invocation of flatten_andors
- * will have built a new arglist not shared with any other expr.
- * Otherwise we'd need a listCopy here.
- */
- if (and_clause((Node *) subexpr))
- out_list = nconc(out_list, subexpr->args);
- else
- out_list = lappend(out_list, subexpr);
+ FastList out_list;
+
+ FastListInit(&out_list);
+ flatten_andors_and_walker(&out_list, bexpr->args);
+ return (Node *) make_andclause(FastListValue(&out_list));
}
- return make_andclause(out_list);
+ if (bexpr->boolop == OR_EXPR)
+ {
+ FastList out_list;
+
+ FastListInit(&out_list);
+ flatten_andors_or_walker(&out_list, bexpr->args);
+ return (Node *) make_orclause(FastListValue(&out_list));
+ }
+ /* else it's a NOT clause, fall through */
}
- else if (or_clause((Node *) qual))
+ return expression_tree_mutator(node, flatten_andors_mutator, context);
+}
+
+static void
+flatten_andors_and_walker(FastList *out_list, List *andlist)
+{
+ List *arg;
+
+ foreach(arg, andlist)
{
- List *out_list = NIL;
- List *arg;
+ Node *subexpr = (Node *) lfirst(arg);
- foreach(arg, qual->args)
- {
- Expr *subexpr = flatten_andors((Expr *) lfirst(arg));
-
- /*
- * Note: we can destructively nconc the subexpression's arglist
- * because we know the recursive invocation of flatten_andors
- * will have built a new arglist not shared with any other expr.
- * Otherwise we'd need a listCopy here.
- */
- if (or_clause((Node *) subexpr))
- out_list = nconc(out_list, subexpr->args);
- else
- out_list = lappend(out_list, subexpr);
- }
- return make_orclause(out_list);
+ if (and_clause(subexpr))
+ flatten_andors_and_walker(out_list, ((BoolExpr *) subexpr)->args);
+ else
+ FastAppend(out_list, flatten_andors(subexpr));
}
- else if (not_clause((Node *) qual))
- return make_notclause(flatten_andors(get_notclausearg(qual)));
- else if (is_opclause((Node *) qual))
+}
+
+static void
+flatten_andors_or_walker(FastList *out_list, List *orlist)
+{
+ List *arg;
+
+ foreach(arg, orlist)
{
- Expr *left = (Expr *) get_leftop(qual);
- Expr *right = (Expr *) get_rightop(qual);
-
- if (right)
- return make_clause(qual->opType, qual->oper,
- lcons(flatten_andors(left),
- lcons(flatten_andors(right),
- NIL)));
+ Node *subexpr = (Node *) lfirst(arg);
+
+ if (or_clause(subexpr))
+ flatten_andors_or_walker(out_list, ((BoolExpr *) subexpr)->args);
else
- return make_clause(qual->opType, qual->oper,
- lcons(flatten_andors(left),
- NIL));
+ FastAppend(out_list, flatten_andors(subexpr));
}
- else
- return qual;
}
/*
- * pull_ors
- * Pull the arguments of an 'or' clause nested within another 'or'
- * clause up into the argument list of the parent.
+ * pull_ands
+ * Recursively flatten nested AND clauses into a single and-clause list.
*
- * Input is the arglist of an OR clause.
+ * Input is the arglist of an AND clause.
* Returns the rebuilt arglist (note original list structure is not touched).
*/
static List *
-pull_ors(List *orlist)
+pull_ands(List *andlist)
+{
+ FastList out_list;
+
+ FastListInit(&out_list);
+ pull_ands_walker(&out_list, andlist);
+ return FastListValue(&out_list);
+}
+
+static void
+pull_ands_walker(FastList *out_list, List *andlist)
{
- List *out_list = NIL;
List *arg;
- foreach(arg, orlist)
+ foreach(arg, andlist)
{
- Expr *subexpr = (Expr *) lfirst(arg);
+ Node *subexpr = (Node *) lfirst(arg);
- /*
- * Note: we can destructively nconc the subexpression's arglist
- * because we know the recursive invocation of pull_ors
- * will have built a new arglist not shared with any other expr.
- * Otherwise we'd need a listCopy here.
- */
- if (or_clause((Node *) subexpr))
- out_list = nconc(out_list, pull_ors(subexpr->args));
+ if (and_clause(subexpr))
+ pull_ands_walker(out_list, ((BoolExpr *) subexpr)->args);
else
- out_list = lappend(out_list, subexpr);
+ FastAppend(out_list, subexpr);
}
- return out_list;
}
/*
- * pull_ands
- * Pull the arguments of an 'and' clause nested within another 'and'
- * clause up into the argument list of the parent.
+ * pull_ors
+ * Recursively flatten nested OR clauses into a single or-clause list.
*
- * Returns the modified list.
+ * Input is the arglist of an OR clause.
+ * Returns the rebuilt arglist (note original list structure is not touched).
*/
static List *
-pull_ands(List *andlist)
+pull_ors(List *orlist)
+{
+ FastList out_list;
+
+ FastListInit(&out_list);
+ pull_ors_walker(&out_list, orlist);
+ return FastListValue(&out_list);
+}
+
+static void
+pull_ors_walker(FastList *out_list, List *orlist)
{
- List *out_list = NIL;
List *arg;
- foreach(arg, andlist)
+ foreach(arg, orlist)
{
- Expr *subexpr = (Expr *) lfirst(arg);
+ Node *subexpr = (Node *) lfirst(arg);
- /*
- * Note: we can destructively nconc the subexpression's arglist
- * because we know the recursive invocation of pull_ands
- * will have built a new arglist not shared with any other expr.
- * Otherwise we'd need a listCopy here.
- */
- if (and_clause((Node *) subexpr))
- out_list = nconc(out_list, pull_ands(subexpr->args));
+ if (or_clause(subexpr))
+ pull_ors_walker(out_list, ((BoolExpr *) subexpr)->args);
else
- out_list = lappend(out_list, subexpr);
+ FastAppend(out_list, subexpr);
}
- return out_list;
}
+
/*
* find_nots
- * Traverse the qualification, looking for 'NOT's to take care of.
- * For 'NOT' clauses, apply push_not() to try to push down the 'NOT'.
- * For all other clause types, simply recurse.
+ * Traverse the qualification, looking for NOTs to take care of.
+ * For NOT clauses, apply push_nots() to try to push down the NOT.
+ * For AND and OR clause types, simply recurse. Otherwise stop
+ * recursing (we do not worry about structure below the top AND/OR tree).
*
- * Returns the modified qualification. AND/OR flatness is preserved.
+ * Returns the modified qualification. AND/OR flatness is preserved.
*/
static Expr *
find_nots(Expr *qual)
if (qual == NULL)
return NULL;
-#ifdef NOT_USED
- /* recursing into operator expressions is probably not worth it. */
- if (is_opclause((Node *) qual))
- {
- Expr *left = (Expr *) get_leftop(qual);
- Expr *right = (Expr *) get_rightop(qual);
-
- if (right)
- return make_clause(qual->opType, qual->oper,
- lcons(find_nots(left),
- lcons(find_nots(right),
- NIL)));
- else
- return make_clause(qual->opType, qual->oper,
- lcons(find_nots(left),
- NIL));
- }
-#endif
if (and_clause((Node *) qual))
{
- List *t_list = NIL;
+ FastList t_list;
List *temp;
- foreach(temp, qual->args)
- t_list = lappend(t_list, find_nots(lfirst(temp)));
- return make_andclause(pull_ands(t_list));
+ FastListInit(&t_list);
+ foreach(temp, ((BoolExpr *) qual)->args)
+ FastAppend(&t_list, find_nots(lfirst(temp)));
+ return make_andclause(pull_ands(FastListValue(&t_list)));
}
else if (or_clause((Node *) qual))
{
- List *t_list = NIL;
+ FastList t_list;
List *temp;
- foreach(temp, qual->args)
- t_list = lappend(t_list, find_nots(lfirst(temp)));
- return make_orclause(pull_ors(t_list));
+ FastListInit(&t_list);
+ foreach(temp, ((BoolExpr *) qual)->args)
+ FastAppend(&t_list, find_nots(lfirst(temp)));
+ return make_orclause(pull_ors(FastListValue(&t_list)));
}
else if (not_clause((Node *) qual))
return push_nots(get_notclausearg(qual));
/*
* push_nots
- * Push down a 'NOT' as far as possible.
+ * Push down a NOT as far as possible.
*
* Input is an expression to be negated (e.g., the argument of a NOT clause).
* Returns a new qual equivalent to the negation of the given qual.
push_nots(Expr *qual)
{
if (qual == NULL)
- return make_notclause(qual); /* XXX is this right? Or possible? */
+ return make_notclause(qual); /* XXX is this right? Or
+ * possible? */
/*
- * Negate an operator clause if possible: ("NOT" (< A B)) => (> A B)
- * Otherwise, retain the clause as it is (the 'not' can't be pushed
+ * Negate an operator clause if possible: (NOT (< A B)) => (> A B)
+ * Otherwise, retain the clause as it is (the NOT can't be pushed
* down any farther).
*/
- if (is_opclause((Node *) qual))
+ if (is_opclause(qual))
{
- Oper *oper = (Oper *) ((Expr *) qual)->oper;
- Oid negator = get_negator(oper->opno);
+ OpExpr *opexpr = (OpExpr *) qual;
+ Oid negator = get_negator(opexpr->opno);
if (negator)
- {
- Oper *op = (Oper *) makeOper(negator,
- InvalidOid,
- oper->opresulttype,
- 0, NULL);
- return make_opclause(op, get_leftop(qual), get_rightop(qual));
- }
+ return make_opclause(negator,
+ opexpr->opresulttype,
+ opexpr->opretset,
+ (Expr *) get_leftop(qual),
+ (Expr *) get_rightop(qual));
else
return make_notclause(qual);
}
{
/*--------------------
* Apply DeMorgan's Laws:
- * ("NOT" ("AND" A B)) => ("OR" ("NOT" A) ("NOT" B))
- * ("NOT" ("OR" A B)) => ("AND" ("NOT" A) ("NOT" B))
+ * (NOT (AND A B)) => (OR (NOT A) (NOT B))
+ * (NOT (OR A B)) => (AND (NOT A) (NOT B))
* i.e., swap AND for OR and negate all the subclauses.
*--------------------
*/
- List *t_list = NIL;
+ FastList t_list;
List *temp;
- foreach(temp, qual->args)
- t_list = lappend(t_list, push_nots(lfirst(temp)));
- return make_orclause(pull_ors(t_list));
+ FastListInit(&t_list);
+ foreach(temp, ((BoolExpr *) qual)->args)
+ FastAppend(&t_list, push_nots(lfirst(temp)));
+ return make_orclause(pull_ors(FastListValue(&t_list)));
}
else if (or_clause((Node *) qual))
{
- List *t_list = NIL;
+ FastList t_list;
List *temp;
- foreach(temp, qual->args)
- t_list = lappend(t_list, push_nots(lfirst(temp)));
- return make_andclause(pull_ands(t_list));
+ FastListInit(&t_list);
+ foreach(temp, ((BoolExpr *) qual)->args)
+ FastAppend(&t_list, push_nots(lfirst(temp)));
+ return make_andclause(pull_ands(FastListValue(&t_list)));
}
else if (not_clause((Node *) qual))
{
/*
- * Another 'not' cancels this 'not', so eliminate the 'not' and
- * stop negating this branch. But search the subexpression for
- * more 'not's to simplify.
+ * Another NOT cancels this NOT, so eliminate the NOT and
+ * stop negating this branch.
*/
- return find_nots(get_notclausearg(qual));
+ return get_notclausearg(qual);
}
else
{
/*
- * We don't know how to negate anything else, place a 'not' at
+ * We don't know how to negate anything else, place a NOT at
* this level.
*/
return make_notclause(qual);
}
}
+
+/*--------------------
+ * The following code attempts to apply the inverse OR distributive law:
+ * ((A AND B) OR (A AND C)) => (A AND (B OR C))
+ * That is, locate OR clauses in which every subclause contains an
+ * identical term, and pull out the duplicated terms.
+ *
+ * This may seem like a fairly useless activity, but it turns out to be
+ * applicable to many machine-generated queries, and there are also queries
+ * in some of the TPC benchmarks that need it. This was in fact almost the
+ * sole useful side-effect of the old prepqual code that tried to force
+ * the query into canonical AND-of-ORs form: the canonical equivalent of
+ * ((A AND B) OR (A AND C))
+ * is
+ * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
+ * which the code was able to simplify to
+ * (A AND (A OR C) AND (B OR A) AND (B OR C))
+ * thus successfully extracting the common condition A --- but at the cost
+ * of cluttering the qual with many redundant clauses.
+ *--------------------
+ */
+
/*
- * find_ors
- * Given a qualification tree with the 'not's pushed down, convert it
- * to a tree in CNF by repeatedly applying the rule:
- * ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C))
- *
- * Note that 'or' clauses will always be turned into 'and' clauses
- * if they contain any 'and' subclauses.
+ * find_duplicate_ors
+ * Given a qualification tree with the NOTs pushed down, search for
+ * OR clauses to which the inverse OR distributive law might apply.
+ * Only the top-level AND/OR structure is searched.
*
* Returns the modified qualification. AND/OR flatness is preserved.
*/
static Expr *
-find_ors(Expr *qual)
+find_duplicate_ors(Expr *qual)
{
if (qual == NULL)
return NULL;
- /* We used to recurse into opclauses here, but I see no reason to... */
- if (and_clause((Node *) qual))
+ if (or_clause((Node *) qual))
{
- List *andlist = NIL;
+ List *orlist = NIL;
List *temp;
- foreach(temp, qual->args)
- andlist = lappend(andlist, find_ors(lfirst(temp)));
- return make_andclause(pull_ands(andlist));
+ /* Recurse */
+ foreach(temp, ((BoolExpr *) qual)->args)
+ orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
+ /*
+ * Don't need pull_ors() since this routine will never introduce
+ * an OR where there wasn't one before.
+ */
+ return process_duplicate_ors(orlist);
}
- else if (or_clause((Node *) qual))
+ else if (and_clause((Node *) qual))
{
- List *orlist = NIL;
+ List *andlist = NIL;
List *temp;
- foreach(temp, qual->args)
- orlist = lappend(orlist, find_ors(lfirst(temp)));
- return or_normalize(pull_ors(orlist));
+ /* Recurse */
+ foreach(temp, ((BoolExpr *) qual)->args)
+ andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
+ /* Flatten any ANDs introduced just below here */
+ andlist = pull_ands(andlist);
+ /* The AND list can't get shorter, so result is always an AND */
+ return make_andclause(andlist);
}
- else if (not_clause((Node *) qual))
- return make_notclause(find_ors(get_notclausearg(qual)));
else
return qual;
}
/*
- * or_normalize
- * Given a list of exprs which are 'or'ed together, try to apply
- * the distributive law
- * ("OR" A ("AND" B C)) => ("AND" ("OR" A B) ("OR" A C))
- * to convert the top-level OR clause to a top-level AND clause.
+ * process_duplicate_ors
+ * Given a list of exprs which are ORed together, try to apply
+ * the inverse OR distributive law.
*
* Returns the resulting expression (could be an AND clause, an OR
* clause, or maybe even a single subexpression).
*/
static Expr *
-or_normalize(List *orlist)
+process_duplicate_ors(List *orlist)
{
- Expr *distributable = NULL;
- int num_subclauses = 1;
- List *andclauses = NIL;
+ List *reference = NIL;
+ int num_subclauses = 0;
+ List *winners;
+ List *neworlist;
List *temp;
+ List *temp2;
if (orlist == NIL)
return NULL; /* probably can't happen */
return lfirst(orlist); /* single-expression OR (can this happen?) */
/*
- * If we have a choice of AND clauses, pick the one with the
- * most subclauses. Because we initialized num_subclauses = 1,
- * any AND clauses with only one arg will be ignored as useless.
+ * Choose the shortest AND clause as the reference list --- obviously,
+ * any subclause not in this clause isn't in all the clauses.
+ * If we find a clause that's not an AND, we can treat it as a
+ * one-element AND clause, which necessarily wins as shortest.
*/
foreach(temp, orlist)
{
- Expr *clause = lfirst(temp);
+ Expr *clause = lfirst(temp);
if (and_clause((Node *) clause))
{
- int nclauses = length(clause->args);
+ List *subclauses = ((BoolExpr *) clause)->args;
+ int nclauses = length(subclauses);
- if (nclauses > num_subclauses)
+ if (reference == NIL || nclauses < num_subclauses)
{
- distributable = clause;
+ reference = subclauses;
num_subclauses = nclauses;
}
}
+ else
+ {
+ reference = makeList1(clause);
+ break;
+ }
}
- /* if there's no suitable AND clause, we can't transform the OR */
- if (! distributable)
- return make_orclause(orlist);
-
- /* Caution: lremove destructively modifies the input orlist.
- * This should be OK, since or_normalize is only called with
- * freshly constructed lists that are not referenced elsewhere.
+ /*
+ * Just in case, eliminate any duplicates in the reference list.
*/
- orlist = lremove(distributable, orlist);
-
- foreach(temp, distributable->args)
- {
- Expr *andclause = lfirst(temp);
-
- /* pull_ors is needed here in case andclause has a top-level OR.
- * Then we recursively apply or_normalize, since there might
- * be an AND subclause in the resulting OR-list.
- * Note: we rely on pull_ors to build a fresh list,
- * and not damage the given orlist.
- */
- andclause = or_normalize(pull_ors(lcons(andclause, orlist)));
- andclauses = lappend(andclauses, andclause);
- }
-
- /* pull_ands is needed in case any sub-or_normalize succeeded */
- return make_andclause(pull_ands(andclauses));
-}
-
-/*
- * find_ands
- * Given a qualification tree with the 'not's pushed down, convert it
- * to a tree in DNF by repeatedly applying the rule:
- * ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C))
- *
- * Note that 'and' clauses will always be turned into 'or' clauses
- * if they contain any 'or' subclauses.
- *
- * Returns the modified qualification. AND/OR flatness is preserved.
- */
-static Expr *
-find_ands(Expr *qual)
-{
- if (qual == NULL)
- return NULL;
-
- /* We used to recurse into opclauses here, but I see no reason to... */
- if (or_clause((Node *) qual))
- {
- List *orlist = NIL;
- List *temp;
-
- foreach(temp, qual->args)
- orlist = lappend(orlist, find_ands(lfirst(temp)));
- return make_orclause(pull_ors(orlist));
- }
- else if (and_clause((Node *) qual))
- {
- List *andlist = NIL;
- List *temp;
-
- foreach(temp, qual->args)
- andlist = lappend(andlist, find_ands(lfirst(temp)));
- return and_normalize(pull_ands(andlist));
- }
- else if (not_clause((Node *) qual))
- return make_notclause(find_ands(get_notclausearg(qual)));
- else
- return qual;
-}
-
-/*
- * and_normalize
- * Given a list of exprs which are 'and'ed together, try to apply
- * the distributive law
- * ("AND" A ("OR" B C)) => ("OR" ("AND" A B) ("AND" A C))
- * to convert the top-level AND clause to a top-level OR clause.
- *
- * Returns the resulting expression (could be an AND clause, an OR
- * clause, or maybe even a single subexpression).
- */
-static Expr *
-and_normalize(List *andlist)
-{
- Expr *distributable = NULL;
- int num_subclauses = 1;
- List *orclauses = NIL;
- List *temp;
-
- if (andlist == NIL)
- return NULL; /* probably can't happen */
- if (lnext(andlist) == NIL)
- return lfirst(andlist); /* single-expression AND (can this happen?) */
+ reference = set_union(NIL, reference);
/*
- * If we have a choice of OR clauses, pick the one with the
- * most subclauses. Because we initialized num_subclauses = 1,
- * any OR clauses with only one arg will be ignored as useless.
+ * Check each element of the reference list to see if it's in all the
+ * OR clauses. Build a new list of winning clauses.
*/
- foreach(temp, andlist)
+ winners = NIL;
+ foreach(temp, reference)
{
- Expr *clause = lfirst(temp);
+ Expr *refclause = lfirst(temp);
+ bool win = true;
- if (or_clause((Node *) clause))
+ foreach(temp2, orlist)
{
- int nclauses = length(clause->args);
+ Expr *clause = lfirst(temp2);
- if (nclauses > num_subclauses)
+ if (and_clause((Node *) clause))
{
- distributable = clause;
- num_subclauses = nclauses;
+ if (!member(refclause, ((BoolExpr *) clause)->args))
+ {
+ win = false;
+ break;
+ }
+ }
+ else
+ {
+ if (!equal(refclause, clause))
+ {
+ win = false;
+ break;
+ }
}
}
- }
- /* if there's no suitable OR clause, we can't transform the AND */
- if (! distributable)
- return make_andclause(andlist);
-
- /* Caution: lremove destructively modifies the input andlist.
- * This should be OK, since and_normalize is only called with
- * freshly constructed lists that are not referenced elsewhere.
- */
- andlist = lremove(distributable, andlist);
-
- foreach(temp, distributable->args)
- {
- Expr *orclause = lfirst(temp);
-
- /* pull_ands is needed here in case orclause has a top-level AND.
- * Then we recursively apply and_normalize, since there might
- * be an OR subclause in the resulting AND-list.
- * Note: we rely on pull_ands to build a fresh list,
- * and not damage the given andlist.
- */
- orclause = and_normalize(pull_ands(lcons(orclause, andlist)));
- orclauses = lappend(orclauses, orclause);
+ if (win)
+ winners = lappend(winners, refclause);
}
- /* pull_ors is needed in case any sub-and_normalize succeeded */
- return make_orclause(pull_ors(orclauses));
-}
-
-/*
- * qual_cleanup
- * Fix up a qualification by removing duplicate entries (which could be
- * created during normalization, if identical subexpressions from different
- * parts of the tree are brought together). Also, check for AND and OR
- * clauses with only one remaining subexpression, and simplify.
- *
- * Returns the modified qualification.
- */
-static Expr *
-qual_cleanup(Expr *qual)
-{
- if (qual == NULL)
- return NULL;
-
- if (and_clause((Node *) qual))
- {
- List *andlist = NIL;
- List *temp;
-
- foreach(temp, qual->args)
- andlist = lappend(andlist, qual_cleanup(lfirst(temp)));
-
- andlist = remove_duplicates(pull_ands(andlist));
+ /*
+ * If no winners, we can't transform the OR
+ */
+ if (winners == NIL)
+ return make_orclause(orlist);
- if (length(andlist) > 1)
- return make_andclause(andlist);
- else
- return lfirst(andlist);
- }
- else if (or_clause((Node *) qual))
+ /*
+ * Generate new OR list consisting of the remaining sub-clauses.
+ *
+ * If any clause degenerates to empty, then we have a situation like
+ * (A AND B) OR (A), which can be reduced to just A --- that is, the
+ * additional conditions in other arms of the OR are irrelevant.
+ *
+ * Note that because we use set_difference, any multiple occurrences of
+ * a winning clause in an AND sub-clause will be removed automatically.
+ */
+ neworlist = NIL;
+ foreach(temp, orlist)
{
- List *orlist = NIL;
- List *temp;
+ Expr *clause = lfirst(temp);
- foreach(temp, qual->args)
- orlist = lappend(orlist, qual_cleanup(lfirst(temp)));
-
- orlist = remove_duplicates(pull_ors(orlist));
+ if (and_clause((Node *) clause))
+ {
+ List *subclauses = ((BoolExpr *) clause)->args;
- if (length(orlist) > 1)
- return make_orclause(orlist);
+ subclauses = set_difference(subclauses, winners);
+ if (subclauses != NIL)
+ {
+ if (lnext(subclauses) == NIL)
+ neworlist = lappend(neworlist, lfirst(subclauses));
+ else
+ neworlist = lappend(neworlist, make_andclause(subclauses));
+ }
+ else
+ {
+ neworlist = NIL; /* degenerate case, see above */
+ break;
+ }
+ }
else
- return lfirst(orlist);
- }
- else if (not_clause((Node *) qual))
- return make_notclause(qual_cleanup(get_notclausearg(qual)));
- else
- return qual;
-}
-
-/*
- * remove_duplicates
- */
-static List *
-remove_duplicates(List *list)
-{
- List *result = NIL;
- List *i;
-
- if (length(list) <= 1)
- return list;
-
- foreach(i, list)
- {
- if (! member(lfirst(i), result))
- result = lappend(result, lfirst(i));
- }
- return result;
-}
-
-/*
- * count_bool_nodes
- * Support for heuristics in canonicalize_qual(): count the
- * number of nodes that are inputs to the top level AND/OR/NOT
- * part of a qual tree, and estimate how many nodes will appear
- * in the CNF'ified or DNF'ified equivalent of the expression.
- *
- * This is just an approximate calculation; it doesn't deal with NOTs
- * very well, and of course it cannot detect possible simplifications
- * from eliminating duplicate subclauses. The idea is just to cheaply
- * determine whether CNF will be markedly worse than DNF or vice versa.
- *
- * The counts/estimates are represented as doubles to avoid risk of overflow.
- */
-static void
-count_bool_nodes(Expr *qual,
- double *nodes,
- double *cnfnodes,
- double *dnfnodes)
-{
- List *temp;
- double subnodes, subcnfnodes, subdnfnodes;
-
- if (and_clause((Node *) qual))
- {
- *nodes = *cnfnodes = 0.0;
- *dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */
-
- foreach(temp, qual->args)
{
- count_bool_nodes(lfirst(temp),
- &subnodes, &subcnfnodes, &subdnfnodes);
- *nodes += subnodes;
- *cnfnodes += subcnfnodes;
- *dnfnodes *= subdnfnodes;
+ if (!member(clause, winners))
+ neworlist = lappend(neworlist, clause);
+ else
+ {
+ neworlist = NIL; /* degenerate case, see above */
+ break;
+ }
}
- /* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
- * simple ones with count 1. Make sure dnfnodes isn't too small.
- */
- if (*dnfnodes < *cnfnodes)
- *dnfnodes = *cnfnodes;
}
- else if (or_clause((Node *) qual))
- {
- *nodes = *dnfnodes = 0.0;
- *cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */
- foreach(temp, qual->args)
- {
- count_bool_nodes(lfirst(temp),
- &subnodes, &subcnfnodes, &subdnfnodes);
- *nodes += subnodes;
- *cnfnodes *= subcnfnodes;
- *dnfnodes += subdnfnodes;
- }
- /* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
- * simple ones with count 1. Make sure cnfnodes isn't too small.
- */
- if (*cnfnodes < *dnfnodes)
- *cnfnodes = *dnfnodes;
- }
- else if (not_clause((Node *) qual))
+ /*
+ * Append reduced OR to the winners list, if it's not degenerate, handling
+ * the special case of one element correctly (can that really happen?).
+ * Also be careful to maintain AND/OR flatness in case we pulled up a
+ * sub-sub-OR-clause.
+ */
+ if (neworlist != NIL)
{
- count_bool_nodes(get_notclausearg(qual),
- nodes, cnfnodes, dnfnodes);
+ if (lnext(neworlist) == NIL)
+ winners = lappend(winners, lfirst(neworlist));
+ else
+ winners = lappend(winners, make_orclause(pull_ors(neworlist)));
}
+
+ /*
+ * And return the constructed AND clause, again being wary of a single
+ * element and AND/OR flatness.
+ */
+ if (lnext(winners) == NIL)
+ return (Expr *) lfirst(winners);
else
- {
- /* anything else counts 1 for my purposes */
- *nodes = *cnfnodes = *dnfnodes = 1.0;
- }
+ return make_andclause(pull_ands(winners));
}