granicus.if.org Git - postgresql/blob - src/backend/optimizer/prep/prepqual.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * prepqual.c
   4  *        Routines for preprocessing qualification expressions
   5  *
   6  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.38 2003/08/08 21:41:52 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "nodes/makefuncs.h"
  19 #include "optimizer/clauses.h"
  20 #include "optimizer/prep.h"
  21 #include "utils/lsyscache.h"
  22
  23 static Expr *flatten_andors(Expr *qual);
  24 static void flatten_andors_and_walker(FastList *out_list, List *andlist);
  25 static void flatten_andors_or_walker(FastList *out_list, List *orlist);
  26 static List *pull_ands(List *andlist);
  27 static void pull_ands_walker(FastList *out_list, List *andlist);
  28 static List *pull_ors(List *orlist);
  29 static void pull_ors_walker(FastList *out_list, List *orlist);
  30 static Expr *find_nots(Expr *qual);
  31 static Expr *push_nots(Expr *qual);
  32 static Expr *find_ors(Expr *qual);
  33 static Expr *or_normalize(List *orlist);
  34 static Expr *find_ands(Expr *qual);
  35 static Expr *and_normalize(List *andlist);
  36 static Expr *qual_cleanup(Expr *qual);
  37 static List *remove_duplicates(List *list);
  38 static void count_bool_nodes(Expr *qual, double *nodes,
  39                                  double *cnfnodes, double *dnfnodes);
  40
  41 /*****************************************************************************
  42  *
  43  *              CNF/DNF CONVERSION ROUTINES
  44  *
  45  *              These routines convert an arbitrary boolean expression into
  46  *              conjunctive normal form or disjunctive normal form.
  47  *
  48  *              Normalization is only carried out in the top AND/OR/NOT portion
  49  *              of the given tree; we do not attempt to normalize boolean expressions
  50  *              that may appear as arguments of operators or functions in the tree.
  51  *
  52  *              Query qualifications (WHERE clauses) are ordinarily transformed into
  53  *              CNF, ie, AND-of-ORs form, because then the optimizer can use any one
  54  *              of the independent AND clauses as a filtering qualification.  However,
  55  *              quals that are naturally expressed as OR-of-ANDs can suffer an
  56  *              exponential growth in size in this transformation, so we also consider
  57  *              converting to DNF (OR-of-ANDs), and we may also leave well enough alone
  58  *              if both transforms cause unreasonable growth.  The OR-of-ANDs format
  59  *              is useful for indexscan implementation, so we prefer that format when
  60  *              there is just one relation involved.
  61  *
  62  *              canonicalize_qual() does "smart" conversion to either CNF or DNF, per
  63  *              the above considerations, while cnfify() and dnfify() simply perform
  64  *              the demanded transformation.  The latter two may become dead code
  65  *              eventually.
  66  *****************************************************************************/
  67
  68
  69 /*
  70  * canonicalize_qual
  71  *        Convert a qualification to the most useful normalized form.
  72  *
  73  * Returns the modified qualification.
  74  *
  75  * If 'removeAndFlag' is true then it removes explicit AND at the top level,
  76  * producing a list of implicitly-ANDed conditions.  Otherwise, a regular
  77  * boolean expression is returned.      Since most callers pass 'true', we
  78  * prefer to declare the result as List *, not Expr *.
  79  *
  80  * XXX This code could be much smarter, at the cost of also being slower,
  81  * if we tried to compute selectivities and/or see whether there are
  82  * actually indexes to support an indexscan implementation of a DNF qual.
  83  * We could even try converting the CNF clauses that mention a single
  84  * relation into a single DNF clause to see if that looks cheaper to
  85  * implement.  For now, though, we just try to avoid doing anything
  86  * quite as stupid as unconditionally converting to CNF was...
  87  */
  88 List *
  89 canonicalize_qual(Expr *qual, bool removeAndFlag)
  90 {
  91         Expr       *newqual;
  92         double          nodes,
  93                                 cnfnodes,
  94                                 dnfnodes;
  95         bool            cnfok,
  96                                 dnfok;
  97
  98         if (qual == NULL)
  99                 return NIL;
 100
 101         /*
 102          * Flatten AND and OR groups throughout the tree. This improvement is
 103          * always worthwhile, so do it unconditionally.
 104          */
 105         qual = flatten_andors(qual);
 106
 107         /*
 108          * Push down NOTs.      We do this only in the top-level boolean
 109          * expression, without examining arguments of operators/functions.
 110          * Even so, it might not be a win if we are unable to find negators
 111          * for all the operators involved; perhaps we should compare before-
 112          * and-after tree sizes?
 113          */
 114         newqual = find_nots(qual);
 115
 116         /*
 117          * Choose whether to convert to CNF, or DNF, or leave well enough
 118          * alone.
 119          *
 120          * We make an approximate estimate of the number of bottom-level nodes
 121          * that will appear in the CNF and DNF forms of the query.
 122          */
 123         count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes);
 124
 125         /*
 126          * First heuristic is to forget about *both* normal forms if there are
 127          * a huge number of terms in the qual clause.  This would only happen
 128          * with machine-generated queries, presumably; and most likely such a
 129          * query is already in either CNF or DNF.
 130          */
 131         cnfok = dnfok = true;
 132         if (nodes >= 500.0)
 133                 cnfok = dnfok = false;
 134
 135         /*
 136          * Second heuristic is to forget about either CNF or DNF if it shows
 137          * unreasonable growth compared to the original form of the qual,
 138          * where we define "unreasonable" a tad arbitrarily as 4x more
 139          * operators.
 140          */
 141         if (cnfnodes >= 4.0 * nodes)
 142                 cnfok = false;
 143         if (dnfnodes >= 4.0 * nodes)
 144                 dnfok = false;
 145
 146         /*
 147          * Third heuristic is to prefer DNF if top level is already an OR, and
 148          * only one relation is mentioned, and DNF is no larger than the CNF
 149          * representation.      (Pretty shaky; can we improve on this?)
 150          */
 151         if (cnfok && dnfok && dnfnodes <= cnfnodes &&
 152                 or_clause((Node *) newqual) &&
 153                 NumRelids((Node *) newqual) == 1)
 154                 cnfok = false;
 155
 156         /*
 157          * Otherwise, we prefer CNF.
 158          *
 159          * XXX obviously, these rules could be improved upon.
 160          */
 161         if (cnfok)
 162         {
 163                 /*
 164                  * Normalize into conjunctive normal form, and clean up the
 165                  * result.
 166                  */
 167                 newqual = qual_cleanup(find_ors(newqual));
 168         }
 169         else if (dnfok)
 170         {
 171                 /*
 172                  * Normalize into disjunctive normal form, and clean up the
 173                  * result.
 174                  */
 175                 newqual = qual_cleanup(find_ands(newqual));
 176         }
 177
 178         /* Convert to implicit-AND list if requested */
 179         if (removeAndFlag)
 180                 newqual = (Expr *) make_ands_implicit(newqual);
 181
 182         return (List *) newqual;
 183 }
 184
 185 /*
 186  * cnfify
 187  *        Convert a qualification to conjunctive normal form by applying
 188  *        successive normalizations.
 189  *
 190  * Returns the modified qualification.
 191  *
 192  * If 'removeAndFlag' is true then it removes explicit AND at the top level,
 193  * producing a list of implicitly-ANDed conditions.  Otherwise, a regular
 194  * boolean expression is returned.      Since most callers pass 'true', we
 195  * prefer to declare the result as List *, not Expr *.
 196  */
 197 List *
 198 cnfify(Expr *qual, bool removeAndFlag)
 199 {
 200         Expr       *newqual;
 201
 202         if (qual == NULL)
 203                 return NIL;
 204
 205         /*
 206          * Flatten AND and OR groups throughout the tree. This improvement is
 207          * always worthwhile.
 208          */
 209         newqual = flatten_andors(qual);
 210
 211         /*
 212          * Push down NOTs.      We do this only in the top-level boolean
 213          * expression, without examining arguments of operators/functions.
 214          */
 215         newqual = find_nots(newqual);
 216         /* Normalize into conjunctive normal form. */
 217         newqual = find_ors(newqual);
 218         /* Clean up the result. */
 219         newqual = qual_cleanup(newqual);
 220
 221         if (removeAndFlag)
 222                 newqual = (Expr *) make_ands_implicit(newqual);
 223
 224         return (List *) newqual;
 225 }
 226
 227 #ifdef NOT_USED
 228 /*
 229  * dnfify
 230  *        Convert a qualification to disjunctive normal form by applying
 231  *        successive normalizations.
 232  *
 233  * Returns the modified qualification.
 234  *
 235  * We do not offer a 'removeOrFlag' in this case; the usages are
 236  * different.
 237  */
 238 static Expr *
 239 dnfify(Expr *qual)
 240 {
 241         Expr       *newqual;
 242
 243         if (qual == NULL)
 244                 return NULL;
 245
 246         /*
 247          * Flatten AND and OR groups throughout the tree. This improvement is
 248          * always worthwhile.
 249          */
 250         newqual = flatten_andors(qual);
 251
 252         /*
 253          * Push down NOTs.      We do this only in the top-level boolean
 254          * expression, without examining arguments of operators/functions.
 255          */
 256         newqual = find_nots(newqual);
 257         /* Normalize into disjunctive normal form. */
 258         newqual = find_ands(newqual);
 259         /* Clean up the result. */
 260         newqual = qual_cleanup(newqual);
 261
 262         return newqual;
 263 }
 264 #endif
 265
 266 /*--------------------
 267  * The parser regards AND and OR as purely binary operators, so a qual like
 268  *              (A = 1) OR (A = 2) OR (A = 3) ...
 269  * will produce a nested parsetree
 270  *              (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
 271  * In reality, the optimizer and executor regard AND and OR as n-argument
 272  * operators, so this tree can be flattened to
 273  *              (OR (A = 1) (A = 2) (A = 3) ...)
 274  * which is the responsibility of the routines below.
 275  *
 276  * flatten_andors() does the basic transformation with no initial assumptions.
 277  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
 278  * tree after local transformations that might introduce nested AND/ORs.
 279  *--------------------
 280  */
 281
 282 /*--------------------
 283  * flatten_andors
 284  *        Given a qualification, simplify nested AND/OR clauses into flat
 285  *        AND/OR clauses with more arguments.
 286  *
 287  * Returns the rebuilt expr (note original list structure is not touched).
 288  *--------------------
 289  */
 290 static Expr *
 291 flatten_andors(Expr *qual)
 292 {
 293         if (qual == NULL)
 294                 return NULL;
 295
 296         if (and_clause((Node *) qual))
 297         {
 298                 FastList        out_list;
 299
 300                 FastListInit(&out_list);
 301                 flatten_andors_and_walker(&out_list, ((BoolExpr *) qual)->args);
 302                 return make_andclause(FastListValue(&out_list));
 303         }
 304         else if (or_clause((Node *) qual))
 305         {
 306                 FastList        out_list;
 307
 308                 FastListInit(&out_list);
 309                 flatten_andors_or_walker(&out_list, ((BoolExpr *) qual)->args);
 310                 return make_orclause(FastListValue(&out_list));
 311         }
 312         else if (not_clause((Node *) qual))
 313                 return make_notclause(flatten_andors(get_notclausearg(qual)));
 314         else if (is_opclause(qual))
 315         {
 316                 OpExpr     *opexpr = (OpExpr *) qual;
 317                 Expr       *left = (Expr *) get_leftop(qual);
 318                 Expr       *right = (Expr *) get_rightop(qual);
 319
 320                 return make_opclause(opexpr->opno,
 321                                                          opexpr->opresulttype,
 322                                                          opexpr->opretset,
 323                                                          flatten_andors(left),
 324                                                          flatten_andors(right));
 325         }
 326         else
 327                 return qual;
 328 }
 329
 330 static void
 331 flatten_andors_and_walker(FastList *out_list, List *andlist)
 332 {
 333         List       *arg;
 334
 335         foreach(arg, andlist)
 336         {
 337                 Expr       *subexpr = (Expr *) lfirst(arg);
 338
 339                 if (and_clause((Node *) subexpr))
 340                         flatten_andors_and_walker(out_list, ((BoolExpr *) subexpr)->args);
 341                 else
 342                         FastAppend(out_list, flatten_andors(subexpr));
 343         }
 344 }
 345
 346 static void
 347 flatten_andors_or_walker(FastList *out_list, List *orlist)
 348 {
 349         List       *arg;
 350
 351         foreach(arg, orlist)
 352         {
 353                 Expr       *subexpr = (Expr *) lfirst(arg);
 354
 355                 if (or_clause((Node *) subexpr))
 356                         flatten_andors_or_walker(out_list, ((BoolExpr *) subexpr)->args);
 357                 else
 358                         FastAppend(out_list, flatten_andors(subexpr));
 359         }
 360 }
 361
 362 /*
 363  * pull_ands
 364  *        Recursively flatten nested AND clauses into a single and-clause list.
 365  *
 366  * Input is the arglist of an AND clause.
 367  * Returns the rebuilt arglist (note original list structure is not touched).
 368  */
 369 static List *
 370 pull_ands(List *andlist)
 371 {
 372         FastList        out_list;
 373
 374         FastListInit(&out_list);
 375         pull_ands_walker(&out_list, andlist);
 376         return FastListValue(&out_list);
 377 }
 378
 379 static void
 380 pull_ands_walker(FastList *out_list, List *andlist)
 381 {
 382         List       *arg;
 383
 384         foreach(arg, andlist)
 385         {
 386                 Expr       *subexpr = (Expr *) lfirst(arg);
 387
 388                 if (and_clause((Node *) subexpr))
 389                         pull_ands_walker(out_list, ((BoolExpr *) subexpr)->args);
 390                 else
 391                         FastAppend(out_list, subexpr);
 392         }
 393 }
 394
 395 /*
 396  * pull_ors
 397  *        Recursively flatten nested OR clauses into a single or-clause list.
 398  *
 399  * Input is the arglist of an OR clause.
 400  * Returns the rebuilt arglist (note original list structure is not touched).
 401  */
 402 static List *
 403 pull_ors(List *orlist)
 404 {
 405         FastList        out_list;
 406
 407         FastListInit(&out_list);
 408         pull_ors_walker(&out_list, orlist);
 409         return FastListValue(&out_list);
 410 }
 411
 412 static void
 413 pull_ors_walker(FastList *out_list, List *orlist)
 414 {
 415         List       *arg;
 416
 417         foreach(arg, orlist)
 418         {
 419                 Expr       *subexpr = (Expr *) lfirst(arg);
 420
 421                 if (or_clause((Node *) subexpr))
 422                         pull_ors_walker(out_list, ((BoolExpr *) subexpr)->args);
 423                 else
 424                         FastAppend(out_list, subexpr);
 425         }
 426 }
 427
 428 /*
 429  * find_nots
 430  *        Traverse the qualification, looking for 'NOT's to take care of.
 431  *        For 'NOT' clauses, apply push_not() to try to push down the 'NOT'.
 432  *        For all other clause types, simply recurse.
 433  *
 434  * Returns the modified qualification.  AND/OR flatness is preserved.
 435  */
 436 static Expr *
 437 find_nots(Expr *qual)
 438 {
 439         if (qual == NULL)
 440                 return NULL;
 441
 442 #ifdef NOT_USED
 443         /* recursing into operator expressions is probably not worth it. */
 444         if (is_opclause(qual))
 445         {
 446                 OpExpr     *opexpr = (OpExpr *) qual;
 447                 Expr       *left = (Expr *) get_leftop(qual);
 448                 Expr       *right = (Expr *) get_rightop(qual);
 449
 450                 return make_opclause(opexpr->opno,
 451                                                          opexpr->opresulttype,
 452                                                          opexpr->opretset,
 453                                                          find_nots(left),
 454                                                          find_nots(right));
 455         }
 456 #endif
 457         if (and_clause((Node *) qual))
 458         {
 459                 FastList        t_list;
 460                 List       *temp;
 461
 462                 FastListInit(&t_list);
 463                 foreach(temp, ((BoolExpr *) qual)->args)
 464                         FastAppend(&t_list, find_nots(lfirst(temp)));
 465                 return make_andclause(pull_ands(FastListValue(&t_list)));
 466         }
 467         else if (or_clause((Node *) qual))
 468         {
 469                 FastList        t_list;
 470                 List       *temp;
 471
 472                 FastListInit(&t_list);
 473                 foreach(temp, ((BoolExpr *) qual)->args)
 474                         FastAppend(&t_list, find_nots(lfirst(temp)));
 475                 return make_orclause(pull_ors(FastListValue(&t_list)));
 476         }
 477         else if (not_clause((Node *) qual))
 478                 return push_nots(get_notclausearg(qual));
 479         else
 480                 return qual;
 481 }
 482
 483 /*
 484  * push_nots
 485  *        Push down a 'NOT' as far as possible.
 486  *
 487  * Input is an expression to be negated (e.g., the argument of a NOT clause).
 488  * Returns a new qual equivalent to the negation of the given qual.
 489  */
 490 static Expr *
 491 push_nots(Expr *qual)
 492 {
 493         if (qual == NULL)
 494                 return make_notclause(qual);    /* XXX is this right?  Or
 495                                                                                  * possible? */
 496
 497         /*
 498          * Negate an operator clause if possible: ("NOT" (< A B)) => (> A B)
 499          * Otherwise, retain the clause as it is (the 'not' can't be pushed
 500          * down any farther).
 501          */
 502         if (is_opclause(qual))
 503         {
 504                 OpExpr     *opexpr = (OpExpr *) qual;
 505                 Oid                     negator = get_negator(opexpr->opno);
 506
 507                 if (negator)
 508                         return make_opclause(negator,
 509                                                                  opexpr->opresulttype,
 510                                                                  opexpr->opretset,
 511                                                                  (Expr *) get_leftop(qual),
 512                                                                  (Expr *) get_rightop(qual));
 513                 else
 514                         return make_notclause(qual);
 515         }
 516         else if (and_clause((Node *) qual))
 517         {
 518                 /*--------------------
 519                  * Apply DeMorgan's Laws:
 520                  *              ("NOT" ("AND" A B)) => ("OR" ("NOT" A) ("NOT" B))
 521                  *              ("NOT" ("OR" A B))      => ("AND" ("NOT" A) ("NOT" B))
 522                  * i.e., swap AND for OR and negate all the subclauses.
 523                  *--------------------
 524                  */
 525                 FastList        t_list;
 526                 List       *temp;
 527
 528                 FastListInit(&t_list);
 529                 foreach(temp, ((BoolExpr *) qual)->args)
 530                         FastAppend(&t_list, push_nots(lfirst(temp)));
 531                 return make_orclause(pull_ors(FastListValue(&t_list)));
 532         }
 533         else if (or_clause((Node *) qual))
 534         {
 535                 FastList        t_list;
 536                 List       *temp;
 537
 538                 FastListInit(&t_list);
 539                 foreach(temp, ((BoolExpr *) qual)->args)
 540                         FastAppend(&t_list, push_nots(lfirst(temp)));
 541                 return make_andclause(pull_ands(FastListValue(&t_list)));
 542         }
 543         else if (not_clause((Node *) qual))
 544         {
 545                 /*
 546                  * Another 'not' cancels this 'not', so eliminate the 'not' and
 547                  * stop negating this branch.  But search the subexpression for
 548                  * more 'not's to simplify.
 549                  */
 550                 return find_nots(get_notclausearg(qual));
 551         }
 552         else
 553         {
 554                 /*
 555                  * We don't know how to negate anything else, place a 'not' at
 556                  * this level.
 557                  */
 558                 return make_notclause(qual);
 559         }
 560 }
 561
 562 /*
 563  * find_ors
 564  *        Given a qualification tree with the 'not's pushed down, convert it
 565  *        to a tree in CNF by repeatedly applying the rule:
 566  *                              ("OR" A ("AND" B C))  => ("AND" ("OR" A B) ("OR" A C))
 567  *
 568  *        Note that 'or' clauses will always be turned into 'and' clauses
 569  *        if they contain any 'and' subclauses.
 570  *
 571  * Returns the modified qualification.  AND/OR flatness is preserved.
 572  */
 573 static Expr *
 574 find_ors(Expr *qual)
 575 {
 576         if (qual == NULL)
 577                 return NULL;
 578
 579         /* We used to recurse into opclauses here, but I see no reason to... */
 580         if (and_clause((Node *) qual))
 581         {
 582                 List       *andlist = NIL;
 583                 List       *temp;
 584
 585                 foreach(temp, ((BoolExpr *) qual)->args)
 586                         andlist = lappend(andlist, find_ors(lfirst(temp)));
 587                 return make_andclause(pull_ands(andlist));
 588         }
 589         else if (or_clause((Node *) qual))
 590         {
 591                 List       *orlist = NIL;
 592                 List       *temp;
 593
 594                 foreach(temp, ((BoolExpr *) qual)->args)
 595                         orlist = lappend(orlist, find_ors(lfirst(temp)));
 596                 return or_normalize(pull_ors(orlist));
 597         }
 598         else if (not_clause((Node *) qual))
 599                 return make_notclause(find_ors(get_notclausearg(qual)));
 600         else
 601                 return qual;
 602 }
 603
 604 /*
 605  * or_normalize
 606  *        Given a list of exprs which are 'or'ed together, try to apply
 607  *        the distributive law
 608  *                              ("OR" A ("AND" B C))  => ("AND" ("OR" A B) ("OR" A C))
 609  *        to convert the top-level OR clause to a top-level AND clause.
 610  *
 611  * Returns the resulting expression (could be an AND clause, an OR
 612  * clause, or maybe even a single subexpression).
 613  */
 614 static Expr *
 615 or_normalize(List *orlist)
 616 {
 617         Expr       *distributable = NULL;
 618         int                     num_subclauses = 1;
 619         List       *andclauses = NIL;
 620         List       *temp;
 621
 622         if (orlist == NIL)
 623                 return NULL;                    /* probably can't happen */
 624         if (lnext(orlist) == NIL)
 625                 return lfirst(orlist);  /* single-expression OR (can this happen?) */
 626
 627         /*
 628          * If we have a choice of AND clauses, pick the one with the most
 629          * subclauses.  Because we initialized num_subclauses = 1, any AND
 630          * clauses with only one arg will be ignored as useless.
 631          */
 632         foreach(temp, orlist)
 633         {
 634                 Expr       *clause = lfirst(temp);
 635
 636                 if (and_clause((Node *) clause))
 637                 {
 638                         int                     nclauses = length(((BoolExpr *) clause)->args);
 639
 640                         if (nclauses > num_subclauses)
 641                         {
 642                                 distributable = clause;
 643                                 num_subclauses = nclauses;
 644                         }
 645                 }
 646         }
 647
 648         /* if there's no suitable AND clause, we can't transform the OR */
 649         if (!distributable)
 650                 return make_orclause(orlist);
 651
 652         /*
 653          * Caution: lremove destructively modifies the input orlist. This
 654          * should be OK, since or_normalize is only called with freshly
 655          * constructed lists that are not referenced elsewhere.
 656          */
 657         orlist = lremove(distributable, orlist);
 658
 659         foreach(temp, ((BoolExpr *) distributable)->args)
 660         {
 661                 Expr       *andclause = lfirst(temp);
 662                 List       *neworlist;
 663
 664                 /*
 665                  * We are going to insert the orlist into multiple places in the
 666                  * result expression.  For most expression types, it'd be OK to
 667                  * just have multiple links to the same subtree, but this fails
 668                  * badly for SubLinks (and perhaps other cases?).  For safety, we
 669                  * make a distinct copy for each place the orlist is inserted.
 670                  */
 671                 if (lnext(temp) == NIL)
 672                         neworlist = orlist; /* can use original tree at the end */
 673                 else
 674                         neworlist = copyObject(orlist);
 675
 676                 /*
 677                  * pull_ors is needed here in case andclause has a top-level OR.
 678                  * Then we recursively apply or_normalize, since there might be an
 679                  * AND subclause in the resulting OR-list.
 680                  */
 681                 andclause = or_normalize(pull_ors(lcons(andclause, neworlist)));
 682                 andclauses = lappend(andclauses, andclause);
 683         }
 684
 685         /* pull_ands is needed in case any sub-or_normalize succeeded */
 686         return make_andclause(pull_ands(andclauses));
 687 }
 688
 689 /*
 690  * find_ands
 691  *        Given a qualification tree with the 'not's pushed down, convert it
 692  *        to a tree in DNF by repeatedly applying the rule:
 693  *                              ("AND" A ("OR" B C))  => ("OR" ("AND" A B) ("AND" A C))
 694  *
 695  *        Note that 'and' clauses will always be turned into 'or' clauses
 696  *        if they contain any 'or' subclauses.
 697  *
 698  * Returns the modified qualification.  AND/OR flatness is preserved.
 699  */
 700 static Expr *
 701 find_ands(Expr *qual)
 702 {
 703         if (qual == NULL)
 704                 return NULL;
 705
 706         /* We used to recurse into opclauses here, but I see no reason to... */
 707         if (or_clause((Node *) qual))
 708         {
 709                 List       *orlist = NIL;
 710                 List       *temp;
 711
 712                 foreach(temp, ((BoolExpr *) qual)->args)
 713                         orlist = lappend(orlist, find_ands(lfirst(temp)));
 714                 return make_orclause(pull_ors(orlist));
 715         }
 716         else if (and_clause((Node *) qual))
 717         {
 718                 List       *andlist = NIL;
 719                 List       *temp;
 720
 721                 foreach(temp, ((BoolExpr *) qual)->args)
 722                         andlist = lappend(andlist, find_ands(lfirst(temp)));
 723                 return and_normalize(pull_ands(andlist));
 724         }
 725         else if (not_clause((Node *) qual))
 726                 return make_notclause(find_ands(get_notclausearg(qual)));
 727         else
 728                 return qual;
 729 }
 730
 731 /*
 732  * and_normalize
 733  *        Given a list of exprs which are 'and'ed together, try to apply
 734  *        the distributive law
 735  *                              ("AND" A ("OR" B C))  => ("OR" ("AND" A B) ("AND" A C))
 736  *        to convert the top-level AND clause to a top-level OR clause.
 737  *
 738  * Returns the resulting expression (could be an AND clause, an OR
 739  * clause, or maybe even a single subexpression).
 740  */
 741 static Expr *
 742 and_normalize(List *andlist)
 743 {
 744         Expr       *distributable = NULL;
 745         int                     num_subclauses = 1;
 746         List       *orclauses = NIL;
 747         List       *temp;
 748
 749         if (andlist == NIL)
 750                 return NULL;                    /* probably can't happen */
 751         if (lnext(andlist) == NIL)
 752                 return lfirst(andlist); /* single-expression AND (can this
 753                                                                  * happen?) */
 754
 755         /*
 756          * If we have a choice of OR clauses, pick the one with the most
 757          * subclauses.  Because we initialized num_subclauses = 1, any OR
 758          * clauses with only one arg will be ignored as useless.
 759          */
 760         foreach(temp, andlist)
 761         {
 762                 Expr       *clause = lfirst(temp);
 763
 764                 if (or_clause((Node *) clause))
 765                 {
 766                         int                     nclauses = length(((BoolExpr *) clause)->args);
 767
 768                         if (nclauses > num_subclauses)
 769                         {
 770                                 distributable = clause;
 771                                 num_subclauses = nclauses;
 772                         }
 773                 }
 774         }
 775
 776         /* if there's no suitable OR clause, we can't transform the AND */
 777         if (!distributable)
 778                 return make_andclause(andlist);
 779
 780         /*
 781          * Caution: lremove destructively modifies the input andlist. This
 782          * should be OK, since and_normalize is only called with freshly
 783          * constructed lists that are not referenced elsewhere.
 784          */
 785         andlist = lremove(distributable, andlist);
 786
 787         foreach(temp, ((BoolExpr *) distributable)->args)
 788         {
 789                 Expr       *orclause = lfirst(temp);
 790                 List       *newandlist;
 791
 792                 /*
 793                  * We are going to insert the andlist into multiple places in the
 794                  * result expression.  For most expression types, it'd be OK to
 795                  * just have multiple links to the same subtree, but this fails
 796                  * badly for SubLinks (and perhaps other cases?).  For safety, we
 797                  * make a distinct copy for each place the andlist is inserted.
 798                  */
 799                 if (lnext(temp) == NIL)
 800                         newandlist = andlist;           /* can use original tree at the
 801                                                                                  * end */
 802                 else
 803                         newandlist = copyObject(andlist);
 804
 805                 /*
 806                  * pull_ands is needed here in case orclause has a top-level AND.
 807                  * Then we recursively apply and_normalize, since there might be
 808                  * an OR subclause in the resulting AND-list.
 809                  */
 810                 orclause = and_normalize(pull_ands(lcons(orclause, newandlist)));
 811                 orclauses = lappend(orclauses, orclause);
 812         }
 813
 814         /* pull_ors is needed in case any sub-and_normalize succeeded */
 815         return make_orclause(pull_ors(orclauses));
 816 }
 817
 818 /*
 819  * qual_cleanup
 820  *        Fix up a qualification by removing duplicate entries (which could be
 821  *        created during normalization, if identical subexpressions from different
 822  *        parts of the tree are brought together).      Also, check for AND and OR
 823  *        clauses with only one remaining subexpression, and simplify.
 824  *
 825  * Returns the modified qualification.
 826  */
 827 static Expr *
 828 qual_cleanup(Expr *qual)
 829 {
 830         if (qual == NULL)
 831                 return NULL;
 832
 833         if (and_clause((Node *) qual))
 834         {
 835                 List       *andlist = NIL;
 836                 List       *temp;
 837
 838                 foreach(temp, ((BoolExpr *) qual)->args)
 839                         andlist = lappend(andlist, qual_cleanup(lfirst(temp)));
 840
 841                 andlist = remove_duplicates(pull_ands(andlist));
 842
 843                 if (length(andlist) > 1)
 844                         return make_andclause(andlist);
 845                 else
 846                         return lfirst(andlist);
 847         }
 848         else if (or_clause((Node *) qual))
 849         {
 850                 List       *orlist = NIL;
 851                 List       *temp;
 852
 853                 foreach(temp, ((BoolExpr *) qual)->args)
 854                         orlist = lappend(orlist, qual_cleanup(lfirst(temp)));
 855
 856                 orlist = remove_duplicates(pull_ors(orlist));
 857
 858                 if (length(orlist) > 1)
 859                         return make_orclause(orlist);
 860                 else
 861                         return lfirst(orlist);
 862         }
 863         else if (not_clause((Node *) qual))
 864                 return make_notclause(qual_cleanup(get_notclausearg(qual)));
 865         else
 866                 return qual;
 867 }
 868
 869 /*
 870  * remove_duplicates
 871  */
 872 static List *
 873 remove_duplicates(List *list)
 874 {
 875         List       *result = NIL;
 876         List       *i;
 877
 878         if (length(list) <= 1)
 879                 return list;
 880
 881         foreach(i, list)
 882         {
 883                 if (!member(lfirst(i), result))
 884                         result = lappend(result, lfirst(i));
 885         }
 886         return result;
 887 }
 888
 889 /*
 890  * count_bool_nodes
 891  *              Support for heuristics in canonicalize_qual(): count the
 892  *              number of nodes that are inputs to the top level AND/OR/NOT
 893  *              part of a qual tree, and estimate how many nodes will appear
 894  *              in the CNF'ified or DNF'ified equivalent of the expression.
 895  *
 896  * This is just an approximate calculation; it doesn't deal with NOTs
 897  * very well, and of course it cannot detect possible simplifications
 898  * from eliminating duplicate subclauses.  The idea is just to cheaply
 899  * determine whether CNF will be markedly worse than DNF or vice versa.
 900  *
 901  * The counts/estimates are represented as doubles to avoid risk of overflow.
 902  */
 903 static void
 904 count_bool_nodes(Expr *qual,
 905                                  double *nodes,
 906                                  double *cnfnodes,
 907                                  double *dnfnodes)
 908 {
 909         List       *temp;
 910         double          subnodes,
 911                                 subcnfnodes,
 912                                 subdnfnodes;
 913
 914         if (and_clause((Node *) qual))
 915         {
 916                 *nodes = *cnfnodes = 0.0;
 917                 *dnfnodes = 1.0;                /* DNF nodes will be product of sub-counts */
 918
 919                 foreach(temp, ((BoolExpr *) qual)->args)
 920                 {
 921                         count_bool_nodes(lfirst(temp),
 922                                                          &subnodes, &subcnfnodes, &subdnfnodes);
 923                         *nodes += subnodes;
 924                         *cnfnodes += subcnfnodes;
 925                         *dnfnodes *= subdnfnodes;
 926                 }
 927
 928                 /*
 929                  * we could get dnfnodes < cnfnodes here, if all the sub-nodes are
 930                  * simple ones with count 1.  Make sure dnfnodes isn't too small.
 931                  */
 932                 if (*dnfnodes < *cnfnodes)
 933                         *dnfnodes = *cnfnodes;
 934         }
 935         else if (or_clause((Node *) qual))
 936         {
 937                 *nodes = *dnfnodes = 0.0;
 938                 *cnfnodes = 1.0;                /* CNF nodes will be product of sub-counts */
 939
 940                 foreach(temp, ((BoolExpr *) qual)->args)
 941                 {
 942                         count_bool_nodes(lfirst(temp),
 943                                                          &subnodes, &subcnfnodes, &subdnfnodes);
 944                         *nodes += subnodes;
 945                         *cnfnodes *= subcnfnodes;
 946                         *dnfnodes += subdnfnodes;
 947                 }
 948
 949                 /*
 950                  * we could get cnfnodes < dnfnodes here, if all the sub-nodes are
 951                  * simple ones with count 1.  Make sure cnfnodes isn't too small.
 952                  */
 953                 if (*cnfnodes < *dnfnodes)
 954                         *cnfnodes = *dnfnodes;
 955         }
 956         else if (not_clause((Node *) qual))
 957         {
 958                 count_bool_nodes(get_notclausearg(qual),
 959                                                  nodes, cnfnodes, dnfnodes);
 960         }
 961         else if (contain_subplans((Node *) qual))
 962         {
 963                 /*
 964                  * charge extra for subexpressions containing sub-SELECTs, to
 965                  * discourage us from rearranging them in a way that might
 966                  * generate N copies of a subselect rather than one.  The magic
 967                  * constant here interacts with the "4x maximum growth" heuristic
 968                  * in canonicalize_qual().
 969                  */
 970                 *nodes = 1.0;
 971                 *cnfnodes = *dnfnodes = 25.0;
 972         }
 973         else
 974         {
 975                 /* anything else counts 1 for my purposes */
 976                 *nodes = *cnfnodes = *dnfnodes = 1.0;
 977         }
 978 }