granicus.if.org Git - postgresql/blob - src/backend/optimizer/prep/prepqual.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * prepqual.c
   4  *        Routines for preprocessing qualification expressions
   5  *
   6  *
   7  * The parser regards AND and OR as purely binary operators, so a qual like
   8  *              (A = 1) OR (A = 2) OR (A = 3) ...
   9  * will produce a nested parsetree
  10  *              (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
  11  * In reality, the optimizer and executor regard AND and OR as N-argument
  12  * operators, so this tree can be flattened to
  13  *              (OR (A = 1) (A = 2) (A = 3) ...)
  14  *
  15  * Formerly, this module was responsible for doing the initial flattening,
  16  * but now we leave it to eval_const_expressions to do that since it has to
  17  * make a complete pass over the expression tree anyway.  Instead, we just
  18  * have to ensure that our manipulations preserve AND/OR flatness.
  19  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
  20  * tree after local transformations that might introduce nested AND/ORs.
  21  *
  22  *
  23  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  24  * Portions Copyright (c) 1994, Regents of the University of California
  25  *
  26  *
  27  * IDENTIFICATION
  28  *        $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.51 2005/10/15 02:49:21 momjian Exp $
  29  *
  30  *-------------------------------------------------------------------------
  31  */
  32
  33 #include "postgres.h"
  34
  35 #include "nodes/makefuncs.h"
  36 #include "optimizer/clauses.h"
  37 #include "optimizer/prep.h"
  38 #include "utils/lsyscache.h"
  39
  40
  41 static List *pull_ands(List *andlist);
  42 static List *pull_ors(List *orlist);
  43 static Expr *find_nots(Expr *qual);
  44 static Expr *push_nots(Expr *qual);
  45 static Expr *find_duplicate_ors(Expr *qual);
  46 static Expr *process_duplicate_ors(List *orlist);
  47
  48
  49 /*
  50  * canonicalize_qual
  51  *        Convert a qualification expression to the most useful form.
  52  *
  53  * The name of this routine is a holdover from a time when it would try to
  54  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
  55  * Eventually, we recognized that that had more theoretical purity than
  56  * actual usefulness, and so now the transformation doesn't involve any
  57  * notion of reaching a canonical form.
  58  *
  59  * NOTE: we assume the input has already been through eval_const_expressions
  60  * and therefore possesses AND/OR flatness.  Formerly this function included
  61  * its own flattening logic, but that requires a useless extra pass over the
  62  * tree.
  63  *
  64  * Returns the modified qualification.
  65  */
  66 Expr *
  67 canonicalize_qual(Expr *qual)
  68 {
  69         Expr       *newqual;
  70
  71         /* Quick exit for empty qual */
  72         if (qual == NULL)
  73                 return NULL;
  74
  75         /*
  76          * Push down NOTs.      We do this only in the top-level boolean expression,
  77          * without examining arguments of operators/functions. The main reason for
  78          * doing this is to expose as much top-level AND/OR structure as we can,
  79          * so there's no point in descending further.
  80          */
  81         newqual = find_nots(qual);
  82
  83         /*
  84          * Pull up redundant subclauses in OR-of-AND trees.  Again, we do this
  85          * only within the top-level AND/OR structure.
  86          */
  87         newqual = find_duplicate_ors(newqual);
  88
  89         return newqual;
  90 }
  91
  92
  93 /*
  94  * pull_ands
  95  *        Recursively flatten nested AND clauses into a single and-clause list.
  96  *
  97  * Input is the arglist of an AND clause.
  98  * Returns the rebuilt arglist (note original list structure is not touched).
  99  */
 100 static List *
 101 pull_ands(List *andlist)
 102 {
 103         List       *out_list = NIL;
 104         ListCell   *arg;
 105
 106         foreach(arg, andlist)
 107         {
 108                 Node       *subexpr = (Node *) lfirst(arg);
 109
 110                 /*
 111                  * Note: we can destructively concat the subexpression's arglist
 112                  * because we know the recursive invocation of pull_ands will have
 113                  * built a new arglist not shared with any other expr. Otherwise we'd
 114                  * need a list_copy here.
 115                  */
 116                 if (and_clause(subexpr))
 117                         out_list = list_concat(out_list,
 118                                                                    pull_ands(((BoolExpr *) subexpr)->args));
 119                 else
 120                         out_list = lappend(out_list, subexpr);
 121         }
 122         return out_list;
 123 }
 124
 125 /*
 126  * pull_ors
 127  *        Recursively flatten nested OR clauses into a single or-clause list.
 128  *
 129  * Input is the arglist of an OR clause.
 130  * Returns the rebuilt arglist (note original list structure is not touched).
 131  */
 132 static List *
 133 pull_ors(List *orlist)
 134 {
 135         List       *out_list = NIL;
 136         ListCell   *arg;
 137
 138         foreach(arg, orlist)
 139         {
 140                 Node       *subexpr = (Node *) lfirst(arg);
 141
 142                 /*
 143                  * Note: we can destructively concat the subexpression's arglist
 144                  * because we know the recursive invocation of pull_ors will have
 145                  * built a new arglist not shared with any other expr. Otherwise we'd
 146                  * need a list_copy here.
 147                  */
 148                 if (or_clause(subexpr))
 149                         out_list = list_concat(out_list,
 150                                                                    pull_ors(((BoolExpr *) subexpr)->args));
 151                 else
 152                         out_list = lappend(out_list, subexpr);
 153         }
 154         return out_list;
 155 }
 156
 157
 158 /*
 159  * find_nots
 160  *        Traverse the qualification, looking for NOTs to take care of.
 161  *        For NOT clauses, apply push_nots() to try to push down the NOT.
 162  *        For AND and OR clause types, simply recurse.  Otherwise stop
 163  *        recursing (we do not worry about structure below the top AND/OR tree).
 164  *
 165  * Returns the modified qualification.  AND/OR flatness is preserved.
 166  */
 167 static Expr *
 168 find_nots(Expr *qual)
 169 {
 170         if (and_clause((Node *) qual))
 171         {
 172                 List       *t_list = NIL;
 173                 ListCell   *temp;
 174
 175                 foreach(temp, ((BoolExpr *) qual)->args)
 176                         t_list = lappend(t_list, find_nots(lfirst(temp)));
 177                 return make_andclause(pull_ands(t_list));
 178         }
 179         else if (or_clause((Node *) qual))
 180         {
 181                 List       *t_list = NIL;
 182                 ListCell   *temp;
 183
 184                 foreach(temp, ((BoolExpr *) qual)->args)
 185                         t_list = lappend(t_list, find_nots(lfirst(temp)));
 186                 return make_orclause(pull_ors(t_list));
 187         }
 188         else if (not_clause((Node *) qual))
 189                 return push_nots(get_notclausearg(qual));
 190         else
 191                 return qual;
 192 }
 193
 194 /*
 195  * push_nots
 196  *        Push down a NOT as far as possible.
 197  *
 198  * Input is an expression to be negated (e.g., the argument of a NOT clause).
 199  * Returns a new qual equivalent to the negation of the given qual.
 200  */
 201 static Expr *
 202 push_nots(Expr *qual)
 203 {
 204         if (is_opclause(qual))
 205         {
 206                 /*
 207                  * Negate an operator clause if possible: (NOT (< A B)) => (>= A B)
 208                  * Otherwise, retain the clause as it is (the NOT can't be pushed down
 209                  * any farther).
 210                  */
 211                 OpExpr     *opexpr = (OpExpr *) qual;
 212                 Oid                     negator = get_negator(opexpr->opno);
 213
 214                 if (negator)
 215                         return make_opclause(negator,
 216                                                                  opexpr->opresulttype,
 217                                                                  opexpr->opretset,
 218                                                                  (Expr *) get_leftop(qual),
 219                                                                  (Expr *) get_rightop(qual));
 220                 else
 221                         return make_notclause(qual);
 222         }
 223         else if (and_clause((Node *) qual))
 224         {
 225                 /*--------------------
 226                  * Apply DeMorgan's Laws:
 227                  *              (NOT (AND A B)) => (OR (NOT A) (NOT B))
 228                  *              (NOT (OR A B))  => (AND (NOT A) (NOT B))
 229                  * i.e., swap AND for OR and negate all the subclauses.
 230                  *--------------------
 231                  */
 232                 List       *t_list = NIL;
 233                 ListCell   *temp;
 234
 235                 foreach(temp, ((BoolExpr *) qual)->args)
 236                         t_list = lappend(t_list, push_nots(lfirst(temp)));
 237                 return make_orclause(pull_ors(t_list));
 238         }
 239         else if (or_clause((Node *) qual))
 240         {
 241                 List       *t_list = NIL;
 242                 ListCell   *temp;
 243
 244                 foreach(temp, ((BoolExpr *) qual)->args)
 245                         t_list = lappend(t_list, push_nots(lfirst(temp)));
 246                 return make_andclause(pull_ands(t_list));
 247         }
 248         else if (not_clause((Node *) qual))
 249         {
 250                 /*
 251                  * Another NOT cancels this NOT, so eliminate the NOT and stop
 252                  * negating this branch.  But search the subexpression for more NOTs
 253                  * to simplify.
 254                  */
 255                 return find_nots(get_notclausearg(qual));
 256         }
 257         else
 258         {
 259                 /*
 260                  * We don't know how to negate anything else, place a NOT at this
 261                  * level.  No point in recursing deeper, either.
 262                  */
 263                 return make_notclause(qual);
 264         }
 265 }
 266
 267
 268 /*--------------------
 269  * The following code attempts to apply the inverse OR distributive law:
 270  *              ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
 271  * That is, locate OR clauses in which every subclause contains an
 272  * identical term, and pull out the duplicated terms.
 273  *
 274  * This may seem like a fairly useless activity, but it turns out to be
 275  * applicable to many machine-generated queries, and there are also queries
 276  * in some of the TPC benchmarks that need it.  This was in fact almost the
 277  * sole useful side-effect of the old prepqual code that tried to force
 278  * the query into canonical AND-of-ORs form: the canonical equivalent of
 279  *              ((A AND B) OR (A AND C))
 280  * is
 281  *              ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
 282  * which the code was able to simplify to
 283  *              (A AND (A OR C) AND (B OR A) AND (B OR C))
 284  * thus successfully extracting the common condition A --- but at the cost
 285  * of cluttering the qual with many redundant clauses.
 286  *--------------------
 287  */
 288
 289 /*
 290  * find_duplicate_ors
 291  *        Given a qualification tree with the NOTs pushed down, search for
 292  *        OR clauses to which the inverse OR distributive law might apply.
 293  *        Only the top-level AND/OR structure is searched.
 294  *
 295  * Returns the modified qualification.  AND/OR flatness is preserved.
 296  */
 297 static Expr *
 298 find_duplicate_ors(Expr *qual)
 299 {
 300         if (or_clause((Node *) qual))
 301         {
 302                 List       *orlist = NIL;
 303                 ListCell   *temp;
 304
 305                 /* Recurse */
 306                 foreach(temp, ((BoolExpr *) qual)->args)
 307                         orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
 308
 309                 /*
 310                  * Don't need pull_ors() since this routine will never introduce an OR
 311                  * where there wasn't one before.
 312                  */
 313                 return process_duplicate_ors(orlist);
 314         }
 315         else if (and_clause((Node *) qual))
 316         {
 317                 List       *andlist = NIL;
 318                 ListCell   *temp;
 319
 320                 /* Recurse */
 321                 foreach(temp, ((BoolExpr *) qual)->args)
 322                         andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
 323                 /* Flatten any ANDs introduced just below here */
 324                 andlist = pull_ands(andlist);
 325                 /* The AND list can't get shorter, so result is always an AND */
 326                 return make_andclause(andlist);
 327         }
 328         else
 329                 return qual;
 330 }
 331
 332 /*
 333  * process_duplicate_ors
 334  *        Given a list of exprs which are ORed together, try to apply
 335  *        the inverse OR distributive law.
 336  *
 337  * Returns the resulting expression (could be an AND clause, an OR
 338  * clause, or maybe even a single subexpression).
 339  */
 340 static Expr *
 341 process_duplicate_ors(List *orlist)
 342 {
 343         List       *reference = NIL;
 344         int                     num_subclauses = 0;
 345         List       *winners;
 346         List       *neworlist;
 347         ListCell   *temp;
 348
 349         if (orlist == NIL)
 350                 return NULL;                    /* probably can't happen */
 351         if (list_length(orlist) == 1)           /* single-expression OR (can this
 352                                                                                  * happen?) */
 353                 return linitial(orlist);
 354
 355         /*
 356          * Choose the shortest AND clause as the reference list --- obviously, any
 357          * subclause not in this clause isn't in all the clauses. If we find a
 358          * clause that's not an AND, we can treat it as a one-element AND clause,
 359          * which necessarily wins as shortest.
 360          */
 361         foreach(temp, orlist)
 362         {
 363                 Expr       *clause = (Expr *) lfirst(temp);
 364
 365                 if (and_clause((Node *) clause))
 366                 {
 367                         List       *subclauses = ((BoolExpr *) clause)->args;
 368                         int                     nclauses = list_length(subclauses);
 369
 370                         if (reference == NIL || nclauses < num_subclauses)
 371                         {
 372                                 reference = subclauses;
 373                                 num_subclauses = nclauses;
 374                         }
 375                 }
 376                 else
 377                 {
 378                         reference = list_make1(clause);
 379                         break;
 380                 }
 381         }
 382
 383         /*
 384          * Just in case, eliminate any duplicates in the reference list.
 385          */
 386         reference = list_union(NIL, reference);
 387
 388         /*
 389          * Check each element of the reference list to see if it's in all the OR
 390          * clauses.  Build a new list of winning clauses.
 391          */
 392         winners = NIL;
 393         foreach(temp, reference)
 394         {
 395                 Expr       *refclause = (Expr *) lfirst(temp);
 396                 bool            win = true;
 397                 ListCell   *temp2;
 398
 399                 foreach(temp2, orlist)
 400                 {
 401                         Expr       *clause = (Expr *) lfirst(temp2);
 402
 403                         if (and_clause((Node *) clause))
 404                         {
 405                                 if (!list_member(((BoolExpr *) clause)->args, refclause))
 406                                 {
 407                                         win = false;
 408                                         break;
 409                                 }
 410                         }
 411                         else
 412                         {
 413                                 if (!equal(refclause, clause))
 414                                 {
 415                                         win = false;
 416                                         break;
 417                                 }
 418                         }
 419                 }
 420
 421                 if (win)
 422                         winners = lappend(winners, refclause);
 423         }
 424
 425         /*
 426          * If no winners, we can't transform the OR
 427          */
 428         if (winners == NIL)
 429                 return make_orclause(orlist);
 430
 431         /*
 432          * Generate new OR list consisting of the remaining sub-clauses.
 433          *
 434          * If any clause degenerates to empty, then we have a situation like (A AND
 435          * B) OR (A), which can be reduced to just A --- that is, the additional
 436          * conditions in other arms of the OR are irrelevant.
 437          *
 438          * Note that because we use list_difference, any multiple occurrences of a
 439          * winning clause in an AND sub-clause will be removed automatically.
 440          */
 441         neworlist = NIL;
 442         foreach(temp, orlist)
 443         {
 444                 Expr       *clause = (Expr *) lfirst(temp);
 445
 446                 if (and_clause((Node *) clause))
 447                 {
 448                         List       *subclauses = ((BoolExpr *) clause)->args;
 449
 450                         subclauses = list_difference(subclauses, winners);
 451                         if (subclauses != NIL)
 452                         {
 453                                 if (list_length(subclauses) == 1)
 454                                         neworlist = lappend(neworlist, linitial(subclauses));
 455                                 else
 456                                         neworlist = lappend(neworlist, make_andclause(subclauses));
 457                         }
 458                         else
 459                         {
 460                                 neworlist = NIL;        /* degenerate case, see above */
 461                                 break;
 462                         }
 463                 }
 464                 else
 465                 {
 466                         if (!list_member(winners, clause))
 467                                 neworlist = lappend(neworlist, clause);
 468                         else
 469                         {
 470                                 neworlist = NIL;        /* degenerate case, see above */
 471                                 break;
 472                         }
 473                 }
 474         }
 475
 476         /*
 477          * Append reduced OR to the winners list, if it's not degenerate, handling
 478          * the special case of one element correctly (can that really happen?).
 479          * Also be careful to maintain AND/OR flatness in case we pulled up a
 480          * sub-sub-OR-clause.
 481          */
 482         if (neworlist != NIL)
 483         {
 484                 if (list_length(neworlist) == 1)
 485                         winners = lappend(winners, linitial(neworlist));
 486                 else
 487                         winners = lappend(winners, make_orclause(pull_ors(neworlist)));
 488         }
 489
 490         /*
 491          * And return the constructed AND clause, again being wary of a single
 492          * element and AND/OR flatness.
 493          */
 494         if (list_length(winners) == 1)
 495                 return (Expr *) linitial(winners);
 496         else
 497                 return make_andclause(pull_ands(winners));
 498 }