granicus.if.org Git - postgresql/blob - src/backend/optimizer/prep/prepqual.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * prepqual.c
   4  *        Routines for preprocessing qualification expressions
   5  *
   6  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL: pgsql/src/backend/optimizer/prep/prepqual.c,v 1.46 2004/08/29 05:06:44 momjian Exp $
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "nodes/makefuncs.h"
  19 #include "optimizer/clauses.h"
  20 #include "optimizer/prep.h"
  21 #include "utils/lsyscache.h"
  22
  23
  24 static Node *flatten_andors_mutator(Node *node, void *context);
  25 static List *pull_ands(List *andlist);
  26 static List *pull_ors(List *orlist);
  27 static Expr *find_nots(Expr *qual);
  28 static Expr *push_nots(Expr *qual);
  29 static Expr *find_duplicate_ors(Expr *qual);
  30 static Expr *process_duplicate_ors(List *orlist);
  31
  32
  33 /*
  34  * canonicalize_qual
  35  *        Convert a qualification expression to the most useful form.
  36  *
  37  * The name of this routine is a holdover from a time when it would try to
  38  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
  39  * Eventually, we recognized that that had more theoretical purity than
  40  * actual usefulness, and so now the transformation doesn't involve any
  41  * notion of reaching a canonical form.
  42  *
  43  * Returns the modified qualification.
  44  */
  45 Expr *
  46 canonicalize_qual(Expr *qual)
  47 {
  48         Expr       *newqual;
  49
  50         /* Quick exit for empty qual */
  51         if (qual == NULL)
  52                 return NULL;
  53
  54         /*
  55          * Flatten AND and OR groups throughout the expression tree.
  56          */
  57         newqual = (Expr *) flatten_andors((Node *) qual);
  58
  59         /*
  60          * Push down NOTs.      We do this only in the top-level boolean
  61          * expression, without examining arguments of operators/functions. The
  62          * main reason for doing this is to expose as much top-level AND/OR
  63          * structure as we can, so there's no point in descending further.
  64          */
  65         newqual = find_nots(newqual);
  66
  67         /*
  68          * Pull up redundant subclauses in OR-of-AND trees.  Again, we do this
  69          * only within the top-level AND/OR structure.
  70          */
  71         newqual = find_duplicate_ors(newqual);
  72
  73         return newqual;
  74 }
  75
  76
  77 /*--------------------
  78  * The parser regards AND and OR as purely binary operators, so a qual like
  79  *              (A = 1) OR (A = 2) OR (A = 3) ...
  80  * will produce a nested parsetree
  81  *              (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
  82  * In reality, the optimizer and executor regard AND and OR as n-argument
  83  * operators, so this tree can be flattened to
  84  *              (OR (A = 1) (A = 2) (A = 3) ...)
  85  * which is the responsibility of the routines below.
  86  *
  87  * flatten_andors() does the basic transformation with no initial assumptions.
  88  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
  89  * tree after local transformations that might introduce nested AND/ORs.
  90  *--------------------
  91  */
  92
  93 /*
  94  * flatten_andors
  95  *        Given an expression tree, simplify nested AND/OR clauses into flat
  96  *        AND/OR clauses with more arguments.  The entire tree is processed.
  97  *
  98  * Returns the rebuilt expr (note original structure is not touched).
  99  *
 100  * This is exported so that other modules can perform the part of
 101  * canonicalize_qual processing that applies to entire trees, rather
 102  * than just the top-level boolean expressions.
 103  */
 104 Node *
 105 flatten_andors(Node *node)
 106 {
 107         return flatten_andors_mutator(node, NULL);
 108 }
 109
 110 static Node *
 111 flatten_andors_mutator(Node *node, void *context)
 112 {
 113         if (node == NULL)
 114                 return NULL;
 115         if (IsA(node, BoolExpr))
 116         {
 117                 BoolExpr   *bexpr = (BoolExpr *) node;
 118
 119                 if (bexpr->boolop == AND_EXPR)
 120                 {
 121                         List       *out_list = NIL;
 122                         ListCell   *arg;
 123
 124                         foreach(arg, bexpr->args)
 125                         {
 126                                 Node       *subexpr = flatten_andors((Node *) lfirst(arg));
 127
 128                                 /*
 129                                  * Note: we can destructively concat the subexpression's
 130                                  * arglist because we know the recursive invocation of
 131                                  * flatten_andors will have built a new arglist not shared
 132                                  * with any other expr. Otherwise we'd need a list_copy
 133                                  * here.
 134                                  */
 135                                 if (and_clause(subexpr))
 136                                         out_list = list_concat(out_list,
 137                                                                                    ((BoolExpr *) subexpr)->args);
 138                                 else
 139                                         out_list = lappend(out_list, subexpr);
 140                         }
 141                         return (Node *) make_andclause(out_list);
 142                 }
 143                 if (bexpr->boolop == OR_EXPR)
 144                 {
 145                         List       *out_list = NIL;
 146                         ListCell   *arg;
 147
 148                         foreach(arg, bexpr->args)
 149                         {
 150                                 Node       *subexpr = flatten_andors((Node *) lfirst(arg));
 151
 152                                 /*
 153                                  * Note: we can destructively concat the subexpression's
 154                                  * arglist because we know the recursive invocation of
 155                                  * flatten_andors will have built a new arglist not shared
 156                                  * with any other expr. Otherwise we'd need a list_copy
 157                                  * here.
 158                                  */
 159                                 if (or_clause(subexpr))
 160                                         out_list = list_concat(out_list,
 161                                                                                    ((BoolExpr *) subexpr)->args);
 162                                 else
 163                                         out_list = lappend(out_list, subexpr);
 164                         }
 165                         return (Node *) make_orclause(out_list);
 166                 }
 167                 /* else it's a NOT clause, fall through */
 168         }
 169         return expression_tree_mutator(node, flatten_andors_mutator, context);
 170 }
 171
 172 /*
 173  * pull_ands
 174  *        Recursively flatten nested AND clauses into a single and-clause list.
 175  *
 176  * Input is the arglist of an AND clause.
 177  * Returns the rebuilt arglist (note original list structure is not touched).
 178  */
 179 static List *
 180 pull_ands(List *andlist)
 181 {
 182         List       *out_list = NIL;
 183         ListCell   *arg;
 184
 185         foreach(arg, andlist)
 186         {
 187                 Node       *subexpr = (Node *) lfirst(arg);
 188
 189                 /*
 190                  * Note: we can destructively concat the subexpression's arglist
 191                  * because we know the recursive invocation of pull_ands will have
 192                  * built a new arglist not shared with any other expr. Otherwise
 193                  * we'd need a list_copy here.
 194                  */
 195                 if (and_clause(subexpr))
 196                         out_list = list_concat(out_list,
 197                                                                 pull_ands(((BoolExpr *) subexpr)->args));
 198                 else
 199                         out_list = lappend(out_list, subexpr);
 200         }
 201         return out_list;
 202 }
 203
 204 /*
 205  * pull_ors
 206  *        Recursively flatten nested OR clauses into a single or-clause list.
 207  *
 208  * Input is the arglist of an OR clause.
 209  * Returns the rebuilt arglist (note original list structure is not touched).
 210  */
 211 static List *
 212 pull_ors(List *orlist)
 213 {
 214         List       *out_list = NIL;
 215         ListCell   *arg;
 216
 217         foreach(arg, orlist)
 218         {
 219                 Node       *subexpr = (Node *) lfirst(arg);
 220
 221                 /*
 222                  * Note: we can destructively concat the subexpression's arglist
 223                  * because we know the recursive invocation of pull_ors will have
 224                  * built a new arglist not shared with any other expr. Otherwise
 225                  * we'd need a list_copy here.
 226                  */
 227                 if (or_clause(subexpr))
 228                         out_list = list_concat(out_list,
 229                                                                  pull_ors(((BoolExpr *) subexpr)->args));
 230                 else
 231                         out_list = lappend(out_list, subexpr);
 232         }
 233         return out_list;
 234 }
 235
 236
 237 /*
 238  * find_nots
 239  *        Traverse the qualification, looking for NOTs to take care of.
 240  *        For NOT clauses, apply push_nots() to try to push down the NOT.
 241  *        For AND and OR clause types, simply recurse.  Otherwise stop
 242  *        recursing (we do not worry about structure below the top AND/OR tree).
 243  *
 244  * Returns the modified qualification.  AND/OR flatness is preserved.
 245  */
 246 static Expr *
 247 find_nots(Expr *qual)
 248 {
 249         if (qual == NULL)
 250                 return NULL;
 251
 252         if (and_clause((Node *) qual))
 253         {
 254                 List       *t_list = NIL;
 255                 ListCell   *temp;
 256
 257                 foreach(temp, ((BoolExpr *) qual)->args)
 258                         t_list = lappend(t_list, find_nots(lfirst(temp)));
 259                 return make_andclause(pull_ands(t_list));
 260         }
 261         else if (or_clause((Node *) qual))
 262         {
 263                 List       *t_list = NIL;
 264                 ListCell   *temp;
 265
 266                 foreach(temp, ((BoolExpr *) qual)->args)
 267                         t_list = lappend(t_list, find_nots(lfirst(temp)));
 268                 return make_orclause(pull_ors(t_list));
 269         }
 270         else if (not_clause((Node *) qual))
 271                 return push_nots(get_notclausearg(qual));
 272         else
 273                 return qual;
 274 }
 275
 276 /*
 277  * push_nots
 278  *        Push down a NOT as far as possible.
 279  *
 280  * Input is an expression to be negated (e.g., the argument of a NOT clause).
 281  * Returns a new qual equivalent to the negation of the given qual.
 282  */
 283 static Expr *
 284 push_nots(Expr *qual)
 285 {
 286         if (qual == NULL)
 287                 return make_notclause(qual);    /* XXX is this right?  Or
 288                                                                                  * possible? */
 289
 290         /*
 291          * Negate an operator clause if possible: (NOT (< A B)) => (> A B)
 292          * Otherwise, retain the clause as it is (the NOT can't be pushed down
 293          * any farther).
 294          */
 295         if (is_opclause(qual))
 296         {
 297                 OpExpr     *opexpr = (OpExpr *) qual;
 298                 Oid                     negator = get_negator(opexpr->opno);
 299
 300                 if (negator)
 301                         return make_opclause(negator,
 302                                                                  opexpr->opresulttype,
 303                                                                  opexpr->opretset,
 304                                                                  (Expr *) get_leftop(qual),
 305                                                                  (Expr *) get_rightop(qual));
 306                 else
 307                         return make_notclause(qual);
 308         }
 309         else if (and_clause((Node *) qual))
 310         {
 311                 /*--------------------
 312                  * Apply DeMorgan's Laws:
 313                  *              (NOT (AND A B)) => (OR (NOT A) (NOT B))
 314                  *              (NOT (OR A B))  => (AND (NOT A) (NOT B))
 315                  * i.e., swap AND for OR and negate all the subclauses.
 316                  *--------------------
 317                  */
 318                 List       *t_list = NIL;
 319                 ListCell   *temp;
 320
 321                 foreach(temp, ((BoolExpr *) qual)->args)
 322                         t_list = lappend(t_list, push_nots(lfirst(temp)));
 323                 return make_orclause(pull_ors(t_list));
 324         }
 325         else if (or_clause((Node *) qual))
 326         {
 327                 List       *t_list = NIL;
 328                 ListCell   *temp;
 329
 330                 foreach(temp, ((BoolExpr *) qual)->args)
 331                         t_list = lappend(t_list, push_nots(lfirst(temp)));
 332                 return make_andclause(pull_ands(t_list));
 333         }
 334         else if (not_clause((Node *) qual))
 335         {
 336                 /*
 337                  * Another NOT cancels this NOT, so eliminate the NOT and stop
 338                  * negating this branch.
 339                  */
 340                 return get_notclausearg(qual);
 341         }
 342         else
 343         {
 344                 /*
 345                  * We don't know how to negate anything else, place a NOT at this
 346                  * level.
 347                  */
 348                 return make_notclause(qual);
 349         }
 350 }
 351
 352
 353 /*--------------------
 354  * The following code attempts to apply the inverse OR distributive law:
 355  *              ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
 356  * That is, locate OR clauses in which every subclause contains an
 357  * identical term, and pull out the duplicated terms.
 358  *
 359  * This may seem like a fairly useless activity, but it turns out to be
 360  * applicable to many machine-generated queries, and there are also queries
 361  * in some of the TPC benchmarks that need it.  This was in fact almost the
 362  * sole useful side-effect of the old prepqual code that tried to force
 363  * the query into canonical AND-of-ORs form: the canonical equivalent of
 364  *              ((A AND B) OR (A AND C))
 365  * is
 366  *              ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
 367  * which the code was able to simplify to
 368  *              (A AND (A OR C) AND (B OR A) AND (B OR C))
 369  * thus successfully extracting the common condition A --- but at the cost
 370  * of cluttering the qual with many redundant clauses.
 371  *--------------------
 372  */
 373
 374 /*
 375  * find_duplicate_ors
 376  *        Given a qualification tree with the NOTs pushed down, search for
 377  *        OR clauses to which the inverse OR distributive law might apply.
 378  *        Only the top-level AND/OR structure is searched.
 379  *
 380  * Returns the modified qualification.  AND/OR flatness is preserved.
 381  */
 382 static Expr *
 383 find_duplicate_ors(Expr *qual)
 384 {
 385         if (qual == NULL)
 386                 return NULL;
 387
 388         if (or_clause((Node *) qual))
 389         {
 390                 List       *orlist = NIL;
 391                 ListCell   *temp;
 392
 393                 /* Recurse */
 394                 foreach(temp, ((BoolExpr *) qual)->args)
 395                         orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
 396
 397                 /*
 398                  * Don't need pull_ors() since this routine will never introduce
 399                  * an OR where there wasn't one before.
 400                  */
 401                 return process_duplicate_ors(orlist);
 402         }
 403         else if (and_clause((Node *) qual))
 404         {
 405                 List       *andlist = NIL;
 406                 ListCell   *temp;
 407
 408                 /* Recurse */
 409                 foreach(temp, ((BoolExpr *) qual)->args)
 410                         andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
 411                 /* Flatten any ANDs introduced just below here */
 412                 andlist = pull_ands(andlist);
 413                 /* The AND list can't get shorter, so result is always an AND */
 414                 return make_andclause(andlist);
 415         }
 416         else
 417                 return qual;
 418 }
 419
 420 /*
 421  * process_duplicate_ors
 422  *        Given a list of exprs which are ORed together, try to apply
 423  *        the inverse OR distributive law.
 424  *
 425  * Returns the resulting expression (could be an AND clause, an OR
 426  * clause, or maybe even a single subexpression).
 427  */
 428 static Expr *
 429 process_duplicate_ors(List *orlist)
 430 {
 431         List       *reference = NIL;
 432         int                     num_subclauses = 0;
 433         List       *winners;
 434         List       *neworlist;
 435         ListCell   *temp;
 436
 437         if (orlist == NIL)
 438                 return NULL;                    /* probably can't happen */
 439         if (list_length(orlist) == 1)           /* single-expression OR (can this
 440                                                                                  * happen?) */
 441                 return linitial(orlist);
 442
 443         /*
 444          * Choose the shortest AND clause as the reference list --- obviously,
 445          * any subclause not in this clause isn't in all the clauses. If we
 446          * find a clause that's not an AND, we can treat it as a one-element
 447          * AND clause, which necessarily wins as shortest.
 448          */
 449         foreach(temp, orlist)
 450         {
 451                 Expr       *clause = (Expr *) lfirst(temp);
 452
 453                 if (and_clause((Node *) clause))
 454                 {
 455                         List       *subclauses = ((BoolExpr *) clause)->args;
 456                         int                     nclauses = list_length(subclauses);
 457
 458                         if (reference == NIL || nclauses < num_subclauses)
 459                         {
 460                                 reference = subclauses;
 461                                 num_subclauses = nclauses;
 462                         }
 463                 }
 464                 else
 465                 {
 466                         reference = list_make1(clause);
 467                         break;
 468                 }
 469         }
 470
 471         /*
 472          * Just in case, eliminate any duplicates in the reference list.
 473          */
 474         reference = list_union(NIL, reference);
 475
 476         /*
 477          * Check each element of the reference list to see if it's in all the
 478          * OR clauses.  Build a new list of winning clauses.
 479          */
 480         winners = NIL;
 481         foreach(temp, reference)
 482         {
 483                 Expr       *refclause = (Expr *) lfirst(temp);
 484                 bool            win = true;
 485                 ListCell   *temp2;
 486
 487                 foreach(temp2, orlist)
 488                 {
 489                         Expr       *clause = (Expr *) lfirst(temp2);
 490
 491                         if (and_clause((Node *) clause))
 492                         {
 493                                 if (!list_member(((BoolExpr *) clause)->args, refclause))
 494                                 {
 495                                         win = false;
 496                                         break;
 497                                 }
 498                         }
 499                         else
 500                         {
 501                                 if (!equal(refclause, clause))
 502                                 {
 503                                         win = false;
 504                                         break;
 505                                 }
 506                         }
 507                 }
 508
 509                 if (win)
 510                         winners = lappend(winners, refclause);
 511         }
 512
 513         /*
 514          * If no winners, we can't transform the OR
 515          */
 516         if (winners == NIL)
 517                 return make_orclause(orlist);
 518
 519         /*
 520          * Generate new OR list consisting of the remaining sub-clauses.
 521          *
 522          * If any clause degenerates to empty, then we have a situation like (A
 523          * AND B) OR (A), which can be reduced to just A --- that is, the
 524          * additional conditions in other arms of the OR are irrelevant.
 525          *
 526          * Note that because we use list_difference, any multiple occurrences of
 527          * a winning clause in an AND sub-clause will be removed
 528          * automatically.
 529          */
 530         neworlist = NIL;
 531         foreach(temp, orlist)
 532         {
 533                 Expr       *clause = (Expr *) lfirst(temp);
 534
 535                 if (and_clause((Node *) clause))
 536                 {
 537                         List       *subclauses = ((BoolExpr *) clause)->args;
 538
 539                         subclauses = list_difference(subclauses, winners);
 540                         if (subclauses != NIL)
 541                         {
 542                                 if (list_length(subclauses) == 1)
 543                                         neworlist = lappend(neworlist, linitial(subclauses));
 544                                 else
 545                                         neworlist = lappend(neworlist, make_andclause(subclauses));
 546                         }
 547                         else
 548                         {
 549                                 neworlist = NIL;        /* degenerate case, see above */
 550                                 break;
 551                         }
 552                 }
 553                 else
 554                 {
 555                         if (!list_member(winners, clause))
 556                                 neworlist = lappend(neworlist, clause);
 557                         else
 558                         {
 559                                 neworlist = NIL;        /* degenerate case, see above */
 560                                 break;
 561                         }
 562                 }
 563         }
 564
 565         /*
 566          * Append reduced OR to the winners list, if it's not degenerate,
 567          * handling the special case of one element correctly (can that really
 568          * happen?). Also be careful to maintain AND/OR flatness in case we
 569          * pulled up a sub-sub-OR-clause.
 570          */
 571         if (neworlist != NIL)
 572         {
 573                 if (list_length(neworlist) == 1)
 574                         winners = lappend(winners, linitial(neworlist));
 575                 else
 576                         winners = lappend(winners, make_orclause(pull_ors(neworlist)));
 577         }
 578
 579         /*
 580          * And return the constructed AND clause, again being wary of a single
 581          * element and AND/OR flatness.
 582          */
 583         if (list_length(winners) == 1)
 584                 return (Expr *) linitial(winners);
 585         else
 586                 return make_andclause(pull_ands(winners));
 587 }