granicus.if.org Git - postgresql/blob - src/backend/optimizer/prep/prepqual.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * prepqual.c
   4  *        Routines for preprocessing qualification expressions
   5  *
   6  *
   7  * The parser regards AND and OR as purely binary operators, so a qual like
   8  *              (A = 1) OR (A = 2) OR (A = 3) ...
   9  * will produce a nested parsetree
  10  *              (OR (A = 1) (OR (A = 2) (OR (A = 3) ...)))
  11  * In reality, the optimizer and executor regard AND and OR as N-argument
  12  * operators, so this tree can be flattened to
  13  *              (OR (A = 1) (A = 2) (A = 3) ...)
  14  *
  15  * Formerly, this module was responsible for doing the initial flattening,
  16  * but now we leave it to eval_const_expressions to do that since it has to
  17  * make a complete pass over the expression tree anyway.  Instead, we just
  18  * have to ensure that our manipulations preserve AND/OR flatness.
  19  * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
  20  * tree after local transformations that might introduce nested AND/ORs.
  21  *
  22  *
  23  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
  24  * Portions Copyright (c) 1994, Regents of the University of California
  25  *
  26  *
  27  * IDENTIFICATION
  28  *        src/backend/optimizer/prep/prepqual.c
  29  *
  30  *-------------------------------------------------------------------------
  31  */
  32
  33 #include "postgres.h"
  34
  35 #include "nodes/makefuncs.h"
  36 #include "optimizer/clauses.h"
  37 #include "optimizer/prep.h"
  38 #include "utils/lsyscache.h"
  39
  40
  41 static List *pull_ands(List *andlist);
  42 static List *pull_ors(List *orlist);
  43 static Expr *find_duplicate_ors(Expr *qual);
  44 static Expr *process_duplicate_ors(List *orlist);
  45
  46
  47 /*
  48  * negate_clause
  49  *        Negate a Boolean expression.
  50  *
  51  * Input is a clause to be negated (e.g., the argument of a NOT clause).
  52  * Returns a new clause equivalent to the negation of the given clause.
  53  *
  54  * Although this can be invoked on its own, it's mainly intended as a helper
  55  * for eval_const_expressions(), and that context drives several design
  56  * decisions.  In particular, if the input is already AND/OR flat, we must
  57  * preserve that property.  We also don't bother to recurse in situations
  58  * where we can assume that lower-level executions of eval_const_expressions
  59  * would already have simplified sub-clauses of the input.
  60  *
  61  * The difference between this and a simple make_notclause() is that this
  62  * tries to get rid of the NOT node by logical simplification.  It's clearly
  63  * always a win if the NOT node can be eliminated altogether.  However, our
  64  * use of DeMorgan's laws could result in having more NOT nodes rather than
  65  * fewer.  We do that unconditionally anyway, because in WHERE clauses it's
  66  * important to expose as much top-level AND/OR structure as possible.
  67  * Also, eliminating an intermediate NOT may allow us to flatten two levels
  68  * of AND or OR together that we couldn't have otherwise.  Finally, one of
  69  * the motivations for doing this is to ensure that logically equivalent
  70  * expressions will be seen as physically equal(), so we should always apply
  71  * the same transformations.
  72  */
  73 Node *
  74 negate_clause(Node *node)
  75 {
  76         if (node == NULL)                       /* should not happen */
  77                 elog(ERROR, "can't negate an empty subexpression");
  78         switch (nodeTag(node))
  79         {
  80                 case T_Const:
  81                         {
  82                                 Const      *c = (Const *) node;
  83
  84                                 /* NOT NULL is still NULL */
  85                                 if (c->constisnull)
  86                                         return makeBoolConst(false, true);
  87                                 /* otherwise pretty easy */
  88                                 return makeBoolConst(!DatumGetBool(c->constvalue), false);
  89                         }
  90                         break;
  91                 case T_OpExpr:
  92                         {
  93                                 /*
  94                                  * Negate operator if possible: (NOT (< A B)) => (>= A B)
  95                                  */
  96                                 OpExpr     *opexpr = (OpExpr *) node;
  97                                 Oid                     negator = get_negator(opexpr->opno);
  98
  99                                 if (negator)
 100                                 {
 101                                         OpExpr     *newopexpr = makeNode(OpExpr);
 102
 103                                         newopexpr->opno = negator;
 104                                         newopexpr->opfuncid = InvalidOid;
 105                                         newopexpr->opresulttype = opexpr->opresulttype;
 106                                         newopexpr->opretset = opexpr->opretset;
 107                                         newopexpr->args = opexpr->args;
 108                                         newopexpr->location = opexpr->location;
 109                                         return (Node *) newopexpr;
 110                                 }
 111                         }
 112                         break;
 113                 case T_ScalarArrayOpExpr:
 114                         {
 115                                 /*
 116                                  * Negate a ScalarArrayOpExpr if its operator has a negator;
 117                                  * for example x = ANY (list) becomes x <> ALL (list)
 118                                  */
 119                                 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
 120                                 Oid                     negator = get_negator(saopexpr->opno);
 121
 122                                 if (negator)
 123                                 {
 124                                         ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
 125
 126                                         newopexpr->opno = negator;
 127                                         newopexpr->opfuncid = InvalidOid;
 128                                         newopexpr->useOr = !saopexpr->useOr;
 129                                         newopexpr->args = saopexpr->args;
 130                                         newopexpr->location = saopexpr->location;
 131                                         return (Node *) newopexpr;
 132                                 }
 133                         }
 134                         break;
 135                 case T_BoolExpr:
 136                         {
 137                                 BoolExpr   *expr = (BoolExpr *) node;
 138
 139                                 switch (expr->boolop)
 140                                 {
 141                                         /*--------------------
 142                                          * Apply DeMorgan's Laws:
 143                                          *              (NOT (AND A B)) => (OR (NOT A) (NOT B))
 144                                          *              (NOT (OR A B))  => (AND (NOT A) (NOT B))
 145                                          * i.e., swap AND for OR and negate each subclause.
 146                                          *
 147                                          * If the input is already AND/OR flat and has no NOT
 148                                          * directly above AND or OR, this transformation preserves
 149                                          * those properties.  For example, if no direct child of
 150                                          * the given AND clause is an AND or a NOT-above-OR, then
 151                                          * the recursive calls of negate_clause() can't return any
 152                                          * OR clauses.  So we needn't call pull_ors() before
 153                                          * building a new OR clause.  Similarly for the OR case.
 154                                          *--------------------
 155                                          */
 156                                         case AND_EXPR:
 157                                                 {
 158                                                         List       *nargs = NIL;
 159                                                         ListCell   *lc;
 160
 161                                                         foreach(lc, expr->args)
 162                                                         {
 163                                                                 nargs = lappend(nargs,
 164                                                                                                 negate_clause(lfirst(lc)));
 165                                                         }
 166                                                         return (Node *) make_orclause(nargs);
 167                                                 }
 168                                                 break;
 169                                         case OR_EXPR:
 170                                                 {
 171                                                         List       *nargs = NIL;
 172                                                         ListCell   *lc;
 173
 174                                                         foreach(lc, expr->args)
 175                                                         {
 176                                                                 nargs = lappend(nargs,
 177                                                                                                 negate_clause(lfirst(lc)));
 178                                                         }
 179                                                         return (Node *) make_andclause(nargs);
 180                                                 }
 181                                                 break;
 182                                         case NOT_EXPR:
 183                                                 /*
 184                                                  * NOT underneath NOT: they cancel.  We assume the
 185                                                  * input is already simplified, so no need to recurse.
 186                                                  */
 187                                                 return (Node *) linitial(expr->args);
 188                                         default:
 189                                                 elog(ERROR, "unrecognized boolop: %d",
 190                                                          (int) expr->boolop);
 191                                                 break;
 192                                 }
 193                         }
 194                         break;
 195                 case T_NullTest:
 196                         {
 197                                 NullTest   *expr = (NullTest *) node;
 198
 199                                 /*
 200                                  * In the rowtype case, the two flavors of NullTest are *not*
 201                                  * logical inverses, so we can't simplify.  But it does work
 202                                  * for scalar datatypes.
 203                                  */
 204                                 if (!expr->argisrow)
 205                                 {
 206                                         NullTest   *newexpr = makeNode(NullTest);
 207
 208                                         newexpr->arg = expr->arg;
 209                                         newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
 210                                                                                          IS_NOT_NULL : IS_NULL);
 211                                         newexpr->argisrow = expr->argisrow;
 212                                         return (Node *) newexpr;
 213                                 }
 214                         }
 215                         break;
 216                 case T_BooleanTest:
 217                         {
 218                                 BooleanTest   *expr = (BooleanTest *) node;
 219                                 BooleanTest   *newexpr = makeNode(BooleanTest);
 220
 221                                 newexpr->arg = expr->arg;
 222                                 switch (expr->booltesttype)
 223                                 {
 224                                         case IS_TRUE:
 225                                                 newexpr->booltesttype = IS_NOT_TRUE;
 226                                                 break;
 227                                         case IS_NOT_TRUE:
 228                                                 newexpr->booltesttype = IS_TRUE;
 229                                                 break;
 230                                         case IS_FALSE:
 231                                                 newexpr->booltesttype = IS_NOT_FALSE;
 232                                                 break;
 233                                         case IS_NOT_FALSE:
 234                                                 newexpr->booltesttype = IS_FALSE;
 235                                                 break;
 236                                         case IS_UNKNOWN:
 237                                                 newexpr->booltesttype = IS_NOT_UNKNOWN;
 238                                                 break;
 239                                         case IS_NOT_UNKNOWN:
 240                                                 newexpr->booltesttype = IS_UNKNOWN;
 241                                                 break;
 242                                         default:
 243                                                 elog(ERROR, "unrecognized booltesttype: %d",
 244                                                          (int) expr->booltesttype);
 245                                                 break;
 246                                 }
 247                                 return (Node *) newexpr;
 248                         }
 249                         break;
 250                 default:
 251                         /* else fall through */
 252                         break;
 253         }
 254
 255         /*
 256          * Otherwise we don't know how to simplify this, so just tack on an
 257          * explicit NOT node.
 258          */
 259         return (Node *) make_notclause((Expr *) node);
 260 }
 261
 262
 263 /*
 264  * canonicalize_qual
 265  *        Convert a qualification expression to the most useful form.
 266  *
 267  * The name of this routine is a holdover from a time when it would try to
 268  * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
 269  * Eventually, we recognized that that had more theoretical purity than
 270  * actual usefulness, and so now the transformation doesn't involve any
 271  * notion of reaching a canonical form.
 272  *
 273  * NOTE: we assume the input has already been through eval_const_expressions
 274  * and therefore possesses AND/OR flatness.  Formerly this function included
 275  * its own flattening logic, but that requires a useless extra pass over the
 276  * tree.
 277  *
 278  * Returns the modified qualification.
 279  */
 280 Expr *
 281 canonicalize_qual(Expr *qual)
 282 {
 283         Expr       *newqual;
 284
 285         /* Quick exit for empty qual */
 286         if (qual == NULL)
 287                 return NULL;
 288
 289         /*
 290          * Pull up redundant subclauses in OR-of-AND trees.  We do this only
 291          * within the top-level AND/OR structure; there's no point in looking
 292          * deeper.
 293          */
 294         newqual = find_duplicate_ors(qual);
 295
 296         return newqual;
 297 }
 298
 299
 300 /*
 301  * pull_ands
 302  *        Recursively flatten nested AND clauses into a single and-clause list.
 303  *
 304  * Input is the arglist of an AND clause.
 305  * Returns the rebuilt arglist (note original list structure is not touched).
 306  */
 307 static List *
 308 pull_ands(List *andlist)
 309 {
 310         List       *out_list = NIL;
 311         ListCell   *arg;
 312
 313         foreach(arg, andlist)
 314         {
 315                 Node       *subexpr = (Node *) lfirst(arg);
 316
 317                 /*
 318                  * Note: we can destructively concat the subexpression's arglist
 319                  * because we know the recursive invocation of pull_ands will have
 320                  * built a new arglist not shared with any other expr. Otherwise we'd
 321                  * need a list_copy here.
 322                  */
 323                 if (and_clause(subexpr))
 324                         out_list = list_concat(out_list,
 325                                                                    pull_ands(((BoolExpr *) subexpr)->args));
 326                 else
 327                         out_list = lappend(out_list, subexpr);
 328         }
 329         return out_list;
 330 }
 331
 332 /*
 333  * pull_ors
 334  *        Recursively flatten nested OR clauses into a single or-clause list.
 335  *
 336  * Input is the arglist of an OR clause.
 337  * Returns the rebuilt arglist (note original list structure is not touched).
 338  */
 339 static List *
 340 pull_ors(List *orlist)
 341 {
 342         List       *out_list = NIL;
 343         ListCell   *arg;
 344
 345         foreach(arg, orlist)
 346         {
 347                 Node       *subexpr = (Node *) lfirst(arg);
 348
 349                 /*
 350                  * Note: we can destructively concat the subexpression's arglist
 351                  * because we know the recursive invocation of pull_ors will have
 352                  * built a new arglist not shared with any other expr. Otherwise we'd
 353                  * need a list_copy here.
 354                  */
 355                 if (or_clause(subexpr))
 356                         out_list = list_concat(out_list,
 357                                                                    pull_ors(((BoolExpr *) subexpr)->args));
 358                 else
 359                         out_list = lappend(out_list, subexpr);
 360         }
 361         return out_list;
 362 }
 363
 364
 365 /*--------------------
 366  * The following code attempts to apply the inverse OR distributive law:
 367  *              ((A AND B) OR (A AND C))  =>  (A AND (B OR C))
 368  * That is, locate OR clauses in which every subclause contains an
 369  * identical term, and pull out the duplicated terms.
 370  *
 371  * This may seem like a fairly useless activity, but it turns out to be
 372  * applicable to many machine-generated queries, and there are also queries
 373  * in some of the TPC benchmarks that need it.  This was in fact almost the
 374  * sole useful side-effect of the old prepqual code that tried to force
 375  * the query into canonical AND-of-ORs form: the canonical equivalent of
 376  *              ((A AND B) OR (A AND C))
 377  * is
 378  *              ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
 379  * which the code was able to simplify to
 380  *              (A AND (A OR C) AND (B OR A) AND (B OR C))
 381  * thus successfully extracting the common condition A --- but at the cost
 382  * of cluttering the qual with many redundant clauses.
 383  *--------------------
 384  */
 385
 386 /*
 387  * find_duplicate_ors
 388  *        Given a qualification tree with the NOTs pushed down, search for
 389  *        OR clauses to which the inverse OR distributive law might apply.
 390  *        Only the top-level AND/OR structure is searched.
 391  *
 392  * Returns the modified qualification.  AND/OR flatness is preserved.
 393  */
 394 static Expr *
 395 find_duplicate_ors(Expr *qual)
 396 {
 397         if (or_clause((Node *) qual))
 398         {
 399                 List       *orlist = NIL;
 400                 ListCell   *temp;
 401
 402                 /* Recurse */
 403                 foreach(temp, ((BoolExpr *) qual)->args)
 404                         orlist = lappend(orlist, find_duplicate_ors(lfirst(temp)));
 405
 406                 /*
 407                  * Don't need pull_ors() since this routine will never introduce an OR
 408                  * where there wasn't one before.
 409                  */
 410                 return process_duplicate_ors(orlist);
 411         }
 412         else if (and_clause((Node *) qual))
 413         {
 414                 List       *andlist = NIL;
 415                 ListCell   *temp;
 416
 417                 /* Recurse */
 418                 foreach(temp, ((BoolExpr *) qual)->args)
 419                         andlist = lappend(andlist, find_duplicate_ors(lfirst(temp)));
 420                 /* Flatten any ANDs introduced just below here */
 421                 andlist = pull_ands(andlist);
 422                 /* The AND list can't get shorter, so result is always an AND */
 423                 return make_andclause(andlist);
 424         }
 425         else
 426                 return qual;
 427 }
 428
 429 /*
 430  * process_duplicate_ors
 431  *        Given a list of exprs which are ORed together, try to apply
 432  *        the inverse OR distributive law.
 433  *
 434  * Returns the resulting expression (could be an AND clause, an OR
 435  * clause, or maybe even a single subexpression).
 436  */
 437 static Expr *
 438 process_duplicate_ors(List *orlist)
 439 {
 440         List       *reference = NIL;
 441         int                     num_subclauses = 0;
 442         List       *winners;
 443         List       *neworlist;
 444         ListCell   *temp;
 445
 446         if (orlist == NIL)
 447                 return NULL;                    /* probably can't happen */
 448         if (list_length(orlist) == 1)           /* single-expression OR (can this
 449                                                                                  * happen?) */
 450                 return linitial(orlist);
 451
 452         /*
 453          * Choose the shortest AND clause as the reference list --- obviously, any
 454          * subclause not in this clause isn't in all the clauses. If we find a
 455          * clause that's not an AND, we can treat it as a one-element AND clause,
 456          * which necessarily wins as shortest.
 457          */
 458         foreach(temp, orlist)
 459         {
 460                 Expr       *clause = (Expr *) lfirst(temp);
 461
 462                 if (and_clause((Node *) clause))
 463                 {
 464                         List       *subclauses = ((BoolExpr *) clause)->args;
 465                         int                     nclauses = list_length(subclauses);
 466
 467                         if (reference == NIL || nclauses < num_subclauses)
 468                         {
 469                                 reference = subclauses;
 470                                 num_subclauses = nclauses;
 471                         }
 472                 }
 473                 else
 474                 {
 475                         reference = list_make1(clause);
 476                         break;
 477                 }
 478         }
 479
 480         /*
 481          * Just in case, eliminate any duplicates in the reference list.
 482          */
 483         reference = list_union(NIL, reference);
 484
 485         /*
 486          * Check each element of the reference list to see if it's in all the OR
 487          * clauses.  Build a new list of winning clauses.
 488          */
 489         winners = NIL;
 490         foreach(temp, reference)
 491         {
 492                 Expr       *refclause = (Expr *) lfirst(temp);
 493                 bool            win = true;
 494                 ListCell   *temp2;
 495
 496                 foreach(temp2, orlist)
 497                 {
 498                         Expr       *clause = (Expr *) lfirst(temp2);
 499
 500                         if (and_clause((Node *) clause))
 501                         {
 502                                 if (!list_member(((BoolExpr *) clause)->args, refclause))
 503                                 {
 504                                         win = false;
 505                                         break;
 506                                 }
 507                         }
 508                         else
 509                         {
 510                                 if (!equal(refclause, clause))
 511                                 {
 512                                         win = false;
 513                                         break;
 514                                 }
 515                         }
 516                 }
 517
 518                 if (win)
 519                         winners = lappend(winners, refclause);
 520         }
 521
 522         /*
 523          * If no winners, we can't transform the OR
 524          */
 525         if (winners == NIL)
 526                 return make_orclause(orlist);
 527
 528         /*
 529          * Generate new OR list consisting of the remaining sub-clauses.
 530          *
 531          * If any clause degenerates to empty, then we have a situation like (A
 532          * AND B) OR (A), which can be reduced to just A --- that is, the
 533          * additional conditions in other arms of the OR are irrelevant.
 534          *
 535          * Note that because we use list_difference, any multiple occurrences of a
 536          * winning clause in an AND sub-clause will be removed automatically.
 537          */
 538         neworlist = NIL;
 539         foreach(temp, orlist)
 540         {
 541                 Expr       *clause = (Expr *) lfirst(temp);
 542
 543                 if (and_clause((Node *) clause))
 544                 {
 545                         List       *subclauses = ((BoolExpr *) clause)->args;
 546
 547                         subclauses = list_difference(subclauses, winners);
 548                         if (subclauses != NIL)
 549                         {
 550                                 if (list_length(subclauses) == 1)
 551                                         neworlist = lappend(neworlist, linitial(subclauses));
 552                                 else
 553                                         neworlist = lappend(neworlist, make_andclause(subclauses));
 554                         }
 555                         else
 556                         {
 557                                 neworlist = NIL;        /* degenerate case, see above */
 558                                 break;
 559                         }
 560                 }
 561                 else
 562                 {
 563                         if (!list_member(winners, clause))
 564                                 neworlist = lappend(neworlist, clause);
 565                         else
 566                         {
 567                                 neworlist = NIL;        /* degenerate case, see above */
 568                                 break;
 569                         }
 570                 }
 571         }
 572
 573         /*
 574          * Append reduced OR to the winners list, if it's not degenerate, handling
 575          * the special case of one element correctly (can that really happen?).
 576          * Also be careful to maintain AND/OR flatness in case we pulled up a
 577          * sub-sub-OR-clause.
 578          */
 579         if (neworlist != NIL)
 580         {
 581                 if (list_length(neworlist) == 1)
 582                         winners = lappend(winners, linitial(neworlist));
 583                 else
 584                         winners = lappend(winners, make_orclause(pull_ors(neworlist)));
 585         }
 586
 587         /*
 588          * And return the constructed AND clause, again being wary of a single
 589          * element and AND/OR flatness.
 590          */
 591         if (list_length(winners) == 1)
 592                 return (Expr *) linitial(winners);
 593         else
 594                 return make_andclause(pull_ands(winners));
 595 }