granicus.if.org Git - postgresql/blob - src/backend/optimizer/util/orclauses.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * orclauses.c
   4  *        Routines to extract restriction OR clauses from join OR clauses
   5  *
   6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/optimizer/util/orclauses.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include "optimizer/clauses.h"
  19 #include "optimizer/cost.h"
  20 #include "optimizer/orclauses.h"
  21 #include "optimizer/restrictinfo.h"
  22
  23
  24 static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
  25 static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
  26 static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
  27                                            Expr *orclause, RestrictInfo *join_or_rinfo);
  28
  29
  30 /*
  31  * extract_restriction_or_clauses
  32  *        Examine join OR-of-AND clauses to see if any useful restriction OR
  33  *        clauses can be extracted.  If so, add them to the query.
  34  *
  35  * Although a join clause must reference multiple relations overall,
  36  * an OR of ANDs clause might contain sub-clauses that reference just one
  37  * relation and can be used to build a restriction clause for that rel.
  38  * For example consider
  39  *              WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
  40  * We can transform this into
  41  *              WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
  42  *                      AND (a.x = 42 OR a.x = 44)
  43  *                      AND (b.y = 43 OR b.z = 45);
  44  * which allows the latter clauses to be applied during the scans of a and b,
  45  * perhaps as index qualifications, and in any case reducing the number of
  46  * rows arriving at the join.  In essence this is a partial transformation to
  47  * CNF (AND of ORs format).  It is not complete, however, because we do not
  48  * unravel the original OR --- doing so would usually bloat the qualification
  49  * expression to little gain.
  50  *
  51  * The added quals are partially redundant with the original OR, and therefore
  52  * would cause the size of the joinrel to be underestimated when it is finally
  53  * formed.  (This would be true of a full transformation to CNF as well; the
  54  * fault is not really in the transformation, but in clauselist_selectivity's
  55  * inability to recognize redundant conditions.)  We can compensate for this
  56  * redundancy by changing the cached selectivity of the original OR clause,
  57  * canceling out the (valid) reduction in the estimated sizes of the base
  58  * relations so that the estimated joinrel size remains the same.  This is
  59  * a MAJOR HACK: it depends on the fact that clause selectivities are cached
  60  * and on the fact that the same RestrictInfo node will appear in every
  61  * joininfo list that might be used when the joinrel is formed.
  62  * And it doesn't work in cases where the size estimation is nonlinear
  63  * (i.e., outer and IN joins).  But it beats not doing anything.
  64  *
  65  * We examine each base relation to see if join clauses associated with it
  66  * contain extractable restriction conditions.  If so, add those conditions
  67  * to the rel's baserestrictinfo and update the cached selectivities of the
  68  * join clauses.  Note that the same join clause will be examined afresh
  69  * from the point of view of each baserel that participates in it, so its
  70  * cached selectivity may get updated multiple times.
  71  */
  72 void
  73 extract_restriction_or_clauses(PlannerInfo *root)
  74 {
  75         Index           rti;
  76
  77         /* Examine each baserel for potential join OR clauses */
  78         for (rti = 1; rti < root->simple_rel_array_size; rti++)
  79         {
  80                 RelOptInfo *rel = root->simple_rel_array[rti];
  81                 ListCell   *lc;
  82
  83                 /* there may be empty slots corresponding to non-baserel RTEs */
  84                 if (rel == NULL)
  85                         continue;
  86
  87                 Assert(rel->relid == rti);      /* sanity check on array */
  88
  89                 /* ignore RTEs that are "other rels" */
  90                 if (rel->reloptkind != RELOPT_BASEREL)
  91                         continue;
  92
  93                 /*
  94                  * Find potentially interesting OR joinclauses.  We can use any
  95                  * joinclause that is considered safe to move to this rel by the
  96                  * parameterized-path machinery, even though what we are going to do
  97                  * with it is not exactly a parameterized path.
  98                  *
  99                  * However, it seems best to ignore clauses that have been marked
 100                  * redundant (by setting norm_selec > 1).  That likely can't happen
 101                  * for OR clauses, but let's be safe.
 102                  */
 103                 foreach(lc, rel->joininfo)
 104                 {
 105                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
 106
 107                         if (restriction_is_or_clause(rinfo) &&
 108                                 join_clause_is_movable_to(rinfo, rel) &&
 109                                 rinfo->norm_selec <= 1)
 110                         {
 111                                 /* Try to extract a qual for this rel only */
 112                                 Expr       *orclause = extract_or_clause(rinfo, rel);
 113
 114                                 /*
 115                                  * If successful, decide whether we want to use the clause,
 116                                  * and insert it into the rel's restrictinfo list if so.
 117                                  */
 118                                 if (orclause)
 119                                         consider_new_or_clause(root, rel, orclause, rinfo);
 120                         }
 121                 }
 122         }
 123 }
 124
 125 /*
 126  * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
 127  */
 128 static bool
 129 is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
 130 {
 131         /*
 132          * We want clauses that mention the rel, and only the rel.  So in
 133          * particular pseudoconstant clauses can be rejected quickly.  Then check
 134          * the clause's Var membership.
 135          */
 136         if (rinfo->pseudoconstant)
 137                 return false;
 138         if (!bms_equal(rinfo->clause_relids, rel->relids))
 139                 return false;
 140
 141         /* We don't want extra evaluations of any volatile functions */
 142         if (contain_volatile_functions((Node *) rinfo->clause))
 143                 return false;
 144
 145         return true;
 146 }
 147
 148 /*
 149  * Try to extract a restriction clause mentioning only "rel" from the given
 150  * join OR-clause.
 151  *
 152  * We must be able to extract at least one qual for this rel from each of
 153  * the arms of the OR, else we can't use it.
 154  *
 155  * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
 156  * if no OR clause could be extracted.
 157  */
 158 static Expr *
 159 extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
 160 {
 161         List       *clauselist = NIL;
 162         ListCell   *lc;
 163
 164         /*
 165          * Scan each arm of the input OR clause.  Notice we descend into
 166          * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
 167          * toplevel OR/AND structure.  This is useful because we can use the info
 168          * in those nodes to make is_safe_restriction_clause_for()'s checks
 169          * cheaper.  We'll strip those nodes from the returned tree, though,
 170          * meaning that fresh ones will be built if the clause is accepted as a
 171          * restriction clause.  This might seem wasteful --- couldn't we re-use
 172          * the existing RestrictInfos?  But that'd require assuming that
 173          * selectivity and other cached data is computed exactly the same way for
 174          * a restriction clause as for a join clause, which seems undesirable.
 175          */
 176         Assert(or_clause((Node *) or_rinfo->orclause));
 177         foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
 178         {
 179                 Node       *orarg = (Node *) lfirst(lc);
 180                 List       *subclauses = NIL;
 181                 Node       *subclause;
 182
 183                 /* OR arguments should be ANDs or sub-RestrictInfos */
 184                 if (and_clause(orarg))
 185                 {
 186                         List       *andargs = ((BoolExpr *) orarg)->args;
 187                         ListCell   *lc2;
 188
 189                         foreach(lc2, andargs)
 190                         {
 191                                 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
 192
 193                                 if (restriction_is_or_clause(rinfo))
 194                                 {
 195                                         /*
 196                                          * Recurse to deal with nested OR.  Note we *must* recurse
 197                                          * here, this isn't just overly-tense optimization: we
 198                                          * have to descend far enough to find and strip all
 199                                          * RestrictInfos in the expression.
 200                                          */
 201                                         Expr       *suborclause;
 202
 203                                         suborclause = extract_or_clause(rinfo, rel);
 204                                         if (suborclause)
 205                                                 subclauses = lappend(subclauses, suborclause);
 206                                 }
 207                                 else if (is_safe_restriction_clause_for(rinfo, rel))
 208                                         subclauses = lappend(subclauses, rinfo->clause);
 209                         }
 210                 }
 211                 else
 212                 {
 213                         RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
 214
 215                         Assert(!restriction_is_or_clause(rinfo));
 216                         if (is_safe_restriction_clause_for(rinfo, rel))
 217                                 subclauses = lappend(subclauses, rinfo->clause);
 218                 }
 219
 220                 /*
 221                  * If nothing could be extracted from this arm, we can't do anything
 222                  * with this OR clause.
 223                  */
 224                 if (subclauses == NIL)
 225                         return NULL;
 226
 227                 /*
 228                  * OK, add subclause(s) to the result OR.  If we found more than one,
 229                  * we need an AND node.  But if we found only one, and it is itself an
 230                  * OR node, add its subclauses to the result instead; this is needed
 231                  * to preserve AND/OR flatness (ie, no OR directly underneath OR).
 232                  */
 233                 subclause = (Node *) make_ands_explicit(subclauses);
 234                 if (or_clause(subclause))
 235                         clauselist = list_concat(clauselist,
 236                                                                   list_copy(((BoolExpr *) subclause)->args));
 237                 else
 238                         clauselist = lappend(clauselist, subclause);
 239         }
 240
 241         /*
 242          * If we got a restriction clause from every arm, wrap them up in an OR
 243          * node.  (In theory the OR node might be unnecessary, if there was only
 244          * one arm --- but then the input OR node was also redundant.)
 245          */
 246         if (clauselist != NIL)
 247                 return make_orclause(clauselist);
 248         return NULL;
 249 }
 250
 251 /*
 252  * Consider whether a successfully-extracted restriction OR clause is
 253  * actually worth using.  If so, add it to the planner's data structures,
 254  * and adjust the original join clause (join_or_rinfo) to compensate.
 255  */
 256 static void
 257 consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
 258                                            Expr *orclause, RestrictInfo *join_or_rinfo)
 259 {
 260         RestrictInfo *or_rinfo;
 261         Selectivity or_selec,
 262                                 orig_selec;
 263
 264         /*
 265          * Build a RestrictInfo from the new OR clause.  We can assume it's valid
 266          * as a base restriction clause.
 267          */
 268         or_rinfo = make_restrictinfo(orclause,
 269                                                                  true,
 270                                                                  false,
 271                                                                  false,
 272                                                                  join_or_rinfo->security_level,
 273                                                                  NULL,
 274                                                                  NULL,
 275                                                                  NULL);
 276
 277         /*
 278          * Estimate its selectivity.  (We could have done this earlier, but doing
 279          * it on the RestrictInfo representation allows the result to get cached,
 280          * saving work later.)
 281          */
 282         or_selec = clause_selectivity(root, (Node *) or_rinfo,
 283                                                                   0, JOIN_INNER, NULL);
 284
 285         /*
 286          * The clause is only worth adding to the query if it rejects a useful
 287          * fraction of the base relation's rows; otherwise, it's just going to
 288          * cause duplicate computation (since we will still have to check the
 289          * original OR clause when the join is formed).  Somewhat arbitrarily, we
 290          * set the selectivity threshold at 0.9.
 291          */
 292         if (or_selec > 0.9)
 293                 return;                                 /* forget it */
 294
 295         /*
 296          * OK, add it to the rel's restriction-clause list.
 297          */
 298         rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
 299         rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
 300                                                                                  or_rinfo->security_level);
 301
 302         /*
 303          * Adjust the original join OR clause's cached selectivity to compensate
 304          * for the selectivity of the added (but redundant) lower-level qual. This
 305          * should result in the join rel getting approximately the same rows
 306          * estimate as it would have gotten without all these shenanigans.
 307          *
 308          * XXX major hack alert: this depends on the assumption that the
 309          * selectivity will stay cached.
 310          *
 311          * XXX another major hack: we adjust only norm_selec, the cached
 312          * selectivity for JOIN_INNER semantics, even though the join clause
 313          * might've been an outer-join clause.  This is partly because we can't
 314          * easily identify the relevant SpecialJoinInfo here, and partly because
 315          * the linearity assumption we're making would fail anyway.  (If it is an
 316          * outer-join clause, "rel" must be on the nullable side, else we'd not
 317          * have gotten here.  So the computation of the join size is going to be
 318          * quite nonlinear with respect to the size of "rel", so it's not clear
 319          * how we ought to adjust outer_selec even if we could compute its
 320          * original value correctly.)
 321          */
 322         if (or_selec > 0)
 323         {
 324                 SpecialJoinInfo sjinfo;
 325
 326                 /*
 327                  * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare
 328                  * approx_tuple_count() in costsize.c.)
 329                  */
 330                 sjinfo.type = T_SpecialJoinInfo;
 331                 sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
 332                                                                                          rel->relids);
 333                 sjinfo.min_righthand = rel->relids;
 334                 sjinfo.syn_lefthand = sjinfo.min_lefthand;
 335                 sjinfo.syn_righthand = sjinfo.min_righthand;
 336                 sjinfo.jointype = JOIN_INNER;
 337                 /* we don't bother trying to make the remaining fields valid */
 338                 sjinfo.lhs_strict = false;
 339                 sjinfo.delay_upper_joins = false;
 340                 sjinfo.semi_can_btree = false;
 341                 sjinfo.semi_can_hash = false;
 342                 sjinfo.semi_operators = NIL;
 343                 sjinfo.semi_rhs_exprs = NIL;
 344
 345                 /* Compute inner-join size */
 346                 orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
 347                                                                                 0, JOIN_INNER, &sjinfo);
 348
 349                 /* And hack cached selectivity so join size remains the same */
 350                 join_or_rinfo->norm_selec = orig_selec / or_selec;
 351                 /* ensure result stays in sane range, in particular not "redundant" */
 352                 if (join_or_rinfo->norm_selec > 1)
 353                         join_or_rinfo->norm_selec = 1;
 354                 /* as explained above, we don't touch outer_selec */
 355         }
 356 }