+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * orindxpath.c
- * Routines to find index paths that match a set of OR clauses
- *
- * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- * src/backend/optimizer/path/orindxpath.c
- *
- *-------------------------------------------------------------------------
- */
-
-#include "postgres.h"
-
-#include "optimizer/cost.h"
-#include "optimizer/paths.h"
-#include "optimizer/restrictinfo.h"
-
-
-/*----------
- * create_or_index_quals
- * Examine join OR-of-AND quals to see if any useful restriction OR
- * clauses can be extracted. If so, add them to the query.
- *
- * Although a join clause must reference other relations overall,
- * an OR of ANDs clause might contain sub-clauses that reference just this
- * relation and can be used to build a restriction clause.
- * For example consider
- * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
- * We can transform this into
- * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
- * AND (a.x = 42 OR a.x = 44)
- * AND (b.y = 43 OR b.z = 45);
- * which opens the potential to build OR indexscans on a and b. In essence
- * this is a partial transformation to CNF (AND of ORs format). It is not
- * complete, however, because we do not unravel the original OR --- doing so
- * would usually bloat the qualification expression to little gain.
- *
- * The added quals are partially redundant with the original OR, and therefore
- * will cause the size of the joinrel to be underestimated when it is finally
- * formed. (This would be true of a full transformation to CNF as well; the
- * fault is not really in the transformation, but in clauselist_selectivity's
- * inability to recognize redundant conditions.) To minimize the collateral
- * damage, we want to minimize the number of quals added. Therefore we do
- * not add every possible extracted restriction condition to the query.
- * Instead, we search for the single restriction condition that generates
- * the most useful (cheapest) OR indexscan, and add only that condition.
- * This is a pretty ad-hoc heuristic, but quite useful.
- *
- * We can then compensate for the redundancy of the added qual by poking
- * the recorded selectivity of the original OR clause, thereby ensuring
- * the added qual doesn't change the estimated size of the joinrel when
- * it is finally formed. This is a MAJOR HACK: it depends on the fact
- * that clause selectivities are cached and on the fact that the same
- * RestrictInfo node will appear in every joininfo list that might be used
- * when the joinrel is formed. And it probably isn't right in cases where
- * the size estimation is nonlinear (i.e., outer and IN joins). But it
- * beats not doing anything.
- *
- * NOTE: one might think this messiness could be worked around by generating
- * the indexscan path with a small path->rows value, and not touching the
- * rel's baserestrictinfo or rel->rows. However, that does not work.
- * The optimizer's fundamental design assumes that every general-purpose
- * Path for a given relation generates the same number of rows. Without
- * this assumption we'd not be able to optimize solely on the cost of Paths,
- * but would have to take number of output rows into account as well.
- * (The parameterized-paths stuff almost fixes this, but not quite...)
- *
- * 'rel' is the relation entry for which quals are to be created
- *
- * If successful, adds qual(s) to rel->baserestrictinfo and returns TRUE.
- * If no quals available, returns FALSE and doesn't change rel.
- *
- * Note: check_partial_indexes() must have been run previously.
- *----------
- */
-bool
-create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
-{
- BitmapOrPath *bestpath = NULL;
- RestrictInfo *bestrinfo = NULL;
- List *newrinfos;
- RestrictInfo *or_rinfo;
- Selectivity or_selec,
- orig_selec;
- ListCell *i;
-
- /* Skip the whole mess if no indexes */
- if (rel->indexlist == NIL)
- return false;
-
- /*
- * Find potentially interesting OR joinclauses. We can use any joinclause
- * that is considered safe to move to this rel by the parameterized-path
- * machinery, even though what we are going to do with it is not exactly a
- * parameterized path.
- */
- foreach(i, rel->joininfo)
- {
- RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
-
- if (restriction_is_or_clause(rinfo) &&
- join_clause_is_movable_to(rinfo, rel))
- {
- /*
- * Use the generate_bitmap_or_paths() machinery to estimate the
- * value of each OR clause. We can use regular restriction
- * clauses along with the OR clause contents to generate
- * indexquals. We pass restriction_only = true so that any
- * sub-clauses that are actually joins will be ignored.
- */
- List *orpaths;
- ListCell *k;
-
- orpaths = generate_bitmap_or_paths(root, rel,
- list_make1(rinfo),
- rel->baserestrictinfo,
- true);
-
- /* Locate the cheapest OR path */
- foreach(k, orpaths)
- {
- BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
-
- Assert(IsA(path, BitmapOrPath));
- if (bestpath == NULL ||
- path->path.total_cost < bestpath->path.total_cost)
- {
- bestpath = path;
- bestrinfo = rinfo;
- }
- }
- }
- }
-
- /* Fail if no suitable clauses found */
- if (bestpath == NULL)
- return false;
-
- /*
- * Convert the path's indexclauses structure to a RestrictInfo tree. We
- * include any partial-index predicates so as to get a reasonable
- * representation of what the path is actually scanning.
- */
- newrinfos = make_restrictinfo_from_bitmapqual((Path *) bestpath,
- true, true);
-
- /* It's possible we get back something other than a single OR clause */
- if (list_length(newrinfos) != 1)
- return false;
- or_rinfo = (RestrictInfo *) linitial(newrinfos);
- Assert(IsA(or_rinfo, RestrictInfo));
- if (!restriction_is_or_clause(or_rinfo))
- return false;
-
- /*
- * OK, add it to the rel's restriction list.
- */
- rel->baserestrictinfo = list_concat(rel->baserestrictinfo, newrinfos);
-
- /*
- * Adjust the original OR clause's cached selectivity to compensate for
- * the selectivity of the added (but redundant) lower-level qual. This
- * should result in the join rel getting approximately the same rows
- * estimate as it would have gotten without all these shenanigans. (XXX
- * major hack alert ... this depends on the assumption that the
- * selectivity will stay cached ...)
- */
- or_selec = clause_selectivity(root, (Node *) or_rinfo,
- 0, JOIN_INNER, NULL);
- if (or_selec > 0 && or_selec < 1)
- {
- orig_selec = clause_selectivity(root, (Node *) bestrinfo,
- 0, JOIN_INNER, NULL);
- bestrinfo->norm_selec = orig_selec / or_selec;
- /* clamp result to sane range */
- if (bestrinfo->norm_selec > 1)
- bestrinfo->norm_selec = 1;
- /* It isn't an outer join clause, so no need to adjust outer_selec */
- }
-
- /* Tell caller to recompute partial index status and rowcount estimate */
- return true;
-}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * orclauses.c
+ * Routines to extract restriction OR clauses from join OR clauses
+ *
+ * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/optimizer/util/orclauses.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/orclauses.h"
+#include "optimizer/restrictinfo.h"
+
+
+static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
+static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
+static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
+ Expr *orclause, RestrictInfo *join_or_rinfo);
+
+
+/*
+ * extract_restriction_or_clauses
+ * Examine join OR-of-AND clauses to see if any useful restriction OR
+ * clauses can be extracted. If so, add them to the query.
+ *
+ * Although a join clause must reference multiple relations overall,
+ * an OR of ANDs clause might contain sub-clauses that reference just one
+ * relation and can be used to build a restriction clause for that rel.
+ * For example consider
+ * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
+ * We can transform this into
+ * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
+ * AND (a.x = 42 OR a.x = 44)
+ * AND (b.y = 43 OR b.z = 45);
+ * which allows the latter clauses to be applied during the scans of a and b,
+ * perhaps as index qualifications, and in any case reducing the number of
+ * rows arriving at the join. In essence this is a partial transformation to
+ * CNF (AND of ORs format). It is not complete, however, because we do not
+ * unravel the original OR --- doing so would usually bloat the qualification
+ * expression to little gain.
+ *
+ * The added quals are partially redundant with the original OR, and therefore
+ * would cause the size of the joinrel to be underestimated when it is finally
+ * formed. (This would be true of a full transformation to CNF as well; the
+ * fault is not really in the transformation, but in clauselist_selectivity's
+ * inability to recognize redundant conditions.) We can compensate for this
+ * redundancy by changing the cached selectivity of the original OR clause,
+ * cancelling out the (valid) reduction in the estimated sizes of the base
+ * relations so that the estimated joinrel size remains the same. This is
+ * a MAJOR HACK: it depends on the fact that clause selectivities are cached
+ * and on the fact that the same RestrictInfo node will appear in every
+ * joininfo list that might be used when the joinrel is formed.
+ * And it doesn't work in cases where the size estimation is nonlinear
+ * (i.e., outer and IN joins). But it beats not doing anything.
+ *
+ * We examine each base relation to see if join clauses associated with it
+ * contain extractable restriction conditions. If so, add those conditions
+ * to the rel's baserestrictinfo and update the cached selectivities of the
+ * join clauses. Note that the same join clause will be examined afresh
+ * from the point of view of each baserel that participates in it, so its
+ * cached selectivity may get updated multiple times.
+ */
+void
+extract_restriction_or_clauses(PlannerInfo *root)
+{
+ Index rti;
+
+ /* Examine each baserel for potential join OR clauses */
+ for (rti = 1; rti < root->simple_rel_array_size; rti++)
+ {
+ RelOptInfo *rel = root->simple_rel_array[rti];
+ ListCell *lc;
+
+ /* there may be empty slots corresponding to non-baserel RTEs */
+ if (rel == NULL)
+ continue;
+
+ Assert(rel->relid == rti); /* sanity check on array */
+
+ /* ignore RTEs that are "other rels" */
+ if (rel->reloptkind != RELOPT_BASEREL)
+ continue;
+
+ /*
+ * Find potentially interesting OR joinclauses. We can use any
+ * joinclause that is considered safe to move to this rel by the
+ * parameterized-path machinery, even though what we are going to do
+ * with it is not exactly a parameterized path.
+ *
+ * However, it seems best to ignore clauses that have been marked
+ * redundant (by setting norm_selec > 1). That likely can't happen
+ * for OR clauses, but let's be safe.
+ */
+ foreach(lc, rel->joininfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (restriction_is_or_clause(rinfo) &&
+ join_clause_is_movable_to(rinfo, rel) &&
+ rinfo->norm_selec <= 1)
+ {
+ /* Try to extract a qual for this rel only */
+ Expr *orclause = extract_or_clause(rinfo, rel);
+
+ /*
+ * If successful, decide whether we want to use the clause,
+ * and insert it into the rel's restrictinfo list if so.
+ */
+ if (orclause)
+ consider_new_or_clause(root, rel, orclause, rinfo);
+ }
+ }
+ }
+}
+
+/*
+ * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
+ */
+static bool
+is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ /*
+ * We want clauses that mention the rel, and only the rel. So in
+ * particular pseudoconstant clauses can be rejected quickly. Then check
+ * the clause's Var membership.
+ */
+ if (rinfo->pseudoconstant)
+ return false;
+ if (!bms_equal(rinfo->clause_relids, rel->relids))
+ return false;
+
+ /* We don't want extra evaluations of any volatile functions */
+ if (contain_volatile_functions((Node *) rinfo->clause))
+ return false;
+
+ return true;
+}
+
+/*
+ * Try to extract a restriction clause mentioning only "rel" from the given
+ * join OR-clause.
+ *
+ * We must be able to extract at least one qual for this rel from each of
+ * the arms of the OR, else we can't use it.
+ *
+ * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
+ * if no OR clause could be extracted.
+ */
+static Expr *
+extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
+{
+ List *clauselist = NIL;
+ ListCell *lc;
+
+ /*
+ * Scan each arm of the input OR clause. Notice we descend into
+ * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
+ * toplevel OR/AND structure. This is useful because we can use the info
+ * in those nodes to make is_safe_restriction_clause_for()'s checks
+ * cheaper. We'll strip those nodes from the returned tree, though,
+ * meaning that fresh ones will be built if the clause is accepted as a
+ * restriction clause. This might seem wasteful --- couldn't we re-use
+ * the existing RestrictInfos? But that'd require assuming that
+ * selectivity and other cached data is computed exactly the same way for
+ * a restriction clause as for a join clause, which seems undesirable.
+ */
+ Assert(or_clause((Node *) or_rinfo->orclause));
+ foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
+ {
+ Node *orarg = (Node *) lfirst(lc);
+ List *subclauses = NIL;
+
+ /* OR arguments should be ANDs or sub-RestrictInfos */
+ if (and_clause(orarg))
+ {
+ List *andargs = ((BoolExpr *) orarg)->args;
+ ListCell *lc2;
+
+ foreach(lc2, andargs)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ Assert(IsA(rinfo, RestrictInfo));
+ if (restriction_is_or_clause(rinfo))
+ {
+ /*
+ * Recurse to deal with nested OR. Note we *must* recurse
+ * here, this isn't just overly-tense optimization: we
+ * have to descend far enough to find and strip all
+ * RestrictInfos in the expression.
+ */
+ Expr *suborclause;
+
+ suborclause = extract_or_clause(rinfo, rel);
+ if (suborclause)
+ subclauses = lappend(subclauses, suborclause);
+ }
+ else if (is_safe_restriction_clause_for(rinfo, rel))
+ subclauses = lappend(subclauses, rinfo->clause);
+ }
+ }
+ else
+ {
+ Assert(IsA(orarg, RestrictInfo));
+ Assert(!restriction_is_or_clause((RestrictInfo *) orarg));
+ if (is_safe_restriction_clause_for((RestrictInfo *) orarg, rel))
+ subclauses = lappend(subclauses,
+ ((RestrictInfo *) orarg)->clause);
+ }
+
+ /*
+ * If nothing could be extracted from this arm, we can't do anything
+ * with this OR clause.
+ */
+ if (subclauses == NIL)
+ return NULL;
+
+ /*
+ * OK, add subclause(s) to the result OR. If we found more than one,
+ * we need an AND node.
+ */
+ clauselist = lappend(clauselist, make_ands_explicit(subclauses));
+ }
+
+ /*
+ * If we got a restriction clause from every arm, wrap them up in an OR
+ * node. (In theory the OR node might be unnecessary, if there was only
+ * one arm --- but then the input OR node was also redundant.)
+ */
+ if (clauselist != NIL)
+ return make_orclause(clauselist);
+ return NULL;
+}
+
+/*
+ * Consider whether a successfully-extracted restriction OR clause is
+ * actually worth using. If so, add it to the planner's data structures,
+ * and adjust the original join clause (join_or_rinfo) to compensate.
+ */
+static void
+consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
+ Expr *orclause, RestrictInfo *join_or_rinfo)
+{
+ RestrictInfo *or_rinfo;
+ Selectivity or_selec,
+ orig_selec;
+
+ /*
+ * Build a RestrictInfo from the new OR clause. We can assume it's valid
+ * as a base restriction clause.
+ */
+ or_rinfo = make_restrictinfo(orclause,
+ true,
+ false,
+ false,
+ NULL,
+ NULL,
+ NULL);
+
+ /*
+ * Estimate its selectivity. (We could have done this earlier, but doing
+ * it on the RestrictInfo representation allows the result to get cached,
+ * saving work later.)
+ */
+ or_selec = clause_selectivity(root, (Node *) or_rinfo,
+ 0, JOIN_INNER, NULL);
+
+ /*
+ * The clause is only worth adding to the query if it rejects a useful
+ * fraction of the base relation's rows; otherwise, it's just going to
+ * cause duplicate computation (since we will still have to check the
+ * original OR clause when the join is formed). Somewhat arbitrarily, we
+ * set the selectivity threshold at 0.9.
+ */
+ if (or_selec > 0.9)
+ return; /* forget it */
+
+ /*
+ * OK, add it to the rel's restriction-clause list.
+ */
+ rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
+
+ /*
+ * Adjust the original join OR clause's cached selectivity to compensate
+ * for the selectivity of the added (but redundant) lower-level qual. This
+ * should result in the join rel getting approximately the same rows
+ * estimate as it would have gotten without all these shenanigans.
+ *
+ * XXX major hack alert: this depends on the assumption that the
+ * selectivity will stay cached.
+ *
+ * XXX another major hack: we adjust only norm_selec, the cached
+ * selectivity for JOIN_INNER semantics, even though the join clause
+ * might've been an outer-join clause. This is partly because we can't
+ * easily identify the relevant SpecialJoinInfo here, and partly because
+ * the linearity assumption we're making would fail anyway. (If it is an
+ * outer-join clause, "rel" must be on the nullable side, else we'd not
+ * have gotten here. So the computation of the join size is going to be
+ * quite nonlinear with respect to the size of "rel", so it's not clear
+ * how we ought to adjust outer_selec even if we could compute its
+ * original value correctly.)
+ */
+ if (or_selec > 0)
+ {
+ SpecialJoinInfo sjinfo;
+
+ /*
+ * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
+ * approx_tuple_count() in costsize.c.)
+ */
+ sjinfo.type = T_SpecialJoinInfo;
+ sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
+ rel->relids);
+ sjinfo.min_righthand = rel->relids;
+ sjinfo.syn_lefthand = sjinfo.min_lefthand;
+ sjinfo.syn_righthand = sjinfo.min_righthand;
+ sjinfo.jointype = JOIN_INNER;
+ /* we don't bother trying to make the remaining fields valid */
+ sjinfo.lhs_strict = false;
+ sjinfo.delay_upper_joins = false;
+ sjinfo.join_quals = NIL;
+
+ /* Compute inner-join size */
+ orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
+ 0, JOIN_INNER, &sjinfo);
+
+ /* And hack cached selectivity so join size remains the same */
+ join_or_rinfo->norm_selec = orig_selec / or_selec;
+ /* ensure result stays in sane range, in particular not "redundant" */
+ if (join_or_rinfo->norm_selec > 1)
+ join_or_rinfo->norm_selec = 1;
+ /* as explained above, we don't touch outer_selec */
+ }
+}