Fix cost_nestloop and cost_hashjoin to model the behavior of semi and anti

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index 119997f3ff4d402a25ae488467b30afe609eb7df..eca0f80b8c1a1eff78adbf38a934acca8639c264 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -54,7 +54,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.207 2009/04/17 15:33:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.208 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -71,6 +71,7 @@
  #include "optimizer/pathnode.h"
  #include "optimizer/placeholder.h"
  #include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
@@ -119,6 +120,11 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
                            RestrictInfo *rinfo,
                            PathKey *pathkey);
  static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
+static bool adjust_semi_join(PlannerInfo *root, JoinPath *path,
+                                SpecialJoinInfo *sjinfo,
+                                Selectivity *outer_match_frac,
+                                Selectivity *match_count,
+                                bool *indexed_join_quals);
  static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
                                                                  List *quals);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
@@ -1394,11 +1400,15 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         Path       *inner_path = path->innerjoinpath;
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
+       Cost            inner_run_cost;
         Cost            cpu_per_tuple;
         QualCost        restrict_qual_cost;
         double          outer_path_rows = PATH_ROWS(outer_path);
         double          inner_path_rows = nestloop_inner_path_rows(inner_path);
         double          ntuples;
+       Selectivity     outer_match_frac;
+       Selectivity     match_count;
+       bool            indexed_join_quals;
  
         if (!enable_nestloop)
                 startup_cost += disable_cost;
@@ -1428,13 +1438,66 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                  */
                 run_cost += (outer_path_rows - 1) * inner_path->startup_cost;
         }
-       run_cost += outer_path_rows *
-               (inner_path->total_cost - inner_path->startup_cost);
+       inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
  
-       /*
-        * Compute number of tuples processed (not number emitted!)
-        */
-       ntuples = outer_path_rows * inner_path_rows;
+       if (adjust_semi_join(root, path, sjinfo,
+                                                &outer_match_frac,
+                                                &match_count,
+                                                &indexed_join_quals))
+       {
+               double          outer_matched_rows;
+               Selectivity     inner_scan_frac;
+
+               /*
+                * SEMI or ANTI join: executor will stop after first match.
+                *
+                * For an outer-rel row that has at least one match, we can expect the
+                * inner scan to stop after a fraction 1/(match_count+1) of the inner
+                * rows, if the matches are evenly distributed.  Since they probably
+                * aren't quite evenly distributed, we apply a fuzz factor of 2.0 to
+                * that fraction.  (If we used a larger fuzz factor, we'd have to
+                * clamp inner_scan_frac to at most 1.0; but since match_count is at
+                * least 1, no such clamp is needed now.)
+                */
+               outer_matched_rows = rint(outer_path_rows * outer_match_frac);
+               inner_scan_frac = 2.0 / (match_count + 1.0);
+
+               /* Add inner run cost for outer tuples having matches */
+               run_cost += outer_matched_rows * inner_run_cost * inner_scan_frac;
+
+               /* Compute number of tuples processed (not number emitted!) */
+               ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
+
+               /*
+                * For unmatched outer-rel rows, there are two cases.  If the inner
+                * path is an indexscan using all the joinquals as indexquals, then
+                * an unmatched row results in an indexscan returning no rows, which
+                * is probably quite cheap.  We estimate this case as the same cost
+                * to return the first tuple of a nonempty scan.  Otherwise, the
+                * executor will have to scan the whole inner rel; not so cheap.
+                */
+               if (indexed_join_quals)
+               {
+                       run_cost += (outer_path_rows - outer_matched_rows) *
+                               inner_run_cost / inner_path_rows;
+                       /* We won't be evaluating any quals at all for these rows */
+               }
+               else
+               {
+                       run_cost += (outer_path_rows - outer_matched_rows) *
+                               inner_run_cost;
+                       ntuples += (outer_path_rows - outer_matched_rows) *
+                               inner_path_rows;
+               }
+       }
+       else
+       {
+               /* Normal case; we'll scan whole input rel for each outer row */
+               run_cost += outer_path_rows * inner_run_cost;
+
+               /* Compute number of tuples processed (not number emitted!) */
+               ntuples = outer_path_rows * inner_path_rows;
+       }
  
         /* CPU costs */
         cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
@@ -1731,6 +1794,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          * cpu_tuple_cost plus the cost of evaluating additional restriction
          * clauses that are to be applied at the join.  (This is pessimistic since
          * not all of the quals may get evaluated at each tuple.)
+        *
+        * Note: we could adjust for SEMI/ANTI joins skipping some qual evaluations
+        * here, but it's probably not worth the trouble.
          */
         startup_cost += qp_qual_cost.startup;
         cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
@@ -1824,6 +1890,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         int                     num_skew_mcvs;
         double          virtualbuckets;
         Selectivity innerbucketsize;
+       Selectivity     outer_match_frac;
+       Selectivity     match_count;
         ListCell   *hcl;
  
         if (!enable_hashjoin)
@@ -1838,12 +1906,6 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         qp_qual_cost.startup -= hash_qual_cost.startup;
         qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
  
-       /*
-        * Get approx # tuples passing the hashquals.  We use approx_tuple_count
-        * here because we need an estimate done with JOIN_INNER semantics.
-        */
-       hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
-
         /* cost of source data */
         startup_cost += outer_path->startup_cost;
         run_cost += outer_path->total_cost - outer_path->startup_cost;
@@ -1970,18 +2032,78 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  
         /* CPU costs */
  
-       /*
-        * The number of tuple comparisons needed is the number of outer tuples
-        * times the typical number of tuples in a hash bucket, which is the inner
-        * relation size times its bucketsize fraction.  At each one, we need to
-        * evaluate the hashjoin quals.  But actually, charging the full qual eval
-        * cost at each tuple is pessimistic, since we don't evaluate the quals
-        * unless the hash values match exactly.  For lack of a better idea, halve
-        * the cost estimate to allow for that.
-        */
-       startup_cost += hash_qual_cost.startup;
-       run_cost += hash_qual_cost.per_tuple *
-               outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
+       if (adjust_semi_join(root, &path->jpath, sjinfo,
+                                                &outer_match_frac,
+                                                &match_count,
+                                                NULL))
+       {
+               double          outer_matched_rows;
+               Selectivity     inner_scan_frac;
+
+               /*
+                * SEMI or ANTI join: executor will stop after first match.
+                *
+                * For an outer-rel row that has at least one match, we can expect the
+                * bucket scan to stop after a fraction 1/(match_count+1) of the
+                * bucket's rows, if the matches are evenly distributed.  Since they
+                * probably aren't quite evenly distributed, we apply a fuzz factor of
+                * 2.0 to that fraction.  (If we used a larger fuzz factor, we'd have
+                * to clamp inner_scan_frac to at most 1.0; but since match_count is
+                * at least 1, no such clamp is needed now.)
+                */
+               outer_matched_rows = rint(outer_path_rows * outer_match_frac);
+               inner_scan_frac = 2.0 / (match_count + 1.0);
+
+               startup_cost += hash_qual_cost.startup;
+               run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
+                       clamp_row_est(inner_path_rows * innerbucketsize * inner_scan_frac) * 0.5;
+
+               /*
+                * For unmatched outer-rel rows, the picture is quite a lot different.
+                * In the first place, there is no reason to assume that these rows
+                * preferentially hit heavily-populated buckets; instead assume they
+                * are uncorrelated with the inner distribution and so they see an
+                * average bucket size of inner_path_rows / virtualbuckets.  In the
+                * second place, it seems likely that they will have few if any
+                * exact hash-code matches and so very few of the tuples in the
+                * bucket will actually require eval of the hash quals.  We don't
+                * have any good way to estimate how many will, but for the moment
+                * assume that the effective cost per bucket entry is one-tenth what
+                * it is for matchable tuples.
+                */
+               run_cost += hash_qual_cost.per_tuple *
+                       (outer_path_rows - outer_matched_rows) *
+                       clamp_row_est(inner_path_rows / virtualbuckets) * 0.05;
+
+               /* Get # of tuples that will pass the basic join */
+               if (path->jpath.jointype == JOIN_SEMI)
+                       hashjointuples = outer_matched_rows;
+               else
+                       hashjointuples = outer_path_rows - outer_matched_rows;
+       }
+       else
+       {
+               /*
+                * The number of tuple comparisons needed is the number of outer
+                * tuples times the typical number of tuples in a hash bucket, which
+                * is the inner relation size times its bucketsize fraction.  At each
+                * one, we need to evaluate the hashjoin quals.  But actually,
+                * charging the full qual eval cost at each tuple is pessimistic,
+                * since we don't evaluate the quals unless the hash values match
+                * exactly.  For lack of a better idea, halve the cost estimate to
+                * allow for that.
+                */
+               startup_cost += hash_qual_cost.startup;
+               run_cost += hash_qual_cost.per_tuple * outer_path_rows *
+                       clamp_row_est(inner_path_rows * innerbucketsize) * 0.5;
+
+               /*
+                * Get approx # tuples passing the hashquals.  We use
+                * approx_tuple_count here because we need an estimate done with
+                * JOIN_INNER semantics.
+                */
+               hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
+       }
  
         /*
          * For each tuple that gets through the hashjoin proper, we charge
@@ -2320,6 +2442,156 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
  }
  
  
+/*
+ * adjust_semi_join
+ *       Estimate how much of the inner input a SEMI or ANTI join
+ *       can be expected to scan.
+ *
+ * In a hash or nestloop SEMI/ANTI join, the executor will stop scanning
+ * inner rows as soon as it finds a match to the current outer row.
+ * We should therefore adjust some of the cost components for this effect.
+ * This function computes some estimates needed for these adjustments.
+ *
+ * 'path' is already filled in except for the cost fields
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ *
+ * Returns TRUE if this is a SEMI or ANTI join, FALSE if not.
+ *
+ * Output parameters (set only in TRUE-result case):
+ * *outer_match_frac is set to the fraction of the outer tuples that are
+ *             expected to have at least one match.
+ * *match_count is set to the average number of matches expected for
+ *             outer tuples that have at least one match.
+ * *indexed_join_quals is set to TRUE if all the joinquals are used as
+ *             inner index quals, FALSE if not.
+ *
+ * indexed_join_quals can be passed as NULL if that information is not
+ * relevant (it is only useful for the nestloop case).
+ */
+static bool
+adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
+                                Selectivity *outer_match_frac,
+                                Selectivity *match_count,
+                                bool *indexed_join_quals)
+{
+       JoinType        jointype = path->jointype;
+       Selectivity jselec;
+       Selectivity nselec;
+       Selectivity avgmatch;
+       SpecialJoinInfo norm_sjinfo;
+       List       *joinquals;
+       ListCell   *l;
+
+       /* Fall out if it's not JOIN_SEMI or JOIN_ANTI */
+       if (jointype != JOIN_SEMI && jointype != JOIN_ANTI)
+               return false;
+
+       /*
+        * Note: it's annoying to repeat this selectivity estimation on each call,
+        * when the joinclause list will be the same for all path pairs
+        * implementing a given join.  clausesel.c will save us from the worst
+        * effects of this by caching at the RestrictInfo level; but perhaps it'd
+        * be worth finding a way to cache the results at a higher level.
+        */
+
+       /*
+        * In an ANTI join, we must ignore clauses that are "pushed down",
+        * since those won't affect the match logic.  In a SEMI join, we do not
+        * distinguish joinquals from "pushed down" quals, so just use the whole
+        * restrictinfo list.
+        */
+       if (jointype == JOIN_ANTI)
+       {
+               joinquals = NIL;
+               foreach(l, path->joinrestrictinfo)
+               {
+                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+
+                       Assert(IsA(rinfo, RestrictInfo));
+                       if (!rinfo->is_pushed_down)
+                               joinquals = lappend(joinquals, rinfo);
+               }
+       }
+       else
+               joinquals = path->joinrestrictinfo;
+
+       /*
+        * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
+        */
+       jselec = clauselist_selectivity(root,
+                                                                       joinquals,
+                                                                       0,
+                                                                       jointype,
+                                                                       sjinfo);
+
+       /*
+        * Also get the normal inner-join selectivity of the join clauses.
+        */
+       norm_sjinfo.type = T_SpecialJoinInfo;
+       norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
+       norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids;
+       norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
+       norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
+       norm_sjinfo.jointype = JOIN_INNER;
+       /* we don't bother trying to make the remaining fields valid */
+       norm_sjinfo.lhs_strict = false;
+       norm_sjinfo.delay_upper_joins = false;
+       norm_sjinfo.join_quals = NIL;
+
+       nselec = clauselist_selectivity(root,
+                                                                       joinquals,
+                                                                       0,
+                                                                       JOIN_INNER,
+                                                                       &norm_sjinfo);
+
+       /* Avoid leaking a lot of ListCells */
+       if (jointype == JOIN_ANTI)
+               list_free(joinquals);
+
+       /*
+        * jselec can be interpreted as the fraction of outer-rel rows that have
+        * any matches (this is true for both SEMI and ANTI cases).  And nselec
+        * is the fraction of the Cartesian product that matches.  So, the
+        * average number of matches for each outer-rel row that has at least
+        * one match is nselec * inner_rows / jselec.
+        *
+        * Note: it is correct to use the inner rel's "rows" count here, not
+        * PATH_ROWS(), even if the inner path under consideration is an inner
+        * indexscan.  This is because we have included all the join clauses
+        * in the selectivity estimate, even ones used in an inner indexscan.
+        */
+       if (jselec > 0)                         /* protect against zero divide */
+       {
+               avgmatch = nselec * path->innerjoinpath->parent->rows / jselec;
+               /* Clamp to sane range */
+               avgmatch = Max(1.0, avgmatch);
+       }
+       else
+               avgmatch = 1.0;
+
+       *outer_match_frac = jselec;
+       *match_count = avgmatch;
+
+       /*
+        * If requested, check whether the inner path uses all the joinquals
+        * as indexquals.  (If that's true, we can assume that an unmatched
+        * outer tuple is cheap to process, whereas otherwise it's probably
+        * expensive.)
+        */
+       if (indexed_join_quals)
+       {
+               List       *nrclauses;
+
+               nrclauses = select_nonredundant_join_clauses(root,
+                                                                                                        path->joinrestrictinfo,
+                                                                                                        path->innerjoinpath);
+               *indexed_join_quals = (nrclauses == NIL);
+       }
+
+       return true;
+}
+
+
  /*
   * approx_tuple_count
   *             Quick-and-dirty estimation of the number of join rows passing
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 1a957ac39698edfe794d143f564016f7c839cbe6..2aabd880aaf5ed577d605ff4991147202118a78c 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.258 2009/04/19 19:46:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.259 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1562,62 +1562,16 @@ create_nestloop_plan(PlannerInfo *root,
         List       *otherclauses;
         NestLoop   *join_plan;
  
-       if (IsA(best_path->innerjoinpath, IndexPath))
-       {
-               /*
-                * An index is being used to reduce the number of tuples scanned in
-                * the inner relation.  If there are join clauses being used with the
-                * index, we may remove those join clauses from the list of clauses
-                * that have to be checked as qpquals at the join node.
-                *
-                * We can also remove any join clauses that are redundant with those
-                * being used in the index scan; this check is needed because
-                * find_eclass_clauses_for_index_join() may emit different clauses
-                * than generate_join_implied_equalities() did.
-                *
-                * We can skip this if the index path is an ordinary indexpath and not
-                * a special innerjoin path, since it then wouldn't be using any join
-                * clauses.
-                */
-               IndexPath  *innerpath = (IndexPath *) best_path->innerjoinpath;
-
-               if (innerpath->isjoininner)
-                       joinrestrictclauses =
-                               select_nonredundant_join_clauses(root,
-                                                                                                joinrestrictclauses,
-                                                                                                innerpath->indexclauses);
-       }
-       else if (IsA(best_path->innerjoinpath, BitmapHeapPath))
-       {
-               /*
-                * Same deal for bitmapped index scans.
-                *
-                * Note: both here and above, we ignore any implicit index
-                * restrictions associated with the use of partial indexes.  This is
-                * OK because we're only trying to prove we can dispense with some
-                * join quals; failing to prove that doesn't result in an incorrect
-                * plan.  It is the right way to proceed because adding more quals to
-                * the stuff we got from the original query would just make it harder
-                * to detect duplication.  (Also, to change this we'd have to be wary
-                * of UPDATE/DELETE/SELECT FOR UPDATE target relations; see notes
-                * above about EvalPlanQual.)
-                */
-               BitmapHeapPath *innerpath = (BitmapHeapPath *) best_path->innerjoinpath;
-
-               if (innerpath->isjoininner)
-               {
-                       List       *bitmapclauses;
-
-                       bitmapclauses =
-                               make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
-                                                                                                 true,
-                                                                                                 false);
-                       joinrestrictclauses =
-                               select_nonredundant_join_clauses(root,
-                                                                                                joinrestrictclauses,
-                                                                                                bitmapclauses);
-               }
-       }
+       /*
+        * If the inner path is a nestloop inner indexscan, it might be using
+        * some of the join quals as index quals, in which case we don't have
+        * to check them again at the join node.  Remove any join quals that
+        * are redundant.
+        */
+       joinrestrictclauses =
+               select_nonredundant_join_clauses(root,
+                                                                                joinrestrictclauses,
+                                                                                best_path->innerjoinpath);
  
         /* Sort join qual clauses into best execution order */
         joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c

index 0fcfd0ed509d724253e0e143383cf8f349bd4bcd..5b75d2de3bd80bbc4a9a9ee2b8bc0f88684de789 100644 (file)
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.58 2009/04/16 20:42:16 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.59 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,8 +35,9 @@ static Expr *make_sub_restrictinfos(Expr *clause,
                                            bool pseudoconstant,
                                            Relids required_relids,
                                            Relids nullable_relids);
-static bool join_clause_is_redundant(PlannerInfo *root,
-                                                RestrictInfo *rinfo,
+static List *select_nonredundant_join_list(List *restrictinfo_list,
+                                                         List *reference_list);
+static bool join_clause_is_redundant(RestrictInfo *rinfo,
                                                  List *reference_list);
  
  
@@ -545,26 +546,91 @@ extract_actual_join_clauses(List *restrictinfo_list,
         }
  }
  
+
  /*
   * select_nonredundant_join_clauses
   *
   * Given a list of RestrictInfo clauses that are to be applied in a join,
- * select the ones that are not redundant with any clause in the
- * reference_list.     This is used only for nestloop-with-inner-indexscan
- * joins: any clauses being checked by the index should be removed from
- * the qpquals list.
+ * select the ones that are not redundant with any clause that's enforced
+ * by the inner_path.  This is used for nestloop joins, wherein any clause
+ * being used in an inner indexscan need not be checked again at the join.
   *
   * "Redundant" means either equal() or derived from the same EquivalenceClass.
   * We have to check the latter because indxqual.c may select different derived
   * clauses than were selected by generate_join_implied_equalities().
   *
- * Note that we assume the given restrictinfo_list has already been checked
- * for local redundancies, so we don't check again.
+ * Note that we are *not* checking for local redundancies within the given
+ * restrictinfo_list; that should have been handled elsewhere.
   */
  List *
  select_nonredundant_join_clauses(PlannerInfo *root,
                                                                  List *restrictinfo_list,
-                                                                List *reference_list)
+                                                                Path *inner_path)
+{
+       if (IsA(inner_path, IndexPath))
+       {
+               /*
+                * Check the index quals to see if any of them are join clauses.
+                *
+                * We can skip this if the index path is an ordinary indexpath and not
+                * a special innerjoin path, since it then wouldn't be using any join
+                * clauses.
+                */
+               IndexPath  *innerpath = (IndexPath *) inner_path;
+
+               if (innerpath->isjoininner)
+                       restrictinfo_list =
+                               select_nonredundant_join_list(restrictinfo_list,
+                                                                                         innerpath->indexclauses);
+       }
+       else if (IsA(inner_path, BitmapHeapPath))
+       {
+               /*
+                * Same deal for bitmapped index scans.
+                *
+                * Note: both here and above, we ignore any implicit index
+                * restrictions associated with the use of partial indexes.  This is
+                * OK because we're only trying to prove we can dispense with some
+                * join quals; failing to prove that doesn't result in an incorrect
+                * plan.  It's quite unlikely that a join qual could be proven
+                * redundant by an index predicate anyway.  (Also, if we did manage
+                * to prove it, we'd have to have a special case for update targets;
+                * see notes about EvalPlanQual testing in create_indexscan_plan().)
+                */
+               BitmapHeapPath *innerpath = (BitmapHeapPath *) inner_path;
+
+               if (innerpath->isjoininner)
+               {
+                       List       *bitmapclauses;
+
+                       bitmapclauses =
+                               make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
+                                                                                                 true,
+                                                                                                 false);
+                       restrictinfo_list =
+                               select_nonredundant_join_list(restrictinfo_list,
+                                                                                         bitmapclauses);
+               }
+       }
+
+       /*
+        * XXX the inner path of a nestloop could also be an append relation
+        * whose elements use join quals.  However, they might each use different
+        * quals; we could only remove join quals that are enforced by all the
+        * appendrel members.  For the moment we don't bother to try.
+        */
+
+       return restrictinfo_list;
+}
+
+/*
+ * select_nonredundant_join_list
+ *             Select the members of restrictinfo_list that are not redundant with
+ *             any member of reference_list.  See above for more info.
+ */
+static List *
+select_nonredundant_join_list(List *restrictinfo_list,
+                                                         List *reference_list)
  {
         List       *result = NIL;
         ListCell   *item;
@@ -574,7 +640,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
  
                 /* drop it if redundant with any reference clause */
-               if (join_clause_is_redundant(root, rinfo, reference_list))
+               if (join_clause_is_redundant(rinfo, reference_list))
                         continue;
  
                 /* otherwise, add it to result list */
@@ -589,8 +655,7 @@ select_nonredundant_join_clauses(PlannerInfo *root,
   *             Test whether rinfo is redundant with any clause in reference_list.
   */
  static bool
-join_clause_is_redundant(PlannerInfo *root,
-                                                RestrictInfo *rinfo,
+join_clause_is_redundant(RestrictInfo *rinfo,
                                                  List *reference_list)
  {
         ListCell   *refitem;
diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h

index 52d256ed2849e296319339e598819b2e162e6eac..dcdc82ee1dc4acc9c6006a84c838b70730f94bcb 100644 (file)
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.43 2009/04/16 20:42:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/restrictinfo.h,v 1.44 2009/05/09 22:51:41 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -39,6 +39,6 @@ extern void extract_actual_join_clauses(List *restrictinfo_list,
                                                         List **otherquals);
  extern List *select_nonredundant_join_clauses(PlannerInfo *root,
                                                                  List *restrictinfo_list,
-                                                                List *reference_list);
+                                                                Path *inner_path);
  
  #endif   /* RESTRICTINFO_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 9 May 2009 22:51:41 +0000 (22:51 +0000)
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/util/restrictinfo.c		patch \| blob \| history
src/include/optimizer/restrictinfo.h		patch \| blob \| history