Use parameterized paths to generate inner indexscans more flexibly.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 28 Jan 2012 00:26:38 +0000 (19:26 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 28 Jan 2012 00:26:38 +0000 (19:26 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 28 Jan 2012 00:26:38 +0000 (19:26 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 28 Jan 2012 00:26:38 +0000 (19:26 -0500)
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml

index 07c816c38f279b057eab32d7f4b9eb06f7b193d4..28f2ae162753e105fffc24b922881eae39da60c5 100644 (file)
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -289,7 +289,7 @@ amcanreturn (Relation indexRelation);
  void
  amcostestimate (PlannerInfo *root,
                  IndexPath *path,
-                RelOptInfo *outer_rel,
+                double loop_count,
                  Cost *indexStartupCost,
                  Cost *indexTotalCost,
                  Selectivity *indexSelectivity,
@@ -928,7 +928,7 @@ amrestrpos (IndexScanDesc scan);
  void
  amcostestimate (PlannerInfo *root,
                  IndexPath *path,
-                RelOptInfo *outer_rel,
+                double loop_count,
                  Cost *indexStartupCost,
                  Cost *indexTotalCost,
                  Selectivity *indexSelectivity,
@@ -958,16 +958,15 @@ amcostestimate (PlannerInfo *root,
      </varlistentry>
  
      <varlistentry>
-     <term><parameter>outer_rel</></term>
+     <term><parameter>loop_count</></term>
       <listitem>
        <para>
-       If the index is being considered for use in a join inner indexscan,
-       the planner's information about the outer side of the join.  Otherwise
-       <symbol>NULL</>.  When non-<symbol>NULL</>, some of the qual clauses
-       will be join clauses for joins
-       with this rel rather than being simple restriction clauses.  Also,
-       the cost estimator should expect that the index scan will be repeated
-       for each row of the outer rel.
+       The number of repetitions of the index scan that should be factored
+       into the cost estimates.  This will typically be greater than one when
+       considering a parameterized scan for use in the inside of a nestloop
+       join.  Note that the cost estimates should still be for just one scan;
+       a larger <parameter>loop_count</> means that it may be appropriate
+       to allow for some caching effects across multiple scans.
        </para>
       </listitem>
      </varlistentry>
@@ -1062,8 +1061,8 @@ amcostestimate (PlannerInfo *root,
    </para>
  
    <para>
-   In the join case, the returned numbers should be averages expected for
-   any one scan of the index.
+   When <parameter>loop_count</> is greater than one, the returned numbers
+   should be averages expected for any one scan of the index.
    </para>
  
    <procedure>
@@ -1121,7 +1120,7 @@ cost_qual_eval(&amp;index_qual_cost, path-&gt;indexquals, root);
  </programlisting>
  
       However, the above does not account for amortization of index reads
-     across repeated index scans in the join case.
+     across repeated index scans.
      </para>
     </step>
  
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c

index 3b5fb20964e7153a859bb639f15fd0be8bb8778b..4c904e03296051cf2c95fdcd3d4a029aea9dc142 100644 (file)
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -335,6 +335,83 @@ bms_is_subset(const Bitmapset *a, const Bitmapset *b)
         return true;
  }
  
+/*
+ * bms_subset_compare - compare A and B for equality/subset relationships
+ *
+ * This is more efficient than testing bms_is_subset in both directions.
+ */
+BMS_Comparison
+bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
+{
+       BMS_Comparison result;
+       int                     shortlen;
+       int                     longlen;
+       int                     i;
+
+       /* Handle cases where either input is NULL */
+       if (a == NULL)
+       {
+               if (b == NULL)
+                       return BMS_EQUAL;
+               return bms_is_empty(b) ? BMS_EQUAL : BMS_SUBSET1;
+       }
+       if (b == NULL)
+               return bms_is_empty(a) ? BMS_EQUAL : BMS_SUBSET2;
+       /* Check common words */
+       result = BMS_EQUAL;                     /* status so far */
+       shortlen = Min(a->nwords, b->nwords);
+       for (i = 0; i < shortlen; i++)
+       {
+               bitmapword aword = a->words[i];
+               bitmapword bword = b->words[i];
+
+               if ((aword & ~bword) != 0)
+               {
+                       /* a is not a subset of b */
+                       if (result == BMS_SUBSET1)
+                               return BMS_DIFFERENT;
+                       result = BMS_SUBSET2;
+               }
+               if ((bword & ~aword) != 0)
+               {
+                       /* b is not a subset of a */
+                       if (result == BMS_SUBSET2)
+                               return BMS_DIFFERENT;
+                       result = BMS_SUBSET1;
+               }
+       }
+       /* Check extra words */
+       if (a->nwords > b->nwords)
+       {
+               longlen = a->nwords;
+               for (; i < longlen; i++)
+               {
+                       if (a->words[i] != 0)
+                       {
+                               /* a is not a subset of b */
+                               if (result == BMS_SUBSET1)
+                                       return BMS_DIFFERENT;
+                               result = BMS_SUBSET2;
+                       }
+               }
+       }
+       else if (a->nwords < b->nwords)
+       {
+               longlen = b->nwords;
+               for (; i < longlen; i++)
+               {
+                       if (b->words[i] != 0)
+                       {
+                               /* b is not a subset of a */
+                               if (result == BMS_SUBSET2)
+                                       return BMS_DIFFERENT;
+                               result = BMS_SUBSET1;
+                       }
+               }
+       }
+       return result;
+}
+
  /*
   * bms_is_member - is X a member of A?
   */
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 8bc19478357e66d3fe8723da693672939c401ae2..829f6d4f7b59cebb0433d20c5a6c325aa5a2b178 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1475,9 +1475,12 @@ _outPathInfo(StringInfo str, const Path *node)
         WRITE_ENUM_FIELD(pathtype, NodeTag);
         appendStringInfo(str, " :parent_relids ");
         _outBitmapset(str, node->parent->relids);
+       WRITE_FLOAT_FIELD(rows, "%.0f");
         WRITE_FLOAT_FIELD(startup_cost, "%.2f");
         WRITE_FLOAT_FIELD(total_cost, "%.2f");
         WRITE_NODE_FIELD(pathkeys);
+       WRITE_BITMAPSET_FIELD(required_outer);
+       WRITE_NODE_FIELD(param_clauses);
  }
  
  /*
@@ -1515,11 +1518,9 @@ _outIndexPath(StringInfo str, const IndexPath *node)
         WRITE_NODE_FIELD(indexqualcols);
         WRITE_NODE_FIELD(indexorderbys);
         WRITE_NODE_FIELD(indexorderbycols);
-       WRITE_BOOL_FIELD(isjoininner);
         WRITE_ENUM_FIELD(indexscandir, ScanDirection);
         WRITE_FLOAT_FIELD(indextotalcost, "%.2f");
         WRITE_FLOAT_FIELD(indexselectivity, "%.4f");
-       WRITE_FLOAT_FIELD(rows, "%.0f");
  }
  
  static void
@@ -1530,8 +1531,6 @@ _outBitmapHeapPath(StringInfo str, const BitmapHeapPath *node)
         _outPathInfo(str, (const Path *) node);
  
         WRITE_NODE_FIELD(bitmapqual);
-       WRITE_BOOL_FIELD(isjoininner);
-       WRITE_FLOAT_FIELD(rows, "%.0f");
  }
  
  static void
@@ -1628,7 +1627,6 @@ _outUniquePath(StringInfo str, const UniquePath *node)
         WRITE_ENUM_FIELD(umethod, UniquePathMethod);
         WRITE_NODE_FIELD(in_operators);
         WRITE_NODE_FIELD(uniq_exprs);
-       WRITE_FLOAT_FIELD(rows, "%.0f");
  }
  
  static void
@@ -1691,6 +1689,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
         WRITE_NODE_FIELD(parse);
         WRITE_NODE_FIELD(glob);
         WRITE_UINT_FIELD(query_level);
+       WRITE_BITMAPSET_FIELD(all_baserels);
         WRITE_NODE_FIELD(join_rel_list);
         WRITE_INT_FIELD(join_cur_level);
         WRITE_NODE_FIELD(init_plans);
@@ -1738,6 +1737,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
         WRITE_NODE_FIELD(cheapest_startup_path);
         WRITE_NODE_FIELD(cheapest_total_path);
         WRITE_NODE_FIELD(cheapest_unique_path);
+       WRITE_NODE_FIELD(cheapest_parameterized_paths);
         WRITE_UINT_FIELD(relid);
         WRITE_UINT_FIELD(reltablespace);
         WRITE_ENUM_FIELD(rtekind, RTEKind);
@@ -1752,8 +1752,6 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
         WRITE_NODE_FIELD(baserestrictinfo);
         WRITE_NODE_FIELD(joininfo);
         WRITE_BOOL_FIELD(has_eclass_joins);
-       WRITE_BITMAPSET_FIELD(index_outer_relids);
-       WRITE_NODE_FIELD(index_inner_paths);
  }
  
  static void
@@ -1854,16 +1852,6 @@ _outRestrictInfo(StringInfo str, const RestrictInfo *node)
         WRITE_OID_FIELD(hashjoinoperator);
  }
  
-static void
-_outInnerIndexscanInfo(StringInfo str, const InnerIndexscanInfo *node)
-{
-       WRITE_NODE_TYPE("INNERINDEXSCANINFO");
-       WRITE_BITMAPSET_FIELD(other_relids);
-       WRITE_BOOL_FIELD(isouterjoin);
-       WRITE_NODE_FIELD(cheapest_startup_innerpath);
-       WRITE_NODE_FIELD(cheapest_total_innerpath);
-}
-
  static void
  _outPlaceHolderVar(StringInfo str, const PlaceHolderVar *node)
  {
@@ -3015,9 +3003,6 @@ _outNode(StringInfo str, const void *obj)
                         case T_RestrictInfo:
                                 _outRestrictInfo(str, obj);
                                 break;
-                       case T_InnerIndexscanInfo:
-                               _outInnerIndexscanInfo(str, obj);
-                               break;
                         case T_PlaceHolderVar:
                                 _outPlaceHolderVar(str, obj);
                                 break;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README

index aaa754cbb18e78823135bbc4a1acfbaa1f93c88f..ee450fb02e4e9e8942ff5a0d6b062c9ae5d18cba 100644 (file)
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -78,10 +78,10 @@ ways.  All the Paths made for a given relation are placed in its
  RelOptInfo.pathlist.  (Actually, we discard Paths that are obviously
  inferior alternatives before they ever get into the pathlist --- what
  ends up in the pathlist is the cheapest way of generating each potentially
-useful sort ordering of the relation.)  Also create a RelOptInfo.joininfo
-list including all the join clauses that involve this relation.  For
-example, the WHERE clause "tab1.col1 = tab2.col1" generates entries in
-both tab1 and tab2's joininfo lists.
+useful sort ordering and parameterization of the relation.)  Also create a
+RelOptInfo.joininfo list including all the join clauses that involve this
+relation.  For example, the WHERE clause "tab1.col1 = tab2.col1" generates
+entries in both tab1 and tab2's joininfo lists.
  
  If we have only a single base relation in the query, we are done.
  Otherwise we have to figure out how to join the base relations into a
@@ -173,12 +173,12 @@ for it or the cheapest path with the desired ordering (if that's cheaper
  than applying a sort to the cheapest other path).
  
  If the query contains one-sided outer joins (LEFT or RIGHT joins), or
-IN or EXISTS WHERE clauses that were converted to joins, then some of
-the possible join orders may be illegal.  These are excluded by having
-join_is_legal consult a side list of such "special" joins to see
-whether a proposed join is illegal.  (The same consultation allows it
-to see which join style should be applied for a valid join, ie,
-JOIN_INNER, JOIN_LEFT, etc.)
+IN or EXISTS WHERE clauses that were converted to semijoins or antijoins,
+then some of the possible join orders may be illegal.  These are excluded
+by having join_is_legal consult a side list of such "special" joins to see
+whether a proposed join is illegal.  (The same consultation allows it to
+see which join style should be applied for a valid join, ie, JOIN_INNER,
+JOIN_LEFT, etc.)
  
  
  Valid OUTER JOIN Optimizations
@@ -526,12 +526,11 @@ multi-column index generates a list with one element per index column.
  are two possible sort orders and two possible PathKey lists it can
  generate.)
  
-Note that a bitmap scan or multi-pass indexscan (OR clause scan) has NIL
-pathkeys since we can say nothing about the overall order of its result.
-Also, an indexscan on an unordered type of index generates NIL pathkeys.
-However, we can always create a pathkey by doing an explicit sort.  The
-pathkeys for a Sort plan's output just represent the sort key fields and
-the ordering operators used.
+Note that a bitmap scan has NIL pathkeys since we can say nothing about
+the overall order of its result.  Also, an indexscan on an unordered type
+of index generates NIL pathkeys.  However, we can always create a pathkey
+by doing an explicit sort.  The pathkeys for a Sort plan's output just
+represent the sort key fields and the ordering operators used.
  
  Things get more interesting when we consider joins.  Suppose we do a
  mergejoin between A and B using the mergeclause A.X = B.Y.  The output
@@ -668,4 +667,102 @@ Currently this happens only for queries involving multiple window functions
  with different orderings, for which extra sorts are needed anyway.
  
  
+Parameterized Paths
+-------------------
+
+The naive way to join two relations using a clause like WHERE A.X = B.Y
+is to generate a nestloop plan like this:
+
+       NestLoop
+               Filter: A.X = B.Y
+               -> Seq Scan on A
+               -> Seq Scan on B
+
+We can make this better by using a merge or hash join, but it still
+requires scanning all of both input relations.  If A is very small and B is
+very large, but there is an index on B.Y, it can be enormously better to do
+something like this:
+
+       NestLoop
+               -> Seq Scan on A
+               -> Index Scan using B_Y_IDX on B
+                       Index Condition: B.Y = A.X
+
+Here, we are expecting that for each row scanned from A, the nestloop
+plan node will pass down the current value of A.X into the scan of B.
+That allows the indexscan to treat A.X as a constant for any one
+invocation, and thereby use it as an index key.  This is the only plan type
+that can avoid fetching all of B, and for small numbers of rows coming from
+A, that will dominate every other consideration.  (As A gets larger, this
+gets less attractive, and eventually a merge or hash join will win instead.
+So we have to cost out all the alternatives to decide what to do.)
+
+It can be useful for the parameter value to be passed down through
+intermediate layers of joins, for example:
+
+       NestLoop
+               -> Seq Scan on A
+               Hash Join
+                       Join Condition: B.Y = C.W
+                       -> Seq Scan on B
+                       -> Index Scan using C_Z_IDX on C
+                               Index Condition: C.Z = A.X
+
+If all joins are plain inner joins then this is unnecessary, because
+it's always possible to reorder the joins so that a parameter is used
+immediately below the nestloop node that provides it.  But in the
+presence of outer joins, join reordering may not be possible, and then
+this option can be critical.  Before version 9.2, Postgres used ad-hoc
+methods for planning and executing such queries, and those methods could
+not handle passing parameters down through multiple join levels.
+
+To plan such queries, we now use a notion of a "parameterized path",
+which is a path that makes use of a join clause to a relation that's not
+scanned by the path.  In the example just above, we would construct a
+path representing the possibility of doing this:
+
+       -> Index Scan using C_Z_IDX on C
+               Index Condition: C.Z = A.X
+
+This path will be marked as being parameterized by relation A.  (Note that
+this is only one of the possible access paths for C; we'd still have a
+plain unparameterized seqscan, and perhaps other possibilities.)  The
+parameterization marker does not prevent joining the path to B, so one of
+the paths generated for the joinrel {B C} will represent
+
+       Hash Join
+               Join Condition: B.Y = C.W
+               -> Seq Scan on B
+               -> Index Scan using C_Z_IDX on C
+                       Index Condition: C.Z = A.X
+
+This path is still marked as being parameterized by A.  When we attempt to
+join {B C} to A to form the complete join tree, such a path can only be
+used as the inner side of a nestloop join: it will be ignored for other
+possible join types.  So we will form a join path representing the query
+plan shown above, and it will compete in the usual way with paths built
+from non-parameterized scans.
+
+To limit planning time, we have to avoid generating an unreasonably large
+number of parameterized paths.  We do this by only generating parameterized
+relation scan paths for index scans, and then only for indexes for which
+suitable join clauses are available.  There are also heuristics in join
+planning that try to limit the number of parameterized paths considered.
+
+In particular, there's been a deliberate policy decision to favor hash
+joins over merge joins for parameterized join steps (those occurring below
+a nestloop that provides parameters to the lower join's inputs).  While we
+do not ignore merge joins entirely, joinpath.c does not fully explore the
+space of potential merge joins with parameterized inputs.  Also, add_path
+treats parameterized paths as having no pathkeys, so that they compete
+only on cost and don't get preference for producing a special sort order.
+This creates additional bias against merge joins, since we might discard
+a path that could have been useful for performing a merge without an
+explicit sort step.  Since a parameterized path must ultimately be used
+on the inside of a nestloop, where its sort order is uninteresting, these
+choices do not affect any requirement for the final output order of a
+query --- they only make it harder to use a merge join at a lower level.
+The savings in planning work justifies that.
+
+
  -- bjm & tgl
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c

index b085553a1adbfee633ae6399cf6e188ec465368a..19bf7340ad454cda081c85d83272f66e6de43c4e 100644 (file)
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -46,13 +46,28 @@ int                 geqo_threshold;
  join_search_hook_type join_search_hook = NULL;
  
  
+static void set_base_rel_sizes(PlannerInfo *root);
  static void set_base_rel_pathlists(PlannerInfo *root);
+static void set_rel_size(PlannerInfo *root, RelOptInfo *rel,
+                                Index rti, RangeTblEntry *rte);
  static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                  Index rti, RangeTblEntry *rte);
+static void set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel,
+                                          RangeTblEntry *rte);
  static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                            RangeTblEntry *rte);
+static void set_foreign_size(PlannerInfo *root, RelOptInfo *rel,
+                                        RangeTblEntry *rte);
+static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                        RangeTblEntry *rte);
+static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
+                                               Index rti, RangeTblEntry *rte);
  static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                                 Index rti, RangeTblEntry *rte);
+static void generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
+                                                  List *live_childrels,
+                                                  List *all_child_pathkeys,
+                                                  Relids required_outer);
  static List *accumulate_append_subpath(List *subpaths, Path *path);
  static void set_dummy_rel_pathlist(RelOptInfo *rel);
  static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -65,8 +80,6 @@ static void set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                  RangeTblEntry *rte);
  static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                            RangeTblEntry *rte);
-static void set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel,
-                                        RangeTblEntry *rte);
  static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
  static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
                                                   bool *differentTypes);
@@ -91,10 +104,33 @@ RelOptInfo *
  make_one_rel(PlannerInfo *root, List *joinlist)
  {
         RelOptInfo *rel;
+       Index           rti;
+
+       /*
+        * Construct the all_baserels Relids set.
+        */
+       root->all_baserels = NULL;
+       for (rti = 1; rti < root->simple_rel_array_size; rti++)
+       {
+               RelOptInfo *brel = root->simple_rel_array[rti];
+
+               /* there may be empty slots corresponding to non-baserel RTEs */
+               if (brel == NULL)
+                       continue;
+
+               Assert(brel->relid == rti); /* sanity check on array */
+
+               /* ignore RTEs that are "other rels" */
+               if (brel->reloptkind != RELOPT_BASEREL)
+                       continue;
+
+               root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
+       }
  
         /*
          * Generate access paths for the base rels.
          */
+       set_base_rel_sizes(root);
         set_base_rel_pathlists(root);
  
         /*
@@ -105,33 +141,39 @@ make_one_rel(PlannerInfo *root, List *joinlist)
         /*
          * The result should join all and only the query's base rels.
          */
-#ifdef USE_ASSERT_CHECKING
-       {
-               int                     num_base_rels = 0;
-               Index           rti;
+       Assert(bms_equal(rel->relids, root->all_baserels));
  
-               for (rti = 1; rti < root->simple_rel_array_size; rti++)
-               {
-                       RelOptInfo *brel = root->simple_rel_array[rti];
+       return rel;
+}
  
-                       if (brel == NULL)
-                               continue;
+/*
+ * set_base_rel_sizes
+ *       Set the size estimates (rows and widths) for each base-relation entry.
+ *
+ * We do this in a separate pass over the base rels so that rowcount
+ * estimates are available for parameterized path generation.
+ */
+static void
+set_base_rel_sizes(PlannerInfo *root)
+{
+       Index           rti;
  
-                       Assert(brel->relid == rti); /* sanity check on array */
+       for (rti = 1; rti < root->simple_rel_array_size; rti++)
+       {
+               RelOptInfo *rel = root->simple_rel_array[rti];
  
-                       /* ignore RTEs that are "other rels" */
-                       if (brel->reloptkind != RELOPT_BASEREL)
-                               continue;
+               /* there may be empty slots corresponding to non-baserel RTEs */
+               if (rel == NULL)
+                       continue;
  
-                       Assert(bms_is_member(rti, rel->relids));
-                       num_base_rels++;
-               }
+               Assert(rel->relid == rti);              /* sanity check on array */
  
-               Assert(bms_num_members(rel->relids) == num_base_rels);
-       }
-#endif
+               /* ignore RTEs that are "other rels" */
+               if (rel->reloptkind != RELOPT_BASEREL)
+                       continue;
  
-       return rel;
+               set_rel_size(root, rel, rti, root->simple_rte_array[rti]);
+       }
  }
  
  /*
@@ -164,11 +206,11 @@ set_base_rel_pathlists(PlannerInfo *root)
  }
  
  /*
- * set_rel_pathlist
- *       Build access paths for a base relation
+ * set_rel_size
+ *       Set size estimates for a base relation
   */
  static void
-set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+set_rel_size(PlannerInfo *root, RelOptInfo *rel,
                                  Index rti, RangeTblEntry *rte)
  {
         if (rel->reloptkind == RELOPT_BASEREL &&
@@ -179,13 +221,18 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                  * so set up a single dummy path for it.  Here we only check this for
                  * regular baserels; if it's an otherrel, CE was already checked in
                  * set_append_rel_pathlist().
+                *
+                * In this case, we go ahead and set up the relation's path right away
+                * instead of leaving it for set_rel_pathlist to do.  This is because
+                * we don't have a convention for marking a rel as dummy except by
+                * assigning a dummy path to it.
                  */
                 set_dummy_rel_pathlist(rel);
         }
         else if (rte->inh)
         {
                 /* It's an "append relation", process accordingly */
-               set_append_rel_pathlist(root, rel, rti, rte);
+               set_append_rel_size(root, rel, rti, rte);
         }
         else
         {
@@ -195,28 +242,32 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                 if (rte->relkind == RELKIND_FOREIGN_TABLE)
                                 {
                                         /* Foreign table */
-                                       set_foreign_pathlist(root, rel, rte);
+                                       set_foreign_size(root, rel, rte);
                                 }
                                 else
                                 {
                                         /* Plain relation */
-                                       set_plain_rel_pathlist(root, rel, rte);
+                                       set_plain_rel_size(root, rel, rte);
                                 }
                                 break;
                         case RTE_SUBQUERY:
-                               /* Subquery --- generate a separate plan for it */
+                               /*
+                                * Subqueries don't support parameterized paths, so just go
+                                * ahead and build their paths immediately.
+                                */
                                 set_subquery_pathlist(root, rel, rti, rte);
                                 break;
                         case RTE_FUNCTION:
-                               /* RangeFunction --- generate a suitable path for it */
-                               set_function_pathlist(root, rel, rte);
+                               set_function_size_estimates(root, rel);
                                 break;
                         case RTE_VALUES:
-                               /* Values list --- generate a suitable path for it */
-                               set_values_pathlist(root, rel, rte);
+                               set_values_size_estimates(root, rel);
                                 break;
                         case RTE_CTE:
-                               /* CTE reference --- generate a suitable path for it */
+                               /*
+                                * CTEs don't support parameterized paths, so just go ahead
+                                * and build their paths immediately.
+                                */
                                 if (rte->self_reference)
                                         set_worktable_pathlist(root, rel, rte);
                                 else
@@ -227,6 +278,60 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                 break;
                 }
         }
+}
+
+/*
+ * set_rel_pathlist
+ *       Build access paths for a base relation
+ */
+static void
+set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                Index rti, RangeTblEntry *rte)
+{
+       if (IS_DUMMY_REL(rel))
+       {
+               /* We already proved the relation empty, so nothing more to do */
+       }
+       else if (rte->inh)
+       {
+               /* It's an "append relation", process accordingly */
+               set_append_rel_pathlist(root, rel, rti, rte);
+       }
+       else
+       {
+               switch (rel->rtekind)
+               {
+                       case RTE_RELATION:
+                               if (rte->relkind == RELKIND_FOREIGN_TABLE)
+                               {
+                                       /* Foreign table */
+                                       set_foreign_pathlist(root, rel, rte);
+                               }
+                               else
+                               {
+                                       /* Plain relation */
+                                       set_plain_rel_pathlist(root, rel, rte);
+                               }
+                               break;
+                       case RTE_SUBQUERY:
+                               /* Subquery --- fully handled during set_rel_size */
+                               break;
+                       case RTE_FUNCTION:
+                               /* RangeFunction */
+                               set_function_pathlist(root, rel, rte);
+                               break;
+                       case RTE_VALUES:
+                               /* Values list */
+                               set_values_pathlist(root, rel, rte);
+                               break;
+                       case RTE_CTE:
+                               /* CTE reference --- fully handled during set_rel_size */
+                               break;
+                       default:
+                               elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
+                               break;
+               }
+       }
  
  #ifdef OPTIMIZER_DEBUG
         debug_print_rel(root, rel);
@@ -234,11 +339,11 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
  }
  
  /*
- * set_plain_rel_pathlist
- *       Build access paths for a plain relation (no subquery, no inheritance)
+ * set_plain_rel_size
+ *       Set size estimates for a plain relation (no subquery, no inheritance)
   */
  static void
-set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+set_plain_rel_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
         /*
          * Test any partial indexes of rel for applicability.  We must do this
@@ -259,15 +364,15 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
                 check_partial_indexes(root, rel);
                 set_baserel_size_estimates(root, rel);
         }
+}
  
-       /*
-        * Generate paths and add them to the rel's pathlist.
-        *
-        * Note: add_path() will discard any paths that are dominated by another
-        * available path, keeping only those paths that are superior along at
-        * least one dimension of cost or sortedness.
-        */
-
+/*
+ * set_plain_rel_pathlist
+ *       Build access paths for a plain relation (no subquery, no inheritance)
+ */
+static void
+set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
         /* Consider sequential scan */
         add_path(rel, create_seqscan_path(root, rel));
  
@@ -282,8 +387,33 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  }
  
  /*
- * set_append_rel_pathlist
- *       Build access paths for an "append relation"
+ * set_foreign_size
+ *             Set size estimates for a foreign table RTE
+ */
+static void
+set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+       /* Mark rel with estimated output rows, width, etc */
+       set_foreign_size_estimates(root, rel);
+}
+
+/*
+ * set_foreign_pathlist
+ *             Build the (single) access path for a foreign table RTE
+ */
+static void
+set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
+{
+       /* Generate appropriate path */
+       add_path(rel, (Path *) create_foreignscan_path(root, rel));
+
+       /* Select cheapest path (pretty easy in this case...) */
+       set_cheapest(rel);
+}
+
+/*
+ * set_append_rel_size
+ *       Set size estimates for an "append relation"
   *
   * The passed-in rel and RTE represent the entire append relation.     The
   * relation's contents are computed by appending together the output of
@@ -293,13 +423,10 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
   * a good thing because their outputs are not the same size.
   */
  static void
-set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
-                                               Index rti, RangeTblEntry *rte)
+set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
+                                       Index rti, RangeTblEntry *rte)
  {
         int                     parentRTindex = rti;
-       List       *live_childrels = NIL;
-       List       *subpaths = NIL;
-       List       *all_child_pathkeys = NIL;
         double          parent_rows;
         double          parent_size;
         double     *parent_attrsizes;
@@ -325,10 +452,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
         nattrs = rel->max_attr - rel->min_attr + 1;
         parent_attrsizes = (double *) palloc0(nattrs * sizeof(double));
  
-       /*
-        * Generate access paths for each member relation, and pick the cheapest
-        * path for each one.
-        */
         foreach(l, root->append_rel_list)
         {
                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
@@ -337,7 +460,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 RelOptInfo *childrel;
                 List       *childquals;
                 Node       *childqual;
-               ListCell   *lcp;
                 ListCell   *parentvars;
                 ListCell   *childvars;
  
@@ -394,8 +516,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                 {
                         /*
                          * This child need not be scanned, so we can omit it from the
-                        * appendrel.  Mark it with a dummy cheapest-path though, in case
-                        * best_appendrel_indexscan() looks at it later.
+                        * appendrel.
                          */
                         set_dummy_rel_pathlist(childrel);
                         continue;
@@ -438,65 +559,20 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                  */
  
                 /*
-                * Compute the child's access paths.
+                * Compute the child's size.
                  */
-               set_rel_pathlist(root, childrel, childRTindex, childRTE);
+               set_rel_size(root, childrel, childRTindex, childRTE);
  
                 /*
                  * It is possible that constraint exclusion detected a contradiction
                  * within a child subquery, even though we didn't prove one above.
-                * If what we got back was a dummy path, we can skip this child.
+                * If so, we can skip this child.
                  */
-               if (IS_DUMMY_PATH(childrel->cheapest_total_path))
+               if (IS_DUMMY_REL(childrel))
                         continue;
  
                 /*
-                * Child is live, so add its cheapest access path to the Append path
-                * we are constructing for the parent.
-                */
-               subpaths = accumulate_append_subpath(subpaths,
-                                                                                        childrel->cheapest_total_path);
-
-               /* Remember which childrels are live, for MergeAppend logic below */
-               live_childrels = lappend(live_childrels, childrel);
-
-               /*
-                * Collect a list of all the available path orderings for all the
-                * children.  We use this as a heuristic to indicate which sort
-                * orderings we should build MergeAppend paths for.
-                */
-               foreach(lcp, childrel->pathlist)
-               {
-                       Path       *childpath = (Path *) lfirst(lcp);
-                       List       *childkeys = childpath->pathkeys;
-                       ListCell   *lpk;
-                       bool            found = false;
-
-                       /* Ignore unsorted paths */
-                       if (childkeys == NIL)
-                               continue;
-
-                       /* Have we already seen this ordering? */
-                       foreach(lpk, all_child_pathkeys)
-                       {
-                               List       *existing_pathkeys = (List *) lfirst(lpk);
-
-                               if (compare_pathkeys(existing_pathkeys,
-                                                                        childkeys) == PATHKEYS_EQUAL)
-                               {
-                                       found = true;
-                                       break;
-                               }
-                       }
-                       if (!found)
-                       {
-                               /* No, so add it to all_child_pathkeys */
-                               all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
-                       }
-               }
-
-               /*
-                * Accumulate size information from each child.
+                * Accumulate size information from each live child.
                  */
                 if (childrel->rows > 0)
                 {
@@ -560,24 +636,208 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
         rel->tuples = parent_rows;
  
         pfree(parent_attrsizes);
+}
+
+/*
+ * set_append_rel_pathlist
+ *       Build access paths for an "append relation"
+ */
+static void
+set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
+                                               Index rti, RangeTblEntry *rte)
+{
+       int                     parentRTindex = rti;
+       List       *live_childrels = NIL;
+       List       *subpaths = NIL;
+       List       *all_child_pathkeys = NIL;
+       List       *all_child_outers = NIL;
+       ListCell   *l;
  
         /*
-        * Next, build an unordered Append path for the rel.  (Note: this is
-        * correct even if we have zero or one live subpath due to constraint
-        * exclusion.)
+        * Generate access paths for each member relation, and remember the
+        * cheapest path for each one.  Also, identify all pathkeys (orderings)
+        * and parameterizations (required_outer sets) available for the member
+        * relations.
+        */
+       foreach(l, root->append_rel_list)
+       {
+               AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
+               int                     childRTindex;
+               RangeTblEntry *childRTE;
+               RelOptInfo *childrel;
+               ListCell   *lcp;
+
+               /* append_rel_list contains all append rels; ignore others */
+               if (appinfo->parent_relid != parentRTindex)
+                       continue;
+
+               /* Re-locate the child RTE and RelOptInfo */
+               childRTindex = appinfo->child_relid;
+               childRTE = root->simple_rte_array[childRTindex];
+               childrel = root->simple_rel_array[childRTindex];
+
+               /*
+                * Compute the child's access paths.
+                */
+               set_rel_pathlist(root, childrel, childRTindex, childRTE);
+
+               /*
+                * If child is dummy, ignore it.
+                */
+               if (IS_DUMMY_REL(childrel))
+                       continue;
+
+               /*
+                * Child is live, so add its cheapest access path to the Append path
+                * we are constructing for the parent.
+                */
+               subpaths = accumulate_append_subpath(subpaths,
+                                                                                        childrel->cheapest_total_path);
+
+               /* Remember which childrels are live, for logic below */
+               live_childrels = lappend(live_childrels, childrel);
+
+               /*
+                * Collect lists of all the available path orderings and
+                * parameterizations for all the children.  We use these as a
+                * heuristic to indicate which sort orderings and parameterizations we
+                * should build Append and MergeAppend paths for.
+                */
+               foreach(lcp, childrel->pathlist)
+               {
+                       Path       *childpath = (Path *) lfirst(lcp);
+                       List       *childkeys = childpath->pathkeys;
+                       Relids          childouter = childpath->required_outer;
+
+                       /* Unsorted paths don't contribute to pathkey list */
+                       if (childkeys != NIL)
+                       {
+                               ListCell   *lpk;
+                               bool            found = false;
+
+                               /* Have we already seen this ordering? */
+                               foreach(lpk, all_child_pathkeys)
+                               {
+                                       List       *existing_pathkeys = (List *) lfirst(lpk);
+
+                                       if (compare_pathkeys(existing_pathkeys,
+                                                                                childkeys) == PATHKEYS_EQUAL)
+                                       {
+                                               found = true;
+                                               break;
+                                       }
+                               }
+                               if (!found)
+                               {
+                                       /* No, so add it to all_child_pathkeys */
+                                       all_child_pathkeys = lappend(all_child_pathkeys,
+                                                                                                childkeys);
+                               }
+                       }
+
+                       /* Unparameterized paths don't contribute to param-set list */
+                       if (childouter)
+                       {
+                               ListCell   *lco;
+                               bool            found = false;
+
+                               /* Have we already seen this param set? */
+                               foreach(lco, all_child_outers)
+                               {
+                                       Relids  existing_outers = (Relids) lfirst(lco);
+
+                                       if (bms_equal(existing_outers, childouter))
+                                       {
+                                               found = true;
+                                               break;
+                                       }
+                               }
+                               if (!found)
+                               {
+                                       /* No, so add it to all_child_outers */
+                                       all_child_outers = lappend(all_child_outers,
+                                                                                          childouter);
+                               }
+                       }
+               }
+       }
+
+       /*
+        * Next, build an unordered, unparameterized Append path for the rel.
+        * (Note: this is correct even if we have zero or one live subpath due to
+        * constraint exclusion.)
          */
         add_path(rel, (Path *) create_append_path(rel, subpaths));
  
         /*
-        * Next, build MergeAppend paths based on the collected list of child
-        * pathkeys.  We consider both cheapest-startup and cheapest-total cases,
-        * ie, for each interesting ordering, collect all the cheapest startup
-        * subpaths and all the cheapest total paths, and build a MergeAppend path
-        * for each list.
+        * Build unparameterized MergeAppend paths based on the collected list of
+        * child pathkeys.
+        */
+       generate_mergeappend_paths(root, rel, live_childrels,
+                                                          all_child_pathkeys, NULL);
+
+       /*
+        * Build Append and MergeAppend paths for each parameterization seen
+        * among the child rels.  (This may look pretty expensive, but in most
+        * cases of practical interest, the child relations will tend to expose
+        * the same parameterizations and pathkeys, so that not that many cases
+        * actually get considered here.)
          */
-       foreach(l, all_child_pathkeys)
+       foreach(l, all_child_outers)
+       {
+               Relids  required_outer = (Relids) lfirst(l);
+               ListCell   *lcr;
+
+               /* Select the child paths for an Append with this parameterization */
+               subpaths = NIL;
+               foreach(lcr, live_childrels)
+               {
+                       RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+                       Path       *cheapest_total;
+
+                       cheapest_total =
+                               get_cheapest_path_for_pathkeys(childrel->pathlist,
+                                                                                          NIL,
+                                                                                          required_outer,
+                                                                                          TOTAL_COST);
+                       Assert(cheapest_total != NULL);
+
+                       subpaths = accumulate_append_subpath(subpaths, cheapest_total);
+               }
+               add_path(rel, (Path *) create_append_path(rel, subpaths));
+
+               /* And build parameterized MergeAppend paths */
+               generate_mergeappend_paths(root, rel, live_childrels,
+                                                                  all_child_pathkeys, required_outer);
+       }
+
+       /* Select cheapest paths */
+       set_cheapest(rel);
+}
+
+/*
+ * generate_mergeappend_paths
+ *             Generate MergeAppend paths for an append relation
+ *
+ * Generate a path for each ordering (pathkey list) appearing in
+ * all_child_pathkeys.  If required_outer isn't NULL, accept paths having
+ * those relations as required outer relations.
+ *
+ * We consider both cheapest-startup and cheapest-total cases, ie, for each
+ * interesting ordering, collect all the cheapest startup subpaths and all the
+ * cheapest total paths, and build a MergeAppend path for each case.
+ */
+static void
+generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
+                                                  List *live_childrels,
+                                                  List *all_child_pathkeys,
+                                                  Relids required_outer)
+{
+       ListCell   *lcp;
+
+       foreach(lcp, all_child_pathkeys)
         {
-               List       *pathkeys = (List *) lfirst(l);
+               List       *pathkeys = (List *) lfirst(lcp);
                 List       *startup_subpaths = NIL;
                 List       *total_subpaths = NIL;
                 bool            startup_neq_total = false;
@@ -594,20 +854,32 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                         cheapest_startup =
                                 get_cheapest_path_for_pathkeys(childrel->pathlist,
                                                                                            pathkeys,
+                                                                                          required_outer,
                                                                                            STARTUP_COST);
                         cheapest_total =
                                 get_cheapest_path_for_pathkeys(childrel->pathlist,
                                                                                            pathkeys,
+                                                                                          required_outer,
                                                                                            TOTAL_COST);
  
                         /*
-                        * If we can't find any paths with the right order just add the
-                        * cheapest-total path; we'll have to sort it.
+                        * If we can't find any paths with the right order just use the
+                        * cheapest-total path; we'll have to sort it later.  We can
+                        * use the cheapest path for the parameterization, though.
                          */
-                       if (cheapest_startup == NULL)
-                               cheapest_startup = childrel->cheapest_total_path;
-                       if (cheapest_total == NULL)
-                               cheapest_total = childrel->cheapest_total_path;
+                       if (cheapest_startup == NULL || cheapest_total == NULL)
+                       {
+                               if (required_outer)
+                                       cheapest_startup = cheapest_total =
+                                               get_cheapest_path_for_pathkeys(childrel->pathlist,
+                                                                                                          NIL,
+                                                                                                          required_outer,
+                                                                                                          TOTAL_COST);
+                               else
+                                       cheapest_startup = cheapest_total =
+                                               childrel->cheapest_total_path;
+                               Assert(cheapest_total != NULL);
+                       }
  
                         /*
                          * Notice whether we actually have different paths for the
@@ -634,9 +906,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                                                                                                                         total_subpaths,
                                                                                                                         pathkeys));
         }
-
-       /* Select cheapest path */
-       set_cheapest(rel);
  }
  
  /*
@@ -667,7 +936,7 @@ accumulate_append_subpath(List *subpaths, Path *path)
   *       Build a dummy path for a relation that's been excluded by constraints
   *
   * Rather than inventing a special "dummy" path type, we represent this as an
- * AppendPath with no members (see also IS_DUMMY_PATH macro).
+ * AppendPath with no members (see also IS_DUMMY_PATH/IS_DUMMY_REL macros).
   */
  static void
  set_dummy_rel_pathlist(RelOptInfo *rel)
@@ -676,6 +945,9 @@ set_dummy_rel_pathlist(RelOptInfo *rel)
         rel->rows = 0;
         rel->width = 0;
  
+       /* Discard any pre-existing paths; no further need for them */
+       rel->pathlist = NIL;
+
         add_path(rel, (Path *) create_append_path(rel, NIL));
  
         /* Select cheapest path (pretty easy in this case...) */
@@ -707,6 +979,9 @@ has_multiple_baserels(PlannerInfo *root)
  /*
   * set_subquery_pathlist
   *             Build the (single) access path for a subquery RTE
+ *
+ * There's no need for a separate set_subquery_size phase, since we don't
+ * support parameterized paths for subqueries.
   */
  static void
  set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -847,9 +1122,6 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
  static void
  set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
-       /* Mark rel with estimated output rows, width, etc */
-       set_function_size_estimates(root, rel);
-
         /* Generate appropriate path */
         add_path(rel, create_functionscan_path(root, rel));
  
@@ -864,9 +1136,6 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  static void
  set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  {
-       /* Mark rel with estimated output rows, width, etc */
-       set_values_size_estimates(root, rel);
-
         /* Generate appropriate path */
         add_path(rel, create_valuesscan_path(root, rel));
  
@@ -877,6 +1146,9 @@ set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  /*
   * set_cte_pathlist
   *             Build the (single) access path for a non-self-reference CTE RTE
+ *
+ * There's no need for a separate set_cte_size phase, since we don't
+ * support parameterized paths for CTEs.
   */
  static void
  set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -935,6 +1207,9 @@ set_cte_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
  /*
   * set_worktable_pathlist
   *             Build the (single) access path for a self-reference CTE RTE
+ *
+ * There's no need for a separate set_worktable_size phase, since we don't
+ * support parameterized paths for CTEs.
   */
  static void
  set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -973,23 +1248,6 @@ set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
         set_cheapest(rel);
  }
  
-/*
- * set_foreign_pathlist
- *             Build the (single) access path for a foreign table RTE
- */
-static void
-set_foreign_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
-{
-       /* Mark rel with estimated output rows, width, etc */
-       set_foreign_size_estimates(root, rel);
-
-       /* Generate appropriate path */
-       add_path(rel, (Path *) create_foreignscan_path(root, rel));
-
-       /* Select cheapest path (pretty easy in this case...) */
-       set_cheapest(rel);
-}
-
  /*
   * make_rel_from_joinlist
   *       Build access paths using a "joinlist" to guide the join path search.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index e1c070ea92c625ffa9593d26dbab369dcd566b55..885d8558c319fd283df351c2c8e062a449b72d3c 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -40,7 +40,7 @@
   * path's result.  A caller can estimate the cost of fetching a partial
   * result by interpolating between startup_cost and total_cost.  In detail:
   *             actual_cost = startup_cost +
- *                     (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
+ *                     (total_cost - startup_cost) * tuples_to_fetch / path->rows;
   * Note that a base relation's rows count (and, by extension, plan_rows for
   * plan nodes below the LIMIT node) are set without regard to any LIMIT, so
   * that this equation works properly.  (Also, these routines guarantee not to
@@ -48,11 +48,11 @@
   * applied as a top-level plan node.
   *
   * For largely historical reasons, most of the routines in this module use
- * the passed result Path only to store their startup_cost and total_cost
- * results into.  All the input data they need is passed as separate
+ * the passed result Path only to store their results (rows, startup_cost and
+ * total_cost) into.  All the input data they need is passed as separate
   * parameters, even though much of it could be extracted from the Path.
   * An exception is made for the cost_XXXjoin() routines, which expect all
- * the non-cost fields of the passed XXXPath to be filled in, and similarly
+ * the other fields of the passed XXXPath to be filled in, and similarly
   * cost_index() assumes the passed IndexPath is valid except for its output
   * values.
   *
@@ -90,15 +90,6 @@
  
  #define LOG2(x)  (log(x) / 0.693147180559945)
  
-/*
- * Some Paths return less than the nominal number of rows of their parent
- * relations; join nodes need to do this to get the correct input count:
- */
-#define PATH_ROWS(path) \
-       (IsA(path, UniquePath) ? \
-        ((UniquePath *) (path))->rows : \
-        (path)->parent->rows)
-
  
  double         seq_page_cost = DEFAULT_SEQ_PAGE_COST;
  double         random_page_cost = DEFAULT_RANDOM_PAGE_COST;
@@ -134,13 +125,17 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
  static void cost_rescan(PlannerInfo *root, Path *path,
                         Cost *rescan_startup_cost, Cost *rescan_total_cost);
  static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
-static bool adjust_semi_join(PlannerInfo *root, JoinPath *path,
-                                SpecialJoinInfo *sjinfo,
-                                Selectivity *outer_match_frac,
-                                Selectivity *match_count,
-                                bool *indexed_join_quals);
+static bool has_indexed_join_quals(NestPath *path, List *joinclauses);
  static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
                                    List *quals);
+static void set_joinpath_size_estimate(PlannerInfo *root, JoinPath *path,
+                                                  SpecialJoinInfo *sjinfo,
+                                                  List *restrictlist);
+static double calc_joinrel_size_estimate(PlannerInfo *root,
+                                                  double outer_rows,
+                                                  double inner_rows,
+                                                  SpecialJoinInfo *sjinfo,
+                                                  List *restrictlist);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
  static double relation_byte_size(double tuples, int width);
  static double page_size(double tuples, int width);
@@ -184,6 +179,9 @@ cost_seqscan(Path *path, PlannerInfo *root,
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_RELATION);
  
+       /* For now, at least, seqscans are never parameterized */
+       path->rows = baserel->rows;
+
         if (!enable_seqscan)
                 startup_cost += disable_cost;
  
@@ -212,14 +210,12 @@ cost_seqscan(Path *path, PlannerInfo *root,
   *
   * 'path' describes the indexscan under consideration, and is complete
   *             except for the fields to be set by this routine
- * 'outer_rel' is the outer relation when we are considering using the index
- *             scan as the inside of a nestloop join (hence, some of the indexquals
- *             are join clauses, and we should expect repeated scans of the index);
- *             NULL for a plain index scan
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ *             estimates of caching behavior
   *
- * In addition to startup_cost and total_cost, cost_index() sets the path's
- * indextotalcost and indexselectivity fields.  These values are needed if
- * the IndexPath is used in a BitmapIndexScan.
+ * In addition to rows, startup_cost and total_cost, cost_index() sets the
+ * path's indextotalcost and indexselectivity fields.  These values will be
+ * needed if the IndexPath is used in a BitmapIndexScan.
   *
   * NOTE: path->indexquals must contain only clauses usable as index
   * restrictions.  Any additional quals evaluated as qpquals may reduce the
@@ -227,7 +223,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
   * we have to fetch from the table, so they don't reduce the scan cost.
   */
  void
-cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
+cost_index(IndexPath *path, PlannerInfo *root, double loop_count)
  {
         IndexOptInfo *index = path->indexinfo;
         RelOptInfo *baserel = index->rel;
@@ -253,6 +249,47 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_RELATION);
  
+       /* Estimate the number of rows returned by the indexscan */
+       if (path->path.required_outer)
+       {
+               /*
+                * The estimate should be less than baserel->rows because of the
+                * additional selectivity of the join clauses.  Since indexclauses may
+                * contain both restriction and join clauses, we have to do a set
+                * union to get the full set of clauses that must be considered to
+                * compute the correct selectivity.  (Without the union operation, we
+                * might have some restriction clauses appearing twice, which'd
+                * mislead clauselist_selectivity into double-counting their
+                * selectivity.  However, since RestrictInfo nodes aren't copied when
+                * linking them into different lists, it should be sufficient to use
+                * pointer comparison to remove duplicates.)
+                *
+                * Note that we force the clauses to be treated as non-join clauses
+                * during selectivity estimation.
+                */
+               List       *allclauses;
+
+               allclauses = list_union_ptr(baserel->baserestrictinfo,
+                                                                       path->indexclauses);
+               path->path.rows = baserel->tuples *
+                       clauselist_selectivity(root,
+                                                                  allclauses,
+                                                                  baserel->relid,      /* do not use 0! */
+                                                                  JOIN_INNER,
+                                                                  NULL);
+               if (path->path.rows > baserel->rows)
+                       path->path.rows = baserel->rows;
+               path->path.rows = clamp_row_est(path->path.rows);
+       }
+       else
+       {
+               /*
+                * The number of rows is the same as the parent rel's estimate, since
+                * this isn't a parameterized path.
+                */
+               path->path.rows = baserel->rows;
+       }
+
         if (!enable_indexscan)
                 startup_cost += disable_cost;
         /* we don't need to check enable_indexonlyscan; indxpath.c does that */
@@ -266,7 +303,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
         OidFunctionCall7(index->amcostestimate,
                                          PointerGetDatum(root),
                                          PointerGetDatum(path),
-                                        PointerGetDatum(outer_rel),
+                                        Float8GetDatum(loop_count),
                                          PointerGetDatum(&indexStartupCost),
                                          PointerGetDatum(&indexTotalCost),
                                          PointerGetDatum(&indexSelectivity),
@@ -319,7 +356,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
          * that this query will fetch; but it's not clear how to do better.
          *----------
          */
-       if (outer_rel != NULL && outer_rel->rows > 1)
+       if (loop_count > 1)
         {
                 /*
                  * For repeated indexscans, the appropriate estimate for the
@@ -329,9 +366,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
                  * pro-rate the costs for one scan.  In this case we assume all the
                  * fetches are random accesses.
                  */
-               double          num_scans = outer_rel->rows;
-
-               pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
+               pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
                                                                                         baserel->pages,
                                                                                         (double) index->pages,
                                                                                         root);
@@ -339,7 +374,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
                 if (indexonly)
                         pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
  
-               max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
+               max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
  
                 /*
                  * In the perfectly correlated case, the number of pages touched by
@@ -353,7 +388,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
                  */
                 pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
  
-               pages_fetched = index_pages_fetched(pages_fetched * num_scans,
+               pages_fetched = index_pages_fetched(pages_fetched * loop_count,
                                                                                         baserel->pages,
                                                                                         (double) index->pages,
                                                                                         root);
@@ -361,7 +396,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
                 if (indexonly)
                         pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
  
-               min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
+               min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
         }
         else
         {
@@ -417,7 +452,7 @@ cost_index(IndexPath *path, PlannerInfo *root, RelOptInfo *outer_rel)
         startup_cost += baserel->baserestrictcost.startup;
         cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
  
-       if (outer_rel == NULL)
+       if (path->path.required_outer == NULL)
         {
                 QualCost        index_qual_cost;
  
@@ -578,17 +613,15 @@ get_indexpath_pages(Path *bitmapqual)
   *
   * 'baserel' is the relation to be scanned
   * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths
- * 'outer_rel' is the outer relation when we are considering using the bitmap
- *             scan as the inside of a nestloop join (hence, some of the indexquals
- *             are join clauses, and we should expect repeated scans of the table);
- *             NULL for a plain bitmap scan
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ *             estimates of caching behavior
   *
- * Note: if this is a join inner path, the component IndexPaths in bitmapqual
- * should have been costed accordingly.
+ * Note: the component IndexPaths in bitmapqual should have been costed
+ * using the same loop_count.
   */
  void
  cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
-                                         Path *bitmapqual, RelOptInfo *outer_rel)
+                                         Path *bitmapqual, double loop_count)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -607,6 +640,34 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_RELATION);
  
+       /* Estimate the number of rows returned by the bitmap scan */
+       if (path->required_outer)
+       {
+               /*
+                * The estimate should be less than baserel->rows because of the
+                * additional selectivity of the join clauses.  We make use of the
+                * selectivity estimated for the bitmap to do this; this isn't really
+                * quite right since there may be restriction conditions not included
+                * in the bitmap ...
+                */
+               Cost            indexTotalCost;
+               Selectivity indexSelectivity;
+
+               cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+               path->rows = baserel->tuples * indexSelectivity;
+               if (path->rows > baserel->rows)
+                       path->rows = baserel->rows;
+               path->rows = clamp_row_est(path->rows);
+       }
+       else
+       {
+               /*
+                * The number of rows is the same as the parent rel's estimate, since
+                * this isn't a parameterized path.
+                */
+               path->rows = baserel->rows;
+       }
+
         if (!enable_bitmapscan)
                 startup_cost += disable_cost;
  
@@ -630,7 +691,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
  
         T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
  
-       if (outer_rel != NULL && outer_rel->rows > 1)
+       if (loop_count > 1)
         {
                 /*
                  * For repeated bitmap scans, scale up the number of tuples fetched in
@@ -638,13 +699,11 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
                  * estimate the number of pages fetched by all the scans. Then
                  * pro-rate for one scan.
                  */
-               double          num_scans = outer_rel->rows;
-
-               pages_fetched = index_pages_fetched(tuples_fetched * num_scans,
+               pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
                                                                                         baserel->pages,
                                                                                         get_indexpath_pages(bitmapqual),
                                                                                         root);
-               pages_fetched /= num_scans;
+               pages_fetched /= loop_count;
         }
         else
         {
@@ -711,7 +770,7 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
                  * scan doesn't look to be the same cost as an indexscan to retrieve a
                  * single tuple.
                  */
-               *cost += 0.1 * cpu_operator_cost * ((IndexPath *) path)->rows;
+               *cost += 0.1 * cpu_operator_cost * path->rows;
         }
         else if (IsA(path, BitmapAndPath))
         {
@@ -737,7 +796,8 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
   * Note that this considers only the costs of index scanning and bitmap
   * creation, not the eventual heap access.     In that sense the object isn't
   * truly a Path, but it has enough path-like properties (costs in particular)
- * to warrant treating it as one.
+ * to warrant treating it as one.  We don't bother to set the path rows field,
+ * however.
   */
  void
  cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
@@ -772,6 +832,7 @@ cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
                         totalCost += 100.0 * cpu_operator_cost;
         }
         path->bitmapselectivity = selec;
+       path->path.rows = 0;            /* per above, not used */
         path->path.startup_cost = totalCost;
         path->path.total_cost = totalCost;
  }
@@ -817,6 +878,7 @@ cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
                         totalCost += 100.0 * cpu_operator_cost;
         }
         path->bitmapselectivity = Min(selec, 1.0);
+       path->path.rows = 0;            /* per above, not used */
         path->path.startup_cost = totalCost;
         path->path.total_cost = totalCost;
  }
@@ -842,6 +904,9 @@ cost_tidscan(Path *path, PlannerInfo *root,
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_RELATION);
  
+       /* For now, tidscans are never parameterized */
+       path->rows = baserel->rows;
+
         /* Count how many tuples we expect to retrieve */
         ntuples = 0;
         foreach(l, tidquals)
@@ -923,6 +988,9 @@ cost_subqueryscan(Path *path, RelOptInfo *baserel)
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_SUBQUERY);
  
+       /* subqueryscans are never parameterized */
+       path->rows = baserel->rows;
+
         /*
          * Cost of path is cost of evaluating the subplan, plus cost of evaluating
          * any restriction clauses that will be attached to the SubqueryScan node,
@@ -957,6 +1025,9 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
         rte = planner_rt_fetch(baserel->relid, root);
         Assert(rte->rtekind == RTE_FUNCTION);
  
+       /* functionscans are never parameterized */
+       path->rows = baserel->rows;
+
         /*
          * Estimate costs of executing the function expression.
          *
@@ -998,6 +1069,9 @@ cost_valuesscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_VALUES);
  
+       /* valuesscans are never parameterized */
+       path->rows = baserel->rows;
+
         /*
          * For now, estimate list evaluation cost at one operator eval per list
          * (probably pretty bogus, but is it worth being smarter?)
@@ -1034,6 +1108,9 @@ cost_ctescan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
         Assert(baserel->relid > 0);
         Assert(baserel->rtekind == RTE_CTE);
  
+       /* ctescans are never parameterized */
+       path->rows = baserel->rows;
+
         /* Charge one CPU tuple cost per row for tuplestore manipulation */
         cpu_per_tuple = cpu_tuple_cost;
  
@@ -1152,6 +1229,8 @@ cost_sort(Path *path, PlannerInfo *root,
         if (!enable_sort)
                 startup_cost += disable_cost;
  
+       path->rows = tuples;
+
         /*
          * We want to be sure the cost of a sort is never estimated as zero, even
          * if passed-in tuple count is zero.  Besides, mustn't do log(0)...
@@ -1320,6 +1399,8 @@ cost_material(Path *path,
         double          nbytes = relation_byte_size(tuples, width);
         long            work_mem_bytes = work_mem * 1024L;
  
+       path->rows = tuples;
+
         /*
          * Whether spilling or not, charge 2x cpu_operator_cost per tuple to
          * reflect bookkeeping overhead.  (This rate must be more than what
@@ -1369,6 +1450,7 @@ cost_agg(Path *path, PlannerInfo *root,
                  Cost input_startup_cost, Cost input_total_cost,
                  double input_tuples)
  {
+       double          output_tuples;
         Cost            startup_cost;
         Cost            total_cost;
         AggClauseCosts dummy_aggcosts;
@@ -1411,6 +1493,7 @@ cost_agg(Path *path, PlannerInfo *root,
                 startup_cost += aggcosts->finalCost;
                 /* we aren't grouping */
                 total_cost = startup_cost + cpu_tuple_cost;
+               output_tuples = 1;
         }
         else if (aggstrategy == AGG_SORTED)
         {
@@ -1423,6 +1506,7 @@ cost_agg(Path *path, PlannerInfo *root,
                 total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
                 total_cost += aggcosts->finalCost * numGroups;
                 total_cost += cpu_tuple_cost * numGroups;
+               output_tuples = numGroups;
         }
         else
         {
@@ -1434,8 +1518,10 @@ cost_agg(Path *path, PlannerInfo *root,
                 total_cost = startup_cost;
                 total_cost += aggcosts->finalCost * numGroups;
                 total_cost += cpu_tuple_cost * numGroups;
+               output_tuples = numGroups;
         }
  
+       path->rows = output_tuples;
         path->startup_cost = startup_cost;
         path->total_cost = total_cost;
  }
@@ -1498,6 +1584,7 @@ cost_windowagg(Path *path, PlannerInfo *root,
         total_cost += cpu_operator_cost * (numPartCols + numOrderCols) * input_tuples;
         total_cost += cpu_tuple_cost * input_tuples;
  
+       path->rows = input_tuples;
         path->startup_cost = startup_cost;
         path->total_cost = total_cost;
  }
@@ -1528,84 +1615,49 @@ cost_group(Path *path, PlannerInfo *root,
          */
         total_cost += cpu_operator_cost * input_tuples * numGroupCols;
  
+       path->rows = numGroups;
         path->startup_cost = startup_cost;
         path->total_cost = total_cost;
  }
  
  /*
- * If a nestloop's inner path is an indexscan, be sure to use its estimated
- * output row count, which may be lower than the restriction-clause-only row
- * count of its parent.  (We don't include this case in the PATH_ROWS macro
- * because it applies *only* to a nestloop's inner relation.)  We have to
- * be prepared to recurse through Append or MergeAppend nodes in case of an
- * appendrel.  (It's not clear MergeAppend can be seen here, but we may as
- * well handle it if so.)
- */
-static double
-nestloop_inner_path_rows(Path *path)
-{
-       double          result;
-
-       if (IsA(path, IndexPath))
-               result = ((IndexPath *) path)->rows;
-       else if (IsA(path, BitmapHeapPath))
-               result = ((BitmapHeapPath *) path)->rows;
-       else if (IsA(path, AppendPath))
-       {
-               ListCell   *l;
-
-               result = 0;
-               foreach(l, ((AppendPath *) path)->subpaths)
-               {
-                       result += nestloop_inner_path_rows((Path *) lfirst(l));
-               }
-       }
-       else if (IsA(path, MergeAppendPath))
-       {
-               ListCell   *l;
-
-               result = 0;
-               foreach(l, ((MergeAppendPath *) path)->subpaths)
-               {
-                       result += nestloop_inner_path_rows((Path *) lfirst(l));
-               }
-       }
-       else
-               result = PATH_ROWS(path);
-
-       return result;
-}
-
-/*
- * cost_nestloop
- *       Determines and returns the cost of joining two relations using the
- *       nested loop algorithm.
- *
- * 'path' is already filled in except for the cost fields
+ * initial_cost_nestloop
+ *       Preliminary estimate of the cost of a nestloop join path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs.  If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_nestloop will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_nestloop
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound.  We choose to not examine the
+ * join quals here, since that's by far the most expensive part of the
+ * calculations.  The end result is that CPU-cost considerations must be
+ * left for the second phase.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ *             other data to be used by final_cost_nestloop
+ * 'jointype' is the type of join to be performed
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
   * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
   */
  void
-cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+initial_cost_nestloop(PlannerInfo *root, JoinCostWorkspace *workspace,
+                                         JoinType jointype,
+                                         Path *outer_path, Path *inner_path,
+                                         SpecialJoinInfo *sjinfo,
+                                         SemiAntiJoinFactors *semifactors)
  {
-       Path       *outer_path = path->outerjoinpath;
-       Path       *inner_path = path->innerjoinpath;
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
+       double          outer_path_rows = outer_path->rows;
         Cost            inner_rescan_start_cost;
         Cost            inner_rescan_total_cost;
         Cost            inner_run_cost;
         Cost            inner_rescan_run_cost;
-       Cost            cpu_per_tuple;
-       QualCost        restrict_qual_cost;
-       double          outer_path_rows = PATH_ROWS(outer_path);
-       double          inner_path_rows = nestloop_inner_path_rows(inner_path);
-       double          ntuples;
-       Selectivity outer_match_frac;
-       Selectivity match_count;
-       bool            indexed_join_quals;
-
-       if (!enable_nestloop)
-               startup_cost += disable_cost;
  
         /* estimate costs to rescan the inner relation */
         cost_rescan(root, inner_path,
@@ -1628,10 +1680,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         inner_run_cost = inner_path->total_cost - inner_path->startup_cost;
         inner_rescan_run_cost = inner_rescan_total_cost - inner_rescan_start_cost;
  
-       if (adjust_semi_join(root, path, sjinfo,
-                                                &outer_match_frac,
-                                                &match_count,
-                                                &indexed_join_quals))
+       if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
         {
                 double          outer_matched_rows;
                 Selectivity inner_scan_frac;
@@ -1656,13 +1705,110 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                  */
                 run_cost += inner_run_cost;
  
-               outer_matched_rows = rint(outer_path_rows * outer_match_frac);
-               inner_scan_frac = 2.0 / (match_count + 1.0);
+               outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+               inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
  
                 /* Add inner run cost for additional outer tuples having matches */
                 if (outer_matched_rows > 1)
                         run_cost += (outer_matched_rows - 1) * inner_rescan_run_cost * inner_scan_frac;
  
+               /*
+                * The cost of processing unmatched rows varies depending on the
+                * details of the joinclauses, so we leave that part for later.
+                */
+
+               /* Save private data for final_cost_nestloop */
+               workspace->outer_matched_rows = outer_matched_rows;
+               workspace->inner_scan_frac = inner_scan_frac;
+       }
+       else
+       {
+               /* Normal case; we'll scan whole input rel for each outer row */
+               run_cost += inner_run_cost;
+               if (outer_path_rows > 1)
+                       run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
+       }
+
+       /* CPU costs left for later */
+
+       /* Public result fields */
+       workspace->startup_cost = startup_cost;
+       workspace->total_cost = startup_cost + run_cost;
+       /* Save private data for final_cost_nestloop */
+       workspace->run_cost = run_cost;
+       workspace->inner_rescan_run_cost = inner_rescan_run_cost;
+}
+
+/*
+ * final_cost_nestloop
+ *       Final estimate of the cost and result size of a nestloop join path.
+ *
+ * 'path' is already filled in except for the rows and cost fields
+ * 'workspace' is the result from initial_cost_nestloop
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
+ */
+void
+final_cost_nestloop(PlannerInfo *root, NestPath *path,
+                                       JoinCostWorkspace *workspace,
+                                       SpecialJoinInfo *sjinfo,
+                                       SemiAntiJoinFactors *semifactors)
+{
+       Path       *outer_path = path->outerjoinpath;
+       Path       *inner_path = path->innerjoinpath;
+       double          outer_path_rows = outer_path->rows;
+       double          inner_path_rows = inner_path->rows;
+       Cost            startup_cost = workspace->startup_cost;
+       Cost            run_cost = workspace->run_cost;
+       Cost            inner_rescan_run_cost = workspace->inner_rescan_run_cost;
+       Cost            cpu_per_tuple;
+       List       *joinclauses;
+       QualCost        restrict_qual_cost;
+       double          ntuples;
+
+       /* Estimate the number of rows returned by the join */
+       if (path->path.required_outer)
+       {
+               /*
+                * The nestloop is (still) parameterized because of upper-level join
+                * clauses used by the input paths.  So the rowcount estimate should
+                * be less than the joinrel's row count because of the additional
+                * selectivity of those join clauses.  To estimate the size we need
+                * to know which of the joinrestrictinfo clauses nominally associated
+                * with the join have been applied in the inner input path.
+                *
+                * We should also assume that such clauses won't be evaluated at the
+                * join node at runtime, so exclude them from restrict_qual_cost.
+                */
+               joinclauses = select_nonredundant_join_clauses(path->joinrestrictinfo,
+                                                                                path->innerjoinpath->param_clauses);
+               set_joinpath_size_estimate(root, path, sjinfo, joinclauses);
+       }
+       else
+       {
+               joinclauses = path->joinrestrictinfo;
+               path->path.rows = path->path.parent->rows;
+       }
+
+       /*
+        * We could include disable_cost in the preliminary estimate, but that
+        * would amount to optimizing for the case where the join method is
+        * disabled, which doesn't seem like the way to bet.
+        */
+       if (!enable_nestloop)
+               startup_cost += disable_cost;
+
+       /* cost of source data */
+
+       if (path->jointype == JOIN_SEMI || path->jointype == JOIN_ANTI)
+       {
+               double          outer_matched_rows = workspace->outer_matched_rows;
+               Selectivity inner_scan_frac = workspace->inner_scan_frac;
+
+               /*
+                * SEMI or ANTI join: executor will stop after first match.
+                */
+
                 /* Compute number of tuples processed (not number emitted!) */
                 ntuples = outer_matched_rows * inner_path_rows * inner_scan_frac;
  
@@ -1674,7 +1820,7 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                  * return the first tuple of a nonempty scan.  Otherwise, the executor
                  * will have to scan the whole inner rel; not so cheap.
                  */
-               if (indexed_join_quals)
+               if (has_indexed_join_quals(path, joinclauses))
                 {
                         run_cost += (outer_path_rows - outer_matched_rows) *
                                 inner_rescan_run_cost / inner_path_rows;
@@ -1694,17 +1840,14 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         }
         else
         {
-               /* Normal case; we'll scan whole input rel for each outer row */
-               run_cost += inner_run_cost;
-               if (outer_path_rows > 1)
-                       run_cost += (outer_path_rows - 1) * inner_rescan_run_cost;
+               /* Normal-case source costs were included in preliminary estimate */
  
                 /* Compute number of tuples processed (not number emitted!) */
                 ntuples = outer_path_rows * inner_path_rows;
         }
  
         /* CPU costs */
-       cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);
+       cost_qual_eval(&restrict_qual_cost, joinclauses, root);
         startup_cost += restrict_qual_cost.startup;
         cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;
         run_cost += cpu_per_tuple * ntuples;
@@ -1714,54 +1857,52 @@ cost_nestloop(NestPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
  }
  
  /*
- * cost_mergejoin
- *       Determines and returns the cost of joining two relations using the
- *       merge join algorithm.
- *
- * Unlike other costsize functions, this routine makes one actual decision:
- * whether we should materialize the inner path.  We do that either because
- * the inner path can't support mark/restore, or because it's cheaper to
- * use an interposed Material node to handle mark/restore.     When the decision
- * is cost-based it would be logically cleaner to build and cost two separate
- * paths with and without that flag set; but that would require repeating most
- * of the calculations here, which are not all that cheap.     Since the choice
- * will not affect output pathkeys or startup cost, only total cost, there is
- * no possibility of wanting to keep both paths.  So it seems best to make
- * the decision here and record it in the path's materialize_inner field.
- *
- * 'path' is already filled in except for the cost fields and materialize_inner
+ * initial_cost_mergejoin
+ *       Preliminary estimate of the cost of a mergejoin path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs.  If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_mergejoin will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_mergejoin
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound.  We choose to not examine the
+ * join quals here, except for obtaining the scan selectivity estimate which
+ * is really essential (but fortunately, use of caching keeps the cost of
+ * getting that down to something reasonable).
+ * We also assume that cost_sort is cheap enough to use here.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ *             other data to be used by final_cost_mergejoin
+ * 'jointype' is the type of join to be performed
+ * 'mergeclauses' is the list of joinclauses to be used as merge clauses
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
+ * 'outersortkeys' is the list of sort keys for the outer path
+ * 'innersortkeys' is the list of sort keys for the inner path
   * 'sjinfo' is extra info about the join for selectivity estimation
   *
- * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list;
- * outersortkeys and innersortkeys are lists of the keys to be used
- * to sort the outer and inner relations, or NIL if no explicit
- * sort is needed because the source path is already ordered.
+ * Note: outersortkeys and innersortkeys should be NIL if no explicit
+ * sort is needed because the respective source path is already ordered.
   */
  void
-cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
+                                          JoinType jointype,
+                                          List *mergeclauses,
+                                          Path *outer_path, Path *inner_path,
+                                          List *outersortkeys, List *innersortkeys,
+                                          SpecialJoinInfo *sjinfo)
  {
-       Path       *outer_path = path->jpath.outerjoinpath;
-       Path       *inner_path = path->jpath.innerjoinpath;
-       List       *mergeclauses = path->path_mergeclauses;
-       List       *outersortkeys = path->outersortkeys;
-       List       *innersortkeys = path->innersortkeys;
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
-       Cost            cpu_per_tuple,
-                               inner_run_cost,
-                               bare_inner_cost,
-                               mat_inner_cost;
-       QualCost        merge_qual_cost;
-       QualCost        qp_qual_cost;
-       double          outer_path_rows = PATH_ROWS(outer_path);
-       double          inner_path_rows = PATH_ROWS(inner_path);
+       double          outer_path_rows = outer_path->rows;
+       double          inner_path_rows = inner_path->rows;
+       Cost            inner_run_cost;
         double          outer_rows,
                                 inner_rows,
                                 outer_skip_rows,
                                 inner_skip_rows;
-       double          mergejointuples,
-                               rescannedtuples;
-       double          rescanratio;
         Selectivity outerstartsel,
                                 outerendsel,
                                 innerstartsel,
@@ -1774,62 +1915,6 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         if (inner_path_rows <= 0 || isnan(inner_path_rows))
                 inner_path_rows = 1;
  
-       if (!enable_mergejoin)
-               startup_cost += disable_cost;
-
-       /*
-        * Compute cost of the mergequals and qpquals (other restriction clauses)
-        * separately.
-        */
-       cost_qual_eval(&merge_qual_cost, mergeclauses, root);
-       cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
-       qp_qual_cost.startup -= merge_qual_cost.startup;
-       qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
-
-       /*
-        * Get approx # tuples passing the mergequals.  We use approx_tuple_count
-        * here because we need an estimate done with JOIN_INNER semantics.
-        */
-       mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
-
-       /*
-        * When there are equal merge keys in the outer relation, the mergejoin
-        * must rescan any matching tuples in the inner relation. This means
-        * re-fetching inner tuples; we have to estimate how often that happens.
-        *
-        * For regular inner and outer joins, the number of re-fetches can be
-        * estimated approximately as size of merge join output minus size of
-        * inner relation. Assume that the distinct key values are 1, 2, ..., and
-        * denote the number of values of each key in the outer relation as m1,
-        * m2, ...; in the inner relation, n1, n2, ...  Then we have
-        *
-        * size of join = m1 * n1 + m2 * n2 + ...
-        *
-        * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
-        * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
-        * relation
-        *
-        * This equation works correctly for outer tuples having no inner match
-        * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
-        * are effectively subtracting those from the number of rescanned tuples,
-        * when we should not.  Can we do better without expensive selectivity
-        * computations?
-        *
-        * The whole issue is moot if we are working from a unique-ified outer
-        * input.
-        */
-       if (IsA(outer_path, UniquePath))
-               rescannedtuples = 0;
-       else
-       {
-               rescannedtuples = mergejointuples - inner_path_rows;
-               /* Must clamp because of possible underestimate */
-               if (rescannedtuples < 0)
-                       rescannedtuples = 0;
-       }
-       /* We'll inflate various costs this much to account for rescanning */
-       rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
-
         /*
          * A merge join will stop as soon as it exhausts either input stream
          * (unless it's an outer join, in which case the outer side has to be
@@ -1841,7 +1926,7 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
          * mergejoinscansel() is a fairly expensive computation, we cache the
          * results in the merge clause RestrictInfo.
          */
-       if (mergeclauses && path->jpath.jointype != JOIN_FULL)
+       if (mergeclauses && jointype != JOIN_FULL)
         {
                 RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);
                 List       *opathkeys;
@@ -1884,13 +1969,13 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                         innerstartsel = cache->leftstartsel;
                         innerendsel = cache->leftendsel;
                 }
-               if (path->jpath.jointype == JOIN_LEFT ||
-                       path->jpath.jointype == JOIN_ANTI)
+               if (jointype == JOIN_LEFT ||
+                       jointype == JOIN_ANTI)
                 {
                         outerstartsel = 0.0;
                         outerendsel = 1.0;
                 }
-               else if (path->jpath.jointype == JOIN_RIGHT)
+               else if (jointype == JOIN_RIGHT)
                 {
                         innerstartsel = 0.0;
                         innerendsel = 1.0;
@@ -1982,6 +2067,143 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                         * (innerendsel - innerstartsel);
         }
  
+       /*
+        * We can't yet determine whether rescanning occurs, or whether
+        * materialization of the inner input should be done.  The minimum
+        * possible inner input cost, regardless of rescan and materialization
+        * considerations, is inner_run_cost.  We include that in
+        * workspace->total_cost, but not yet in run_cost.
+        */
+
+       /* CPU costs left for later */
+
+       /* Public result fields */
+       workspace->startup_cost = startup_cost;
+       workspace->total_cost = startup_cost + run_cost + inner_run_cost;
+       /* Save private data for final_cost_mergejoin */
+       workspace->run_cost = run_cost;
+       workspace->inner_run_cost = inner_run_cost;
+       workspace->outer_rows = outer_rows;
+       workspace->inner_rows = inner_rows;
+       workspace->outer_skip_rows = outer_skip_rows;
+       workspace->inner_skip_rows = inner_skip_rows;
+}
+
+/*
+ * final_cost_mergejoin
+ *       Final estimate of the cost and result size of a mergejoin path.
+ *
+ * Unlike other costsize functions, this routine makes one actual decision:
+ * whether we should materialize the inner path.  We do that either because
+ * the inner path can't support mark/restore, or because it's cheaper to
+ * use an interposed Material node to handle mark/restore.     When the decision
+ * is cost-based it would be logically cleaner to build and cost two separate
+ * paths with and without that flag set; but that would require repeating most
+ * of the cost calculations, which are not all that cheap.     Since the choice
+ * will not affect output pathkeys or startup cost, only total cost, there is
+ * no possibility of wanting to keep both paths.  So it seems best to make
+ * the decision here and record it in the path's materialize_inner field.
+ *
+ * 'path' is already filled in except for the rows and cost fields and
+ *             materialize_inner
+ * 'workspace' is the result from initial_cost_mergejoin
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ */
+void
+final_cost_mergejoin(PlannerInfo *root, MergePath *path,
+                                        JoinCostWorkspace *workspace,
+                                        SpecialJoinInfo *sjinfo)
+{
+       Path       *outer_path = path->jpath.outerjoinpath;
+       Path       *inner_path = path->jpath.innerjoinpath;
+       double          inner_path_rows = inner_path->rows;
+       List       *mergeclauses = path->path_mergeclauses;
+       List       *innersortkeys = path->innersortkeys;
+       Cost            startup_cost = workspace->startup_cost;
+       Cost            run_cost = workspace->run_cost;
+       Cost            inner_run_cost = workspace->inner_run_cost;
+       double          outer_rows = workspace->outer_rows;
+       double          inner_rows = workspace->inner_rows;
+       double          outer_skip_rows = workspace->outer_skip_rows;
+       double          inner_skip_rows = workspace->inner_skip_rows;
+       Cost            cpu_per_tuple,
+                               bare_inner_cost,
+                               mat_inner_cost;
+       QualCost        merge_qual_cost;
+       QualCost        qp_qual_cost;
+       double          mergejointuples,
+                               rescannedtuples;
+       double          rescanratio;
+
+       /* Protect some assumptions below that rowcounts aren't zero or NaN */
+       if (inner_path_rows <= 0 || isnan(inner_path_rows))
+               inner_path_rows = 1;
+
+       /* Estimate the number of rows returned by the join */
+       set_joinpath_size_estimate(root, &path->jpath, sjinfo,
+                                                          path->jpath.joinrestrictinfo);
+
+       /*
+        * We could include disable_cost in the preliminary estimate, but that
+        * would amount to optimizing for the case where the join method is
+        * disabled, which doesn't seem like the way to bet.
+        */
+       if (!enable_mergejoin)
+               startup_cost += disable_cost;
+
+       /*
+        * Compute cost of the mergequals and qpquals (other restriction clauses)
+        * separately.
+        */
+       cost_qual_eval(&merge_qual_cost, mergeclauses, root);
+       cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
+       qp_qual_cost.startup -= merge_qual_cost.startup;
+       qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;
+
+       /*
+        * Get approx # tuples passing the mergequals.  We use approx_tuple_count
+        * here because we need an estimate done with JOIN_INNER semantics.
+        */
+       mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
+
+       /*
+        * When there are equal merge keys in the outer relation, the mergejoin
+        * must rescan any matching tuples in the inner relation. This means
+        * re-fetching inner tuples; we have to estimate how often that happens.
+        *
+        * For regular inner and outer joins, the number of re-fetches can be
+        * estimated approximately as size of merge join output minus size of
+        * inner relation. Assume that the distinct key values are 1, 2, ..., and
+        * denote the number of values of each key in the outer relation as m1,
+        * m2, ...; in the inner relation, n1, n2, ...  Then we have
+        *
+        * size of join = m1 * n1 + m2 * n2 + ...
+        *
+        * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
+        * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
+        * relation
+        *
+        * This equation works correctly for outer tuples having no inner match
+        * (nk = 0), but not for inner tuples having no outer match (mk = 0); we
+        * are effectively subtracting those from the number of rescanned tuples,
+        * when we should not.  Can we do better without expensive selectivity
+        * computations?
+        *
+        * The whole issue is moot if we are working from a unique-ified outer
+        * input.
+        */
+       if (IsA(outer_path, UniquePath))
+               rescannedtuples = 0;
+       else
+       {
+               rescannedtuples = mergejointuples - inner_path_rows;
+               /* Must clamp because of possible underestimate */
+               if (rescannedtuples < 0)
+                       rescannedtuples = 0;
+       }
+       /* We'll inflate various costs this much to account for rescanning */
+       rescanratio = 1.0 + (rescannedtuples / inner_path_rows);
+
         /*
          * Decide whether we want to materialize the inner input to shield it from
          * mark/restore and performing re-fetches.      Our cost model for regular
@@ -2148,50 +2370,46 @@ cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey)
  }
  
  /*
- * cost_hashjoin
- *       Determines and returns the cost of joining two relations using the
- *       hash join algorithm.
- *
- * 'path' is already filled in except for the cost fields
+ * initial_cost_hashjoin
+ *       Preliminary estimate of the cost of a hashjoin path.
+ *
+ * This must quickly produce lower-bound estimates of the path's startup and
+ * total costs.  If we are unable to eliminate the proposed path from
+ * consideration using the lower bounds, final_cost_hashjoin will be called
+ * to obtain the final estimates.
+ *
+ * The exact division of labor between this function and final_cost_hashjoin
+ * is private to them, and represents a tradeoff between speed of the initial
+ * estimate and getting a tight lower bound.  We choose to not examine the
+ * join quals here (other than by counting the number of hash clauses),
+ * so we can't do much with CPU costs.  We do assume that
+ * ExecChooseHashTableSize is cheap enough to use here.
+ *
+ * 'workspace' is to be filled with startup_cost, total_cost, and perhaps
+ *             other data to be used by final_cost_hashjoin
+ * 'jointype' is the type of join to be performed
+ * 'hashclauses' is the list of joinclauses to be used as hash clauses
+ * 'outer_path' is the outer input to the join
+ * 'inner_path' is the inner input to the join
   * 'sjinfo' is extra info about the join for selectivity estimation
- *
- * Note: path's hashclauses should be a subset of the joinrestrictinfo list
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
   */
  void
-cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
+initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
+                                         JoinType jointype,
+                                         List *hashclauses,
+                                         Path *outer_path, Path *inner_path,
+                                         SpecialJoinInfo *sjinfo,
+                                         SemiAntiJoinFactors *semifactors)
  {
-       Path       *outer_path = path->jpath.outerjoinpath;
-       Path       *inner_path = path->jpath.innerjoinpath;
-       List       *hashclauses = path->path_hashclauses;
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
-       Cost            cpu_per_tuple;
-       QualCost        hash_qual_cost;
-       QualCost        qp_qual_cost;
-       double          hashjointuples;
-       double          outer_path_rows = PATH_ROWS(outer_path);
-       double          inner_path_rows = PATH_ROWS(inner_path);
+       double          outer_path_rows = outer_path->rows;
+       double          inner_path_rows = inner_path->rows;
         int                     num_hashclauses = list_length(hashclauses);
         int                     numbuckets;
         int                     numbatches;
         int                     num_skew_mcvs;
-       double          virtualbuckets;
-       Selectivity innerbucketsize;
-       Selectivity outer_match_frac;
-       Selectivity match_count;
-       ListCell   *hcl;
-
-       if (!enable_hashjoin)
-               startup_cost += disable_cost;
-
-       /*
-        * Compute cost of the hashquals and qpquals (other restriction clauses)
-        * separately.
-        */
-       cost_qual_eval(&hash_qual_cost, hashclauses, root);
-       cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
-       qp_qual_cost.startup -= hash_qual_cost.startup;
-       qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
  
         /* cost of source data */
         startup_cost += outer_path->startup_cost;
@@ -2228,11 +2446,89 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                                                         &numbuckets,
                                                         &numbatches,
                                                         &num_skew_mcvs);
-       virtualbuckets = (double) numbuckets *(double) numbatches;
+
+       /*
+        * If inner relation is too big then we will need to "batch" the join,
+        * which implies writing and reading most of the tuples to disk an extra
+        * time.  Charge seq_page_cost per page, since the I/O should be nice and
+        * sequential.  Writing the inner rel counts as startup cost, all the rest
+        * as run cost.
+        */
+       if (numbatches > 1)
+       {
+               double          outerpages = page_size(outer_path_rows,
+                                                                                  outer_path->parent->width);
+               double          innerpages = page_size(inner_path_rows,
+                                                                                  inner_path->parent->width);
+
+               startup_cost += seq_page_cost * innerpages;
+               run_cost += seq_page_cost * (innerpages + 2 * outerpages);
+       }
+
+       /* CPU costs left for later */
+
+       /* Public result fields */
+       workspace->startup_cost = startup_cost;
+       workspace->total_cost = startup_cost + run_cost;
+       /* Save private data for final_cost_hashjoin */
+       workspace->run_cost = run_cost;
+       workspace->numbuckets = numbuckets;
+       workspace->numbatches = numbatches;
+}
+
+/*
+ * final_cost_hashjoin
+ *       Final estimate of the cost and result size of a hashjoin path.
+ *
+ * Note: the numbatches estimate is also saved into 'path' for use later
+ *
+ * 'path' is already filled in except for the rows and cost fields and
+ *             num_batches
+ * 'workspace' is the result from initial_cost_hashjoin
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if path->jointype is SEMI or ANTI
+ */
+void
+final_cost_hashjoin(PlannerInfo *root, HashPath *path,
+                                       JoinCostWorkspace *workspace,
+                                       SpecialJoinInfo *sjinfo,
+                                       SemiAntiJoinFactors *semifactors)
+{
+       Path       *outer_path = path->jpath.outerjoinpath;
+       Path       *inner_path = path->jpath.innerjoinpath;
+       double          outer_path_rows = outer_path->rows;
+       double          inner_path_rows = inner_path->rows;
+       List       *hashclauses = path->path_hashclauses;
+       Cost            startup_cost = workspace->startup_cost;
+       Cost            run_cost = workspace->run_cost;
+       int                     numbuckets = workspace->numbuckets;
+       int                     numbatches = workspace->numbatches;
+       Cost            cpu_per_tuple;
+       QualCost        hash_qual_cost;
+       QualCost        qp_qual_cost;
+       double          hashjointuples;
+       double          virtualbuckets;
+       Selectivity innerbucketsize;
+       ListCell   *hcl;
+
+       /* Estimate the number of rows returned by the join */
+       set_joinpath_size_estimate(root, &path->jpath, sjinfo,
+                                                          path->jpath.joinrestrictinfo);
+
+       /*
+        * We could include disable_cost in the preliminary estimate, but that
+        * would amount to optimizing for the case where the join method is
+        * disabled, which doesn't seem like the way to bet.
+        */
+       if (!enable_hashjoin)
+               startup_cost += disable_cost;
  
         /* mark the path with estimated # of batches */
         path->num_batches = numbatches;
  
+       /* and compute the number of "virtual" buckets in the whole join */
+       virtualbuckets = (double) numbuckets *(double) numbatches;
+
         /*
          * Determine bucketsize fraction for inner relation.  We use the smallest
          * bucketsize estimated for any individual hashclause; this is undoubtedly
@@ -2301,29 +2597,17 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
         }
  
         /*
-        * If inner relation is too big then we will need to "batch" the join,
-        * which implies writing and reading most of the tuples to disk an extra
-        * time.  Charge seq_page_cost per page, since the I/O should be nice and
-        * sequential.  Writing the inner rel counts as startup cost, all the rest
-        * as run cost.
+        * Compute cost of the hashquals and qpquals (other restriction clauses)
+        * separately.
          */
-       if (numbatches > 1)
-       {
-               double          outerpages = page_size(outer_path_rows,
-                                                                                  outer_path->parent->width);
-               double          innerpages = page_size(inner_path_rows,
-                                                                                  inner_path->parent->width);
-
-               startup_cost += seq_page_cost * innerpages;
-               run_cost += seq_page_cost * (innerpages + 2 * outerpages);
-       }
+       cost_qual_eval(&hash_qual_cost, hashclauses, root);
+       cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);
+       qp_qual_cost.startup -= hash_qual_cost.startup;
+       qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;
  
         /* CPU costs */
  
-       if (adjust_semi_join(root, &path->jpath, sjinfo,
-                                                &outer_match_frac,
-                                                &match_count,
-                                                NULL))
+       if (path->jpath.jointype == JOIN_SEMI || path->jpath.jointype == JOIN_ANTI)
         {
                 double          outer_matched_rows;
                 Selectivity inner_scan_frac;
@@ -2339,8 +2623,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
                  * to clamp inner_scan_frac to at most 1.0; but since match_count is
                  * at least 1, no such clamp is needed now.)
                  */
-               outer_matched_rows = rint(outer_path_rows * outer_match_frac);
-               inner_scan_frac = 2.0 / (match_count + 1.0);
+               outer_matched_rows = rint(outer_path_rows * semifactors->outer_match_frac);
+               inner_scan_frac = 2.0 / (semifactors->match_count + 1.0);
  
                 startup_cost += hash_qual_cost.startup;
                 run_cost += hash_qual_cost.per_tuple * outer_matched_rows *
@@ -2545,8 +2829,8 @@ cost_rescan(PlannerInfo *root, Path *path,
                                  * cpu_tuple_cost per tuple, unless the result is large enough
                                  * to spill to disk.
                                  */
-                               Cost            run_cost = cpu_tuple_cost * path->parent->rows;
-                               double          nbytes = relation_byte_size(path->parent->rows,
+                               Cost            run_cost = cpu_tuple_cost * path->rows;
+                               double          nbytes = relation_byte_size(path->rows,
                                                                                                                 path->parent->width);
                                 long            work_mem_bytes = work_mem * 1024L;
  
@@ -2572,8 +2856,8 @@ cost_rescan(PlannerInfo *root, Path *path,
                                  * the run_cost charge in cost_sort, and also see comments in
                                  * cost_material before you change it.)
                                  */
-                               Cost            run_cost = cpu_operator_cost * path->parent->rows;
-                               double          nbytes = relation_byte_size(path->parent->rows,
+                               Cost            run_cost = cpu_operator_cost * path->rows;
+                               double          nbytes = relation_byte_size(path->rows,
                                                                                                                 path->parent->width);
                                 long            work_mem_bytes = work_mem * 1024L;
  
@@ -2841,7 +3125,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
  
  
  /*
- * adjust_semi_join
+ * compute_semi_anti_join_factors
   *       Estimate how much of the inner input a SEMI or ANTI join
   *       can be expected to scan.
   *
@@ -2849,30 +3133,28 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
   * inner rows as soon as it finds a match to the current outer row.
   * We should therefore adjust some of the cost components for this effect.
   * This function computes some estimates needed for these adjustments.
- *
- * 'path' is already filled in except for the cost fields
- * 'sjinfo' is extra info about the join for selectivity estimation
- *
- * Returns TRUE if this is a SEMI or ANTI join, FALSE if not.
- *
- * Output parameters (set only in TRUE-result case):
- * *outer_match_frac is set to the fraction of the outer tuples that are
- *             expected to have at least one match.
- * *match_count is set to the average number of matches expected for
- *             outer tuples that have at least one match.
- * *indexed_join_quals is set to TRUE if all the joinquals are used as
- *             inner index quals, FALSE if not.
- *
- * indexed_join_quals can be passed as NULL if that information is not
- * relevant (it is only useful for the nestloop case).
+ * These estimates will be the same regardless of the particular paths used
+ * for the outer and inner relation, so we compute these once and then pass
+ * them to all the join cost estimation functions.
+ *
+ * Input parameters:
+ *     outerrel: outer relation under consideration
+ *     innerrel: inner relation under consideration
+ *     jointype: must be JOIN_SEMI or JOIN_ANTI
+ *     sjinfo: SpecialJoinInfo relevant to this join
+ *     restrictlist: join quals
+ * Output parameters:
+ *     *semifactors is filled in (see relation.h for field definitions)
   */
-static bool
-adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
-                                Selectivity *outer_match_frac,
-                                Selectivity *match_count,
-                                bool *indexed_join_quals)
+void
+compute_semi_anti_join_factors(PlannerInfo *root,
+                                                          RelOptInfo *outerrel,
+                                                          RelOptInfo *innerrel,
+                                                          JoinType jointype,
+                                                          SpecialJoinInfo *sjinfo,
+                                                          List *restrictlist,
+                                                          SemiAntiJoinFactors *semifactors)
  {
-       JoinType        jointype = path->jointype;
         Selectivity jselec;
         Selectivity nselec;
         Selectivity avgmatch;
@@ -2880,17 +3162,8 @@ adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
         List       *joinquals;
         ListCell   *l;
  
-       /* Fall out if it's not JOIN_SEMI or JOIN_ANTI */
-       if (jointype != JOIN_SEMI && jointype != JOIN_ANTI)
-               return false;
-
-       /*
-        * Note: it's annoying to repeat this selectivity estimation on each call,
-        * when the joinclause list will be the same for all path pairs
-        * implementing a given join.  clausesel.c will save us from the worst
-        * effects of this by caching at the RestrictInfo level; but perhaps it'd
-        * be worth finding a way to cache the results at a higher level.
-        */
+       /* Should only be called in these cases */
+       Assert(jointype == JOIN_SEMI || jointype == JOIN_ANTI);
  
         /*
          * In an ANTI join, we must ignore clauses that are "pushed down", since
@@ -2901,7 +3174,7 @@ adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
         if (jointype == JOIN_ANTI)
         {
                 joinquals = NIL;
-               foreach(l, path->joinrestrictinfo)
+               foreach(l, restrictlist)
                 {
                         RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
  
@@ -2911,7 +3184,7 @@ adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
                 }
         }
         else
-               joinquals = path->joinrestrictinfo;
+               joinquals = restrictlist;
  
         /*
          * Get the JOIN_SEMI or JOIN_ANTI selectivity of the join clauses.
@@ -2926,10 +3199,10 @@ adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
          * Also get the normal inner-join selectivity of the join clauses.
          */
         norm_sjinfo.type = T_SpecialJoinInfo;
-       norm_sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
-       norm_sjinfo.min_righthand = path->innerjoinpath->parent->relids;
-       norm_sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
-       norm_sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
+       norm_sjinfo.min_lefthand = outerrel->relids;
+       norm_sjinfo.min_righthand = innerrel->relids;
+       norm_sjinfo.syn_lefthand = outerrel->relids;
+       norm_sjinfo.syn_righthand = innerrel->relids;
         norm_sjinfo.jointype = JOIN_INNER;
         /* we don't bother trying to make the remaining fields valid */
         norm_sjinfo.lhs_strict = false;
@@ -2953,47 +3226,63 @@ adjust_semi_join(PlannerInfo *root, JoinPath *path, SpecialJoinInfo *sjinfo,
          * number of matches for each outer-rel row that has at least one match is
          * nselec * inner_rows / jselec.
          *
-        * Note: it is correct to use the inner rel's "rows" count here, not
-        * PATH_ROWS(), even if the inner path under consideration is an inner
-        * indexscan.  This is because we have included all the join clauses in
-        * the selectivity estimate, even ones used in an inner indexscan.
+        * Note: it is correct to use the inner rel's "rows" count here, even
+        * though we might later be considering a parameterized inner path with
+        * fewer rows.  This is because we have included all the join clauses
+        * in the selectivity estimate.
          */
         if (jselec > 0)                         /* protect against zero divide */
         {
-               avgmatch = nselec * path->innerjoinpath->parent->rows / jselec;
+               avgmatch = nselec * innerrel->rows / jselec;
                 /* Clamp to sane range */
                 avgmatch = Max(1.0, avgmatch);
         }
         else
                 avgmatch = 1.0;
  
-       *outer_match_frac = jselec;
-       *match_count = avgmatch;
+       semifactors->outer_match_frac = jselec;
+       semifactors->match_count = avgmatch;
+}
  
-       /*
-        * If requested, check whether the inner path uses all the joinquals as
-        * indexquals.  (If that's true, we can assume that an unmatched outer
-        * tuple is cheap to process, whereas otherwise it's probably expensive.)
-        */
-       if (indexed_join_quals)
+/*
+ * has_indexed_join_quals
+ *       Check whether all the joinquals of a nestloop join are used as
+ *       inner index quals.
+ *
+ * If the inner path of a SEMI/ANTI join is an indexscan (including bitmap
+ * indexscan) that uses all the joinquals as indexquals, we can assume that an
+ * unmatched outer tuple is cheap to process, whereas otherwise it's probably
+ * expensive.
+ */
+static bool
+has_indexed_join_quals(NestPath *path, List *joinclauses)
+{
+       NodeTag         pathtype = path->innerjoinpath->pathtype;
+
+       if (pathtype == T_IndexScan ||
+               pathtype == T_IndexOnlyScan ||
+               pathtype == T_BitmapHeapScan)
         {
                 if (path->joinrestrictinfo != NIL)
                 {
-                       List       *nrclauses;
-
-                       nrclauses = select_nonredundant_join_clauses(root,
-                                                                                                         path->joinrestrictinfo,
-                                                                                                                path->innerjoinpath);
-                       *indexed_join_quals = (nrclauses == NIL);
+                       /* OK if all those clauses were found to be redundant */
+                       return (joinclauses == NIL);
                 }
                 else
                 {
                         /* a clauseless join does NOT qualify */
-                       *indexed_join_quals = false;
+                       return false;
                 }
         }
-
-       return true;
+       else
+       {
+               /*
+                * If it's not a simple indexscan, it probably doesn't run quickly for
+                * zero rows out, even if it's a parameterized path using all the
+                * joinquals.
+                */
+               return false;
+       }
  }
  
  
@@ -3025,8 +3314,8 @@ static double
  approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
  {
         double          tuples;
-       double          outer_tuples = path->outerjoinpath->parent->rows;
-       double          inner_tuples = path->innerjoinpath->parent->rows;
+       double          outer_tuples = path->outerjoinpath->rows;
+       double          inner_tuples = path->innerjoinpath->rows;
         SpecialJoinInfo sjinfo;
         Selectivity selec = 1.0;
         ListCell   *l;
@@ -3122,6 +3411,55 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                                    RelOptInfo *inner_rel,
                                                    SpecialJoinInfo *sjinfo,
                                                    List *restrictlist)
+{
+       rel->rows = calc_joinrel_size_estimate(root,
+                                                                                  outer_rel->rows,
+                                                                                  inner_rel->rows,
+                                                                                  sjinfo,
+                                                                                  restrictlist);
+}
+
+/*
+ * set_joinpath_size_estimate
+ *             Set the rows estimate for the given join path.
+ *
+ * If the join is not parameterized by any joinclauses from higher joins, the
+ * estimate is the same as previously computed by set_joinrel_size_estimates.
+ * Otherwise, we estimate afresh using the identical logic, but with the rows
+ * estimates from the input paths (which are typically less than their rels'
+ * regular row estimates) and the restriction clauses actually being applied
+ * at the join.
+ */
+static void
+set_joinpath_size_estimate(PlannerInfo *root, JoinPath *path,
+                                                  SpecialJoinInfo *sjinfo,
+                                                  List *restrictlist)
+{
+       if (path->path.required_outer)
+       {
+               path->path.rows = calc_joinrel_size_estimate(root,
+                                                                                                        path->outerjoinpath->rows,
+                                                                                                        path->innerjoinpath->rows,
+                                                                                                        sjinfo,
+                                                                                                        restrictlist);
+               /* For safety, make sure result is not more than the base estimate */
+               if (path->path.rows > path->path.parent->rows)
+                       path->path.rows = path->path.parent->rows;
+       }
+       else
+               path->path.rows = path->path.parent->rows;
+}
+
+/*
+ * calc_joinrel_size_estimate
+ *             Workhorse for set_joinrel_size_estimates and set_joinpath_size_estimate
+ */
+static double
+calc_joinrel_size_estimate(PlannerInfo *root,
+                                                  double outer_rows,
+                                                  double inner_rows,
+                                                  SpecialJoinInfo *sjinfo,
+                                                  List *restrictlist)
  {
         JoinType        jointype = sjinfo->jointype;
         Selectivity jselec;
@@ -3197,28 +3535,28 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
         switch (jointype)
         {
                 case JOIN_INNER:
-                       nrows = outer_rel->rows * inner_rel->rows * jselec;
+                       nrows = outer_rows * inner_rows * jselec;
                         break;
                 case JOIN_LEFT:
-                       nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows < outer_rel->rows)
-                               nrows = outer_rel->rows;
+                       nrows = outer_rows * inner_rows * jselec;
+                       if (nrows < outer_rows)
+                               nrows = outer_rows;
                         nrows *= pselec;
                         break;
                 case JOIN_FULL:
-                       nrows = outer_rel->rows * inner_rel->rows * jselec;
-                       if (nrows < outer_rel->rows)
-                               nrows = outer_rel->rows;
-                       if (nrows < inner_rel->rows)
-                               nrows = inner_rel->rows;
+                       nrows = outer_rows * inner_rows * jselec;
+                       if (nrows < outer_rows)
+                               nrows = outer_rows;
+                       if (nrows < inner_rows)
+                               nrows = inner_rows;
                         nrows *= pselec;
                         break;
                 case JOIN_SEMI:
-                       nrows = outer_rel->rows * jselec;
+                       nrows = outer_rows * jselec;
                         /* pselec not used */
                         break;
                 case JOIN_ANTI:
-                       nrows = outer_rel->rows * (1.0 - jselec);
+                       nrows = outer_rows * (1.0 - jselec);
                         nrows *= pselec;
                         break;
                 default:
@@ -3228,7 +3566,7 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                         break;
         }
  
-       rel->rows = clamp_row_est(nrows);
+       return clamp_row_est(nrows);
  }
  
  /*
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c

index 17fbe01a01f967705cd84134741035573883f9e4..2f22efabb53d341618289460e015ae09930352b2 100644 (file)
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -59,6 +59,7 @@ static bool reconsider_outer_join_clause(PlannerInfo *root,
                                                          bool outer_on_left);
  static bool reconsider_full_join_clause(PlannerInfo *root,
                                                         RestrictInfo *rinfo);
+static Index get_parent_relid(PlannerInfo *root, RelOptInfo *rel);
  
  
  /*
@@ -892,7 +893,7 @@ generate_base_implied_equalities_broken(PlannerInfo *root,
   *
   * The results are sufficient for use in merge, hash, and plain nestloop join
   * methods.  We do not worry here about selecting clauses that are optimal
- * for use in a nestloop-with-inner-indexscan join, however.  indxpath.c makes
+ * for use in a nestloop-with-parameterized-inner-scan.  indxpath.c makes
   * its own selections of clauses to use, and if the ones we pick here are
   * redundant with those, the extras will be eliminated in createplan.c.
   *
@@ -1858,21 +1859,40 @@ mutate_eclass_expressions(PlannerInfo *root,
  
  
  /*
- * find_eclass_clauses_for_index_join
- *       Create joinclauses usable for a nestloop-with-inner-indexscan
- *       scanning the given inner rel with the specified set of outer rels.
+ * generate_implied_equalities_for_indexcol
+ *       Create EC-derived joinclauses usable with a specific index column.
+ *
+ * We assume that any given index column could appear in only one EC.
+ * (This should be true in all but the most pathological cases, and if it
+ * isn't, we stop on the first match anyway.)  Therefore, what we return
+ * is a redundant list of clauses equating the index column to each of
+ * the other-relation values it is known to be equal to.  Any one of
+ * these clauses can be used to create a parameterized indexscan, and there
+ * is no value in using more than one.  (But it *is* worthwhile to create
+ * a separate parameterized path for each one, since that leads to different
+ * join orders.)
   */
  List *
-find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
-                                                                  Relids outer_relids)
+generate_implied_equalities_for_indexcol(PlannerInfo *root,
+                                                                                IndexOptInfo *index,
+                                                                                int indexcol)
  {
         List       *result = NIL;
+       RelOptInfo *rel = index->rel;
         bool            is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+       Index           parent_relid;
         ListCell   *lc1;
  
+       /* If it's a child rel, we'll need to know what its parent is */
+       if (is_child_rel)
+               parent_relid = get_parent_relid(root, rel);
+       else
+               parent_relid = 0;               /* not used, but keep compiler quiet */
+
         foreach(lc1, root->eq_classes)
         {
                 EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
+               EquivalenceMember *cur_em;
                 ListCell   *lc2;
  
                 /*
@@ -1889,71 +1909,94 @@ find_eclass_clauses_for_index_join(PlannerInfo *root, RelOptInfo *rel,
                 if (!is_child_rel &&
                         !bms_is_subset(rel->relids, cur_ec->ec_relids))
                         continue;
-               /* ... nor if no overlap with outer_relids */
-               if (!bms_overlap(outer_relids, cur_ec->ec_relids))
+
+               /* Scan members, looking for a match to the indexable column */
+               cur_em = NULL;
+               foreach(lc2, cur_ec->ec_members)
+               {
+                       cur_em = (EquivalenceMember *) lfirst(lc2);
+                       if (bms_equal(cur_em->em_relids, rel->relids) &&
+                               eclass_member_matches_indexcol(cur_ec, cur_em,
+                                                                                          index, indexcol))
+                               break;
+                       cur_em = NULL;
+               }
+
+               if (!cur_em)
                         continue;
  
-               /* Scan members, looking for indexable columns */
+               /*
+                * Found our match.  Scan the other EC members and attempt to generate
+                * joinclauses.
+                */
                 foreach(lc2, cur_ec->ec_members)
                 {
-                       EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-                       EquivalenceMember *best_outer_em = NULL;
-                       Oid                     best_eq_op = InvalidOid;
-                       ListCell   *lc3;
+                       EquivalenceMember *other_em = (EquivalenceMember *) lfirst(lc2);
+                       Oid                     eq_op;
+                       RestrictInfo *rinfo;
  
-                       if (!bms_equal(cur_em->em_relids, rel->relids) ||
-                               !eclass_matches_any_index(cur_ec, cur_em, rel))
+                       /* Make sure it'll be a join to a different rel */
+                       if (other_em == cur_em ||
+                               bms_overlap(other_em->em_relids, rel->relids))
                                 continue;
  
                         /*
-                        * Found one, so try to generate a join clause.  This is like
-                        * generate_join_implied_equalities_normal, except simpler since
-                        * our only preference item is to pick a Var on the outer side. We
-                        * only need one join clause per index col.
+                        * Also, if this is a child rel, avoid generating a useless join
+                        * to its parent rel.
                          */
-                       foreach(lc3, cur_ec->ec_members)
-                       {
-                               EquivalenceMember *outer_em = (EquivalenceMember *) lfirst(lc3);
-                               Oid                     eq_op;
-
-                               if (!bms_is_subset(outer_em->em_relids, outer_relids))
-                                       continue;
-                               eq_op = select_equality_operator(cur_ec,
-                                                                                                cur_em->em_datatype,
-                                                                                                outer_em->em_datatype);
-                               if (!OidIsValid(eq_op))
-                                       continue;
-                               best_outer_em = outer_em;
-                               best_eq_op = eq_op;
-                               if (IsA(outer_em->em_expr, Var) ||
-                                       (IsA(outer_em->em_expr, RelabelType) &&
-                                        IsA(((RelabelType *) outer_em->em_expr)->arg, Var)))
-                                       break;          /* no need to look further */
-                       }
+                       if (is_child_rel &&
+                               bms_is_member(parent_relid, other_em->em_relids))
+                               continue;
  
-                       if (best_outer_em)
-                       {
-                               /* Found a suitable joinclause */
-                               RestrictInfo *rinfo;
+                       eq_op = select_equality_operator(cur_ec,
+                                                                                        cur_em->em_datatype,
+                                                                                        other_em->em_datatype);
+                       if (!OidIsValid(eq_op))
+                               continue;
  
-                               /* set parent_ec to mark as redundant with other joinclauses */
-                               rinfo = create_join_clause(root, cur_ec, best_eq_op,
-                                                                                  cur_em, best_outer_em,
-                                                                                  cur_ec);
+                       /* set parent_ec to mark as redundant with other joinclauses */
+                       rinfo = create_join_clause(root, cur_ec, eq_op,
+                                                                          cur_em, other_em,
+                                                                          cur_ec);
  
-                               result = lappend(result, rinfo);
-
-                               /*
-                                * Note: we keep scanning here because we want to provide a
-                                * clause for every possible indexcol.
-                                */
-                       }
+                       result = lappend(result, rinfo);
                 }
+
+               /*
+                * If somehow we failed to create any join clauses, we might as well
+                * keep scanning the ECs for another match.  But if we did make any,
+                * we're done, because we don't want to return non-redundant clauses.
+                */
+               if (result)
+                       break;
         }
  
         return result;
  }
  
+/*
+ * get_parent_relid
+ *             Get the relid of a child rel's parent appendrel
+ *
+ * Possibly this should be somewhere else, but right now the logic is only
+ * needed here.
+ */
+static Index
+get_parent_relid(PlannerInfo *root, RelOptInfo *rel)
+{
+       ListCell   *lc;
+
+       foreach(lc, root->append_rel_list)
+       {
+               AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
+
+               if (appinfo->child_relid == rel->relid)
+                       return appinfo->parent_relid;
+       }
+       /* should have found the entry ... */
+       elog(ERROR, "child rel not found in append_rel_list");
+       return 0;
+}
  
  /*
   * have_relevant_eclass_joinclause
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c

index 11ee2317376b03d443d7ec25c745f51497d8a808..920593781b2a2cdef83496b2ae90f1870b21bbc5 100644 (file)
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -61,6 +61,14 @@ typedef enum
         ST_ANYSCAN                                      /* either is okay */
  } ScanTypeControl;
  
+/* Data structure for collecting qual clauses that match an index */
+typedef struct
+{
+       bool            nonempty;               /* True if lists are not all empty */
+       /* Lists of RestrictInfos, one per index column */
+       List       *indexclauses[INDEX_MAX_KEYS];
+} IndexClauseSet;
+
  /* Per-path data used within choose_bitmap_and() */
  typedef struct
  {
@@ -71,55 +79,73 @@ typedef struct
  } PathClauseUsage;
  
  
-static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
-                                       List *clauses, List *outer_clauses,
-                                       bool istoplevel, RelOptInfo *outer_rel,
-                                       SaOpControl saop_control, ScanTypeControl scantype);
-static List *find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
-                               List *clauses, List *outer_clauses,
-                               bool istoplevel, RelOptInfo *outer_rel);
+static void consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
+                                                       IndexOptInfo *index,
+                                                       IndexClauseSet *rclauseset,
+                                                       IndexClauseSet *jclauseset,
+                                                       IndexClauseSet *eclauseset,
+                                                       List **bitindexpaths);
+static void expand_eclass_clause_combinations(PlannerInfo *root,
+                                                                 RelOptInfo *rel,
+                                                                 IndexOptInfo *index,
+                                                                 int thiscol, int lastcol,
+                                                                 IndexClauseSet *clauseset,
+                                                                 IndexClauseSet *eclauseset,
+                                                                 List **bitindexpaths);
+static void get_index_paths(PlannerInfo *root, RelOptInfo *rel,
+                               IndexOptInfo *index, IndexClauseSet *clauses,
+                               List **bitindexpaths);
+static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
+                                 IndexOptInfo *index, IndexClauseSet *clauses,
+                                 bool useful_predicate,
+                                 SaOpControl saop_control, ScanTypeControl scantype);
+static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
+                                  List *clauses, List *other_clauses);
+static List *drop_indexable_join_clauses(RelOptInfo *rel, List *clauses);
  static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
-                                 List *paths, RelOptInfo *outer_rel);
+                                                          List *paths);
  static int     path_usage_comparator(const void *a, const void *b);
  static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
-                                        Path *ipath, RelOptInfo *outer_rel);
+                                                                Path *ipath);
  static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
-                                       List *paths, RelOptInfo *outer_rel);
+                                                               List *paths);
  static PathClauseUsage *classify_index_clause_usage(Path *path,
                                                         List **clauselist);
  static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
  static int     find_list_position(Node *node, List **nodelist);
  static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static double get_loop_count(PlannerInfo *root, Relids outer_relids);
+static void match_restriction_clauses_to_index(RelOptInfo *rel,
+                                                                                          IndexOptInfo *index,
+                                                                                          IndexClauseSet *clauseset);
+static void match_join_clauses_to_index(PlannerInfo *root,
+                                                       RelOptInfo *rel, IndexOptInfo *index,
+                                                       IndexClauseSet *clauseset,
+                                                       List **joinorclauses);
+static void match_eclass_clauses_to_index(PlannerInfo *root,
+                                                                                 IndexOptInfo *index,
+                                                                                 IndexClauseSet *clauseset);
  static void match_clauses_to_index(IndexOptInfo *index,
-                                          List *clauses, List *outer_clauses,
-                                          Relids outer_relids,
-                                          SaOpControl saop_control,
-                                          List **index_clauses_p,
-                                          List **clause_columns_p,
-                                          bool *found_clause);
+                                          List *clauses,
+                                          IndexClauseSet *clauseset);
+static void match_clause_to_index(IndexOptInfo *index,
+                                         RestrictInfo *rinfo,
+                                         IndexClauseSet *clauseset);
  static bool match_clause_to_indexcol(IndexOptInfo *index,
                                                  int indexcol,
-                                                RestrictInfo *rinfo,
-                                                Relids outer_relids,
-                                                SaOpControl saop_control);
+                                                RestrictInfo *rinfo);
  static bool is_indexable_operator(Oid expr_op, Oid opfamily,
                                           bool indexkey_on_left);
  static bool match_rowcompare_to_indexcol(IndexOptInfo *index,
                                                          int indexcol,
                                                          Oid opfamily,
                                                          Oid idxcollation,
-                                                        RowCompareExpr *clause,
-                                                        Relids outer_relids);
+                                                        RowCompareExpr *clause);
  static void match_pathkeys_to_index(IndexOptInfo *index, List *pathkeys,
                                                 List **orderby_clauses_p,
                                                 List **clause_columns_p);
  static Expr *match_clause_to_ordering_op(IndexOptInfo *index,
                                                         int indexcol, Expr *clause, Oid pk_opfamily);
-static Relids indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel);
-static bool matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel,
-                                 Relids outer_relids);
-static List *find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
-                                         Relids outer_relids, bool isouterjoin);
  static bool match_boolean_index_clause(Node *clause, int indexcol,
                                                    IndexOptInfo *index);
  static bool match_special_index_operator(Expr *clause,
@@ -152,21 +178,17 @@ static Const *string_to_const(const char *str, Oid datatype);
   *
   * There are two basic kinds of index scans.  A "plain" index scan uses
   * only restriction clauses (possibly none at all) in its indexqual,
- * so it can be applied in any context.  An "innerjoin" index scan uses
+ * so it can be applied in any context.  A "parameterized" index scan uses
   * join clauses (plus restriction clauses, if available) in its indexqual.
- * Therefore it can only be used as the inner relation of a nestloop
- * join against an outer rel that includes all the other rels mentioned
- * in its join clauses.  In that context, values for the other rels'
+ * When joining such a scan to one of the relations supplying the other
+ * variables used in its indexqual, the parameterized scan must appear as
+ * the inner relation of a nestloop join; it can't be used on the outer side,
+ * nor in a merge or hash join.  In that context, values for the other rels'
   * attributes are available and fixed during any one scan of the indexpath.
   *
- * An IndexPath is generated and submitted to add_path() for each plain index
- * scan this routine deems potentially interesting for the current query.
- *
- * We also determine the set of other relids that participate in join
- * clauses that could be used with each index. The actually best innerjoin
- * path will be generated for each outer relation later on, but knowing the
- * set of potential otherrels allows us to identify equivalent outer relations
- * and avoid repeated computation.
+ * An IndexPath is generated and submitted to add_path() for each plain or
+ * parameterized index scan this routine deems potentially interesting for
+ * the current query.
   *
   * 'rel' is the relation for which we want to generate index paths
   *
@@ -177,98 +199,631 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
  {
         List       *indexpaths;
         List       *bitindexpaths;
-       ListCell   *l;
+       List       *bitjoinpaths;
+       List       *joinorclauses;
+       IndexClauseSet rclauseset;
+       IndexClauseSet jclauseset;
+       IndexClauseSet eclauseset;
+       ListCell   *ilist;
  
         /* Skip the whole mess if no indexes */
         if (rel->indexlist == NIL)
-       {
-               rel->index_outer_relids = NULL;
                 return;
+
+       /* Bitmap paths are collected and then dealt with at the end */
+       bitindexpaths = bitjoinpaths = joinorclauses = NIL;
+
+       /* Examine each index in turn */
+       foreach(ilist, rel->indexlist)
+       {
+               IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+
+               /* Protect limited-size array in IndexClauseSets */
+               Assert(index->ncolumns <= INDEX_MAX_KEYS);
+
+               /*
+                * Ignore partial indexes that do not match the query.
+                * (generate_bitmap_or_paths() might be able to do something with
+                * them, but that's of no concern here.)
+                */
+               if (index->indpred != NIL && !index->predOK)
+                       continue;
+
+               /*
+                * Identify the restriction clauses that can match the index.
+                */
+               MemSet(&rclauseset, 0, sizeof(rclauseset));
+               match_restriction_clauses_to_index(rel, index, &rclauseset);
+
+               /*
+                * Build index paths from the restriction clauses.  These will be
+                * non-parameterized paths.  Plain paths go directly to add_path(),
+                * bitmap paths are added to bitindexpaths to be handled below.
+                */
+               get_index_paths(root, rel, index, &rclauseset,
+                                               &bitindexpaths);
+
+               /*
+                * Identify the join clauses that can match the index.  For the moment
+                * we keep them separate from the restriction clauses.  Note that
+                * this finds only "loose" join clauses that have not been merged
+                * into EquivalenceClasses.  Also, collect join OR clauses for later.
+                */
+               MemSet(&jclauseset, 0, sizeof(jclauseset));
+               match_join_clauses_to_index(root, rel, index,
+                                                                       &jclauseset, &joinorclauses);
+
+               /*
+                * Look for EquivalenceClasses that can generate joinclauses
+                * matching the index.
+                */
+               MemSet(&eclauseset, 0, sizeof(eclauseset));
+               match_eclass_clauses_to_index(root, index, &eclauseset);
+
+               /*
+                * If we found any plain or eclass join clauses, decide what to
+                * do with 'em.
+                */
+               if (jclauseset.nonempty || eclauseset.nonempty)
+                       consider_index_join_clauses(root, rel, index,
+                                                                               &rclauseset,
+                                                                               &jclauseset,
+                                                                               &eclauseset,
+                                                                               &bitjoinpaths);
         }
  
         /*
-        * Examine join clauses to see which ones are potentially usable with
-        * indexes of this rel, and generate the set of all other relids that
-        * participate in such join clauses.  We'll use this set later to
-        * recognize outer rels that are equivalent for joining purposes.
+        * Generate BitmapOrPaths for any suitable OR-clauses present in the
+        * restriction list.  Add these to bitindexpaths.
          */
-       rel->index_outer_relids = indexable_outerrelids(root, rel);
+       indexpaths = generate_bitmap_or_paths(root, rel,
+                                                                                 rel->baserestrictinfo, NIL,
+                                                                                 false);
+       bitindexpaths = list_concat(bitindexpaths, indexpaths);
  
         /*
-        * Find all the index paths that are directly usable for this relation
-        * (ie, are valid without considering OR or JOIN clauses).
+        * Likewise, generate BitmapOrPaths for any suitable OR-clauses present in
+        * the joinclause list.  Add these to bitjoinpaths.
          */
-       indexpaths = find_usable_indexes(root, rel,
-                                                                        rel->baserestrictinfo, NIL,
-                                                                        true, NULL, SAOP_PER_AM, ST_ANYSCAN);
+       indexpaths = generate_bitmap_or_paths(root, rel,
+                                                                                 joinorclauses, rel->baserestrictinfo,
+                                                                                 false);
+       bitjoinpaths = list_concat(bitjoinpaths, indexpaths);
+
+       /*
+        * If we found anything usable, generate a BitmapHeapPath for the most
+        * promising combination of restriction bitmap index paths.  Note there
+        * will be only one such path no matter how many indexes exist.  This
+        * should be sufficient since there's basically only one figure of merit
+        * (total cost) for such a path.
+        */
+       if (bitindexpaths != NIL)
+       {
+               Path       *bitmapqual;
+               BitmapHeapPath *bpath;
+
+               bitmapqual = choose_bitmap_and(root, rel, bitindexpaths);
+               bpath = create_bitmap_heap_path(root, rel, bitmapqual, 1.0);
+               add_path(rel, (Path *) bpath);
+       }
+
+       /*
+        * Likewise, if we found anything usable, generate a BitmapHeapPath for
+        * the most promising combination of join bitmap index paths.  Note there
+        * will be only one such path no matter how many join clauses are
+        * available.  (XXX is that good enough, or do we need to consider even
+        * more paths for different subsets of possible join partners?  Also,
+        * should we add in restriction bitmap paths as well?)
+        */
+       if (bitjoinpaths != NIL)
+       {
+               Path       *bitmapqual;
+               double          loop_count;
+               BitmapHeapPath *bpath;
+
+               bitmapqual = choose_bitmap_and(root, rel, bitjoinpaths);
+               loop_count = get_loop_count(root, bitmapqual->required_outer);
+               bpath = create_bitmap_heap_path(root, rel, bitmapqual, loop_count);
+               add_path(rel, (Path *) bpath);
+       }
+}
+
+/*
+ * consider_index_join_clauses
+ *       Given sets of join clauses for an index, decide which parameterized
+ *       index paths to build.
+ *
+ * Plain indexpaths are sent directly to add_path, while potential
+ * bitmap indexpaths are added to *bitindexpaths for later processing.
+ *
+ * 'rel' is the index's heap relation
+ * 'index' is the index for which we want to generate paths
+ * 'rclauseset' is the collection of indexable restriction clauses
+ * 'jclauseset' is the collection of indexable simple join clauses
+ * 'eclauseset' is the collection of indexable clauses from EquivalenceClasses
+ * '*bitindexpaths' is the list to add bitmap paths to
+ *
+ * Note: this changes the clause lists contained in the passed clausesets,
+ * but we don't care since the caller is done with them.
+ */
+static void
+consider_index_join_clauses(PlannerInfo *root, RelOptInfo *rel,
+                                                       IndexOptInfo *index,
+                                                       IndexClauseSet *rclauseset,
+                                                       IndexClauseSet *jclauseset,
+                                                       IndexClauseSet *eclauseset,
+                                                       List **bitindexpaths)
+{
+       IndexClauseSet clauseset;
+       int                     last_eclass_col;
+       int                     indexcol;
+
+       /*
+        * We can always include any restriction clauses in the index clauses.
+        * However, it's not obvious which subsets of the join clauses are worth
+        * generating paths from, and it's unlikely that considering every
+        * possible subset is worth the cycles.  Our current heuristic is based
+        * on the index columns, with the idea that later index columns are less
+        * useful than earlier ones; therefore it's unlikely to be worth trying
+        * combinations that would remove a clause from an earlier index column
+        * while adding one to a later column.  Also, we know that all the
+        * eclass clauses for a particular column are redundant, so we should
+        * use only one of them.  However, eclass clauses will always represent
+        * equality which is the strongest type of index constraint, so those
+        * are high-value and we should try every available combination when we
+        * have eclass clauses for more than one column.  Furthermore, it's
+        * unlikely to be useful to combine an eclass clause with non-eclass
+        * clauses for the same index column.  These considerations lead to the
+        * following heuristics:
+        *
+        * First, start with the restriction clauses, and add on all simple join
+        * clauses for column 1.  If there are any such join clauses, generate
+        * paths with this collection of clauses.  Then, if there are eclass
+        * clauses for column 1, generate paths with each one of them replacing
+        * any other clauses we have for column 1.
+        *
+        * Next, add on all simple join clauses for column 2.  If there are any
+        * such join clauses, generate paths with this collection.  If there are
+        * eclass clauses for columns 1 or 2, generate paths with each such clause
+        * replacing other clauses for its index column, including cases where we
+        * use restriction or simple join clauses for one column and an eclass
+        * clause for the other.
+        *
+        * Repeat for each additional index column.
+        */
+
+       /* Set up working set with just the restriction clauses */
+       memcpy(&clauseset, rclauseset, sizeof(clauseset));
+       /* Even if it's empty right now, it won't be by the time we use it */
+       clauseset.nonempty = true;
+
+       last_eclass_col = -1;
+       for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+       {
+               /*
+                * If we don't have either simple join clauses or eclass clauses for
+                * this column, no new paths can be created in this iteration.
+                */
+               if (jclauseset->indexclauses[indexcol] == NIL &&
+                       eclauseset->indexclauses[indexcol] == NIL)
+                       continue;
+
+               /* Add any simple join clauses to the working set */
+               clauseset.indexclauses[indexcol] =
+                       list_concat(clauseset.indexclauses[indexcol],
+                                               jclauseset->indexclauses[indexcol]);
+
+               /* Set recursion depth to reach last col with eclass clauses */
+               if (eclauseset->indexclauses[indexcol] != NIL)
+                       last_eclass_col = indexcol;
+
+               /* Do we have eclass clauses for any column now under consideration? */
+               if (last_eclass_col >= 0)
+               {
+                       /* Yes, so recursively generate all eclass clause combinations */
+                       expand_eclass_clause_combinations(root, rel, index,
+                                                                                         0, last_eclass_col,
+                                                                                         &clauseset, eclauseset,
+                                                                                         bitindexpaths);
+               }
+               else
+               {
+                       /* No, consider the newly-enlarged set of simple join clauses */
+                       get_index_paths(root, rel, index, &clauseset, bitindexpaths);
+               }
+       }
+}
+
+/*
+ * expand_eclass_clause_combinations
+ *             Generate all combinations of eclass join clauses for first N columns,
+ *             and construct parameterized index paths for each combination.
+ *
+ * Workhorse for consider_index_join_clauses; see notes therein for rationale.
+ * It's convenient to use recursion to implement the enumeration, since we
+ * can have at most INDEX_MAX_KEYS recursion levels.
+ *
+ * 'rel', 'index', 'eclauseset', 'bitindexpaths' as above
+ * 'thiscol' is the current index column number/recursion level
+ * 'lastcol' is the last index column we should consider eclass clauses for
+ * 'clauseset' is the current collection of indexable clauses
+ */
+static void
+expand_eclass_clause_combinations(PlannerInfo *root, RelOptInfo *rel,
+                                                                 IndexOptInfo *index,
+                                                                 int thiscol, int lastcol,
+                                                                 IndexClauseSet *clauseset,
+                                                                 IndexClauseSet *eclauseset,
+                                                                 List **bitindexpaths)
+{
+       List       *save_clauses;
+       ListCell   *lc;
+
+       /* If past last eclass column, end the recursion and generate paths */
+       if (thiscol > lastcol)
+       {
+               get_index_paths(root, rel, index, clauseset, bitindexpaths);
+               return;
+       }
+
+       /* If no eclass clauses to consider for this column, just recurse */
+       if (eclauseset->indexclauses[thiscol] == NIL)
+       {
+               Assert(thiscol < lastcol);
+               expand_eclass_clause_combinations(root, rel, index,
+                                                                                 thiscol + 1, lastcol,
+                                                                                 clauseset, eclauseset,
+                                                                                 bitindexpaths);
+               return;
+       }
+
+       /* We'll momentarily save and restore the list of non-eclass clauses */
+       save_clauses = clauseset->indexclauses[thiscol];
+
+       /* If we have non-eclass clauses for this column, first try with those */
+       if (save_clauses)
+               expand_eclass_clause_combinations(root, rel, index,
+                                                                                 thiscol + 1, lastcol,
+                                                                                 clauseset, eclauseset,
+                                                                                 bitindexpaths);
+
+       /* For each eclass clause alternative ... */
+       foreach(lc, eclauseset->indexclauses[thiscol])
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+               /* Replace any existing clauses with the eclass clause */
+               clauseset->indexclauses[thiscol] = list_make1(rinfo);
+
+               /* Recurse to advance to next column */
+               expand_eclass_clause_combinations(root, rel, index,
+                                                                                 thiscol + 1, lastcol,
+                                                                                 clauseset, eclauseset,
+                                                                                 bitindexpaths);
+       }
+
+       /* Restore previous list contents */
+       clauseset->indexclauses[thiscol] = save_clauses;
+}
+
+
+/*
+ * get_index_paths
+ *       Given an index and a set of index clauses for it, construct IndexPaths.
+ *
+ * Plain indexpaths are sent directly to add_path, while potential
+ * bitmap indexpaths are added to *bitindexpaths for later processing.
+ *
+ * This is a fairly simple frontend to build_index_paths().  Its reason for
+ * existence is mainly to handle ScalarArrayOpExpr quals properly.  If the
+ * index AM supports them natively, we should just include them in simple
+ * index paths.  If not, we should exclude them while building simple index
+ * paths, and then make a separate attempt to include them in bitmap paths.
+ */
+static void
+get_index_paths(PlannerInfo *root, RelOptInfo *rel,
+                               IndexOptInfo *index, IndexClauseSet *clauses,
+                               List **bitindexpaths)
+{
+       List       *indexpaths;
+       ListCell   *lc;
+
+       /*
+        * Build simple index paths using the clauses.  Allow ScalarArrayOpExpr
+        * clauses only if the index AM supports them natively.
+        */
+       indexpaths = build_index_paths(root, rel,
+                                                                  index, clauses,
+                                                                  index->predOK,
+                                                                  SAOP_PER_AM, ST_ANYSCAN);
  
         /*
          * Submit all the ones that can form plain IndexScan plans to add_path.
-        * (A plain IndexPath might represent either a plain IndexScan or an
-        * IndexOnlyScan, but for our purposes here the distinction does not
+        * (A plain IndexPath can represent either a plain IndexScan or an
+        * IndexOnlyScan, but for our purposes here that distinction does not
          * matter.  However, some of the indexes might support only bitmap scans,
-        * and those we mustn't submit to add_path here.)  Also, pick out the ones
-        * that might be useful as bitmap scans.  For that, we must discard
-        * indexes that don't support bitmap scans, and we also are only
-        * interested in paths that have some selectivity; we should discard
-        * anything that was generated solely for ordering purposes.
+        * and those we mustn't submit to add_path here.)
+        *
+        * Also, pick out the ones that are usable as bitmap scans.  For that,
+        * we must discard indexes that don't support bitmap scans, and we
+        * also are only interested in paths that have some selectivity; we
+        * should discard anything that was generated solely for ordering
+        * purposes.
          */
-       bitindexpaths = NIL;
-       foreach(l, indexpaths)
+       foreach(lc, indexpaths)
         {
-               IndexPath  *ipath = (IndexPath *) lfirst(l);
+               IndexPath  *ipath = (IndexPath *) lfirst(lc);
  
-               if (ipath->indexinfo->amhasgettuple)
+               if (index->amhasgettuple)
                         add_path(rel, (Path *) ipath);
  
-               if (ipath->indexinfo->amhasgetbitmap &&
+               if (index->amhasgetbitmap &&
                         (ipath->path.pathkeys == NIL ||
                          ipath->indexselectivity < 1.0))
-                       bitindexpaths = lappend(bitindexpaths, ipath);
+                       *bitindexpaths = lappend(*bitindexpaths, ipath);
         }
  
         /*
-        * Generate BitmapOrPaths for any suitable OR-clauses present in the
-        * restriction list.  Add these to bitindexpaths.
+        * If the index doesn't handle ScalarArrayOpExpr clauses natively,
+        * check to see if there are any such clauses, and if so generate
+        * bitmap scan paths relying on executor-managed ScalarArrayOpExpr.
          */
-       indexpaths = generate_bitmap_or_paths(root, rel,
-                                                                                 rel->baserestrictinfo, NIL,
-                                                                                 NULL);
-       bitindexpaths = list_concat(bitindexpaths, indexpaths);
+       if (!index->amsearcharray)
+       {
+               indexpaths = build_index_paths(root, rel,
+                                                                          index, clauses,
+                                                                          false,
+                                                                          SAOP_REQUIRE, ST_BITMAPSCAN);
+               *bitindexpaths = list_concat(*bitindexpaths, indexpaths);
+       }
+}
+
+/*
+ * build_index_paths
+ *       Given an index and a set of index clauses for it, construct zero
+ *       or more IndexPaths.
+ *
+ * We return a list of paths because (1) this routine checks some cases
+ * that should cause us to not generate any IndexPath, and (2) in some
+ * cases we want to consider both a forward and a backward scan, so as
+ * to obtain both sort orders.  Note that the paths are just returned
+ * to the caller and not immediately fed to add_path().
+ *
+ * At top level, useful_predicate should be exactly the index's predOK flag
+ * (ie, true if it has a predicate that was proven from the restriction
+ * clauses).  When working on an arm of an OR clause, useful_predicate
+ * should be true if the predicate required the current OR list to be proven.
+ * Note that this routine should never be called at all if the index has an
+ * unprovable predicate.
+ *
+ * saop_control indicates whether ScalarArrayOpExpr clauses can be used.
+ * When it's SAOP_REQUIRE, index paths are created only if we found at least
+ * one ScalarArrayOpExpr clause.
+ *
+ * scantype indicates whether we want to create plain indexscans, bitmap
+ * indexscans, or both.  When it's ST_BITMAPSCAN, we will not consider
+ * index ordering while deciding if a Path is worth generating.
+ *
+ * 'rel' is the index's heap relation
+ * 'index' is the index for which we want to generate paths
+ * 'clauses' is the collection of indexable clauses (RestrictInfo nodes)
+ * 'useful_predicate' indicates whether the index has a useful predicate
+ * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used
+ * 'scantype' indicates whether we need plain or bitmap scan support
+ */
+static List *
+build_index_paths(PlannerInfo *root, RelOptInfo *rel,
+                                 IndexOptInfo *index, IndexClauseSet *clauses,
+                                 bool useful_predicate,
+                                 SaOpControl saop_control, ScanTypeControl scantype)
+{
+       List       *result = NIL;
+       IndexPath  *ipath;
+       List       *index_clauses;
+       List       *clause_columns;
+       Relids          outer_relids;
+       double          loop_count;
+       List       *orderbyclauses;
+       List       *orderbyclausecols;
+       List       *index_pathkeys;
+       List       *useful_pathkeys;
+       bool            found_clause;
+       bool            pathkeys_possibly_useful;
+       bool            index_is_ordered;
+       bool            index_only_scan;
+       int                     indexcol;
  
         /*
-        * Likewise, generate paths using executor-managed ScalarArrayOpExpr
-        * clauses; these can't be simple indexscans but they can be used in
-        * bitmap scans.
+        * Check that index supports the desired scan type(s)
          */
-       indexpaths = find_saop_paths(root, rel,
-                                                                rel->baserestrictinfo, NIL,
-                                                                true, NULL);
-       bitindexpaths = list_concat(bitindexpaths, indexpaths);
+       switch (scantype)
+       {
+               case ST_INDEXSCAN:
+                       if (!index->amhasgettuple)
+                               return NIL;
+                       break;
+               case ST_BITMAPSCAN:
+                       if (!index->amhasgetbitmap)
+                               return NIL;
+                       break;
+               case ST_ANYSCAN:
+                       /* either or both are OK */
+                       break;
+       }
  
         /*
-        * If we found anything usable, generate a BitmapHeapPath for the most
-        * promising combination of bitmap index paths.
+        * 1. Collect the index clauses into a single list.
+        *
+        * We build a list of RestrictInfo nodes for clauses to be used with
+        * this index, along with an integer list of the index column numbers
+        * (zero based) that each clause should be used with.  The clauses are
+        * ordered by index key, so that the column numbers form a nondecreasing
+        * sequence.  (This order is depended on by btree and possibly other
+        * places.)  The lists can be empty, if the index AM allows that.
+        *
+        * found_clause is set true only if there's at least one index clause;
+        * and if saop_control is SAOP_REQUIRE, it has to be a ScalarArrayOpExpr
+        * clause.
+        *
+        * We also build a Relids set showing which outer rels are required
+        * by the selected clauses.
          */
-       if (bitindexpaths != NIL)
+       index_clauses = NIL;
+       clause_columns = NIL;
+       found_clause = false;
+       outer_relids = NULL;
+       for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
         {
-               Path       *bitmapqual;
-               BitmapHeapPath *bpath;
+               ListCell   *lc;
  
-               bitmapqual = choose_bitmap_and(root, rel, bitindexpaths, NULL);
-               bpath = create_bitmap_heap_path(root, rel, bitmapqual, NULL);
-               add_path(rel, (Path *) bpath);
+               foreach(lc, clauses->indexclauses[indexcol])
+               {
+                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+                       if (IsA(rinfo->clause, ScalarArrayOpExpr))
+                       {
+                               /* Ignore if not supported by index */
+                               if (saop_control == SAOP_PER_AM && !index->amsearcharray)
+                                       continue;
+                               found_clause = true;
+                       }
+                       else
+                       {
+                               if (saop_control != SAOP_REQUIRE)
+                                       found_clause = true;
+                       }
+                       index_clauses = lappend(index_clauses, rinfo);
+                       clause_columns = lappend_int(clause_columns, indexcol);
+                       outer_relids = bms_add_members(outer_relids,
+                                                                                  rinfo->clause_relids);
+               }
+
+               /*
+                * If no clauses match the first index column, check for amoptionalkey
+                * restriction.  We can't generate a scan over an index with
+                * amoptionalkey = false unless there's at least one index clause.
+                * (When working on columns after the first, this test cannot fail.
+                * It is always okay for columns after the first to not have any
+                * clauses.)
+                */
+               if (index_clauses == NIL && !index->amoptionalkey)
+                       return NIL;
+       }
+
+       /* We do not want the index's rel itself listed in outer_relids */
+       outer_relids = bms_del_member(outer_relids, rel->relid);
+       /* Enforce convention that outer_relids is exactly NULL if empty */
+       if (bms_is_empty(outer_relids))
+               outer_relids = NULL;
+
+       /* Compute loop_count for cost estimation purposes */
+       loop_count = get_loop_count(root, outer_relids);
+
+       /*
+        * 2. Compute pathkeys describing index's ordering, if any, then see how
+        * many of them are actually useful for this query.  This is not relevant
+        * if we are only trying to build bitmap indexscans.
+        */
+       pathkeys_possibly_useful = (scantype != ST_BITMAPSCAN &&
+                                                               has_useful_pathkeys(root, rel));
+       index_is_ordered = (index->sortopfamily != NULL);
+       if (index_is_ordered && pathkeys_possibly_useful)
+       {
+               index_pathkeys = build_index_pathkeys(root, index,
+                                                                                         ForwardScanDirection);
+               useful_pathkeys = truncate_useless_pathkeys(root, rel,
+                                                                                                       index_pathkeys);
+               orderbyclauses = NIL;
+               orderbyclausecols = NIL;
+       }
+       else if (index->amcanorderbyop && pathkeys_possibly_useful)
+       {
+               /* see if we can generate ordering operators for query_pathkeys */
+               match_pathkeys_to_index(index, root->query_pathkeys,
+                                                               &orderbyclauses,
+                                                               &orderbyclausecols);
+               if (orderbyclauses)
+                       useful_pathkeys = root->query_pathkeys;
+               else
+                       useful_pathkeys = NIL;
+       }
+       else
+       {
+               useful_pathkeys = NIL;
+               orderbyclauses = NIL;
+               orderbyclausecols = NIL;
+       }
+
+       /*
+        * 3. Check if an index-only scan is possible.  If we're not building
+        * plain indexscans, this isn't relevant since bitmap scans don't support
+        * index data retrieval anyway.
+        */
+       index_only_scan = (scantype != ST_BITMAPSCAN &&
+                                          check_index_only(rel, index));
+
+       /*
+        * 4. Generate an indexscan path if there are relevant restriction clauses
+        * in the current clauses, OR the index ordering is potentially useful for
+        * later merging or final output ordering, OR the index has a useful
+        * predicate, OR an index-only scan is possible.
+        */
+       if (found_clause || useful_pathkeys != NIL || useful_predicate ||
+               index_only_scan)
+       {
+               ipath = create_index_path(root, index,
+                                                                 index_clauses,
+                                                                 clause_columns,
+                                                                 orderbyclauses,
+                                                                 orderbyclausecols,
+                                                                 useful_pathkeys,
+                                                                 index_is_ordered ?
+                                                                 ForwardScanDirection :
+                                                                 NoMovementScanDirection,
+                                                                 index_only_scan,
+                                                                 outer_relids,
+                                                                 loop_count);
+               result = lappend(result, ipath);
+       }
+
+       /*
+        * 5. If the index is ordered, a backwards scan might be interesting.
+        */
+       if (index_is_ordered && pathkeys_possibly_useful)
+       {
+               index_pathkeys = build_index_pathkeys(root, index,
+                                                                                         BackwardScanDirection);
+               useful_pathkeys = truncate_useless_pathkeys(root, rel,
+                                                                                                       index_pathkeys);
+               if (useful_pathkeys != NIL)
+               {
+                       ipath = create_index_path(root, index,
+                                                                         index_clauses,
+                                                                         clause_columns,
+                                                                         NIL,
+                                                                         NIL,
+                                                                         useful_pathkeys,
+                                                                         BackwardScanDirection,
+                                                                         index_only_scan,
+                                                                         outer_relids,
+                                                                         loop_count);
+                       result = lappend(result, ipath);
+               }
         }
-}
  
+       return result;
+}
  
-/*----------
- * find_usable_indexes
- *       Given a list of restriction clauses, find all the potentially usable
- *       indexes for the given relation, and return a list of IndexPaths.
+/*
+ * build_paths_for_OR
+ *       Given a list of restriction clauses from one arm of an OR clause,
+ *       construct all matching IndexPaths for the relation.
+ *
+ * Here we must scan all indexes of the relation, since a bitmap OR tree
+ * can use multiple indexes.
   *
   * The caller actually supplies two lists of restriction clauses: some
- * "current" ones and some "outer" ones.  Both lists can be used freely
+ * "current" ones and some "other" ones.  Both lists can be used freely
   * to match keys of the index, but an index must use at least one of the
   * "current" clauses to be considered usable.  The motivation for this is
   * examples like
@@ -281,85 +836,34 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel)
   * When dealing with a partial index, a match of the index predicate to
   * one of the "current" clauses also makes the index usable.
   *
- * If istoplevel is true (indicating we are considering the top level of a
- * rel's restriction clauses), we will include indexes in the result that
- * have an interesting sort order, even if they have no matching restriction
- * clauses.
- *
   * 'rel' is the relation for which we want to generate index paths
   * 'clauses' is the current list of clauses (RestrictInfo nodes)
- * 'outer_clauses' is the list of additional upper-level clauses
- * 'istoplevel' is true if clauses are the rel's top-level restriction list
- *             (outer_clauses must be NIL when this is true)
- * 'outer_rel' is the outer side of the join if forming an inner indexscan
- *             (so some of the given clauses are join clauses); NULL if not
- * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used
- * 'scantype' indicates whether we need plain or bitmap scan support
- *
- * Note: check_partial_indexes() must have been run previously.
- *----------
+ * 'other_clauses' is the list of additional upper-level clauses
   */
  static List *
-find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
-                                       List *clauses, List *outer_clauses,
-                                       bool istoplevel, RelOptInfo *outer_rel,
-                                       SaOpControl saop_control, ScanTypeControl scantype)
+build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
+                                  List *clauses, List *other_clauses)
  {
-       Relids          outer_relids = outer_rel ? outer_rel->relids : NULL;
-       bool            possibly_useful_pathkeys = has_useful_pathkeys(root, rel);
         List       *result = NIL;
         List       *all_clauses = NIL;          /* not computed till needed */
-       ListCell   *ilist;
+       ListCell   *lc;
  
-       foreach(ilist, rel->indexlist)
+       foreach(lc, rel->indexlist)
         {
-               IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
-               IndexPath  *ipath;
-               List       *restrictclauses;
-               List       *restrictclausecols;
-               List       *orderbyclauses;
-               List       *orderbyclausecols;
-               List       *index_pathkeys;
-               List       *useful_pathkeys;
+               IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+               IndexClauseSet clauseset;
+               List       *indexpaths;
                 bool            useful_predicate;
-               bool            found_clause;
-               bool            index_is_ordered;
-               bool            index_only_scan;
  
-               /*
-                * Check that index supports the desired scan type(s)
-                */
-               switch (scantype)
-               {
-                       case ST_INDEXSCAN:
-                               if (!index->amhasgettuple)
-                                       continue;
-                               break;
-                       case ST_BITMAPSCAN:
-                               if (!index->amhasgetbitmap)
-                                       continue;
-                               break;
-                       case ST_ANYSCAN:
-                               /* either or both are OK */
-                               break;
-               }
-
-               /*
-                * If we're doing find_saop_paths(), we can skip indexes that support
-                * ScalarArrayOpExpr natively.  We already generated all the potential
-                * indexpaths for them, so no need to do anything more.
-                */
-               if (saop_control == SAOP_REQUIRE && index->amsearcharray)
+               /* Ignore index if it doesn't support bitmap scans */
+               if (!index->amhasgetbitmap)
                         continue;
  
                 /*
                  * Ignore partial indexes that do not match the query.  If a partial
-                * index is marked predOK then we know it's OK; otherwise, if we are
-                * at top level we know it's not OK (since predOK is exactly whether
-                * its predicate could be proven from the toplevel clauses).
-                * Otherwise, we have to test whether the added clauses are sufficient
-                * to imply the predicate.      If so, we could use the index in the
-                * current context.
+                * index is marked predOK then we know it's OK.  Otherwise, we have
+                * to test whether the added clauses are sufficient to imply the
+                * predicate. If so, we can use the index in the current context.
                  *
                  * We set useful_predicate to true iff the predicate was proven using
                  * the current set of clauses.  This is needed to prevent matching a
@@ -372,218 +876,87 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
                 {
                         if (index->predOK)
                         {
-                               if (istoplevel)
-                               {
-                                       /* we know predicate was proven from these clauses */
-                                       useful_predicate = true;
-                               }
+                               /* Usable, but don't set useful_predicate */
                         }
                         else
                         {
-                               if (istoplevel)
-                                       continue;       /* no point in trying to prove it */
-
                                 /* Form all_clauses if not done already */
                                 if (all_clauses == NIL)
                                         all_clauses = list_concat(list_copy(clauses),
-                                                                                         outer_clauses);
+                                                                                         other_clauses);
  
                                 if (!predicate_implied_by(index->indpred, all_clauses))
                                         continue;       /* can't use it at all */
  
-                               if (!predicate_implied_by(index->indpred, outer_clauses))
+                               if (!predicate_implied_by(index->indpred, other_clauses))
                                         useful_predicate = true;
                         }
                 }
  
                 /*
-                * 1. Match the index against the available restriction clauses.
-                * found_clause is set true only if at least one of the current
-                * clauses was used (and, if saop_control is SAOP_REQUIRE, it has to
-                * have been a ScalarArrayOpExpr clause).
+                * Identify the restriction clauses that can match the index.
                  */
-               match_clauses_to_index(index,
-                                                          clauses,
-                                                          outer_clauses,
-                                                          outer_relids,
-                                                          saop_control,
-                                                          &restrictclauses,
-                                                          &restrictclausecols,
-                                                          &found_clause);
+               MemSet(&clauseset, 0, sizeof(clauseset));
+               match_clauses_to_index(index, clauses, &clauseset);
  
                 /*
-                * Not all index AMs support scans with no restriction clauses. We
-                * can't generate a scan over an index with amoptionalkey = false
-                * unless there's at least one restriction clause.
+                * If no matches so far, and the index predicate isn't useful,
+                * we don't want it.
                  */
-               if (restrictclauses == NIL && !index->amoptionalkey)
+               if (!clauseset.nonempty && !useful_predicate)
                         continue;
  
                 /*
-                * 2. Compute pathkeys describing index's ordering, if any, then see
-                * how many of them are actually useful for this query.  This is not
-                * relevant unless we are at top level.
+                * Add "other" restriction clauses to the clauseset.
                  */
-               index_is_ordered = (index->sortopfamily != NULL);
-               if (index_is_ordered && possibly_useful_pathkeys &&
-                       istoplevel && outer_rel == NULL)
-               {
-                       index_pathkeys = build_index_pathkeys(root, index,
-                                                                                                 ForwardScanDirection);
-                       useful_pathkeys = truncate_useless_pathkeys(root, rel,
-                                                                                                               index_pathkeys);
-                       orderbyclauses = NIL;
-                       orderbyclausecols = NIL;
-               }
-               else if (index->amcanorderbyop && possibly_useful_pathkeys &&
-                                istoplevel && outer_rel == NULL && scantype != ST_BITMAPSCAN)
-               {
-                       /* see if we can generate ordering operators for query_pathkeys */
-                       match_pathkeys_to_index(index, root->query_pathkeys,
-                                                                       &orderbyclauses,
-                                                                       &orderbyclausecols);
-                       if (orderbyclauses)
-                               useful_pathkeys = root->query_pathkeys;
-                       else
-                               useful_pathkeys = NIL;
-               }
-               else
-               {
-                       useful_pathkeys = NIL;
-                       orderbyclauses = NIL;
-                       orderbyclausecols = NIL;
-               }
-
-               /*
-                * 3. Check if an index-only scan is possible.
-                */
-               index_only_scan = check_index_only(rel, index);
-
-               /*
-                * 4. Generate an indexscan path if there are relevant restriction
-                * clauses in the current clauses, OR the index ordering is
-                * potentially useful for later merging or final output ordering, OR
-                * the index has a predicate that was proven by the current clauses,
-                * OR an index-only scan is possible.
-                */
-               if (found_clause || useful_pathkeys != NIL || useful_predicate ||
-                       index_only_scan)
-               {
-                       ipath = create_index_path(root, index,
-                                                                         restrictclauses,
-                                                                         restrictclausecols,
-                                                                         orderbyclauses,
-                                                                         orderbyclausecols,
-                                                                         useful_pathkeys,
-                                                                         index_is_ordered ?
-                                                                         ForwardScanDirection :
-                                                                         NoMovementScanDirection,
-                                                                         index_only_scan,
-                                                                         outer_rel);
-                       result = lappend(result, ipath);
-               }
+               match_clauses_to_index(index, other_clauses, &clauseset);
  
                 /*
-                * 5. If the index is ordered, a backwards scan might be interesting.
-                * Again, this is only interesting at top level.
+                * Construct paths if possible.
                  */
-               if (index_is_ordered && possibly_useful_pathkeys &&
-                       istoplevel && outer_rel == NULL)
-               {
-                       index_pathkeys = build_index_pathkeys(root, index,
-                                                                                                 BackwardScanDirection);
-                       useful_pathkeys = truncate_useless_pathkeys(root, rel,
-                                                                                                               index_pathkeys);
-                       if (useful_pathkeys != NIL)
-                       {
-                               ipath = create_index_path(root, index,
-                                                                                 restrictclauses,
-                                                                                 restrictclausecols,
-                                                                                 NIL,
-                                                                                 NIL,
-                                                                                 useful_pathkeys,
-                                                                                 BackwardScanDirection,
-                                                                                 index_only_scan,
-                                                                                 outer_rel);
-                               result = lappend(result, ipath);
-                       }
-               }
+               indexpaths = build_index_paths(root, rel,
+                                                                          index, &clauseset,
+                                                                          useful_predicate,
+                                                                          SAOP_ALLOW, ST_BITMAPSCAN);
+               result = list_concat(result, indexpaths);
         }
  
         return result;
  }
  
-
-/*
- * find_saop_paths
- *             Find all the potential indexpaths that make use of executor-managed
- *             ScalarArrayOpExpr clauses.  The executor only supports these in bitmap
- *             scans, not plain indexscans, so we need to segregate them from the
- *             normal case.  Otherwise, same API as find_usable_indexes().
- *             Returns a list of IndexPaths.
- */
-static List *
-find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
-                               List *clauses, List *outer_clauses,
-                               bool istoplevel, RelOptInfo *outer_rel)
-{
-       bool            have_saop = false;
-       ListCell   *l;
-
-       /*
-        * Since find_usable_indexes is relatively expensive, don't bother to run
-        * it unless there are some top-level ScalarArrayOpExpr clauses.
-        */
-       foreach(l, clauses)
-       {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-
-               Assert(IsA(rinfo, RestrictInfo));
-               if (IsA(rinfo->clause, ScalarArrayOpExpr))
-               {
-                       have_saop = true;
-                       break;
-               }
-       }
-       if (!have_saop)
-               return NIL;
-
-       return find_usable_indexes(root, rel,
-                                                          clauses, outer_clauses,
-                                                          istoplevel, outer_rel,
-                                                          SAOP_REQUIRE, ST_BITMAPSCAN);
-}
-
-
  /*
   * generate_bitmap_or_paths
   *             Look through the list of clauses to find OR clauses, and generate
   *             a BitmapOrPath for each one we can handle that way.  Return a list
   *             of the generated BitmapOrPaths.
   *
- * outer_clauses is a list of additional clauses that can be assumed true
+ * other_clauses is a list of additional clauses that can be assumed true
   * for the purpose of generating indexquals, but are not to be searched for
- * ORs.  (See find_usable_indexes() for motivation.)  outer_rel is the outer
- * side when we are considering a nestloop inner indexpath.
+ * ORs.  (See build_paths_for_OR() for motivation.)
+ *
+ * If restriction_only is true, ignore OR elements that are join clauses.
+ * When using this feature it is caller's responsibility that neither clauses
+ * nor other_clauses contain any join clauses that are not ORs, as we do not
+ * re-filter those lists.
   */
  List *
  generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
-                                                List *clauses, List *outer_clauses,
-                                                RelOptInfo *outer_rel)
+                                                List *clauses, List *other_clauses,
+                                                bool restriction_only)
  {
         List       *result = NIL;
         List       *all_clauses;
-       ListCell   *l;
+       ListCell   *lc;
  
         /*
-        * We can use both the current and outer clauses as context for
-        * find_usable_indexes
+        * We can use both the current and other clauses as context for
+        * build_paths_for_OR; no need to remove ORs from the lists.
          */
-       all_clauses = list_concat(list_copy(clauses), outer_clauses);
+       all_clauses = list_concat(list_copy(clauses), other_clauses);
  
-       foreach(l, clauses)
+       foreach(lc, clauses)
         {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
                 List       *pathlist;
                 Path       *bitmapqual;
                 ListCell   *j;
@@ -608,31 +981,34 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
                         {
                                 List       *andargs = ((BoolExpr *) orarg)->args;
  
-                               indlist = find_usable_indexes(root, rel,
-                                                                                         andargs,
-                                                                                         all_clauses,
-                                                                                         false,
-                                                                                         outer_rel,
-                                                                                         SAOP_ALLOW,
-                                                                                         ST_BITMAPSCAN);
+                               if (restriction_only)
+                                       andargs = drop_indexable_join_clauses(rel, andargs);
+
+                               indlist = build_paths_for_OR(root, rel,
+                                                                                        andargs,
+                                                                                        all_clauses);
+
                                 /* Recurse in case there are sub-ORs */
                                 indlist = list_concat(indlist,
                                                                           generate_bitmap_or_paths(root, rel,
                                                                                                                            andargs,
                                                                                                                            all_clauses,
-                                                                                                                          outer_rel));
+                                                                                                                          restriction_only));
                         }
                         else
                         {
+                               List       *orargs;
+
                                 Assert(IsA(orarg, RestrictInfo));
                                 Assert(!restriction_is_or_clause((RestrictInfo *) orarg));
-                               indlist = find_usable_indexes(root, rel,
-                                                                                         list_make1(orarg),
-                                                                                         all_clauses,
-                                                                                         false,
-                                                                                         outer_rel,
-                                                                                         SAOP_ALLOW,
-                                                                                         ST_BITMAPSCAN);
+                               orargs = list_make1(orarg);
+
+                               if (restriction_only)
+                                       orargs = drop_indexable_join_clauses(rel, orargs);
+
+                               indlist = build_paths_for_OR(root, rel,
+                                                                                        orargs,
+                                                                                        all_clauses);
                         }
  
                         /*
@@ -649,7 +1025,7 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
                          * OK, pick the most promising AND combination, and add it to
                          * pathlist.
                          */
-                       bitmapqual = choose_bitmap_and(root, rel, indlist, outer_rel);
+                       bitmapqual = choose_bitmap_and(root, rel, indlist);
                         pathlist = lappend(pathlist, bitmapqual);
                 }
  
@@ -667,6 +1043,34 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
         return result;
  }
  
+/*
+ * drop_indexable_join_clauses
+ *             Remove any indexable join clauses from the list.
+ *
+ * This is a helper for generate_bitmap_or_paths().  We leave OR clauses
+ * in the list whether they are joins or not, since we might be able to
+ * extract a restriction item from an OR list.  It's safe to leave such
+ * clauses in the list because match_clauses_to_index() will ignore them,
+ * so there's no harm in passing such clauses to build_paths_for_OR().
+ */
+static List *
+drop_indexable_join_clauses(RelOptInfo *rel, List *clauses)
+{
+       List       *result = NIL;
+       ListCell   *lc;
+
+       foreach(lc, clauses)
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+               Assert(IsA(rinfo, RestrictInfo));
+               if (restriction_is_or_clause(rinfo) ||
+                       bms_is_subset(rinfo->clause_relids, rel->relids))
+                       result = lappend(result, rinfo);
+       }
+       return result;
+}
+
  
  /*
   * choose_bitmap_and
@@ -680,8 +1084,7 @@ generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
   * combining multiple inputs.
   */
  static Path *
-choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
-                                 List *paths, RelOptInfo *outer_rel)
+choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths)
  {
         int                     npaths = list_length(paths);
         PathClauseUsage **pathinfoarray;
@@ -729,7 +1132,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
          * reduces the total cost.      Perhaps someday that code will be smarter and
          * we can remove this limitation.  (But note that this also defends
          * against flat-out duplicate input paths, which can happen because
-        * best_inner_indexscan will find the same OR join clauses that
+        * match_join_clauses_to_index will find the same OR join clauses that
          * create_or_index_quals has pulled OR restriction clauses out of.)
          *
          * For the same reason, we reject AND combinations in which an index
@@ -807,7 +1210,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
  
                 pathinfo = pathinfoarray[i];
                 paths = list_make1(pathinfo->path);
-               costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path, outer_rel);
+               costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path);
                 qualsofar = list_concat(list_copy(pathinfo->quals),
                                                                 list_copy(pathinfo->preds));
                 clauseidsofar = bms_copy(pathinfo->clauseids);
@@ -841,7 +1244,7 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
                         }
                         /* tentatively add new path to paths, so we can estimate cost */
                         paths = lappend(paths, pathinfo->path);
-                       newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
+                       newcost = bitmap_and_cost_est(root, rel, paths);
                         if (newcost < costsofar)
                         {
                                 /* keep new path in paths, update subsidiary variables */
@@ -913,14 +1316,23 @@ path_usage_comparator(const void *a, const void *b)
   * index path (no BitmapAnd, at least not at this level).
   */
  static Cost
-bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
-                                        Path *ipath, RelOptInfo *outer_rel)
+bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, Path *ipath)
  {
-       Path            bpath;
+       BitmapHeapPath bpath;
+
+       /* Set up a dummy BitmapHeapPath */
+       bpath.path.type = T_BitmapHeapPath;
+       bpath.path.pathtype = T_BitmapHeapScan;
+       bpath.path.parent = rel;
+       bpath.path.pathkeys = NIL;
+       bpath.path.required_outer = ipath->required_outer;
+       bpath.path.param_clauses = ipath->param_clauses;
+       bpath.bitmapqual = ipath;
  
-       cost_bitmap_heap_scan(&bpath, root, rel, ipath, outer_rel);
+       cost_bitmap_heap_scan((Path *) &bpath, root, rel, ipath,
+                                                 get_loop_count(root, bpath.path.required_outer));
  
-       return bpath.total_cost;
+       return bpath.path.total_cost;
  }
  
  /*
@@ -928,22 +1340,32 @@ bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
   * inputs.
   */
  static Cost
-bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
-                                       List *paths, RelOptInfo *outer_rel)
+bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths)
  {
-       BitmapAndPath apath;
-       Path            bpath;
+       BitmapAndPath *apath;
+       BitmapHeapPath bpath;
+
+       /*
+        * Create a temporary BitmapAndPath.  (Because it needs realistic
+        * required_outer and param_clauses values, making a dummy one would
+        * take more code than it's worth.)
+        */
+       apath = create_bitmap_and_path(root, rel, paths);
  
-       /* Set up a dummy BitmapAndPath */
-       apath.path.type = T_BitmapAndPath;
-       apath.path.parent = rel;
-       apath.bitmapquals = paths;
-       cost_bitmap_and_node(&apath, root);
+       /* Set up a dummy BitmapHeapPath */
+       bpath.path.type = T_BitmapHeapPath;
+       bpath.path.pathtype = T_BitmapHeapScan;
+       bpath.path.parent = rel;
+       bpath.path.pathkeys = NIL;
+       bpath.path.required_outer = apath->path.required_outer;
+       bpath.path.param_clauses = apath->path.param_clauses;
+       bpath.bitmapqual = (Path *) apath;
  
         /* Now we can do cost_bitmap_heap_scan */
-       cost_bitmap_heap_scan(&bpath, root, rel, (Path *) &apath, outer_rel);
+       cost_bitmap_heap_scan((Path *) &bpath, root, rel, (Path *) apath,
+                                                 get_loop_count(root, bpath.path.required_outer));
  
-       return bpath.total_cost;
+       return bpath.path.total_cost;
  }
  
  
@@ -1150,128 +1572,253 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
         return result;
  }
  
+/*
+ * get_loop_count
+ *             Choose the loop count estimate to use for costing a parameterized path
+ *             with the given set of outer relids.
+ *
+ * Since we produce parameterized paths before we've begun to generate join
+ * relations, it's impossible to predict exactly how many times a parameterized
+ * path will be iterated; we don't know the size of the relation that will be
+ * on the outside of the nestloop.  However, we should try to account for
+ * multiple iterations somehow in costing the path.  The heuristic embodied
+ * here is to use the rowcount of the smallest other base relation needed in
+ * the join clauses used by the path.  (We could alternatively consider the
+ * largest one, but that seems too optimistic.)  This is of course the right
+ * answer for single-other-relation cases, and it seems like a reasonable
+ * zero-order approximation for multiway-join cases.
+ *
+ * Note: for this to work, allpaths.c must establish all baserel size
+ * estimates before it begins to compute paths, or at least before it
+ * calls create_index_paths().
+ */
+static double
+get_loop_count(PlannerInfo *root, Relids outer_relids)
+{
+       double          result = 1.0;
+
+       /* For a non-parameterized path, just return 1.0 quickly */
+       if (outer_relids != NULL)
+       {
+               int                     relid;
+
+               /* Need a working copy since bms_first_member is destructive */
+               outer_relids = bms_copy(outer_relids);
+               while ((relid = bms_first_member(outer_relids)) >= 0)
+               {
+                       RelOptInfo *outer_rel;
+
+                       /* Paranoia: ignore bogus relid indexes */
+                       if (relid >= root->simple_rel_array_size)
+                               continue;
+                       outer_rel = root->simple_rel_array[relid];
+                       if (outer_rel == NULL)
+                               continue;
+                       Assert(outer_rel->relid == relid); /* sanity check on array */
+
+                       /* Other relation could be proven empty, if so ignore */
+                       if (IS_DUMMY_REL(outer_rel))
+                               continue;
+
+                       /* Otherwise, rel's rows estimate should be valid by now */
+                       Assert(outer_rel->rows > 0);
+
+                       /* Remember smallest row count estimate among the outer rels */
+                       if (result == 1.0 || result > outer_rel->rows)
+                               result = outer_rel->rows;
+               }
+               bms_free(outer_relids);
+       }
+       return result;
+}
+
+
+/****************************************************************************
+ *                             ----  ROUTINES TO CHECK QUERY CLAUSES  ----
+ ****************************************************************************/
+
+/*
+ * match_restriction_clauses_to_index
+ *       Identify restriction clauses for the rel that match the index.
+ *       Matching clauses are added to *clauseset.
+ */
+static void
+match_restriction_clauses_to_index(RelOptInfo *rel, IndexOptInfo *index,
+                                                                  IndexClauseSet *clauseset)
+{
+       match_clauses_to_index(index, rel->baserestrictinfo, clauseset);
+}
+
+/*
+ * match_join_clauses_to_index
+ *       Identify join clauses for the rel that match the index.
+ *       Matching clauses are added to *clauseset.
+ *       Also, add any potentially usable join OR clauses to *joinorclauses.
+ */
+static void
+match_join_clauses_to_index(PlannerInfo *root,
+                                                       RelOptInfo *rel, IndexOptInfo *index,
+                                                       IndexClauseSet *clauseset,
+                                                       List **joinorclauses)
+{
+       Relids          inner_baserels;
+       ListCell   *lc;
+
+       /*
+        * There is no value in considering join clauses for outer joins in which
+        * the indexed relation is on the outside, since there'd be no way to
+        * perform such a join with a parameterized nestloop.  So first, identify
+        * all baserels that are on the inside of such joins.
+        */
+       inner_baserels = NULL;
+       foreach(lc, root->join_info_list)
+       {
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+               if (bms_overlap(rel->relids, sjinfo->min_lefthand))
+                       inner_baserels = bms_add_members(inner_baserels,
+                                                                                        sjinfo->min_righthand);
+               /* full joins constrain both sides symmetrically */
+               if (sjinfo->jointype == JOIN_FULL &&
+                       bms_overlap(rel->relids, sjinfo->min_righthand))
+                       inner_baserels = bms_add_members(inner_baserels,
+                                                                                        sjinfo->min_lefthand);
+       }
+
+       /* Now scan the rel's join clauses */
+       foreach(lc, rel->joininfo)
+       {
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+               /* Ignore if it mentions anything from wrong side of an outer join */
+               if (bms_overlap(rinfo->clause_relids, inner_baserels))
+                       continue;
+
+               /*
+                * Note that we ignore required_relids; that's okay because we are
+                * intentionally ignoring the normal rules for placing evaluation of
+                * join clauses.  The whole point here is to evaluate join clauses
+                * below their join, even if they would normally be delayed by
+                * outer join rules.
+                *
+                * Instead of considering required_relids, we ignore clauses for which
+                * any referenced rel is in nullable_relids; that means there's an
+                * outer join below the clause and so it can't be checked at the
+                * relation scan level.
+                *
+                * Note: unlike create_or_index_quals(), we can accept clauses that
+                * are marked !is_pushed_down (ie they are themselves outer-join
+                * clauses).  This is OK because any path generated with these clauses
+                * could only be used in the inside of a nestloop join, which will be
+                * the nullable side.
+                */
+               if (bms_overlap(rinfo->clause_relids, rinfo->nullable_relids))
+                       continue;
+
+               /* Potentially usable, so see if it matches the index or is an OR */
+               if (restriction_is_or_clause(rinfo))
+                       *joinorclauses = lappend(*joinorclauses, rinfo);
+               else
+                       match_clause_to_index(index, rinfo, clauseset);
+       }
+}
+
+/*
+ * match_eclass_clauses_to_index
+ *       Identify EquivalenceClass join clauses for the rel that match the index.
+ *       Matching clauses are added to *clauseset.
+ */
+static void
+match_eclass_clauses_to_index(PlannerInfo *root, IndexOptInfo *index,
+                                                         IndexClauseSet *clauseset)
+{
+       int                     indexcol;
+
+       /* No work if rel is not in any such ECs */
+       if (!index->rel->has_eclass_joins)
+               return;
+
+       for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+       {
+               List       *clauses;
  
-/****************************************************************************
- *                             ----  ROUTINES TO CHECK RESTRICTIONS  ----
- ****************************************************************************/
+               clauses = generate_implied_equalities_for_indexcol(root,
+                                                                                                                  index,
+                                                                                                                  indexcol);
  
+               /*
+                * We have to check whether the results actually do match the index,
+                * since for non-btree indexes the EC's equality operators might not
+                * be in the index opclass (cf eclass_member_matches_indexcol).
+                */
+               match_clauses_to_index(index, clauses, clauseset);
+       }
+}
  
  /*
   * match_clauses_to_index
- *       Find restriction clauses that can be used with an index.
- *
- * Returns a list of RestrictInfo nodes for clauses that can be used with
- * this index, along with an integer list of the index column numbers
- * (zero based) that each clause would be used with.  The clauses are
- * ordered by index key, so that the column numbers form a nondecreasing
- * sequence.  (This order is depended on by btree and possibly other places.)
- * NIL lists are returned if there are no matching clauses.
- *
- * We can use clauses from either the current clauses or outer_clauses lists,
- * but *found_clause is set TRUE only if we used at least one clause from
- * the "current clauses" list. See find_usable_indexes() for motivation.
- *
- * outer_relids determines what Vars will be allowed on the other side
- * of a possible index qual; see match_clause_to_indexcol().
- *
- * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used.
- * When it's SAOP_REQUIRE, *found_clause is set TRUE only if we used at least
- * one ScalarArrayOpExpr from the current clauses list.
- *
- * If the index has amoptionalkey = false, we give up and return NIL when
- * there are no restriction clauses matching the first index key.  Otherwise,
- * we return NIL only if there are no restriction clauses matching any index
- * key.  There could be unused index keys after the first one in any case.
- *
- * Note: in some circumstances we may find the same RestrictInfos coming
- * from multiple places.  Defend against redundant outputs by refusing to
- * match an already-used clause (pointer equality should be a good enough
- * check for this).  This also keeps us from matching the same clause to
- * multiple columns of a badly-defined index, which is unlikely to be helpful
- * and is likely to give us an inflated idea of the index's selectivity.
+ *       Perform match_clause_to_index() for each clause in a list.
+ *       Matching clauses are added to *clauseset.
   */
  static void
  match_clauses_to_index(IndexOptInfo *index,
-                                          List *clauses, List *outer_clauses,
-                                          Relids outer_relids,
-                                          SaOpControl saop_control,
-                                          List **index_clauses_p,
-                                          List **clause_columns_p,
-                                          bool *found_clause)
+                                          List *clauses,
+                                          IndexClauseSet *clauseset)
  {
-       List       *index_clauses = NIL;
-       List       *clause_columns = NIL;
-       int                     indexcol;
-
-       *index_clauses_p = NIL;         /* set default results */
-       *clause_columns_p = NIL;
-       *found_clause = false;
-
-       if (clauses == NIL && outer_clauses == NIL)
-               return;                                 /* cannot succeed */
+       ListCell   *lc;
  
-       for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+       foreach(lc, clauses)
         {
-               ListCell   *l;
+               RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
  
-               /* check the current clauses */
-               foreach(l, clauses)
-               {
-                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+               Assert(IsA(rinfo, RestrictInfo));
+               match_clause_to_index(index, rinfo, clauseset);
+       }
+}
  
-                       Assert(IsA(rinfo, RestrictInfo));
-                       if (list_member_ptr(index_clauses, rinfo))
-                               continue;
-                       if (match_clause_to_indexcol(index,
-                                                                                indexcol,
-                                                                                rinfo,
-                                                                                outer_relids,
-                                                                                saop_control))
-                       {
-                               index_clauses = lappend(index_clauses, rinfo);
-                               clause_columns = lappend_int(clause_columns, indexcol);
-                               if (saop_control != SAOP_REQUIRE ||
-                                       IsA(rinfo->clause, ScalarArrayOpExpr))
-                                       *found_clause = true;
-                       }
-               }
+/*
+ * match_clause_to_index
+ *       Test whether a qual clause can be used with an index.
+ *
+ * If the clause is usable, add it to the appropriate list in *clauseset.
+ * *clauseset must be initialized to zeroes before first call.
+ *
+ * Note: in some circumstances we may find the same RestrictInfos coming from
+ * multiple places.  Defend against redundant outputs by refusing to add a
+ * clause twice (pointer equality should be a good enough check for this).
+ *
+ * Note: it's possible that a badly-defined index could have multiple matching
+ * columns.  We always select the first match if so; this avoids scenarios
+ * wherein we get an inflated idea of the index's selectivity by using the
+ * same clause multiple times with different index columns.
+ */
+static void
+match_clause_to_index(IndexOptInfo *index,
+                                         RestrictInfo *rinfo,
+                                         IndexClauseSet *clauseset)
+{
+       int                     indexcol;
  
-               /* check the outer clauses */
-               foreach(l, outer_clauses)
+       for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+       {
+               if (match_clause_to_indexcol(index,
+                                                                        indexcol,
+                                                                        rinfo))
                 {
-                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-
-                       Assert(IsA(rinfo, RestrictInfo));
-                       if (list_member_ptr(index_clauses, rinfo))
-                               continue;
-                       if (match_clause_to_indexcol(index,
-                                                                                indexcol,
-                                                                                rinfo,
-                                                                                outer_relids,
-                                                                                saop_control))
-                       {
-                               index_clauses = lappend(index_clauses, rinfo);
-                               clause_columns = lappend_int(clause_columns, indexcol);
-                       }
-               }
-
-               /*
-                * If no clauses match this key, check for amoptionalkey restriction.
-                */
-               if (index_clauses == NIL && !index->amoptionalkey)
+                       clauseset->indexclauses[indexcol] =
+                               list_append_unique_ptr(clauseset->indexclauses[indexcol],
+                                                                          rinfo);
+                       clauseset->nonempty = true;
                         return;
+               }
         }
-
-       *index_clauses_p = index_clauses;
-       *clause_columns_p = clause_columns;
  }
  
-
  /*
   * match_clause_to_indexcol()
   *       Determines whether a restriction clause matches a column of an index.
   *
- *       To match a normal index, the clause:
+ *       To match an index normally, the clause:
   *
   *       (1)  must be in the form (indexkey op const) or (const op indexkey);
   *                and
@@ -1281,18 +1828,17 @@ match_clauses_to_index(IndexOptInfo *index,
   *                and
   *       (3)  must match the collation of the index, if collation is relevant.
   *
- *       Our definition of "const" is pretty liberal: we allow Vars belonging
- *       to the caller-specified outer_relids relations (which had better not
- *       include the relation whose index is being tested).  outer_relids should
- *       be NULL when checking simple restriction clauses, and the outer side
- *       of the join when building a join inner scan.  Other than that, the
- *       only thing we don't like is volatile functions.
+ *       Our definition of "const" is exceedingly liberal: we allow anything that
+ *       doesn't involve a volatile function or a Var of the index's relation.
+ *       In particular, Vars belonging to other relations of the query are
+ *       accepted here, since a clause of that form can be used in a
+ *       parameterized indexscan.  It's the responsibility of higher code levels
+ *       to manage restriction and join clauses appropriately.
   *
- *       Note: in most cases we already know that the clause as a whole uses
- *       vars from the interesting set of relations.  The reason for the
- *       outer_relids test is to reject clauses like (a.f1 OP (b.f2 OP a.f3));
- *       that's not processable by an indexscan nestloop join on A, whereas
- *       (a.f1 OP (b.f2 OP c.f3)) is.
+ *       Note: we do need to check for Vars of the index's relation on the
+ *       "const" side of the clause, since clauses like (a.f1 OP (b.f2 OP a.f3))
+ *       are not processable by a parameterized indexscan on a.f1, whereas
+ *       something like (a.f1 OP (b.f2 OP c.f3)) is.
   *
   *       Presently, the executor can only deal with indexquals that have the
   *       indexkey on the left, so we can only use clauses that have the indexkey
@@ -1316,10 +1862,7 @@ match_clauses_to_index(IndexOptInfo *index,
   *       adjust_rowcompare_for_index().
   *
   *       It is also possible to match ScalarArrayOpExpr clauses to indexes, when
- *       the clause is of the form "indexkey op ANY (arrayconst)".  Since not
- *       all indexes handle these natively, and the executor implements them
- *       only in the context of bitmap index scans, our caller specifies whether
- *       to allow these or not.
+ *       the clause is of the form "indexkey op ANY (arrayconst)".
   *
   *       For boolean indexes, it is also possible to match the clause directly
   *       to the indexkey; or perhaps the clause is (NOT indexkey).
@@ -1327,8 +1870,6 @@ match_clauses_to_index(IndexOptInfo *index,
   * 'index' is the index of interest.
   * 'indexcol' is a column number of 'index' (counting from 0).
   * 'rinfo' is the clause to be tested (as a RestrictInfo node).
- * 'outer_relids' lists rels whose Vars can be considered pseudoconstant.
- * 'saop_control' indicates whether ScalarArrayOpExpr clauses can be used.
   *
   * Returns true if the clause can be used with this index key.
   *
@@ -1338,11 +1879,10 @@ match_clauses_to_index(IndexOptInfo *index,
  static bool
  match_clause_to_indexcol(IndexOptInfo *index,
                                                  int indexcol,
-                                                RestrictInfo *rinfo,
-                                                Relids outer_relids,
-                                                SaOpControl saop_control)
+                                                RestrictInfo *rinfo)
  {
         Expr       *clause = rinfo->clause;
+       Index           index_relid = index->rel->relid;
         Oid                     opfamily = index->opfamily[indexcol];
         Oid                     idxcollation = index->indexcollations[indexcol];
         Node       *leftop,
@@ -1387,8 +1927,7 @@ match_clause_to_indexcol(IndexOptInfo *index,
                 expr_coll = ((OpExpr *) clause)->inputcollid;
                 plain_op = true;
         }
-       else if (clause && IsA(clause, ScalarArrayOpExpr) &&
-                        (index->amsearcharray || saop_control != SAOP_PER_AM))
+       else if (clause && IsA(clause, ScalarArrayOpExpr))
         {
                 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
  
@@ -1407,8 +1946,7 @@ match_clause_to_indexcol(IndexOptInfo *index,
         {
                 return match_rowcompare_to_indexcol(index, indexcol,
                                                                                         opfamily, idxcollation,
-                                                                                       (RowCompareExpr *) clause,
-                                                                                       outer_relids);
+                                                                                       (RowCompareExpr *) clause);
         }
         else if (index->amsearchnulls && IsA(clause, NullTest))
         {
@@ -1427,7 +1965,7 @@ match_clause_to_indexcol(IndexOptInfo *index,
          * (constant operator indexkey).  See above notes about const-ness.
          */
         if (match_index_to_operand(leftop, indexcol, index) &&
-               bms_is_subset(right_relids, outer_relids) &&
+               !bms_is_member(index_relid, right_relids) &&
                 !contain_volatile_functions(rightop))
         {
                 if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
@@ -1439,14 +1977,15 @@ match_clause_to_indexcol(IndexOptInfo *index,
                  * is a "special" indexable operator.
                  */
                 if (plain_op &&
-                 match_special_index_operator(clause, opfamily, idxcollation, true))
+                       match_special_index_operator(clause, opfamily,
+                                                                                idxcollation, true))
                         return true;
                 return false;
         }
  
         if (plain_op &&
                 match_index_to_operand(rightop, indexcol, index) &&
-               bms_is_subset(left_relids, outer_relids) &&
+               !bms_is_member(index_relid, left_relids) &&
                 !contain_volatile_functions(leftop))
         {
                 if (IndexCollMatchesExprColl(idxcollation, expr_coll) &&
@@ -1457,7 +1996,8 @@ match_clause_to_indexcol(IndexOptInfo *index,
                  * If we didn't find a member of the index's opfamily, see whether it
                  * is a "special" indexable operator.
                  */
-               if (match_special_index_operator(clause, opfamily, idxcollation, false))
+               if (match_special_index_operator(clause, opfamily,
+                                                                                idxcollation, false))
                         return true;
                 return false;
         }
@@ -1498,9 +2038,9 @@ match_rowcompare_to_indexcol(IndexOptInfo *index,
                                                          int indexcol,
                                                          Oid opfamily,
                                                          Oid idxcollation,
-                                                        RowCompareExpr *clause,
-                                                        Relids outer_relids)
+                                                        RowCompareExpr *clause)
  {
+       Index           index_relid = index->rel->relid;
         Node       *leftop,
                            *rightop;
         Oid                     expr_op;
@@ -1533,13 +2073,13 @@ match_rowcompare_to_indexcol(IndexOptInfo *index,
          * These syntactic tests are the same as in match_clause_to_indexcol()
          */
         if (match_index_to_operand(leftop, indexcol, index) &&
-               bms_is_subset(pull_varnos(rightop), outer_relids) &&
+               !bms_is_member(index_relid, pull_varnos(rightop)) &&
                 !contain_volatile_functions(rightop))
         {
                 /* OK, indexkey is on left */
         }
         else if (match_index_to_operand(rightop, indexcol, index) &&
-                        bms_is_subset(pull_varnos(leftop), outer_relids) &&
+                        !bms_is_member(index_relid, pull_varnos(leftop)) &&
                          !contain_volatile_functions(leftop))
         {
                 /* indexkey is on right, so commute the operator */
@@ -1800,484 +2340,41 @@ check_partial_indexes(PlannerInfo *root, RelOptInfo *rel)
  }
  
  /****************************************************************************
- *                             ----  ROUTINES TO CHECK JOIN CLAUSES  ----
+ *                             ----  ROUTINES TO CHECK EXTERNALLY-VISIBLE CONDITIONS  ----
   ****************************************************************************/
  
  /*
- * indexable_outerrelids
- *       Finds all other relids that participate in any indexable join clause
- *       for the specified table.      Returns a set of relids.
- */
-static Relids
-indexable_outerrelids(PlannerInfo *root, RelOptInfo *rel)
-{
-       Relids          outer_relids = NULL;
-       bool            is_child_rel = (rel->reloptkind == RELOPT_OTHER_MEMBER_REL);
-       ListCell   *lc1;
-
-       /*
-        * Examine each joinclause in the joininfo list to see if it matches any
-        * key of any index.  If so, add the clause's other rels to the result.
-        */
-       foreach(lc1, rel->joininfo)
-       {
-               RestrictInfo *joininfo = (RestrictInfo *) lfirst(lc1);
-               Relids          other_rels;
-
-               other_rels = bms_difference(joininfo->required_relids, rel->relids);
-               if (matches_any_index(joininfo, rel, other_rels))
-                       outer_relids = bms_join(outer_relids, other_rels);
-               else
-                       bms_free(other_rels);
-       }
-
-       /*
-        * We also have to look through the query's EquivalenceClasses to see if
-        * any of them could generate indexable join conditions for this rel.
-        */
-       if (rel->has_eclass_joins)
-       {
-               foreach(lc1, root->eq_classes)
-               {
-                       EquivalenceClass *cur_ec = (EquivalenceClass *) lfirst(lc1);
-                       Relids          other_rels = NULL;
-                       bool            found_index = false;
-                       ListCell   *lc2;
-
-                       /*
-                        * Won't generate joinclauses if const or single-member (the
-                        * latter test covers the volatile case too)
-                        */
-                       if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
-                               continue;
-
-                       /*
-                        * Note we don't test ec_broken; if we did, we'd need a separate
-                        * code path to look through ec_sources.  Checking the members
-                        * anyway is OK as a possibly-overoptimistic heuristic.
-                        */
-
-                       /*
-                        * No point in searching if rel not mentioned in eclass (but we
-                        * can't tell that for a child rel).
-                        */
-                       if (!is_child_rel &&
-                               !bms_is_subset(rel->relids, cur_ec->ec_relids))
-                               continue;
-
-                       /*
-                        * Scan members, looking for both an index match and join
-                        * candidates
-                        */
-                       foreach(lc2, cur_ec->ec_members)
-                       {
-                               EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
-
-                               /* Join candidate? */
-                               if (!cur_em->em_is_child &&
-                                       !bms_overlap(cur_em->em_relids, rel->relids))
-                               {
-                                       other_rels = bms_add_members(other_rels,
-                                                                                                cur_em->em_relids);
-                                       continue;
-                               }
-
-                               /* Check for index match (only need one) */
-                               if (!found_index &&
-                                       bms_equal(cur_em->em_relids, rel->relids) &&
-                                       eclass_matches_any_index(cur_ec, cur_em, rel))
-                                       found_index = true;
-                       }
-
-                       if (found_index)
-                               outer_relids = bms_join(outer_relids, other_rels);
-                       else
-                               bms_free(other_rels);
-               }
-       }
-
-       return outer_relids;
-}
-
-/*
- * matches_any_index
- *       Workhorse for indexable_outerrelids: see if a joinclause can be
- *       matched to any index of the given rel.
- */
-static bool
-matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids)
-{
-       ListCell   *l;
-
-       Assert(IsA(rinfo, RestrictInfo));
-
-       if (restriction_is_or_clause(rinfo))
-       {
-               foreach(l, ((BoolExpr *) rinfo->orclause)->args)
-               {
-                       Node       *orarg = (Node *) lfirst(l);
-
-                       /* OR arguments should be ANDs or sub-RestrictInfos */
-                       if (and_clause(orarg))
-                       {
-                               ListCell   *j;
-
-                               /* Recurse to examine AND items and sub-ORs */
-                               foreach(j, ((BoolExpr *) orarg)->args)
-                               {
-                                       RestrictInfo *arinfo = (RestrictInfo *) lfirst(j);
-
-                                       if (matches_any_index(arinfo, rel, outer_relids))
-                                               return true;
-                               }
-                       }
-                       else
-                       {
-                               /* Recurse to examine simple clause */
-                               Assert(IsA(orarg, RestrictInfo));
-                               Assert(!restriction_is_or_clause((RestrictInfo *) orarg));
-                               if (matches_any_index((RestrictInfo *) orarg, rel,
-                                                                         outer_relids))
-                                       return true;
-                       }
-               }
-
-               return false;
-       }
-
-       /* Normal case for a simple restriction clause */
-       foreach(l, rel->indexlist)
-       {
-               IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
-               int                     indexcol;
-
-               for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
-               {
-                       if (match_clause_to_indexcol(index,
-                                                                                indexcol,
-                                                                                rinfo,
-                                                                                outer_relids,
-                                                                                SAOP_ALLOW))
-                               return true;
-               }
-       }
-
-       return false;
-}
-
-/*
- * eclass_matches_any_index
- *       Workhorse for indexable_outerrelids: see if an EquivalenceClass member
- *       can be matched to any index column of the given rel.
+ * eclass_member_matches_indexcol
+ *       Test whether an EquivalenceClass member matches an index column.
   *
- * This is also exported for use by find_eclass_clauses_for_index_join.
+ * This is exported for use by generate_implied_equalities_for_indexcol.
   */
  bool
-eclass_matches_any_index(EquivalenceClass *ec, EquivalenceMember *em,
-                                                RelOptInfo *rel)
-{
-       ListCell   *l;
-
-       foreach(l, rel->indexlist)
-       {
-               IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
-               int                     indexcol;
-
-               for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
-               {
-                       Oid                     curFamily = index->opfamily[indexcol];
-                       Oid                     curCollation = index->indexcollations[indexcol];
-
-                       /*
-                        * If it's a btree index, we can reject it if its opfamily isn't
-                        * compatible with the EC, since no clause generated from the EC
-                        * could be used with the index.  For non-btree indexes, we can't
-                        * easily tell whether clauses generated from the EC could be used
-                        * with the index, so only check for expression match.  This might
-                        * mean we return "true" for a useless index, but that will just
-                        * cause some wasted planner cycles; it's better than ignoring
-                        * useful indexes.
-                        *
-                        * We insist on collation match for all index types, though.
-                        */
-                       if ((index->relam != BTREE_AM_OID ||
-                                list_member_oid(ec->ec_opfamilies, curFamily)) &&
-                               IndexCollMatchesExprColl(curCollation, ec->ec_collation) &&
-                               match_index_to_operand((Node *) em->em_expr, indexcol, index))
-                               return true;
-               }
-       }
-
-       return false;
-}
-
-
-/*
- * best_inner_indexscan
- *       Finds the best available inner indexscans for a nestloop join
- *       with the given rel on the inside and the given outer_rel outside.
- *
- * *cheapest_startup gets the path with least startup cost
- * *cheapest_total gets the path with least total cost (often the same path)
- * Both are set to NULL if there are no possible inner indexscans.
- *
- * We ignore ordering considerations, since a nestloop's inner scan's order
- * is uninteresting.  Hence startup cost and total cost are the only figures
- * of merit to consider.
- *
- * Note: create_index_paths() must have been run previously for this rel,
- * else the results will always be NULL.
- */
-void
-best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
-                                        RelOptInfo *outer_rel, JoinType jointype,
-                                        Path **cheapest_startup, Path **cheapest_total)
-{
-       Relids          outer_relids;
-       bool            isouterjoin;
-       List       *clause_list;
-       List       *indexpaths;
-       List       *bitindexpaths;
-       List       *allindexpaths;
-       ListCell   *l;
-       InnerIndexscanInfo *info;
-       MemoryContext oldcontext;
-
-       /* Initialize results for failure returns */
-       *cheapest_startup = *cheapest_total = NULL;
-
-       /*
-        * Nestloop only supports inner, left, semi, and anti joins.
-        */
-       switch (jointype)
-       {
-               case JOIN_INNER:
-               case JOIN_SEMI:
-                       isouterjoin = false;
-                       break;
-               case JOIN_LEFT:
-               case JOIN_ANTI:
-                       isouterjoin = true;
-                       break;
-               default:
-                       return;
-       }
-
-       /*
-        * If there are no indexable joinclauses for this rel, exit quickly.
-        */
-       if (bms_is_empty(rel->index_outer_relids))
-               return;
-
-       /*
-        * Otherwise, we have to do path selection in the main planning context,
-        * so that any created path can be safely attached to the rel's cache of
-        * best inner paths.  (This is not currently an issue for normal planning,
-        * but it is an issue for GEQO planning.)
-        */
-       oldcontext = MemoryContextSwitchTo(root->planner_cxt);
-
-       /*
-        * Intersect the given outer relids with index_outer_relids to find the
-        * set of outer relids actually relevant for this rel. If there are none,
-        * again we can fail immediately.
-        */
-       outer_relids = bms_intersect(rel->index_outer_relids, outer_rel->relids);
-       if (bms_is_empty(outer_relids))
-       {
-               bms_free(outer_relids);
-               MemoryContextSwitchTo(oldcontext);
-               return;
-       }
-
-       /*
-        * Look to see if we already computed the result for this set of relevant
-        * outerrels.  (We include the isouterjoin status in the cache lookup key
-        * for safety.  In practice I suspect this is not necessary because it
-        * should always be the same for a given combination of rels.)
-        *
-        * NOTE: because we cache on outer_relids rather than outer_rel->relids,
-        * we will report the same paths and hence path cost for joins with
-        * different sets of irrelevant rels on the outside.  Now that cost_index
-        * is sensitive to outer_rel->rows, this is not really right.  However the
-        * error is probably not large.  Is it worth establishing a separate cache
-        * entry for each distinct outer_rel->relids set to get this right?
-        */
-       foreach(l, rel->index_inner_paths)
-       {
-               info = (InnerIndexscanInfo *) lfirst(l);
-               if (bms_equal(info->other_relids, outer_relids) &&
-                       info->isouterjoin == isouterjoin)
-               {
-                       bms_free(outer_relids);
-                       MemoryContextSwitchTo(oldcontext);
-                       *cheapest_startup = info->cheapest_startup_innerpath;
-                       *cheapest_total = info->cheapest_total_innerpath;
-                       return;
-               }
-       }
-
-       /*
-        * Find all the relevant restriction and join clauses.
-        *
-        * Note: because we include restriction clauses, we will find indexscans
-        * that could be plain indexscans, ie, they don't require the join context
-        * at all.      This may seem redundant, but we need to include those scans in
-        * the input given to choose_bitmap_and() to be sure we find optimal AND
-        * combinations of join and non-join scans.  Also, even if the "best inner
-        * indexscan" is just a plain indexscan, it will have a different cost
-        * estimate because of cache effects.
-        */
-       clause_list = find_clauses_for_join(root, rel, outer_relids, isouterjoin);
-
-       /*
-        * Find all the index paths that are usable for this join, except for
-        * stuff involving OR and executor-managed ScalarArrayOpExpr clauses.
-        */
-       allindexpaths = find_usable_indexes(root, rel,
-                                                                               clause_list, NIL,
-                                                                               false, outer_rel,
-                                                                               SAOP_PER_AM,
-                                                                               ST_ANYSCAN);
-
-       /*
-        * Include the ones that are usable as plain indexscans in indexpaths, and
-        * include the ones that are usable as bitmap scans in bitindexpaths.
-        */
-       indexpaths = bitindexpaths = NIL;
-       foreach(l, allindexpaths)
-       {
-               IndexPath  *ipath = (IndexPath *) lfirst(l);
-
-               if (ipath->indexinfo->amhasgettuple)
-                       indexpaths = lappend(indexpaths, ipath);
-
-               if (ipath->indexinfo->amhasgetbitmap)
-                       bitindexpaths = lappend(bitindexpaths, ipath);
-       }
-
-       /*
-        * Generate BitmapOrPaths for any suitable OR-clauses present in the
-        * clause list.
-        */
-       bitindexpaths = list_concat(bitindexpaths,
-                                                               generate_bitmap_or_paths(root, rel,
-                                                                                                                clause_list, NIL,
-                                                                                                                outer_rel));
-
-       /*
-        * Likewise, generate paths using executor-managed ScalarArrayOpExpr
-        * clauses; these can't be simple indexscans but they can be used in
-        * bitmap scans.
-        */
-       bitindexpaths = list_concat(bitindexpaths,
-                                                               find_saop_paths(root, rel,
-                                                                                               clause_list, NIL,
-                                                                                               false, outer_rel));
-
-       /*
-        * If we found anything usable, generate a BitmapHeapPath for the most
-        * promising combination of bitmap index paths.
-        */
-       if (bitindexpaths != NIL)
-       {
-               Path       *bitmapqual;
-               BitmapHeapPath *bpath;
-
-               bitmapqual = choose_bitmap_and(root, rel, bitindexpaths, outer_rel);
-               bpath = create_bitmap_heap_path(root, rel, bitmapqual, outer_rel);
-               indexpaths = lappend(indexpaths, bpath);
-       }
-
-       /*
-        * Now choose the cheapest members of indexpaths.
-        */
-       if (indexpaths != NIL)
-       {
-               *cheapest_startup = *cheapest_total = (Path *) linitial(indexpaths);
-
-               for_each_cell(l, lnext(list_head(indexpaths)))
-               {
-                       Path       *path = (Path *) lfirst(l);
-
-                       if (compare_path_costs(path, *cheapest_startup, STARTUP_COST) < 0)
-                               *cheapest_startup = path;
-                       if (compare_path_costs(path, *cheapest_total, TOTAL_COST) < 0)
-                               *cheapest_total = path;
-               }
-       }
-
-       /* Cache the results --- whether positive or negative */
-       info = makeNode(InnerIndexscanInfo);
-       info->other_relids = outer_relids;
-       info->isouterjoin = isouterjoin;
-       info->cheapest_startup_innerpath = *cheapest_startup;
-       info->cheapest_total_innerpath = *cheapest_total;
-       rel->index_inner_paths = lcons(info, rel->index_inner_paths);
-
-       MemoryContextSwitchTo(oldcontext);
-}
-
-/*
- * find_clauses_for_join
- *       Generate a list of clauses that are potentially useful for
- *       scanning rel as the inner side of a nestloop join.
- *
- * We consider both join and restriction clauses.  Any joinclause that uses
- * only otherrels in the specified outer_relids is fair game.  But there must
- * be at least one such joinclause in the final list, otherwise we return NIL
- * indicating that there isn't any potential win here.
- */
-static List *
-find_clauses_for_join(PlannerInfo *root, RelOptInfo *rel,
-                                         Relids outer_relids, bool isouterjoin)
+eclass_member_matches_indexcol(EquivalenceClass *ec, EquivalenceMember *em,
+                                                          IndexOptInfo *index, int indexcol)
  {
-       List       *clause_list = NIL;
-       Relids          join_relids;
-       ListCell   *l;
+       Oid                     curFamily = index->opfamily[indexcol];
+       Oid                     curCollation = index->indexcollations[indexcol];
  
         /*
-        * Look for joinclauses that are usable with given outer_relids.  Note
-        * we'll take anything that's applicable to the join whether it has
-        * anything to do with an index or not; since we're only building a list,
-        * it's not worth filtering more finely here.
+        * If it's a btree index, we can reject it if its opfamily isn't
+        * compatible with the EC, since no clause generated from the EC could be
+        * used with the index.  For non-btree indexes, we can't easily tell
+        * whether clauses generated from the EC could be used with the index,
+        * so don't check the opfamily.  This might mean we return "true" for a
+        * useless EC, so we have to recheck the results of
+        * generate_implied_equalities_for_indexcol; see
+        * match_eclass_clauses_to_index.
          */
-       join_relids = bms_union(rel->relids, outer_relids);
-
-       foreach(l, rel->joininfo)
-       {
-               RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
-
-               /* Can't use pushed-down join clauses in outer join */
-               if (isouterjoin && rinfo->is_pushed_down)
-                       continue;
-               if (!bms_is_subset(rinfo->required_relids, join_relids))
-                       continue;
-
-               clause_list = lappend(clause_list, rinfo);
-       }
-
-       bms_free(join_relids);
-
-       /*
-        * Also check to see if any EquivalenceClasses can produce a relevant
-        * joinclause.  Since all such clauses are effectively pushed-down, this
-        * doesn't apply to outer joins.
-        */
-       if (!isouterjoin && rel->has_eclass_joins)
-               clause_list = list_concat(clause_list,
-                                                                 find_eclass_clauses_for_index_join(root,
-                                                                                                                                        rel,
-                                                                                                                         outer_relids));
-
-       /* If no join clause was matched then forget it, per comments above */
-       if (clause_list == NIL)
-               return NIL;
+       if (index->relam == BTREE_AM_OID &&
+               !list_member_oid(ec->ec_opfamilies, curFamily))
+               return false;
  
-       /* We can also use any plain restriction clauses for the rel */
-       clause_list = list_concat(list_copy(rel->baserestrictinfo), clause_list);
+       /* We insist on collation match for all index types, though */
+       if (!IndexCollMatchesExprColl(curCollation, ec->ec_collation))
+               return false;
  
-       return clause_list;
+       return match_index_to_operand((Node *) em->em_expr, indexcol, index);
  }
  
  /*
@@ -2473,6 +2570,8 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
   *
   * Note that we aren't interested in collations here; the caller must check
   * for a collation match, if it's dealing with an operator where that matters.
+ *
+ * This is exported for use in selfuncs.c.
   */
  bool
  match_index_to_operand(Node *operand,
@@ -2543,7 +2642,7 @@ match_index_to_operand(Node *operand,
   *                     ----  ROUTINES FOR "SPECIAL" INDEXABLE OPERATORS  ----
   ****************************************************************************/
  
-/*----------
+/*
   * These routines handle special optimization of operators that can be
   * used with index scans even though they are not known to the executor's
   * indexscan machinery.  The key idea is that these operators allow us
@@ -2590,7 +2689,6 @@ match_index_to_operand(Node *operand,
   * the index's opfamily this transformation is a no-op, but clauses recognized
   * by match_special_index_operator() or match_boolean_index_clause() must be
   * converted into one or more "regular" indexqual conditions.
- *----------
   */
  
  /*
@@ -3168,9 +3266,7 @@ adjust_rowcompare_for_index(RowCompareExpr *clause,
  
         /*
          * See how many of the remaining columns match some index column in the
-        * same way.  A note about rel membership tests: we assume that the clause
-        * as a whole is already known to use only Vars from the indexed relation
-        * and possibly some acceptable outer relations. So the "other" side of
+        * same way.  As in match_clause_to_indexcol(), the "other" side of
          * any potential index condition is OK as long as it doesn't use Vars from
          * the indexed relation.
          */
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 65caeb86c9bd419e3485bbe889f44e559235083a..1d537d84ee6a37f89e5bcfc2df93001ab8426fc6 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -25,17 +25,20 @@
  static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
                                          List *restrictlist, List *mergeclause_list,
-                                        JoinType jointype, SpecialJoinInfo *sjinfo);
+                                        JoinType jointype, SpecialJoinInfo *sjinfo,
+                                        Relids param_source_rels);
  static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
                                          List *restrictlist, List *mergeclause_list,
-                                        JoinType jointype, SpecialJoinInfo *sjinfo);
+                                        JoinType jointype, SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
+                                        Relids param_source_rels);
  static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
                                          RelOptInfo *outerrel, RelOptInfo *innerrel,
                                          List *restrictlist,
-                                        JoinType jointype, SpecialJoinInfo *sjinfo);
-static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
-                                                RelOptInfo *outer_rel, JoinType jointype);
+                                        JoinType jointype, SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
+                                        Relids param_source_rels);
  static List *select_mergejoin_clauses(PlannerInfo *root,
                                                  RelOptInfo *joinrel,
                                                  RelOptInfo *outerrel,
@@ -79,6 +82,9 @@ add_paths_to_joinrel(PlannerInfo *root,
  {
         List       *mergeclause_list = NIL;
         bool            mergejoin_allowed = true;
+       SemiAntiJoinFactors semifactors;
+       Relids          param_source_rels = NULL;
+       ListCell   *lc;
  
         /*
          * Find potential mergejoin clauses.  We can skip this if we are not
@@ -95,13 +101,60 @@ add_paths_to_joinrel(PlannerInfo *root,
                                                                                                         jointype,
                                                                                                         &mergejoin_allowed);
  
+       /*
+        * If it's SEMI or ANTI join, compute correction factors for cost
+        * estimation.  These will be the same for all paths.
+        */
+       if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+               compute_semi_anti_join_factors(root, outerrel, innerrel,
+                                                                          jointype, sjinfo, restrictlist,
+                                                                          &semifactors);
+
+       /*
+        * Decide whether it's sensible to generate parameterized paths for this
+        * joinrel, and if so, which relations such paths should require.  There
+        * is no need to create a parameterized result path unless there is a join
+        * order restriction that prevents joining one of our input rels directly
+        * to the parameter source rel instead of joining to the other input rel.
+        * This restriction reduces the number of parameterized paths we have to
+        * deal with at higher join levels, without compromising the quality of
+        * the resulting plan.  We express the restriction as a Relids set that
+        * must overlap the parameterization of any proposed join path.
+        */
+       foreach(lc, root->join_info_list)
+       {
+               SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc);
+
+               /*
+                * SJ is relevant to this join if we have some part of its RHS
+                * (possibly not all of it), and haven't yet joined to its LHS.  (This
+                * test is pretty simplistic, but should be sufficient considering the
+                * join has already been proven legal.)  If the SJ is relevant, it
+                * presents constraints for joining to anything not in its RHS.
+                */
+               if (bms_overlap(joinrel->relids, sjinfo->min_righthand) &&
+                       !bms_overlap(joinrel->relids, sjinfo->min_lefthand))
+                       param_source_rels = bms_join(param_source_rels,
+                                                                                bms_difference(root->all_baserels,
+                                                                                                       sjinfo->min_righthand));
+
+               /* full joins constrain both sides symmetrically */
+               if (sjinfo->jointype == JOIN_FULL &&
+                       bms_overlap(joinrel->relids, sjinfo->min_lefthand) &&
+                       !bms_overlap(joinrel->relids, sjinfo->min_righthand))
+                       param_source_rels = bms_join(param_source_rels,
+                                                                                bms_difference(root->all_baserels,
+                                                                                                       sjinfo->min_lefthand));
+       }
+
         /*
          * 1. Consider mergejoin paths where both relations must be explicitly
          * sorted.      Skip this if we can't mergejoin.
          */
         if (mergejoin_allowed)
                 sort_inner_and_outer(root, joinrel, outerrel, innerrel,
-                                                  restrictlist, mergeclause_list, jointype, sjinfo);
+                                                        restrictlist, mergeclause_list, jointype,
+                                                        sjinfo, param_source_rels);
  
         /*
          * 2. Consider paths where the outer relation need not be explicitly
@@ -112,7 +165,8 @@ add_paths_to_joinrel(PlannerInfo *root,
          */
         if (mergejoin_allowed)
                 match_unsorted_outer(root, joinrel, outerrel, innerrel,
-                                                  restrictlist, mergeclause_list, jointype, sjinfo);
+                                                        restrictlist, mergeclause_list, jointype,
+                                                        sjinfo, &semifactors, param_source_rels);
  
  #ifdef NOT_USED
  
@@ -129,7 +183,8 @@ add_paths_to_joinrel(PlannerInfo *root,
          */
         if (mergejoin_allowed)
                 match_unsorted_inner(root, joinrel, outerrel, innerrel,
-                                                  restrictlist, mergeclause_list, jointype, sjinfo);
+                                                        restrictlist, mergeclause_list, jointype,
+                                                        sjinfo, &semifactors, param_source_rels);
  #endif
  
         /*
@@ -139,7 +194,226 @@ add_paths_to_joinrel(PlannerInfo *root,
          */
         if (enable_hashjoin || jointype == JOIN_FULL)
                 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
-                                                        restrictlist, jointype, sjinfo);
+                                                        restrictlist, jointype,
+                                                        sjinfo, &semifactors, param_source_rels);
+}
+
+/*
+ * try_nestloop_path
+ *       Consider a nestloop join path; if it appears useful, push it into
+ *       the joinrel's pathlist via add_path().
+ */
+static void
+try_nestloop_path(PlannerInfo *root,
+                                 RelOptInfo *joinrel,
+                                 JoinType jointype,
+                                 SpecialJoinInfo *sjinfo,
+                                 SemiAntiJoinFactors *semifactors,
+                                 Relids param_source_rels,
+                                 Path *outer_path,
+                                 Path *inner_path,
+                                 List *restrict_clauses,
+                                 List *pathkeys)
+{
+       Relids          required_outer;
+       JoinCostWorkspace       workspace;
+
+       /*
+        * Check to see if proposed path is still parameterized, and reject if
+        * the parameterization wouldn't be sensible.
+        */
+       required_outer = calc_nestloop_required_outer(outer_path,
+                                                                                                 inner_path);
+       if (required_outer &&
+               !bms_overlap(required_outer, param_source_rels))
+       {
+               /* Waste no memory when we reject a path here */
+               bms_free(required_outer);
+               return;
+       }
+
+       /*
+        * Do a precheck to quickly eliminate obviously-inferior paths.  We
+        * calculate a cheap lower bound on the path's cost and then use
+        * add_path_precheck() to see if the path is clearly going to be dominated
+        * by some existing path for the joinrel.  If not, do the full pushup with
+        * creating a fully valid path structure and submitting it to add_path().
+        * The latter two steps are expensive enough to make this two-phase
+        * methodology worthwhile.
+        */
+       initial_cost_nestloop(root, &workspace, jointype,
+                                                 outer_path, inner_path,
+                                                 sjinfo, semifactors);
+
+       if (add_path_precheck(joinrel,
+                                                 workspace.startup_cost, workspace.total_cost,
+                                                 pathkeys, required_outer))
+       {
+               add_path(joinrel, (Path *)
+                                create_nestloop_path(root,
+                                                                         joinrel,
+                                                                         jointype,
+                                                                         &workspace,
+                                                                         sjinfo,
+                                                                         semifactors,
+                                                                         outer_path,
+                                                                         inner_path,
+                                                                         restrict_clauses,
+                                                                         pathkeys,
+                                                                         required_outer));
+       }
+       else
+       {
+               /* Waste no memory when we reject a path here */
+               bms_free(required_outer);
+       }
+}
+
+/*
+ * try_mergejoin_path
+ *       Consider a merge join path; if it appears useful, push it into
+ *       the joinrel's pathlist via add_path().
+ */
+static void
+try_mergejoin_path(PlannerInfo *root,
+                                  RelOptInfo *joinrel,
+                                  JoinType jointype,
+                                  SpecialJoinInfo *sjinfo,
+                                  Relids param_source_rels,
+                                  Path *outer_path,
+                                  Path *inner_path,
+                                  List *restrict_clauses,
+                                  List *pathkeys,
+                                  List *mergeclauses,
+                                  List *outersortkeys,
+                                  List *innersortkeys)
+{
+       Relids          required_outer;
+       JoinCostWorkspace       workspace;
+
+       /*
+        * Check to see if proposed path is still parameterized, and reject if
+        * the parameterization wouldn't be sensible.
+        */
+       required_outer = calc_non_nestloop_required_outer(outer_path,
+                                                                                                                                inner_path);
+       if (required_outer &&
+               !bms_overlap(required_outer, param_source_rels))
+       {
+               /* Waste no memory when we reject a path here */
+               bms_free(required_outer);
+               return;
+       }
+
+       /*
+        * If the given paths are already well enough ordered, we can skip doing
+        * an explicit sort.
+        */
+       if (outersortkeys &&
+               pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
+               outersortkeys = NIL;
+       if (innersortkeys &&
+               pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
+               innersortkeys = NIL;
+
+       /*
+        * See comments in try_nestloop_path().
+        */
+       initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
+                                                  outer_path, inner_path,
+                                                  outersortkeys, innersortkeys,
+                                                  sjinfo);
+
+       if (add_path_precheck(joinrel,
+                                                 workspace.startup_cost, workspace.total_cost,
+                                                 pathkeys, required_outer))
+       {
+               add_path(joinrel, (Path *)
+                                create_mergejoin_path(root,
+                                                                          joinrel,
+                                                                          jointype,
+                                                                          &workspace,
+                                                                          sjinfo,
+                                                                          outer_path,
+                                                                          inner_path,
+                                                                          restrict_clauses,
+                                                                          pathkeys,
+                                                                          required_outer,
+                                                                          mergeclauses,
+                                                                          outersortkeys,
+                                                                          innersortkeys));
+       }
+       else
+       {
+               /* Waste no memory when we reject a path here */
+               bms_free(required_outer);
+       }
+}
+
+/*
+ * try_hashjoin_path
+ *       Consider a hash join path; if it appears useful, push it into
+ *       the joinrel's pathlist via add_path().
+ */
+static void
+try_hashjoin_path(PlannerInfo *root,
+                                 RelOptInfo *joinrel,
+                                 JoinType jointype,
+                                 SpecialJoinInfo *sjinfo,
+                                 SemiAntiJoinFactors *semifactors,
+                                 Relids param_source_rels,
+                                 Path *outer_path,
+                                 Path *inner_path,
+                                 List *restrict_clauses,
+                                 List *hashclauses)
+{
+       Relids          required_outer;
+       JoinCostWorkspace       workspace;
+
+       /*
+        * Check to see if proposed path is still parameterized, and reject if
+        * the parameterization wouldn't be sensible.
+        */
+       required_outer = calc_non_nestloop_required_outer(outer_path,
+                                                                                                                                inner_path);
+       if (required_outer &&
+               !bms_overlap(required_outer, param_source_rels))
+       {
+               /* Waste no memory when we reject a path here */
+               bms_free(required_outer);
+               return;
+       }
+
+       /*
+        * See comments in try_nestloop_path().  Also note that hashjoin paths
+        * never have any output pathkeys, per comments in create_hashjoin_path.
+        */
+       initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
+                                                 outer_path, inner_path,
+                                                 sjinfo, semifactors);
+
+       if (add_path_precheck(joinrel,
+                                                 workspace.startup_cost, workspace.total_cost,
+                                                 NIL, required_outer))
+       {
+               add_path(joinrel, (Path *)
+                                create_hashjoin_path(root,
+                                                                         joinrel,
+                                                                         jointype,
+                                                                         &workspace,
+                                                                         sjinfo,
+                                                                         semifactors,
+                                                                         outer_path,
+                                                                         inner_path,
+                                                                         restrict_clauses,
+                                                                         required_outer,
+                                                                         hashclauses));
+       }
+       else
+       {
+               /* Waste no memory when we reject a path here */
+               bms_free(required_outer);
+       }
  }
  
  /*
@@ -187,6 +461,7 @@ clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
   *             mergejoin clauses in this join
   * 'jointype' is the type of join to do
   * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'param_source_rels' are OK targets for parameterization of result paths
   */
  static void
  sort_inner_and_outer(PlannerInfo *root,
@@ -196,7 +471,8 @@ sort_inner_and_outer(PlannerInfo *root,
                                          List *restrictlist,
                                          List *mergeclause_list,
                                          JoinType jointype,
-                                        SpecialJoinInfo *sjinfo)
+                                        SpecialJoinInfo *sjinfo,
+                                        Relids param_source_rels)
  {
         Path       *outer_path;
         Path       *inner_path;
@@ -209,6 +485,13 @@ sort_inner_and_outer(PlannerInfo *root,
          * cheapest-startup-cost input paths later, and only if they don't need a
          * sort.
          *
+        * This function intentionally does not consider parameterized input paths
+        * (implicit in the fact that it only looks at cheapest_total_path, which
+        * is always unparameterized).  If we did so, we'd have a combinatorial
+        * explosion of mergejoin paths of dubious value.  This interacts with
+        * decisions elsewhere that also discriminate against mergejoins with
+        * parameterized inputs; see comments in src/backend/optimizer/README.
+        *
          * If unique-ification is requested, do it and then handle as a plain
          * inner join.
          */
@@ -299,21 +582,21 @@ sort_inner_and_outer(PlannerInfo *root,
                  * And now we can make the path.
                  *
                  * Note: it's possible that the cheapest paths will already be sorted
-                * properly.  create_mergejoin_path will detect that case and suppress
+                * properly.  try_mergejoin_path will detect that case and suppress
                  * an explicit sort step, so we needn't do so here.
                  */
-               add_path(joinrel, (Path *)
-                                create_mergejoin_path(root,
-                                                                          joinrel,
-                                                                          jointype,
-                                                                          sjinfo,
-                                                                          outer_path,
-                                                                          inner_path,
-                                                                          restrictlist,
-                                                                          merge_pathkeys,
-                                                                          cur_mergeclauses,
-                                                                          outerkeys,
-                                                                          innerkeys));
+               try_mergejoin_path(root,
+                                                  joinrel,
+                                                  jointype,
+                                                  sjinfo,
+                                                  param_source_rels,
+                                                  outer_path,
+                                                  inner_path,
+                                                  restrictlist,
+                                                  merge_pathkeys,
+                                                  cur_mergeclauses,
+                                                  outerkeys,
+                                                  innerkeys);
         }
  }
  
@@ -350,6 +633,8 @@ sort_inner_and_outer(PlannerInfo *root,
   *             mergejoin clauses in this join
   * 'jointype' is the type of join to do
   * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'param_source_rels' are OK targets for parameterization of result paths
   */
  static void
  match_unsorted_outer(PlannerInfo *root,
@@ -359,17 +644,16 @@ match_unsorted_outer(PlannerInfo *root,
                                          List *restrictlist,
                                          List *mergeclause_list,
                                          JoinType jointype,
-                                        SpecialJoinInfo *sjinfo)
+                                        SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
+                                        Relids param_source_rels)
  {
         JoinType        save_jointype = jointype;
         bool            nestjoinOK;
         bool            useallclauses;
-       Path       *inner_cheapest_startup = innerrel->cheapest_startup_path;
         Path       *inner_cheapest_total = innerrel->cheapest_total_path;
         Path       *matpath = NULL;
-       Path       *index_cheapest_startup = NULL;
-       Path       *index_cheapest_total = NULL;
-       ListCell   *l;
+       ListCell   *lc1;
  
         /*
          * Nestloop only supports inner, left, semi, and anti joins.  Also, if we
@@ -408,14 +692,13 @@ match_unsorted_outer(PlannerInfo *root,
  
         /*
          * If we need to unique-ify the inner path, we will consider only the
-        * cheapest inner.
+        * cheapest-total inner.
          */
         if (save_jointype == JOIN_UNIQUE_INNER)
         {
                 inner_cheapest_total = (Path *)
                         create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
                 Assert(inner_cheapest_total);
-               inner_cheapest_startup = inner_cheapest_total;
         }
         else if (nestjoinOK)
         {
@@ -428,28 +711,11 @@ match_unsorted_outer(PlannerInfo *root,
                         !ExecMaterializesOutput(inner_cheapest_total->pathtype))
                         matpath = (Path *)
                                 create_material_path(innerrel, inner_cheapest_total);
-
-               /*
-                * Get the best innerjoin indexpaths (if any) for this outer rel.
-                * They're the same for all outer paths.
-                */
-               if (innerrel->reloptkind != RELOPT_JOINREL)
-               {
-                       if (IsA(inner_cheapest_total, AppendPath))
-                               index_cheapest_total = best_appendrel_indexscan(root,
-                                                                                                                               innerrel,
-                                                                                                                               outerrel,
-                                                                                                                               jointype);
-                       else if (innerrel->rtekind == RTE_RELATION)
-                               best_inner_indexscan(root, innerrel, outerrel, jointype,
-                                                                        &index_cheapest_startup,
-                                                                        &index_cheapest_total);
-               }
         }
  
-       foreach(l, outerrel->pathlist)
+       foreach(lc1, outerrel->pathlist)
         {
-               Path       *outerpath = (Path *) lfirst(l);
+               Path       *outerpath = (Path *) lfirst(lc1);
                 List       *merge_pathkeys;
                 List       *mergeclauses;
                 List       *innersortkeys;
@@ -459,9 +725,16 @@ match_unsorted_outer(PlannerInfo *root,
                 int                     num_sortkeys;
                 int                     sortkeycnt;
  
+               /*
+                * We cannot use an outer path that is parameterized by the inner rel.
+                */
+               if (bms_overlap(outerpath->required_outer, innerrel->relids))
+                       continue;
+
                 /*
                  * If we need to unique-ify the outer path, it's pointless to consider
-                * any but the cheapest outer.
+                * any but the cheapest outer.  (XXX we don't consider parameterized
+                * outers, nor inners, for unique-ified cases.  Should we?)
                  */
                 if (save_jointype == JOIN_UNIQUE_OUTER)
                 {
@@ -480,65 +753,61 @@ match_unsorted_outer(PlannerInfo *root,
                 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
                                                                                          outerpath->pathkeys);
  
-               if (nestjoinOK)
+               if (save_jointype == JOIN_UNIQUE_INNER)
+               {
+                       /*
+                        * Consider nestloop join, but only with the unique-ified cheapest
+                        * inner path
+                        */
+                       try_nestloop_path(root,
+                                                         joinrel,
+                                                         jointype,
+                                                         sjinfo,
+                                                         semifactors,
+                                                         param_source_rels,
+                                                         outerpath,
+                                                         inner_cheapest_total,
+                                                         restrictlist,
+                                                         merge_pathkeys);
+               }
+               else if (nestjoinOK)
                 {
                         /*
-                        * Always consider a nestloop join with this outer and
-                        * cheapest-total-cost inner.  When appropriate, also consider
-                        * using the materialized form of the cheapest inner, the
-                        * cheapest-startup-cost inner path, and the cheapest innerjoin
-                        * indexpaths.
+                        * Consider nestloop joins using this outer path and various
+                        * available paths for the inner relation.  We consider the
+                        * cheapest-total paths for each available parameterization of
+                        * the inner relation, including the unparameterized case.
                          */
-                       add_path(joinrel, (Path *)
-                                        create_nestloop_path(root,
-                                                                                 joinrel,
-                                                                                 jointype,
-                                                                                 sjinfo,
-                                                                                 outerpath,
-                                                                                 inner_cheapest_total,
-                                                                                 restrictlist,
-                                                                                 merge_pathkeys));
+                       ListCell   *lc2;
+
+                       foreach(lc2, innerrel->cheapest_parameterized_paths)
+                       {
+                               Path       *innerpath = (Path *) lfirst(lc2);
+
+                               try_nestloop_path(root,
+                                                                 joinrel,
+                                                                 jointype,
+                                                                 sjinfo,
+                                                                 semifactors,
+                                                                 param_source_rels,
+                                                                 outerpath,
+                                                                 innerpath,
+                                                                 restrictlist,
+                                                                 merge_pathkeys);
+                       }
+
+                       /* Also consider materialized form of the cheapest inner path */
                         if (matpath != NULL)
-                               add_path(joinrel, (Path *)
-                                                create_nestloop_path(root,
-                                                                                         joinrel,
-                                                                                         jointype,
-                                                                                         sjinfo,
-                                                                                         outerpath,
-                                                                                         matpath,
-                                                                                         restrictlist,
-                                                                                         merge_pathkeys));
-                       if (inner_cheapest_startup != inner_cheapest_total)
-                               add_path(joinrel, (Path *)
-                                                create_nestloop_path(root,
-                                                                                         joinrel,
-                                                                                         jointype,
-                                                                                         sjinfo,
-                                                                                         outerpath,
-                                                                                         inner_cheapest_startup,
-                                                                                         restrictlist,
-                                                                                         merge_pathkeys));
-                       if (index_cheapest_total != NULL)
-                               add_path(joinrel, (Path *)
-                                                create_nestloop_path(root,
-                                                                                         joinrel,
-                                                                                         jointype,
-                                                                                         sjinfo,
-                                                                                         outerpath,
-                                                                                         index_cheapest_total,
-                                                                                         restrictlist,
-                                                                                         merge_pathkeys));
-                       if (index_cheapest_startup != NULL &&
-                               index_cheapest_startup != index_cheapest_total)
-                               add_path(joinrel, (Path *)
-                                                create_nestloop_path(root,
-                                                                                         joinrel,
-                                                                                         jointype,
-                                                                                         sjinfo,
-                                                                                         outerpath,
-                                                                                         index_cheapest_startup,
-                                                                                         restrictlist,
-                                                                                         merge_pathkeys));
+                               try_nestloop_path(root,
+                                                                 joinrel,
+                                                                 jointype,
+                                                                 sjinfo,
+                                                                 semifactors,
+                                                                 param_source_rels,
+                                                                 outerpath,
+                                                                 matpath,
+                                                                 restrictlist,
+                                                                 merge_pathkeys);
                 }
  
                 /* Can't do anything else if outer path needs to be unique'd */
@@ -578,21 +847,21 @@ match_unsorted_outer(PlannerInfo *root,
                 /*
                  * Generate a mergejoin on the basis of sorting the cheapest inner.
                  * Since a sort will be needed, only cheapest total cost matters. (But
-                * create_mergejoin_path will do the right thing if
+                * try_mergejoin_path will do the right thing if
                  * inner_cheapest_total is already correctly sorted.)
                  */
-               add_path(joinrel, (Path *)
-                                create_mergejoin_path(root,
-                                                                          joinrel,
-                                                                          jointype,
-                                                                          sjinfo,
-                                                                          outerpath,
-                                                                          inner_cheapest_total,
-                                                                          restrictlist,
-                                                                          merge_pathkeys,
-                                                                          mergeclauses,
-                                                                          NIL,
-                                                                          innersortkeys));
+               try_mergejoin_path(root,
+                                                  joinrel,
+                                                  jointype,
+                                                  sjinfo,
+                                                  param_source_rels,
+                                                  outerpath,
+                                                  inner_cheapest_total,
+                                                  restrictlist,
+                                                  merge_pathkeys,
+                                                  mergeclauses,
+                                                  NIL,
+                                                  innersortkeys);
  
                 /* Can't do anything else if inner path needs to be unique'd */
                 if (save_jointype == JOIN_UNIQUE_INNER)
@@ -604,6 +873,11 @@ match_unsorted_outer(PlannerInfo *root,
                  * mergejoin using a subset of the merge clauses.  Here, we consider
                  * both cheap startup cost and cheap total cost.
                  *
+                * Currently we do not consider parameterized inner paths here.
+                * This interacts with decisions elsewhere that also discriminate
+                * against mergejoins with parameterized inputs; see comments in
+                * src/backend/optimizer/README.
+                *
                  * As we shorten the sortkey list, we should consider only paths that
                  * are strictly cheaper than (in particular, not the same as) any path
                  * found in an earlier iteration.  Otherwise we'd be intentionally
@@ -654,6 +928,7 @@ match_unsorted_outer(PlannerInfo *root,
                         trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
                                                                                                            trialsortkeys,
+                                                                                                          NULL,
                                                                                                            TOTAL_COST);
                         if (innerpath != NULL &&
                                 (cheapest_total_inner == NULL ||
@@ -673,23 +948,24 @@ match_unsorted_outer(PlannerInfo *root,
                                 }
                                 else
                                         newclauses = mergeclauses;
-                               add_path(joinrel, (Path *)
-                                                create_mergejoin_path(root,
-                                                                                          joinrel,
-                                                                                          jointype,
-                                                                                          sjinfo,
-                                                                                          outerpath,
-                                                                                          innerpath,
-                                                                                          restrictlist,
-                                                                                          merge_pathkeys,
-                                                                                          newclauses,
-                                                                                          NIL,
-                                                                                          NIL));
+                               try_mergejoin_path(root,
+                                                                  joinrel,
+                                                                  jointype,
+                                                                  sjinfo,
+                                                                  param_source_rels,
+                                                                  outerpath,
+                                                                  innerpath,
+                                                                  restrictlist,
+                                                                  merge_pathkeys,
+                                                                  newclauses,
+                                                                  NIL,
+                                                                  NIL);
                                 cheapest_total_inner = innerpath;
                         }
                         /* Same on the basis of cheapest startup cost ... */
                         innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
                                                                                                            trialsortkeys,
+                                                                                                          NULL,
                                                                                                            STARTUP_COST);
                         if (innerpath != NULL &&
                                 (cheapest_startup_inner == NULL ||
@@ -717,18 +993,18 @@ match_unsorted_outer(PlannerInfo *root,
                                                 else
                                                         newclauses = mergeclauses;
                                         }
-                                       add_path(joinrel, (Path *)
-                                                        create_mergejoin_path(root,
-                                                                                                  joinrel,
-                                                                                                  jointype,
-                                                                                                  sjinfo,
-                                                                                                  outerpath,
-                                                                                                  innerpath,
-                                                                                                  restrictlist,
-                                                                                                  merge_pathkeys,
-                                                                                                  newclauses,
-                                                                                                  NIL,
-                                                                                                  NIL));
+                                       try_mergejoin_path(root,
+                                                                          joinrel,
+                                                                          jointype,
+                                                                          sjinfo,
+                                                                          param_source_rels,
+                                                                          outerpath,
+                                                                          innerpath,
+                                                                          restrictlist,
+                                                                          merge_pathkeys,
+                                                                          newclauses,
+                                                                          NIL,
+                                                                          NIL);
                                 }
                                 cheapest_startup_inner = innerpath;
                         }
@@ -754,6 +1030,8 @@ match_unsorted_outer(PlannerInfo *root,
   *             clauses that apply to this join
   * 'jointype' is the type of join to do
   * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
+ * 'param_source_rels' are OK targets for parameterization of result paths
   */
  static void
  hash_inner_and_outer(PlannerInfo *root,
@@ -762,15 +1040,17 @@ hash_inner_and_outer(PlannerInfo *root,
                                          RelOptInfo *innerrel,
                                          List *restrictlist,
                                          JoinType jointype,
-                                        SpecialJoinInfo *sjinfo)
+                                        SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
+                                        Relids param_source_rels)
  {
         bool            isouterjoin = IS_OUTER_JOIN(jointype);
         List       *hashclauses;
         ListCell   *l;
  
         /*
-        * We need to build only one hashpath for any given pair of outer and
-        * inner relations; all of the hashable clauses will be used as keys.
+        * We need to build only one hashclauses list for any given pair of outer
+        * and inner relations; all of the hashable clauses will be used as keys.
          *
          * Scan the join's restrictinfo list to find hashjoinable clauses that are
          * usable with this pair of sub-relations.
@@ -800,7 +1080,7 @@ hash_inner_and_outer(PlannerInfo *root,
                 hashclauses = lappend(hashclauses, restrictinfo);
         }
  
-       /* If we found any usable hashclauses, make a path */
+       /* If we found any usable hashclauses, make paths */
         if (hashclauses)
         {
                 /*
@@ -812,15 +1092,25 @@ hash_inner_and_outer(PlannerInfo *root,
                 Path       *cheapest_total_outer = outerrel->cheapest_total_path;
                 Path       *cheapest_total_inner = innerrel->cheapest_total_path;
  
-               /* Unique-ify if need be */
+               /* Unique-ify if need be; we ignore parameterized possibilities */
                 if (jointype == JOIN_UNIQUE_OUTER)
                 {
                         cheapest_total_outer = (Path *)
                                 create_unique_path(root, outerrel,
                                                                    cheapest_total_outer, sjinfo);
                         Assert(cheapest_total_outer);
-                       cheapest_startup_outer = cheapest_total_outer;
                         jointype = JOIN_INNER;
+                       try_hashjoin_path(root,
+                                                         joinrel,
+                                                         jointype,
+                                                         sjinfo,
+                                                         semifactors,
+                                                         param_source_rels,
+                                                         cheapest_total_outer,
+                                                         cheapest_total_inner,
+                                                         restrictlist,
+                                                         hashclauses);
+                       /* no possibility of cheap startup here */
                 }
                 else if (jointype == JOIN_UNIQUE_INNER)
                 {
@@ -829,97 +1119,92 @@ hash_inner_and_outer(PlannerInfo *root,
                                                                    cheapest_total_inner, sjinfo);
                         Assert(cheapest_total_inner);
                         jointype = JOIN_INNER;
+                       try_hashjoin_path(root,
+                                                         joinrel,
+                                                         jointype,
+                                                         sjinfo,
+                                                         semifactors,
+                                                         param_source_rels,
+                                                         cheapest_total_outer,
+                                                         cheapest_total_inner,
+                                                         restrictlist,
+                                                         hashclauses);
+                       if (cheapest_startup_outer != cheapest_total_outer)
+                               try_hashjoin_path(root,
+                                                                 joinrel,
+                                                                 jointype,
+                                                                 sjinfo,
+                                                                 semifactors,
+                                                                 param_source_rels,
+                                                                 cheapest_startup_outer,
+                                                                 cheapest_total_inner,
+                                                                 restrictlist,
+                                                                 hashclauses);
                 }
+               else
+               {
+                       /*
+                        * For other jointypes, we consider the cheapest startup outer
+                        * together with the cheapest total inner, and then consider
+                        * pairings of cheapest-total paths including parameterized ones.
+                        * There is no use in generating parameterized paths on the basis
+                        * of possibly cheap startup cost, so this is sufficient.
+                        */
+                       ListCell   *lc1;
+                       ListCell   *lc2;
+
+                       try_hashjoin_path(root,
+                                                         joinrel,
+                                                         jointype,
+                                                         sjinfo,
+                                                         semifactors,
+                                                         param_source_rels,
+                                                         cheapest_startup_outer,
+                                                         cheapest_total_inner,
+                                                         restrictlist,
+                                                         hashclauses);
+
+                       foreach(lc1, outerrel->cheapest_parameterized_paths)
+                       {
+                               Path       *outerpath = (Path *) lfirst(lc1);
  
-               add_path(joinrel, (Path *)
-                                create_hashjoin_path(root,
-                                                                         joinrel,
-                                                                         jointype,
-                                                                         sjinfo,
-                                                                         cheapest_total_outer,
-                                                                         cheapest_total_inner,
-                                                                         restrictlist,
-                                                                         hashclauses));
-               if (cheapest_startup_outer != cheapest_total_outer)
-                       add_path(joinrel, (Path *)
-                                        create_hashjoin_path(root,
-                                                                                 joinrel,
-                                                                                 jointype,
-                                                                                 sjinfo,
-                                                                                 cheapest_startup_outer,
-                                                                                 cheapest_total_inner,
-                                                                                 restrictlist,
-                                                                                 hashclauses));
-       }
-}
-
-/*
- * best_appendrel_indexscan
- *       Finds the best available set of inner indexscans for a nestloop join
- *       with the given append relation on the inside and the given outer_rel
- *       outside.      Returns an AppendPath comprising the best inner scans, or
- *       NULL if there are no possible inner indexscans.
- *
- * Note that we currently consider only cheapest-total-cost.  It's not
- * very clear what cheapest-startup-cost might mean for an AppendPath.
- */
-static Path *
-best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
-                                                RelOptInfo *outer_rel, JoinType jointype)
-{
-       int                     parentRTindex = rel->relid;
-       List       *append_paths = NIL;
-       bool            found_indexscan = false;
-       ListCell   *l;
-
-       foreach(l, root->append_rel_list)
-       {
-               AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
-               int                     childRTindex;
-               RelOptInfo *childrel;
-               Path       *index_cheapest_startup;
-               Path       *index_cheapest_total;
-
-               /* append_rel_list contains all append rels; ignore others */
-               if (appinfo->parent_relid != parentRTindex)
-                       continue;
-
-               childRTindex = appinfo->child_relid;
-               childrel = find_base_rel(root, childRTindex);
-               Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
+                               /*
+                                * We cannot use an outer path that is parameterized by the
+                                * inner rel.
+                                */
+                               if (bms_overlap(outerpath->required_outer, innerrel->relids))
+                                       continue;
  
-               /*
-                * Check to see if child was rejected by constraint exclusion. If so,
-                * it will have a cheapest_total_path that's a "dummy" path.
-                */
-               if (IS_DUMMY_PATH(childrel->cheapest_total_path))
-                       continue;                       /* OK, we can ignore it */
+                               foreach(lc2, innerrel->cheapest_parameterized_paths)
+                               {
+                                       Path       *innerpath = (Path *) lfirst(lc2);
  
-               /*
-                * Get the best innerjoin indexpaths (if any) for this child rel.
-                */
-               best_inner_indexscan(root, childrel, outer_rel, jointype,
-                                                        &index_cheapest_startup, &index_cheapest_total);
+                                       /*
+                                        * We cannot use an inner path that is parameterized by
+                                        * the outer rel, either.
+                                        */
+                                       if (bms_overlap(innerpath->required_outer,
+                                                                       outerrel->relids))
+                                               continue;
  
-               /*
-                * If no luck on an indexpath for this rel, we'll still consider an
-                * Append substituting the cheapest-total inner path.  However we must
-                * find at least one indexpath, else there's not going to be any
-                * improvement over the base path for the appendrel.
-                */
-               if (index_cheapest_total)
-                       found_indexscan = true;
-               else
-                       index_cheapest_total = childrel->cheapest_total_path;
+                                       if (outerpath == cheapest_startup_outer &&
+                                               innerpath == cheapest_total_inner)
+                                               continue;                               /* already tried it */
  
-               append_paths = lappend(append_paths, index_cheapest_total);
+                                       try_hashjoin_path(root,
+                                                                         joinrel,
+                                                                         jointype,
+                                                                         sjinfo,
+                                                                         semifactors,
+                                                                         param_source_rels,
+                                                                         outerpath,
+                                                                         innerpath,
+                                                                         restrictlist,
+                                                                         hashclauses);
+                               }
+                       }
+               }
         }
-
-       if (!found_indexscan)
-               return NULL;
-
-       /* Form and return the completed Append path. */
-       return (Path *) create_append_path(rel, append_paths);
  }
  
  /*
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c

index 9656e5b8a60e507ad6a3b3cfa417d134511d321c..4a35d8d3a48d07dec2085e10783b690eb917886f 100644 (file)
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -935,14 +935,11 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
  
  /*
   * is_dummy_rel --- has relation been proven empty?
- *
- * If so, it will have a single path that is dummy.
   */
  static bool
  is_dummy_rel(RelOptInfo *rel)
  {
-       return (rel->cheapest_total_path != NULL &&
-                       IS_DUMMY_PATH(rel->cheapest_total_path));
+       return IS_DUMMY_REL(rel);
  }
  
  /*
@@ -981,7 +978,7 @@ mark_dummy_rel(RelOptInfo *rel)
         /* Set up the dummy path */
         add_path(rel, (Path *) create_append_path(rel, NIL));
  
-       /* Set or update cheapest_total_path */
+       /* Set or update cheapest_total_path and related fields */
         set_cheapest(rel);
  
         MemoryContextSwitchTo(oldcontext);
diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c

index d4cc567892da164fae658ef9baed253f7e2bf0bd..02bd362c5c8a6369fadb2636e886e27f9a3eaf35 100644 (file)
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@@ -88,6 +88,10 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
                                 orig_selec;
         ListCell   *i;
  
+       /* Skip the whole mess if no indexes */
+       if (rel->indexlist == NIL)
+               return false;
+
         /*
          * Find potentially interesting OR joinclauses.
          *
@@ -114,8 +118,8 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
                          * Use the generate_bitmap_or_paths() machinery to estimate the
                          * value of each OR clause.  We can use regular restriction
                          * clauses along with the OR clause contents to generate
-                        * indexquals.  We pass outer_rel = NULL so that sub-clauses that
-                        * are actually joins will be ignored.
+                        * indexquals.  We pass restriction_only = true so that any
+                        * sub-clauses that are actually joins will be ignored.
                          */
                         List       *orpaths;
                         ListCell   *k;
@@ -123,7 +127,7 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
                         orpaths = generate_bitmap_or_paths(root, rel,
                                                                                            list_make1(rinfo),
                                                                                            rel->baserestrictinfo,
-                                                                                          NULL);
+                                                                                          true);
  
                         /* Locate the cheapest OR path */
                         foreach(k, orpaths)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c

index 0ebdead6c84d48baf8f8c0fc93f2775e56ec3a2a..653387e582d8900bd9353d8f78ccb7764f5dc727 100644 (file)
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -403,14 +403,17 @@ pathkeys_contained_in(List *keys1, List *keys2)
  /*
   * get_cheapest_path_for_pathkeys
   *       Find the cheapest path (according to the specified criterion) that
- *       satisfies the given pathkeys.  Return NULL if no such path.
+ *       satisfies the given pathkeys and parameterization.
+ *       Return NULL if no such path.
   *
   * 'paths' is a list of possible paths that all generate the same relation
   * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'required_outer' denotes allowable outer relations for parameterized paths
   * 'cost_criterion' is STARTUP_COST or TOTAL_COST
   */
  Path *
  get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+                                                          Relids required_outer,
                                                            CostSelector cost_criterion)
  {
         Path       *matched_path = NULL;
@@ -428,7 +431,8 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
                         compare_path_costs(matched_path, path, cost_criterion) <= 0)
                         continue;
  
-               if (pathkeys_contained_in(pathkeys, path->pathkeys))
+               if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+                       bms_is_subset(path->required_outer, required_outer))
                         matched_path = path;
         }
         return matched_path;
@@ -437,7 +441,7 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
  /*
   * get_cheapest_fractional_path_for_pathkeys
   *       Find the cheapest path (for retrieving a specified fraction of all
- *       the tuples) that satisfies the given pathkeys.
+ *       the tuples) that satisfies the given pathkeys and parameterization.
   *       Return NULL if no such path.
   *
   * See compare_fractional_path_costs() for the interpretation of the fraction
@@ -445,11 +449,13 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
   *
   * 'paths' is a list of possible paths that all generate the same relation
   * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'required_outer' denotes allowable outer relations for parameterized paths
   * 'fraction' is the fraction of the total tuples expected to be retrieved
   */
  Path *
  get_cheapest_fractional_path_for_pathkeys(List *paths,
                                                                                   List *pathkeys,
+                                                                                 Relids required_outer,
                                                                                   double fraction)
  {
         Path       *matched_path = NULL;
@@ -461,13 +467,14 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
  
                 /*
                  * Since cost comparison is a lot cheaper than pathkey comparison, do
-                * that first.
+                * that first.  (XXX is that still true?)
                  */
                 if (matched_path != NULL &&
                         compare_fractional_path_costs(matched_path, path, fraction) <= 0)
                         continue;
  
-               if (pathkeys_contained_in(pathkeys, path->pathkeys))
+               if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+                       bms_is_subset(path->required_outer, required_outer))
                         matched_path = path;
         }
         return matched_path;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index e41d2a6eb99a48cf47e2f95e2c4863f58e9098c2..aea42b1dd461a3ef5d94bee79b980bdadfd57551 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -932,7 +932,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
                 long            numGroups;
                 Oid                *groupOperators;
  
-               numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
+               numGroups = (long) Min(best_path->path.rows, (double) LONG_MAX);
  
                 /*
                  * Get the hashable equality operators for the Agg node to use.
@@ -1018,7 +1018,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path)
         }
  
         /* Adjust output size estimate (other fields should be OK already) */
-       plan->plan_rows = best_path->rows;
+       plan->plan_rows = best_path->path.rows;
  
         return plan;
  }
@@ -1112,7 +1112,7 @@ create_indexscan_plan(PlannerInfo *root,
         fixed_indexorderbys = fix_indexorderby_references(root, best_path);
  
         /*
-        * If this is an innerjoin scan, the indexclauses will contain join
+        * If this is a parameterized scan, the indexclauses will contain join
          * clauses that are not present in scan_clauses (since the passed-in value
          * is just the rel's baserestrictinfo list).  We must add these clauses to
          * scan_clauses to ensure they get checked.  In most cases we will remove
@@ -1122,7 +1122,7 @@ create_indexscan_plan(PlannerInfo *root,
          * Note: pointer comparison should be enough to determine RestrictInfo
          * matches.
          */
-       if (best_path->isjoininner)
+       if (best_path->path.required_outer)
                 scan_clauses = list_union_ptr(scan_clauses, best_path->indexclauses);
  
         /*
@@ -1189,7 +1189,7 @@ create_indexscan_plan(PlannerInfo *root,
          * it'd break the comparisons to predicates above ... (or would it?  Those
          * wouldn't have outer refs)
          */
-       if (best_path->isjoininner)
+       if (best_path->path.required_outer)
         {
                 stripped_indexquals = (List *)
                         replace_nestloop_params(root, (Node *) stripped_indexquals);
@@ -1221,8 +1221,6 @@ create_indexscan_plan(PlannerInfo *root,
                                                                                         best_path->indexscandir);
  
         copy_path_costsize(&scan_plan->plan, &best_path->path);
-       /* use the indexscan-specific rows estimate, not the parent rel's */
-       scan_plan->plan.plan_rows = best_path->rows;
  
         return scan_plan;
  }
@@ -1258,14 +1256,14 @@ create_bitmap_scan_plan(PlannerInfo *root,
         scan_clauses = extract_actual_clauses(scan_clauses, false);
  
         /*
-        * If this is a innerjoin scan, the indexclauses will contain join clauses
+        * If this is a parameterized scan, the indexclauses will contain join clauses
          * that are not present in scan_clauses (since the passed-in value is just
          * the rel's baserestrictinfo list).  We must add these clauses to
          * scan_clauses to ensure they get checked.  In most cases we will remove
          * the join clauses again below, but if a join clause contains a special
          * operator, we need to make sure it gets into the scan_clauses.
          */
-       if (best_path->isjoininner)
+       if (best_path->path.required_outer)
         {
                 scan_clauses = list_concat_unique(scan_clauses, bitmapqualorig);
         }
@@ -1328,8 +1326,6 @@ create_bitmap_scan_plan(PlannerInfo *root,
                                                                          baserelid);
  
         copy_path_costsize(&scan_plan->scan.plan, &best_path->path);
-       /* use the indexscan-specific rows estimate, not the parent rel's */
-       scan_plan->scan.plan.plan_rows = best_path->rows;
  
         return scan_plan;
  }
@@ -1510,7 +1506,7 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
                  * Replace outer-relation variables with nestloop params, but only
                  * after doing the above comparisons to index predicates.
                  */
-               if (ipath->isjoininner)
+               if (ipath->path.required_outer)
                 {
                         *qual = (List *)
                                 replace_nestloop_params(root, (Node *) *qual);
@@ -1883,14 +1879,13 @@ create_nestloop_plan(PlannerInfo *root,
         ListCell   *next;
  
         /*
-        * If the inner path is a nestloop inner indexscan, it might be using some
-        * of the join quals as index quals, in which case we don't have to check
-        * them again at the join node.  Remove any join quals that are redundant.
+        * If the inner path is parameterized, it might have already used some of
+        * the join quals, in which case we don't have to check them again at the
+        * join node.  Remove any join quals that are redundant.
          */
         joinrestrictclauses =
-               select_nonredundant_join_clauses(root,
-                                                                                joinrestrictclauses,
-                                                                                best_path->innerjoinpath);
+               select_nonredundant_join_clauses(joinrestrictclauses,
+                                                                                best_path->innerjoinpath->param_clauses);
  
         /* Sort join qual clauses into best execution order */
         joinrestrictclauses = order_qual_clauses(root, joinrestrictclauses);
@@ -2054,7 +2049,7 @@ create_mergejoin_plan(PlannerInfo *root,
                 /*
                  * We assume the materialize will not spill to disk, and therefore
                  * charge just cpu_operator_cost per tuple.  (Keep this estimate in
-                * sync with cost_mergejoin.)
+                * sync with final_cost_mergejoin.)
                  */
                 copy_plan_costsize(matplan, inner_plan);
                 matplan->total_cost += cpu_operator_cost * matplan->plan_rows;
@@ -2885,7 +2880,7 @@ copy_path_costsize(Plan *dest, Path *src)
         {
                 dest->startup_cost = src->startup_cost;
                 dest->total_cost = src->total_cost;
-               dest->plan_rows = src->parent->rows;
+               dest->plan_rows = src->rows;
                 dest->plan_width = src->parent->width;
         }
         else
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c

index 4db43dc6aa0b3d70e8b3bb1e57c5f79e9ba83537..68968d9655a43f5b33e578406cc6a72e1995067d 100644 (file)
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -375,6 +375,7 @@ query_planner(PlannerInfo *root, List *tlist,
         sortedpath =
                 get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
                                                                                                   root->query_pathkeys,
+                                                                                                 NULL,
                                                                                                   tuple_fraction);
  
         /* Don't return same path in both guises; just wastes effort */
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b66a508c22da934e96771c99a0a5010dc6c64177..921262948bb8cc5b42c32170c447a0d8008bea34 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3297,7 +3297,8 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
         /* Estimate the cost of index scan */
         indexScanPath = create_index_path(root, indexInfo,
                                                                           NIL, NIL, NIL, NIL, NIL,
-                                                                         ForwardScanDirection, false, NULL);
+                                                                         ForwardScanDirection, false,
+                                                                         NULL, 1.0);
  
         return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
  }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 0ca161a31dc99a3ee7384f47dde75e3fcd628cb5..d29b454f7249e8552ca669795ae36e4ff51dcd49 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -29,6 +29,15 @@
  #include "utils/selfuncs.h"
  
  
+typedef enum
+{
+       COSTS_EQUAL,                            /* path costs are fuzzily equal */
+       COSTS_BETTER1,                          /* first path is cheaper than second */
+       COSTS_BETTER2,                          /* second path is cheaper than first */
+       COSTS_DIFFERENT                         /* neither path dominates the other on cost */
+} PathCostComparison;
+
+static void add_parameterized_path(RelOptInfo *parent_rel, Path *new_path);
  static List *translate_sub_tlist(List *tlist, int relid);
  static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
  static Oid     distinct_col_search(int colno, List *colnos, List *opids);
@@ -80,58 +89,6 @@ compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
         return 0;
  }
  
-/*
- * compare_fuzzy_path_costs
- *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
- *       or more expensive than path2 for the specified criterion.
- *
- * This differs from compare_path_costs in that we consider the costs the
- * same if they agree to within a "fuzz factor".  This is used by add_path
- * to avoid keeping both of a pair of paths that really have insignificantly
- * different cost.
- */
-static int
-compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
-{
-       /*
-        * We use a fuzz factor of 1% of the smaller cost.
-        *
-        * XXX does this percentage need to be user-configurable?
-        */
-       if (criterion == STARTUP_COST)
-       {
-               if (path1->startup_cost > path2->startup_cost * 1.01)
-                       return +1;
-               if (path2->startup_cost > path1->startup_cost * 1.01)
-                       return -1;
-
-               /*
-                * If paths have the same startup cost (not at all unlikely), order
-                * them by total cost.
-                */
-               if (path1->total_cost > path2->total_cost * 1.01)
-                       return +1;
-               if (path2->total_cost > path1->total_cost * 1.01)
-                       return -1;
-       }
-       else
-       {
-               if (path1->total_cost > path2->total_cost * 1.01)
-                       return +1;
-               if (path2->total_cost > path1->total_cost * 1.01)
-                       return -1;
-
-               /*
-                * If paths have the same total cost, order them by startup cost.
-                */
-               if (path1->startup_cost > path2->startup_cost * 1.01)
-                       return +1;
-               if (path2->startup_cost > path1->startup_cost * 1.01)
-                       return -1;
-       }
-       return 0;
-}
-
  /*
   * compare_path_fractional_costs
   *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
@@ -161,39 +118,120 @@ compare_fractional_path_costs(Path *path1, Path *path2,
         return 0;
  }
  
+/*
+ * compare_path_costs_fuzzily
+ *       Compare the costs of two paths to see if either can be said to
+ *       dominate the other.
+ *
+ * We use fuzzy comparisons so that add_path() can avoid keeping both of
+ * a pair of paths that really have insignificantly different cost.
+ * The fuzz factor is 1% of the smaller cost.  (XXX does this percentage
+ * need to be user-configurable?)
+ *
+ * The two paths are said to have "equal" costs if both startup and total
+ * costs are fuzzily the same.  Path1 is said to be better than path2 if
+ * it has fuzzily better startup cost and fuzzily no worse total cost,
+ * or if it has fuzzily better total cost and fuzzily no worse startup cost.
+ * Path2 is better than path1 if the reverse holds.  Finally, if one path
+ * is fuzzily better than the other on startup cost and fuzzily worse on
+ * total cost, we just say that their costs are "different", since neither
+ * dominates the other across the whole performance spectrum.
+ */
+static PathCostComparison
+compare_path_costs_fuzzily(Path *path1, Path *path2)
+{
+       /*
+        * Check total cost first since it's more likely to be different; many
+        * paths have zero startup cost.
+        */
+       if (path1->total_cost > path2->total_cost * 1.01)
+       {
+               /* path1 fuzzily worse on total cost */
+               if (path2->startup_cost > path1->startup_cost * 1.01)
+               {
+                       /* ... but path2 fuzzily worse on startup, so DIFFERENT */
+                       return COSTS_DIFFERENT;
+               }
+               /* else path2 dominates */
+               return COSTS_BETTER2;
+       }
+       if (path2->total_cost > path1->total_cost * 1.01)
+       {
+               /* path2 fuzzily worse on total cost */
+               if (path1->startup_cost > path2->startup_cost * 1.01)
+               {
+                       /* ... but path1 fuzzily worse on startup, so DIFFERENT */
+                       return COSTS_DIFFERENT;
+               }
+               /* else path1 dominates */
+               return COSTS_BETTER1;
+       }
+       /* fuzzily the same on total cost */
+       if (path1->startup_cost > path2->startup_cost * 1.01)
+       {
+               /* ... but path1 fuzzily worse on startup, so path2 wins */
+               return COSTS_BETTER2;
+       }
+       if (path2->startup_cost > path1->startup_cost * 1.01)
+       {
+               /* ... but path2 fuzzily worse on startup, so path1 wins */
+               return COSTS_BETTER1;
+       }
+       /* fuzzily the same on both costs */
+       return COSTS_EQUAL;
+}
+
  /*
   * set_cheapest
   *       Find the minimum-cost paths from among a relation's paths,
   *       and save them in the rel's cheapest-path fields.
   *
+ * Only unparameterized paths are considered candidates for cheapest_startup
+ * and cheapest_total.  The cheapest_parameterized_paths list collects paths
+ * that are cheapest-total for their parameterization (i.e., there is no
+ * cheaper path with the same or weaker parameterization).  This list always
+ * includes the unparameterized cheapest-total path, too.
+ *
   * This is normally called only after we've finished constructing the path
   * list for the rel node.
- *
- * If we find two paths of identical costs, try to keep the better-sorted one.
- * The paths might have unrelated sort orderings, in which case we can only
- * guess which might be better to keep, but if one is superior then we
- * definitely should keep it.
   */
  void
  set_cheapest(RelOptInfo *parent_rel)
  {
-       List       *pathlist = parent_rel->pathlist;
-       ListCell   *p;
         Path       *cheapest_startup_path;
         Path       *cheapest_total_path;
+       bool            have_parameterized_paths;
+       ListCell   *p;
  
         Assert(IsA(parent_rel, RelOptInfo));
  
-       if (pathlist == NIL)
-               elog(ERROR, "could not devise a query plan for the given query");
-
-       cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
+       cheapest_startup_path = cheapest_total_path = NULL;
+       have_parameterized_paths = false;
  
-       for_each_cell(p, lnext(list_head(pathlist)))
+       foreach(p, parent_rel->pathlist)
         {
                 Path       *path = (Path *) lfirst(p);
                 int                     cmp;
  
+               /* We only consider unparameterized paths in this step */
+               if (path->required_outer)
+               {
+                       have_parameterized_paths = true;
+                       continue;
+               }
+
+               if (cheapest_total_path == NULL)
+               {
+                       cheapest_startup_path = cheapest_total_path = path;
+                       continue;
+               }
+
+               /*
+                * If we find two paths of identical costs, try to keep the
+                * better-sorted one.  The paths might have unrelated sort orderings,
+                * in which case we can only guess which might be better to keep, but
+                * if one is superior then we definitely should keep that one.
+                */
                 cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
                 if (cmp > 0 ||
                         (cmp == 0 &&
@@ -209,9 +247,27 @@ set_cheapest(RelOptInfo *parent_rel)
                         cheapest_total_path = path;
         }
  
+       if (cheapest_total_path == NULL)
+               elog(ERROR, "could not devise a query plan for the given query");
+
         parent_rel->cheapest_startup_path = cheapest_startup_path;
         parent_rel->cheapest_total_path = cheapest_total_path;
         parent_rel->cheapest_unique_path = NULL;        /* computed only if needed */
+
+       /* Seed the parameterized-paths list with the cheapest total */
+       parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path);
+
+       /* And, if there are any parameterized paths, add them in one at a time */
+       if (have_parameterized_paths)
+       {
+               foreach(p, parent_rel->pathlist)
+               {
+                       Path       *path = (Path *) lfirst(p);
+
+                       if (path->required_outer)
+                               add_parameterized_path(parent_rel, path);
+               }
+       }
  }
  
  /*
@@ -219,17 +275,25 @@ set_cheapest(RelOptInfo *parent_rel)
   *       Consider a potential implementation path for the specified parent rel,
   *       and add it to the rel's pathlist if it is worthy of consideration.
   *       A path is worthy if it has either a better sort order (better pathkeys)
- *       or cheaper cost (on either dimension) than any of the existing old paths.
+ *       or cheaper cost (on either dimension) than any of the existing old paths
+ *       that have the same or superset required_outer rels.
   *
   *       We also remove from the rel's pathlist any old paths that are dominated
- *       by new_path --- that is, new_path is both cheaper and at least as well
- *       ordered.
+ *       by new_path --- that is, new_path is cheaper, at least as well ordered,
+ *       and requires no outer rels not required by old path.
+ *
+ *       There is one policy decision embedded in this function, along with its
+ *       sibling add_path_precheck: we treat all parameterized paths as having
+ *       NIL pathkeys, so that they compete only on cost.  This is to reduce
+ *       the number of parameterized paths that are kept.  See discussion in
+ *       src/backend/optimizer/README.
   *
- *       The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
- *       at the front.  No code depends on that for correctness; it's simply
- *       a speed hack within this routine.  Doing it that way makes it more
- *       likely that we will reject an inferior path after a few comparisons,
- *       rather than many comparisons.
+ *       The pathlist is kept sorted by total_cost, with cheaper paths
+ *       at the front.  Within this routine, that's simply a speed hack:
+ *       doing it that way makes it more likely that we will reject an inferior
+ *       path after a few comparisons, rather than many comparisons.
+ *       However, add_path_precheck relies on this ordering to exit early
+ *       when possible.
   *
   *       NOTE: discarded Path objects are immediately pfree'd to reduce planner
   *       memory consumption.  We dare not try to free the substructure of a Path,
@@ -250,6 +314,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
  {
         bool            accept_new = true;              /* unless we find a superior old path */
         ListCell   *insert_after = NULL;        /* where to insert new item */
+       List       *new_path_pathkeys;
         ListCell   *p1;
         ListCell   *p1_prev;
         ListCell   *p1_next;
@@ -260,6 +325,9 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
          */
         CHECK_FOR_INTERRUPTS();
  
+       /* Pretend parameterized paths have no pathkeys, per comment above */
+       new_path_pathkeys = new_path->required_outer ? NIL : new_path->pathkeys;
+
         /*
          * Loop to check proposed new path against old paths.  Note it is possible
          * for more than one old path to be tossed out because new_path dominates
@@ -273,62 +341,99 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
         {
                 Path       *old_path = (Path *) lfirst(p1);
                 bool            remove_old = false; /* unless new proves superior */
-               int                     costcmp;
+               PathCostComparison costcmp;
+               PathKeysComparison keyscmp;
+               BMS_Comparison outercmp;
  
                 p1_next = lnext(p1);
  
-               /*
-                * As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
-                * cycles keeping paths that are really not significantly different in
-                * cost.
-                */
-               costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
+               costcmp = compare_path_costs_fuzzily(new_path, old_path);
  
                 /*
                  * If the two paths compare differently for startup and total cost,
-                * then we want to keep both, and we can skip the (much slower)
-                * comparison of pathkeys.      If they compare the same, proceed with the
-                * pathkeys comparison.  Note: this test relies on the fact that
-                * compare_fuzzy_path_costs will only return 0 if both costs are
-                * effectively equal (and, therefore, there's no need to call it twice
-                * in that case).
+                * then we want to keep both, and we can skip comparing pathkeys and
+                * required_outer rels.  If they compare the same, proceed with the
+                * other comparisons.  (We make the tests in this order because the
+                * cost comparison is most likely to turn out "different", and the
+                * pathkeys comparison next most likely.)
                  */
-               if (costcmp == 0 ||
-                       costcmp == compare_fuzzy_path_costs(new_path, old_path,
-                                                                                               STARTUP_COST))
+               if (costcmp != COSTS_DIFFERENT)
                 {
-                       switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
+                       /* Similarly check to see if either dominates on pathkeys */
+                       List       *old_path_pathkeys;
+
+                       old_path_pathkeys = old_path->required_outer ? NIL : old_path->pathkeys;
+                       keyscmp = compare_pathkeys(new_path_pathkeys,
+                                                                          old_path_pathkeys);
+                       if (keyscmp != PATHKEYS_DIFFERENT)
                         {
-                               case PATHKEYS_EQUAL:
-                                       if (costcmp < 0)
-                                               remove_old = true;              /* new dominates old */
-                                       else if (costcmp > 0)
-                                               accept_new = false;             /* old dominates new */
-                                       else
-                                       {
+                               switch (costcmp)
+                               {
+                                       case COSTS_EQUAL:
+                                               outercmp = bms_subset_compare(new_path->required_outer,
+                                                                                                         old_path->required_outer);
+                                               if (keyscmp == PATHKEYS_BETTER1)
+                                               {
+                                                       if (outercmp == BMS_EQUAL ||
+                                                               outercmp == BMS_SUBSET1)
+                                                               remove_old = true;      /* new dominates old */
+                                               }
+                                               else if (keyscmp == PATHKEYS_BETTER2)
+                                               {
+                                                       if (outercmp == BMS_EQUAL ||
+                                                               outercmp == BMS_SUBSET2)
+                                                               accept_new = false;     /* old dominates new */
+                                               }
+                                               else    /* keyscmp == PATHKEYS_EQUAL */
+                                               {
+                                                       if (outercmp == BMS_EQUAL)
+                                                       {
+                                                               /*
+                                                                * Same pathkeys and outer rels, and fuzzily
+                                                                * the same cost, so keep just one --- but
+                                                                * we'll do an exact cost comparison to decide
+                                                                * which.
+                                                                */
+                                                               if (compare_path_costs(new_path, old_path,
+                                                                                                          TOTAL_COST) < 0)
+                                                                       remove_old = true;      /* new dominates old */
+                                                               else
+                                                                       accept_new = false; /* old equals or dominates new */
+                                                       }
+                                                       else if (outercmp == BMS_SUBSET1)
+                                                               remove_old = true;      /* new dominates old */
+                                                       else if (outercmp == BMS_SUBSET2)
+                                                               accept_new = false;     /* old dominates new */
+                                                       /* else different parameterizations, keep both */
+                                               }
+                                               break;
+                                       case COSTS_BETTER1:
+                                               if (keyscmp != PATHKEYS_BETTER2)
+                                               {
+                                                       outercmp = bms_subset_compare(new_path->required_outer,
+                                                                                                                 old_path->required_outer);
+                                                       if (outercmp == BMS_EQUAL ||
+                                                               outercmp == BMS_SUBSET1)
+                                                               remove_old = true;      /* new dominates old */
+                                               }
+                                               break;
+                                       case COSTS_BETTER2:
+                                               if (keyscmp != PATHKEYS_BETTER1)
+                                               {
+                                                       outercmp = bms_subset_compare(new_path->required_outer,
+                                                                                                                 old_path->required_outer);
+                                                       if (outercmp == BMS_EQUAL ||
+                                                               outercmp == BMS_SUBSET2)
+                                                               accept_new = false;     /* old dominates new */
+                                               }
+                                               break;
+                                       case COSTS_DIFFERENT:
                                                 /*
-                                                * Same pathkeys, and fuzzily the same cost, so keep
-                                                * just one --- but we'll do an exact cost comparison
-                                                * to decide which.
+                                                * can't get here, but keep this case to keep compiler
+                                                * quiet
                                                  */
-                                               if (compare_path_costs(new_path, old_path,
-                                                                                          TOTAL_COST) < 0)
-                                                       remove_old = true;      /* new dominates old */
-                                               else
-                                                       accept_new = false; /* old equals or dominates new */
-                                       }
-                                       break;
-                               case PATHKEYS_BETTER1:
-                                       if (costcmp <= 0)
-                                               remove_old = true;              /* new dominates old */
-                                       break;
-                               case PATHKEYS_BETTER2:
-                                       if (costcmp >= 0)
-                                               accept_new = false;             /* old dominates new */
-                                       break;
-                               case PATHKEYS_DIFFERENT:
-                                       /* keep both paths, since they have different ordering */
-                                       break;
+                                               break;
+                               }
                         }
                 }
  
@@ -350,7 +455,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                 else
                 {
                         /* new belongs after this old path if it has cost >= old's */
-                       if (costcmp >= 0)
+                       if (new_path->total_cost >= old_path->total_cost)
                                 insert_after = p1;
                         /* p1_prev advances */
                         p1_prev = p1;
@@ -381,6 +486,185 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
         }
  }
  
+/*
+ * add_path_precheck
+ *       Check whether a proposed new path could possibly get accepted.
+ *       We assume we know the path's pathkeys and parameterization accurately,
+ *       and have lower bounds for its costs.
+ *
+ * At the time this is called, we haven't actually built a Path structure,
+ * so the required information has to be passed piecemeal.
+ */
+bool
+add_path_precheck(RelOptInfo *parent_rel,
+                                 Cost startup_cost, Cost total_cost,
+                                 List *pathkeys, Relids required_outer)
+{
+       List       *new_path_pathkeys;
+       ListCell   *p1;
+
+       /* Pretend parameterized paths have no pathkeys, per comment above */
+       new_path_pathkeys = required_outer ? NIL : pathkeys;
+
+       foreach(p1, parent_rel->pathlist)
+       {
+               Path       *old_path = (Path *) lfirst(p1);
+               PathKeysComparison keyscmp;
+               BMS_Comparison outercmp;
+
+               /*
+                * We are looking for an old_path that dominates the new path across
+                * all four metrics.  If we find one, we can reject the new path.
+                *
+                * For speed, we make exact rather than fuzzy cost comparisons.
+                * If an old path dominates the new path exactly on both costs, it
+                * will surely do so fuzzily.
+                */
+               if (total_cost >= old_path->total_cost)
+               {
+                       if (startup_cost >= old_path->startup_cost)
+                       {
+                               List       *old_path_pathkeys;
+
+                               old_path_pathkeys = old_path->required_outer ? NIL : old_path->pathkeys;
+                               keyscmp = compare_pathkeys(new_path_pathkeys,
+                                                                                  old_path_pathkeys);
+                               if (keyscmp == PATHKEYS_EQUAL ||
+                                       keyscmp == PATHKEYS_BETTER2)
+                               {
+                                       outercmp = bms_subset_compare(required_outer,
+                                                                                                 old_path->required_outer);
+                                       if (outercmp == BMS_EQUAL ||
+                                               outercmp == BMS_SUBSET2)
+                                               return false;
+                               }
+                       }
+               }
+               else
+               {
+                       /*
+                        * Since the pathlist is sorted by total_cost, we can stop
+                        * looking once we reach a path with a total_cost larger
+                        * than the new path's.
+                        */
+                       break;
+               }
+       }
+
+       return true;
+}
+
+/*
+ * add_parameterized_path
+ *       Consider a parameterized implementation path for the specified rel,
+ *       and add it to the rel's cheapest_parameterized_paths list if it
+ *       belongs there, removing any old entries that it dominates.
+ *
+ *       This is essentially a cut-down form of add_path(): we do not care about
+ *       startup cost or sort ordering, only total cost and parameterization.
+ *       Also, we should not recycle rejected paths, since they will still be
+ *       present in the rel's pathlist.
+ *
+ * 'parent_rel' is the relation entry to which the path corresponds.
+ * 'new_path' is a parameterized path for parent_rel.
+ *
+ * Returns nothing, but modifies parent_rel->cheapest_parameterized_paths.
+ */
+static void
+add_parameterized_path(RelOptInfo *parent_rel, Path *new_path)
+{
+       bool            accept_new = true;              /* unless we find a superior old path */
+       ListCell   *insert_after = NULL;        /* where to insert new item */
+       ListCell   *p1;
+       ListCell   *p1_prev;
+       ListCell   *p1_next;
+
+       /*
+        * Loop to check proposed new path against old paths.  Note it is possible
+        * for more than one old path to be tossed out because new_path dominates
+        * it.
+        *
+        * We can't use foreach here because the loop body may delete the current
+        * list cell.
+        */
+       p1_prev = NULL;
+       for (p1 = list_head(parent_rel->cheapest_parameterized_paths);
+                p1 != NULL; p1 = p1_next)
+       {
+               Path       *old_path = (Path *) lfirst(p1);
+               bool            remove_old = false; /* unless new proves superior */
+               int                     costcmp;
+               BMS_Comparison outercmp;
+
+               p1_next = lnext(p1);
+
+               costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
+               outercmp = bms_subset_compare(new_path->required_outer,
+                                                                         old_path->required_outer);
+               if (outercmp != BMS_DIFFERENT)
+               {
+                       if (costcmp < 0)
+                       {
+                               if (outercmp != BMS_SUBSET2)
+                                       remove_old = true; /* new dominates old */
+                       }
+                       else if (costcmp > 0)
+                       {
+                               if (outercmp != BMS_SUBSET1)
+                                       accept_new = false;     /* old dominates new */
+                       }
+                       else if (outercmp == BMS_SUBSET1)
+                               remove_old = true; /* new dominates old */
+                       else if (outercmp == BMS_SUBSET2)
+                               accept_new = false;     /* old dominates new */
+                       else
+                       {
+                               /* Same cost and outer rels, arbitrarily keep the old */
+                               accept_new = false; /* old equals or dominates new */
+                       }
+               }
+
+               /*
+                * Remove current element from cheapest_parameterized_paths if
+                * dominated by new.
+                */
+               if (remove_old)
+               {
+                       parent_rel->cheapest_parameterized_paths =
+                               list_delete_cell(parent_rel->cheapest_parameterized_paths,
+                                                                p1, p1_prev);
+                       /* p1_prev does not advance */
+               }
+               else
+               {
+                       /* new belongs after this old path if it has cost >= old's */
+                       if (costcmp >= 0)
+                               insert_after = p1;
+                       /* p1_prev advances */
+                       p1_prev = p1;
+               }
+
+               /*
+                * If we found an old path that dominates new_path, we can quit
+                * scanning the list; we will not add new_path, and we assume
+                * new_path cannot dominate any other elements of the list.
+                */
+               if (!accept_new)
+                       break;
+       }
+
+       if (accept_new)
+       {
+               /* Accept the new path: insert it at proper place in list */
+               if (insert_after)
+                       lappend_cell(parent_rel->cheapest_parameterized_paths,
+                                                insert_after, new_path);
+               else
+                       parent_rel->cheapest_parameterized_paths =
+                               lcons(new_path, parent_rel->cheapest_parameterized_paths);
+       }
+}
+
  
  /*****************************************************************************
   *             PATH NODE CREATION ROUTINES
@@ -399,6 +683,8 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->pathtype = T_SeqScan;
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* seqscan has unordered result */
+       pathnode->required_outer = NULL;
+       pathnode->param_clauses = NIL;
  
         cost_seqscan(pathnode, root, rel);
  
@@ -423,8 +709,9 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
   *                     for an ordered index, or NoMovementScanDirection for
   *                     an unordered index.
   * 'indexonly' is true if an index-only scan is wanted.
- * 'outer_rel' is the outer relation if this is a join inner indexscan path.
- *                     (pathkeys and indexscandir are ignored if so.)  NULL if not.
+ * 'required_outer' is the set of outer relids referenced in indexclauses.
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ *             estimates of caching behavior.
   *
   * Returns the new path node.
   */
@@ -438,29 +725,35 @@ create_index_path(PlannerInfo *root,
                                   List *pathkeys,
                                   ScanDirection indexscandir,
                                   bool indexonly,
-                                 RelOptInfo *outer_rel)
+                                 Relids required_outer,
+                                 double loop_count)
  {
         IndexPath  *pathnode = makeNode(IndexPath);
         RelOptInfo *rel = index->rel;
         List       *indexquals,
                            *indexqualcols;
  
-       /*
-        * For a join inner scan, there's no point in marking the path with any
-        * pathkeys, since it will only ever be used as the inner path of a
-        * nestloop, and so its ordering does not matter.  For the same reason we
-        * don't really care what order it's scanned in.  (We could expect the
-        * caller to supply the correct values, but it's easier to force it here.)
-        */
-       if (outer_rel != NULL)
-       {
-               pathkeys = NIL;
-               indexscandir = NoMovementScanDirection;
-       }
-
         pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = pathkeys;
+       pathnode->path.required_outer = required_outer;
+       if (required_outer)
+       {
+               /* Identify index clauses that are join clauses */
+               List       *jclauses = NIL;
+               ListCell   *lc;
+
+               foreach(lc, indexclauses)
+               {
+                       RestrictInfo   *rinfo = (RestrictInfo *) lfirst(lc);
+
+                       if (!bms_is_subset(rinfo->clause_relids, rel->relids))
+                               jclauses = lappend(jclauses, rinfo);
+               }
+               pathnode->path.param_clauses = jclauses;
+       }
+       else
+               pathnode->path.param_clauses = NIL;
  
         /* Convert clauses to indexquals the executor can handle */
         expand_indexqual_conditions(index, indexclauses, indexclausecols,
@@ -473,50 +766,9 @@ create_index_path(PlannerInfo *root,
         pathnode->indexqualcols = indexqualcols;
         pathnode->indexorderbys = indexorderbys;
         pathnode->indexorderbycols = indexorderbycols;
-
-       pathnode->isjoininner = (outer_rel != NULL);
         pathnode->indexscandir = indexscandir;
  
-       if (outer_rel != NULL)
-       {
-               /*
-                * We must compute the estimated number of output rows for the
-                * indexscan.  This is less than rel->rows because of the additional
-                * selectivity of the join clauses.  Since indexclauses may contain
-                * both restriction and join clauses, we have to do a set union to get
-                * the full set of clauses that must be considered to compute the
-                * correct selectivity.  (Without the union operation, we might have
-                * some restriction clauses appearing twice, which'd mislead
-                * clauselist_selectivity into double-counting their selectivity.
-                * However, since RestrictInfo nodes aren't copied when linking them
-                * into different lists, it should be sufficient to use pointer
-                * comparison to remove duplicates.)
-                *
-                * Note that we force the clauses to be treated as non-join clauses
-                * during selectivity estimation.
-                */
-               List       *allclauses;
-
-               allclauses = list_union_ptr(rel->baserestrictinfo, indexclauses);
-               pathnode->rows = rel->tuples *
-                       clauselist_selectivity(root,
-                                                                  allclauses,
-                                                                  rel->relid,  /* do not use 0! */
-                                                                  JOIN_INNER,
-                                                                  NULL);
-               /* Like costsize.c, force estimate to be at least one row */
-               pathnode->rows = clamp_row_est(pathnode->rows);
-       }
-       else
-       {
-               /*
-                * The number of rows is the same as the parent rel's estimate, since
-                * this isn't a join inner indexscan.
-                */
-               pathnode->rows = rel->rows;
-       }
-
-       cost_index(pathnode, root, outer_rel);
+       cost_index(pathnode, root, loop_count);
  
         return pathnode;
  }
@@ -526,55 +778,29 @@ create_index_path(PlannerInfo *root,
   *       Creates a path node for a bitmap scan.
   *
   * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ *             estimates of caching behavior.
   *
- * If this is a join inner indexscan path, 'outer_rel' is the outer relation,
- * and all the component IndexPaths should have been costed accordingly.
+ * loop_count should match the value used when creating the component
+ * IndexPaths.
   */
  BitmapHeapPath *
  create_bitmap_heap_path(PlannerInfo *root,
                                                 RelOptInfo *rel,
                                                 Path *bitmapqual,
-                                               RelOptInfo *outer_rel)
+                                               double loop_count)
  {
         BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
  
         pathnode->path.pathtype = T_BitmapHeapScan;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;          /* always unordered */
+       pathnode->path.required_outer = bitmapqual->required_outer;
+       pathnode->path.param_clauses = bitmapqual->param_clauses;
  
         pathnode->bitmapqual = bitmapqual;
-       pathnode->isjoininner = (outer_rel != NULL);
-
-       if (pathnode->isjoininner)
-       {
-               /*
-                * We must compute the estimated number of output rows for the
-                * indexscan.  This is less than rel->rows because of the additional
-                * selectivity of the join clauses.  We make use of the selectivity
-                * estimated for the bitmap to do this; this isn't really quite right
-                * since there may be restriction conditions not included in the
-                * bitmap ...
-                */
-               Cost            indexTotalCost;
-               Selectivity indexSelectivity;
-
-               cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
-               pathnode->rows = rel->tuples * indexSelectivity;
-               if (pathnode->rows > rel->rows)
-                       pathnode->rows = rel->rows;
-               /* Like costsize.c, force estimate to be at least one row */
-               pathnode->rows = clamp_row_est(pathnode->rows);
-       }
-       else
-       {
-               /*
-                * The number of rows is the same as the parent rel's estimate, since
-                * this isn't a join inner indexscan.
-                */
-               pathnode->rows = rel->rows;
-       }
  
-       cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
+       cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, loop_count);
  
         return pathnode;
  }
@@ -589,13 +815,29 @@ create_bitmap_and_path(PlannerInfo *root,
                                            List *bitmapquals)
  {
         BitmapAndPath *pathnode = makeNode(BitmapAndPath);
+       ListCell   *lc;
  
         pathnode->path.pathtype = T_BitmapAnd;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;          /* always unordered */
+       pathnode->path.required_outer = NULL;
+       pathnode->path.param_clauses = NIL;
  
         pathnode->bitmapquals = bitmapquals;
  
+       /* required_outer and param_clauses are the union of the inputs' values */
+       foreach(lc, bitmapquals)
+       {
+               Path   *bpath = (Path *) lfirst(lc);
+
+               pathnode->path.required_outer =
+                       bms_add_members(pathnode->path.required_outer,
+                                                       bpath->required_outer);
+               pathnode->path.param_clauses =
+                       list_concat(pathnode->path.param_clauses,
+                                               list_copy(bpath->param_clauses));
+       }
+
         /* this sets bitmapselectivity as well as the regular cost fields: */
         cost_bitmap_and_node(pathnode, root);
  
@@ -612,13 +854,29 @@ create_bitmap_or_path(PlannerInfo *root,
                                           List *bitmapquals)
  {
         BitmapOrPath *pathnode = makeNode(BitmapOrPath);
+       ListCell   *lc;
  
         pathnode->path.pathtype = T_BitmapOr;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;          /* always unordered */
+       pathnode->path.required_outer = NULL;
+       pathnode->path.param_clauses = NIL;
  
         pathnode->bitmapquals = bitmapquals;
  
+       /* required_outer and param_clauses are the union of the inputs' values */
+       foreach(lc, bitmapquals)
+       {
+               Path   *bpath = (Path *) lfirst(lc);
+
+               pathnode->path.required_outer =
+                       bms_add_members(pathnode->path.required_outer,
+                                                       bpath->required_outer);
+               pathnode->path.param_clauses =
+                       list_concat(pathnode->path.param_clauses,
+                                               list_copy(bpath->param_clauses));
+       }
+
         /* this sets bitmapselectivity as well as the regular cost fields: */
         cost_bitmap_or_node(pathnode, root);
  
@@ -637,6 +895,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
         pathnode->path.pathtype = T_TidScan;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;
+       pathnode->path.required_outer = NULL;
+       pathnode->path.param_clauses = NIL;
  
         pathnode->tidquals = tidquals;
  
@@ -662,24 +922,46 @@ create_append_path(RelOptInfo *rel, List *subpaths)
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;          /* result is always considered
                                                                                  * unsorted */
+       pathnode->path.required_outer = NULL; /* updated below */
+       pathnode->path.param_clauses = NIL;     /* XXX see below */
         pathnode->subpaths = subpaths;
  
         /*
-        * Compute cost as sum of subplan costs.  We charge nothing extra for the
-        * Append itself, which perhaps is too optimistic, but since it doesn't do
-        * any selection or projection, it is a pretty cheap node.
+        * We don't bother with inventing a cost_append(), but just do it here.
+        *
+        * Compute rows and costs as sums of subplan rows and costs.  We charge
+        * nothing extra for the Append itself, which perhaps is too optimistic,
+        * but since it doesn't do any selection or projection, it is a pretty
+        * cheap node.  If you change this, see also make_append().
          *
-        * If you change this, see also make_append().
+        * We also compute the correct required_outer set, namely the union of
+        * the input paths' requirements.
+        *
+        * XXX We should also compute a proper param_clauses list, but that
+        * will require identifying which joinclauses are enforced by all the
+        * subplans, as well as locating the original parent RestrictInfo from
+        * which they were generated.  For the moment we punt and leave the list
+        * as NIL.  This will result in uselessly rechecking such joinclauses
+        * at the parameter-supplying nestloop join, which is slightly annoying,
+        * as well as overestimating the sizes of any intermediate joins, which
+        * is significantly more annoying.
          */
+       pathnode->path.rows = 0;
         pathnode->path.startup_cost = 0;
         pathnode->path.total_cost = 0;
         foreach(l, subpaths)
         {
                 Path       *subpath = (Path *) lfirst(l);
  
+               pathnode->path.rows += subpath->rows;
+
                 if (l == list_head(subpaths))   /* first node? */
                         pathnode->path.startup_cost = subpath->startup_cost;
                 pathnode->path.total_cost += subpath->total_cost;
+
+               pathnode->path.required_outer =
+                       bms_add_members(pathnode->path.required_outer,
+                                                       subpath->required_outer);
         }
  
         return pathnode;
@@ -704,6 +986,8 @@ create_merge_append_path(PlannerInfo *root,
         pathnode->path.pathtype = T_MergeAppend;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = pathkeys;
+       pathnode->path.required_outer = NULL; /* updated below */
+       pathnode->path.param_clauses = NIL;     /* XXX see below */
         pathnode->subpaths = subpaths;
  
         /*
@@ -735,13 +1019,22 @@ create_merge_append_path(PlannerInfo *root,
                 }
         }
  
-       /* Add up all the costs of the input paths */
+       /*
+        * Add up the sizes and costs of the input paths, and also compute the
+        * real required_outer value.
+        *
+        * XXX as in create_append_path(), we should compute param_clauses but
+        * it will require more work.
+        */
+       pathnode->path.rows = 0;
         input_startup_cost = 0;
         input_total_cost = 0;
         foreach(l, subpaths)
         {
                 Path       *subpath = (Path *) lfirst(l);
  
+               pathnode->path.rows += subpath->rows;
+
                 if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
                 {
                         /* Subpath is adequately ordered, we won't need to sort it */
@@ -765,6 +1058,10 @@ create_merge_append_path(PlannerInfo *root,
                         input_startup_cost += sort_path.startup_cost;
                         input_total_cost += sort_path.total_cost;
                 }
+
+               pathnode->path.required_outer =
+                       bms_add_members(pathnode->path.required_outer,
+                                                       subpath->required_outer);
         }
  
         /* Now we can compute total costs of the MergeAppend */
@@ -789,9 +1086,12 @@ create_result_path(List *quals)
         pathnode->path.pathtype = T_Result;
         pathnode->path.parent = NULL;
         pathnode->path.pathkeys = NIL;
+       pathnode->path.required_outer = NULL;
+       pathnode->path.param_clauses = NIL;
         pathnode->quals = quals;
  
-       /* Ideally should define cost_result(), but I'm too lazy */
+       /* Hardly worth defining a cost_result() function ... just do it */
+       pathnode->path.rows = 1;
         pathnode->path.startup_cost = 0;
         pathnode->path.total_cost = cpu_tuple_cost;
  
@@ -817,15 +1117,16 @@ create_material_path(RelOptInfo *rel, Path *subpath)
  
         pathnode->path.pathtype = T_Material;
         pathnode->path.parent = rel;
-
         pathnode->path.pathkeys = subpath->pathkeys;
+       pathnode->path.required_outer = subpath->required_outer;
+       pathnode->path.param_clauses = subpath->param_clauses;
  
         pathnode->subpath = subpath;
  
         cost_material(&pathnode->path,
                                   subpath->startup_cost,
                                   subpath->total_cost,
-                                 rel->rows,
+                                 subpath->rows,
                                   rel->width);
  
         return pathnode;
@@ -874,7 +1175,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
         /*
          * We must ensure path struct and subsidiary data are allocated in main
          * planning context; otherwise GEQO memory management causes trouble.
-        * (Compare best_inner_indexscan().)
          */
         oldcontext = MemoryContextSwitchTo(root->planner_cxt);
  
@@ -1032,6 +1332,8 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
          * to represent it.  (This might get overridden below.)
          */
         pathnode->path.pathkeys = NIL;
+       pathnode->path.required_outer = subpath->required_outer;
+       pathnode->path.param_clauses = subpath->param_clauses;
  
         pathnode->subpath = subpath;
         pathnode->in_operators = in_operators;
@@ -1048,7 +1350,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
                                                                           uniq_exprs, in_operators))
         {
                 pathnode->umethod = UNIQUE_PATH_NOOP;
-               pathnode->rows = rel->rows;
+               pathnode->path.rows = rel->rows;
                 pathnode->path.startup_cost = subpath->startup_cost;
                 pathnode->path.total_cost = subpath->total_cost;
                 pathnode->path.pathkeys = subpath->pathkeys;
@@ -1081,7 +1383,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
                                                                   sub_tlist_colnos, in_operators))
                 {
                         pathnode->umethod = UNIQUE_PATH_NOOP;
-                       pathnode->rows = rel->rows;
+                       pathnode->path.rows = rel->rows;
                         pathnode->path.startup_cost = subpath->startup_cost;
                         pathnode->path.total_cost = subpath->total_cost;
                         pathnode->path.pathkeys = subpath->pathkeys;
@@ -1095,7 +1397,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
         }
  
         /* Estimate number of output rows */
-       pathnode->rows = estimate_num_groups(root, uniq_exprs, rel->rows);
+       pathnode->path.rows = estimate_num_groups(root, uniq_exprs, rel->rows);
         numCols = list_length(uniq_exprs);
  
         if (all_btree)
@@ -1128,12 +1430,12 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
                  */
                 int                     hashentrysize = rel->width + 64;
  
-               if (hashentrysize * pathnode->rows > work_mem * 1024L)
+               if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
                         all_hash = false;       /* don't try to hash */
                 else
                         cost_agg(&agg_path, root,
                                          AGG_HASHED, NULL,
-                                        numCols, pathnode->rows,
+                                        numCols, pathnode->path.rows,
                                          subpath->startup_cost,
                                          subpath->total_cost,
                                          rel->rows);
@@ -1367,6 +1669,8 @@ create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
         pathnode->pathtype = T_SubqueryScan;
         pathnode->parent = rel;
         pathnode->pathkeys = pathkeys;
+       pathnode->required_outer = NULL;
+       pathnode->param_clauses = NIL;
  
         cost_subqueryscan(pathnode, rel);
  
@@ -1386,6 +1690,8 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->pathtype = T_FunctionScan;
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* for now, assume unordered result */
+       pathnode->required_outer = NULL;
+       pathnode->param_clauses = NIL;
  
         cost_functionscan(pathnode, root, rel);
  
@@ -1405,6 +1711,8 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->pathtype = T_ValuesScan;
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* result is always unordered */
+       pathnode->required_outer = NULL;
+       pathnode->param_clauses = NIL;
  
         cost_valuesscan(pathnode, root, rel);
  
@@ -1424,6 +1732,8 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->pathtype = T_CteScan;
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* XXX for now, result is always unordered */
+       pathnode->required_outer = NULL;
+       pathnode->param_clauses = NIL;
  
         cost_ctescan(pathnode, root, rel);
  
@@ -1443,6 +1753,8 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->pathtype = T_WorkTableScan;
         pathnode->parent = rel;
         pathnode->pathkeys = NIL;       /* result is always unordered */
+       pathnode->required_outer = NULL;
+       pathnode->param_clauses = NIL;
  
         /* Cost is the same as for a regular CTE scan */
         cost_ctescan(pathnode, root, rel);
@@ -1466,6 +1778,8 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->path.pathtype = T_ForeignScan;
         pathnode->path.parent = rel;
         pathnode->path.pathkeys = NIL;          /* result is always unordered */
+       pathnode->path.required_outer = NULL;
+       pathnode->path.param_clauses = NIL;
  
         /* Get FDW's callback info */
         rte = planner_rt_fetch(rel->relid, root);
@@ -1479,12 +1793,68 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel)
         pathnode->fdwplan = fdwplan;
  
         /* use costs estimated by FDW */
+       pathnode->path.rows = rel->rows;
         pathnode->path.startup_cost = fdwplan->startup_cost;
         pathnode->path.total_cost = fdwplan->total_cost;
  
         return pathnode;
  }
  
+/*
+ * calc_nestloop_required_outer
+ *       Compute the required_outer set for a nestloop join path
+ *
+ * Note: result must not share storage with either input
+ */
+Relids
+calc_nestloop_required_outer(Path *outer_path, Path *inner_path)
+{
+       Relids  required_outer;
+
+       /* inner_path can require rels from outer path, but not vice versa */
+       Assert(!bms_overlap(outer_path->required_outer,
+                                               inner_path->parent->relids));
+       /* easy case if inner path is not parameterized */
+       if (!inner_path->required_outer)
+               return bms_copy(outer_path->required_outer);
+       /* else, form the union ... */
+       required_outer = bms_union(outer_path->required_outer,
+                                                          inner_path->required_outer);
+       /* ... and remove any mention of now-satisfied outer rels */
+       required_outer = bms_del_members(required_outer,
+                                                                        outer_path->parent->relids);
+       /* maintain invariant that required_outer is exactly NULL if empty */
+       if (bms_is_empty(required_outer))
+       {
+               bms_free(required_outer);
+               required_outer = NULL;
+       }
+       return required_outer;
+}
+
+/*
+ * calc_non_nestloop_required_outer
+ *       Compute the required_outer set for a merge or hash join path
+ *
+ * Note: result must not share storage with either input
+ */
+Relids
+calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
+{
+       Relids  required_outer;
+
+       /* neither path can require rels from the other */
+       Assert(!bms_overlap(outer_path->required_outer,
+                                               inner_path->parent->relids));
+       Assert(!bms_overlap(inner_path->required_outer,
+                                               outer_path->parent->relids));
+       /* form the union ... */
+       required_outer = bms_union(outer_path->required_outer,
+                                                          inner_path->required_outer);
+       /* we do not need an explicit test for empty; bms_union gets it right */
+       return required_outer;
+}
+
  /*
   * create_nestloop_path
   *       Creates a pathnode corresponding to a nestloop join between two
@@ -1492,11 +1862,14 @@ create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel)
   *
   * 'joinrel' is the join relation.
   * 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_nestloop
   * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
   * 'outer_path' is the outer path
   * 'inner_path' is the inner path
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'pathkeys' are the path keys of the new join path
+ * 'required_outer' is the set of required outer rels
   *
   * Returns the resulting path node.
   */
@@ -1504,23 +1877,46 @@ NestPath *
  create_nestloop_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                        JoinCostWorkspace *workspace,
                                          SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
-                                        List *pathkeys)
+                                        List *pathkeys,
+                                        Relids required_outer)
  {
         NestPath   *pathnode = makeNode(NestPath);
  
         pathnode->path.pathtype = T_NestLoop;
         pathnode->path.parent = joinrel;
+       pathnode->path.pathkeys = pathkeys;
+       pathnode->path.required_outer = required_outer;
+       if (pathnode->path.required_outer)
+       {
+               /* Identify parameter clauses not yet applied here */
+               List       *jclauses;
+               ListCell   *lc;
+
+               /* LHS clauses could not be satisfied here */
+               jclauses = list_copy(outer_path->param_clauses);
+               foreach(lc, inner_path->param_clauses)
+               {
+                       RestrictInfo   *rinfo = (RestrictInfo *) lfirst(lc);
+
+                       if (!bms_is_subset(rinfo->clause_relids, joinrel->relids))
+                               jclauses = lappend(jclauses, rinfo);
+               }
+               pathnode->path.param_clauses = jclauses;
+       }
+       else
+               pathnode->path.param_clauses = NIL;
         pathnode->jointype = jointype;
         pathnode->outerjoinpath = outer_path;
         pathnode->innerjoinpath = inner_path;
         pathnode->joinrestrictinfo = restrict_clauses;
-       pathnode->path.pathkeys = pathkeys;
  
-       cost_nestloop(pathnode, root, sjinfo);
+       final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
  
         return pathnode;
  }
@@ -1532,11 +1928,13 @@ create_nestloop_path(PlannerInfo *root,
   *
   * 'joinrel' is the join relation
   * 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_mergejoin
   * 'sjinfo' is extra info about the join for selectivity estimation
   * 'outer_path' is the outer path
   * 'inner_path' is the inner path
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'pathkeys' are the path keys of the new join path
+ * 'required_outer' is the set of required outer rels
   * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
   *             (this should be a subset of the restrict_clauses list)
   * 'outersortkeys' are the sort varkeys for the outer relation
@@ -1546,41 +1944,36 @@ MergePath *
  create_mergejoin_path(PlannerInfo *root,
                                           RelOptInfo *joinrel,
                                           JoinType jointype,
+                                         JoinCostWorkspace *workspace,
                                           SpecialJoinInfo *sjinfo,
                                           Path *outer_path,
                                           Path *inner_path,
                                           List *restrict_clauses,
                                           List *pathkeys,
+                                         Relids required_outer,
                                           List *mergeclauses,
                                           List *outersortkeys,
                                           List *innersortkeys)
  {
         MergePath  *pathnode = makeNode(MergePath);
  
-       /*
-        * If the given paths are already well enough ordered, we can skip doing
-        * an explicit sort.
-        */
-       if (outersortkeys &&
-               pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
-               outersortkeys = NIL;
-       if (innersortkeys &&
-               pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
-               innersortkeys = NIL;
-
         pathnode->jpath.path.pathtype = T_MergeJoin;
         pathnode->jpath.path.parent = joinrel;
+       pathnode->jpath.path.pathkeys = pathkeys;
+       pathnode->jpath.path.required_outer = required_outer;
+       pathnode->jpath.path.param_clauses =
+               list_concat(list_copy(outer_path->param_clauses),
+                                       inner_path->param_clauses);
         pathnode->jpath.jointype = jointype;
         pathnode->jpath.outerjoinpath = outer_path;
         pathnode->jpath.innerjoinpath = inner_path;
         pathnode->jpath.joinrestrictinfo = restrict_clauses;
-       pathnode->jpath.path.pathkeys = pathkeys;
         pathnode->path_mergeclauses = mergeclauses;
         pathnode->outersortkeys = outersortkeys;
         pathnode->innersortkeys = innersortkeys;
-       /* pathnode->materialize_inner will be set by cost_mergejoin */
+       /* pathnode->materialize_inner will be set by final_cost_mergejoin */
  
-       cost_mergejoin(pathnode, root, sjinfo);
+       final_cost_mergejoin(root, pathnode, workspace, sjinfo);
  
         return pathnode;
  }
@@ -1591,10 +1984,13 @@ create_mergejoin_path(PlannerInfo *root,
   *
   * 'joinrel' is the join relation
   * 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_hashjoin
   * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
   * 'outer_path' is the cheapest outer path
   * 'inner_path' is the cheapest inner path
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
+ * 'required_outer' is the set of required outer rels
   * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
   *             (this should be a subset of the restrict_clauses list)
   */
@@ -1602,20 +1998,19 @@ HashPath *
  create_hashjoin_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                        JoinCostWorkspace *workspace,
                                          SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
+                                        Relids required_outer,
                                          List *hashclauses)
  {
         HashPath   *pathnode = makeNode(HashPath);
  
         pathnode->jpath.path.pathtype = T_HashJoin;
         pathnode->jpath.path.parent = joinrel;
-       pathnode->jpath.jointype = jointype;
-       pathnode->jpath.outerjoinpath = outer_path;
-       pathnode->jpath.innerjoinpath = inner_path;
-       pathnode->jpath.joinrestrictinfo = restrict_clauses;
  
         /*
          * A hashjoin never has pathkeys, since its output ordering is
@@ -1629,10 +2024,18 @@ create_hashjoin_path(PlannerInfo *root,
          * outer rel than it does now.)
          */
         pathnode->jpath.path.pathkeys = NIL;
+       pathnode->jpath.path.required_outer = required_outer;
+       pathnode->jpath.path.param_clauses =
+               list_concat(list_copy(outer_path->param_clauses),
+                                       inner_path->param_clauses);
+       pathnode->jpath.jointype = jointype;
+       pathnode->jpath.outerjoinpath = outer_path;
+       pathnode->jpath.innerjoinpath = inner_path;
+       pathnode->jpath.joinrestrictinfo = restrict_clauses;
         pathnode->path_hashclauses = hashclauses;
-       /* cost_hashjoin will fill in pathnode->num_batches */
+       /* final_cost_hashjoin will fill in pathnode->num_batches */
  
-       cost_hashjoin(pathnode, root, sjinfo);
+       final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
  
         return pathnode;
  }
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c

index e9155efb3433a189419fbd1967ae5e55dd895f39..0cdf638c1ddb1c614eb8ef3cd2ddea4571248ff6 100644 (file)
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -103,6 +103,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
         rel->cheapest_startup_path = NULL;
         rel->cheapest_total_path = NULL;
         rel->cheapest_unique_path = NULL;
+       rel->cheapest_parameterized_paths = NIL;
         rel->relid = relid;
         rel->rtekind = rte->rtekind;
         /* min_attr, max_attr, attr_needed, attr_widths are set below */
@@ -117,8 +118,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
         rel->baserestrictcost.per_tuple = 0;
         rel->joininfo = NIL;
         rel->has_eclass_joins = false;
-       rel->index_outer_relids = NULL;
-       rel->index_inner_paths = NIL;
  
         /* Check type of rtable entry */
         switch (rte->rtekind)
@@ -354,6 +353,7 @@ build_join_rel(PlannerInfo *root,
         joinrel->cheapest_startup_path = NULL;
         joinrel->cheapest_total_path = NULL;
         joinrel->cheapest_unique_path = NULL;
+       joinrel->cheapest_parameterized_paths = NIL;
         joinrel->relid = 0;                     /* indicates not a baserel */
         joinrel->rtekind = RTE_JOIN;
         joinrel->min_attr = 0;
@@ -371,8 +371,6 @@ build_join_rel(PlannerInfo *root,
         joinrel->baserestrictcost.per_tuple = 0;
         joinrel->joininfo = NIL;
         joinrel->has_eclass_joins = false;
-       joinrel->index_outer_relids = NULL;
-       joinrel->index_inner_paths = NIL;
  
         /*
          * Create a new tlist containing just the vars that need to be output from
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c

index d9c96020f14d650987f2b470da397dfa2ac17cb1..7d03d91f5d3b6f90de576021542fea9b21ca4122 100644 (file)
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -33,8 +33,6 @@ static Expr *make_sub_restrictinfos(Expr *clause,
                                            bool pseudoconstant,
                                            Relids required_relids,
                                            Relids nullable_relids);
-static List *select_nonredundant_join_list(List *restrictinfo_list,
-                                                         List *reference_list);
  static bool join_clause_is_redundant(RestrictInfo *rinfo,
                                                  List *reference_list);
  
@@ -623,11 +621,14 @@ extract_actual_join_clauses(List *restrictinfo_list,
  
  /*
   * select_nonredundant_join_clauses
+ *             Select the members of restrictinfo_list that are not redundant with
+ *             any member of reference_list.
   *
   * Given a list of RestrictInfo clauses that are to be applied in a join,
- * select the ones that are not redundant with any clause that's enforced
- * by the inner_path.  This is used for nestloop joins, wherein any clause
- * being used in an inner indexscan need not be checked again at the join.
+ * select the ones that are not redundant with any clause that's listed in
+ * reference_list.  This is used, for example, to avoid checking joinclauses
+ * again at a nestloop join when they've already been enforced by a
+ * parameterized inner path.
   *
   * "Redundant" means either equal() or derived from the same EquivalenceClass.
   * We have to check the latter because indxpath.c may select different derived
@@ -637,78 +638,16 @@ extract_actual_join_clauses(List *restrictinfo_list,
   * restrictinfo_list; that should have been handled elsewhere.
   */
  List *
-select_nonredundant_join_clauses(PlannerInfo *root,
-                                                                List *restrictinfo_list,
-                                                                Path *inner_path)
-{
-       if (IsA(inner_path, IndexPath))
-       {
-               /*
-                * Check the index quals to see if any of them are join clauses.
-                *
-                * We can skip this if the index path is an ordinary indexpath and not
-                * a special innerjoin path, since it then wouldn't be using any join
-                * clauses.
-                */
-               IndexPath  *innerpath = (IndexPath *) inner_path;
-
-               if (innerpath->isjoininner)
-                       restrictinfo_list =
-                               select_nonredundant_join_list(restrictinfo_list,
-                                                                                         innerpath->indexclauses);
-       }
-       else if (IsA(inner_path, BitmapHeapPath))
-       {
-               /*
-                * Same deal for bitmapped index scans.
-                *
-                * Note: both here and above, we ignore any implicit index
-                * restrictions associated with the use of partial indexes.  This is
-                * OK because we're only trying to prove we can dispense with some
-                * join quals; failing to prove that doesn't result in an incorrect
-                * plan.  It's quite unlikely that a join qual could be proven
-                * redundant by an index predicate anyway.      (Also, if we did manage to
-                * prove it, we'd have to have a special case for update targets; see
-                * notes about EvalPlanQual testing in create_indexscan_plan().)
-                */
-               BitmapHeapPath *innerpath = (BitmapHeapPath *) inner_path;
-
-               if (innerpath->isjoininner)
-               {
-                       List       *bitmapclauses;
-
-                       bitmapclauses =
-                               make_restrictinfo_from_bitmapqual(innerpath->bitmapqual,
-                                                                                                 true,
-                                                                                                 false);
-                       restrictinfo_list =
-                               select_nonredundant_join_list(restrictinfo_list,
-                                                                                         bitmapclauses);
-               }
-       }
-
-       /*
-        * XXX the inner path of a nestloop could also be an append relation whose
-        * elements use join quals.  However, they might each use different quals;
-        * we could only remove join quals that are enforced by all the appendrel
-        * members.  For the moment we don't bother to try.
-        */
-
-       return restrictinfo_list;
-}
-
-/*
- * select_nonredundant_join_list
- *             Select the members of restrictinfo_list that are not redundant with
- *             any member of reference_list.  See above for more info.
- */
-static List *
-select_nonredundant_join_list(List *restrictinfo_list,
-                                                         List *reference_list)
+select_nonredundant_join_clauses(List *restrictinfo_list,
+                                                                List *reference_list)
  {
         List       *result = NIL;
         ListCell   *item;
  
+       /* Quick out if nothing could be removed */
+       if (reference_list == NIL)
+               return restrictinfo_list;
+
         foreach(item, restrictinfo_list)
         {
                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(item);
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index da638f885aa5c381220f0e53688dd48d52cd86bf..09c9240490218b0525389465d6f4db6d46498cff 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5971,7 +5971,7 @@ string_to_bytea_const(const char *str, size_t str_len)
  static void
  genericcostestimate(PlannerInfo *root,
                                         IndexPath *path,
-                                       RelOptInfo *outer_rel,
+                                       double loop_count,
                                         double numIndexTuples,
                                         Cost *indexStartupCost,
                                         Cost *indexTotalCost,
@@ -6119,16 +6119,8 @@ genericcostestimate(PlannerInfo *root,
          * Note that we are counting pages not tuples anymore, so we take N = T =
          * index size, as if there were one "tuple" per page.
          */
-       if (outer_rel != NULL && outer_rel->rows > 1)
-       {
-               num_outer_scans = outer_rel->rows;
-               num_scans = num_sa_scans * num_outer_scans;
-       }
-       else
-       {
-               num_outer_scans = 1;
-               num_scans = num_sa_scans;
-       }
+       num_outer_scans = loop_count;
+       num_scans = num_sa_scans * num_outer_scans;
  
         if (num_scans > 1)
         {
@@ -6234,7 +6226,7 @@ btcostestimate(PG_FUNCTION_ARGS)
  {
         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
         IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+       double          loop_count = PG_GETARG_FLOAT8(2);
         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
@@ -6410,7 +6402,7 @@ btcostestimate(PG_FUNCTION_ARGS)
                 numIndexTuples = rint(numIndexTuples / num_sa_scans);
         }
  
-       genericcostestimate(root, path, outer_rel,
+       genericcostestimate(root, path, loop_count,
                                                 numIndexTuples,
                                                 indexStartupCost, indexTotalCost,
                                                 indexSelectivity, indexCorrelation);
@@ -6527,13 +6519,13 @@ hashcostestimate(PG_FUNCTION_ARGS)
  {
         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
         IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+       double          loop_count = PG_GETARG_FLOAT8(2);
         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
         double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
  
-       genericcostestimate(root, path, outer_rel, 0.0,
+       genericcostestimate(root, path, loop_count, 0.0,
                                                 indexStartupCost, indexTotalCost,
                                                 indexSelectivity, indexCorrelation);
  
@@ -6545,13 +6537,13 @@ gistcostestimate(PG_FUNCTION_ARGS)
  {
         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
         IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+       double          loop_count = PG_GETARG_FLOAT8(2);
         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
         double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
  
-       genericcostestimate(root, path, outer_rel, 0.0,
+       genericcostestimate(root, path, loop_count, 0.0,
                                                 indexStartupCost, indexTotalCost,
                                                 indexSelectivity, indexCorrelation);
  
@@ -6563,13 +6555,13 @@ spgcostestimate(PG_FUNCTION_ARGS)
  {
         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
         IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+       double          loop_count = PG_GETARG_FLOAT8(2);
         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
         double     *indexCorrelation = (double *) PG_GETARG_POINTER(6);
  
-       genericcostestimate(root, path, outer_rel, 0.0,
+       genericcostestimate(root, path, loop_count, 0.0,
                                                 indexStartupCost, indexTotalCost,
                                                 indexSelectivity, indexCorrelation);
  
@@ -6884,7 +6876,7 @@ gincostestimate(PG_FUNCTION_ARGS)
  {
         PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
         IndexPath  *path = (IndexPath *) PG_GETARG_POINTER(1);
-       RelOptInfo *outer_rel = (RelOptInfo *) PG_GETARG_POINTER(2);
+       double          loop_count = PG_GETARG_FLOAT8(2);
         Cost       *indexStartupCost = (Cost *) PG_GETARG_POINTER(3);
         Cost       *indexTotalCost = (Cost *) PG_GETARG_POINTER(4);
         Selectivity *indexSelectivity = (Selectivity *) PG_GETARG_POINTER(5);
@@ -7051,10 +7043,7 @@ gincostestimate(PG_FUNCTION_ARGS)
         }
  
         /* Will we have more than one iteration of a nestloop scan? */
-       if (outer_rel != NULL && outer_rel->rows > 1)
-               outer_scans = outer_rel->rows;
-       else
-               outer_scans = 1;
+       outer_scans = loop_count;
  
         /*
          * Compute cost to begin scan, first of all, pay attention to pending list.
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h

index 8fdbd72d3525ddde33d24051e228005be4ec9527..bcca70d2b9817ca0d05f2a90e0c898ff0c48b1b6 100644 (file)
--- a/src/include/nodes/bitmapset.h
+++ b/src/include/nodes/bitmapset.h
@@ -36,6 +36,15 @@ typedef struct Bitmapset
  } Bitmapset;                                   /* VARIABLE LENGTH STRUCT */
  
  
+/* result of bms_subset_compare */
+typedef enum
+{
+       BMS_EQUAL,                                      /* sets are equal */
+       BMS_SUBSET1,                            /* first set is a subset of the second */
+       BMS_SUBSET2,                            /* second set is a subset of the first */
+       BMS_DIFFERENT                           /* neither set is a subset of the other */
+} BMS_Comparison;
+
  /* result of bms_membership */
  typedef enum
  {
@@ -58,6 +67,7 @@ extern Bitmapset *bms_union(const Bitmapset *a, const Bitmapset *b);
  extern Bitmapset *bms_intersect(const Bitmapset *a, const Bitmapset *b);
  extern Bitmapset *bms_difference(const Bitmapset *a, const Bitmapset *b);
  extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b);
+extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b);
  extern bool bms_is_member(int x, const Bitmapset *a);
  extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b);
  extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b);
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h

index b1168086e917c2cbe58c1b4f1f48be24ebd399c9..0e7d184a0d8b8ed83fad3142186a1e0e3e349509 100644 (file)
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -232,7 +232,6 @@ typedef enum NodeTag
         T_EquivalenceMember,
         T_PathKey,
         T_RestrictInfo,
-       T_InnerIndexscanInfo,
         T_PlaceHolderVar,
         T_SpecialJoinInfo,
         T_AppendRelInfo,
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index 94657d2aaa1aacc4ee04160833298c7af7e585fa..6ba920a479ef65a22deb4110bc90f7c9c1231295 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -145,6 +145,13 @@ typedef struct PlannerInfo
          */
         RangeTblEntry **simple_rte_array;       /* rangetable as an array */
  
+       /*
+        * all_baserels is a Relids set of all base relids (but not "other"
+        * relids) in the query; that is, the Relids identifier of the final
+        * join we need to form.
+        */
+       Relids          all_baserels;
+
         /*
          * join_rel_list is a list of all join-relation RelOptInfos we have
          * considered in this planning run.  For small problems we just scan the
@@ -298,11 +305,16 @@ typedef struct PlannerInfo
   *             pathlist - List of Path nodes, one for each potentially useful
   *                                method of generating the relation
   *             cheapest_startup_path - the pathlist member with lowest startup cost
- *                                                             (regardless of its ordering)
+ *                                                             (regardless of its ordering; but must be
+ *                                                              unparameterized)
   *             cheapest_total_path - the pathlist member with lowest total cost
- *                                                       (regardless of its ordering)
+ *                                                       (regardless of its ordering; but must be
+ *                                                        unparameterized)
   *             cheapest_unique_path - for caching cheapest path to produce unique
   *                                                        (no duplicates) output from relation
+ *             cheapest_parameterized_paths - paths with cheapest total costs for
+ *                                                              their parameterizations; always includes
+ *                                                              cheapest_total_path
   *
   * If the relation is a base relation it will have these fields set:
   *
@@ -343,11 +355,6 @@ typedef struct PlannerInfo
   *                                     note this excludes clauses that might be derivable from
   *                                     EquivalenceClasses)
   *             has_eclass_joins - flag that EquivalenceClass joins are possible
- *             index_outer_relids - only used for base rels; set of outer relids
- *                                     that participate in indexable joinclauses for this rel
- *             index_inner_paths - only used for base rels; list of InnerIndexscanInfo
- *                                     nodes showing best indexpaths for various subsets of
- *                                     index_outer_relids.
   *
   * Note: Keeping a restrictinfo list in the RelOptInfo is useful only for
   * base rels, because for a join rel the set of clauses that are treated as
@@ -393,6 +400,7 @@ typedef struct RelOptInfo
         struct Path *cheapest_startup_path;
         struct Path *cheapest_total_path;
         struct Path *cheapest_unique_path;
+       List       *cheapest_parameterized_paths;
  
         /* information about a base rel (not set for join rels!) */
         Index           relid;
@@ -416,18 +424,6 @@ typedef struct RelOptInfo
         List       *joininfo;           /* RestrictInfo structures for join clauses
                                                                  * involving this rel */
         bool            has_eclass_joins;               /* T means joininfo is incomplete */
-
-       /* cached info about inner indexscan paths for relation: */
-       Relids          index_outer_relids;             /* other relids in indexable join
-                                                                                * clauses */
-       List       *index_inner_paths;          /* InnerIndexscanInfo nodes */
-
-       /*
-        * Inner indexscans are not in the main pathlist because they are not
-        * usable except in specific join contexts.  We use the index_inner_paths
-        * list just to avoid recomputing the best inner indexscan repeatedly for
-        * similar outer relations.  See comments for InnerIndexscanInfo.
-        */
  } RelOptInfo;
  
  /*
@@ -609,7 +605,6 @@ typedef struct EquivalenceMember
   * BTGreaterStrategyNumber (for DESC). We assume that all ordering-capable
   * index types will use btree-compatible strategy numbers.
   */
-
  typedef struct PathKey
  {
         NodeTag         type;
@@ -625,12 +620,31 @@ typedef struct PathKey
   * simple plan types that we don't need any extra information in the path for.
   * For other path types it is the first component of a larger struct.
   *
- * Note: "pathtype" is the NodeTag of the Plan node we could build from this
- * Path.  It is partially redundant with the Path's NodeTag, but allows us
- * to use the same Path type for multiple Plan types where there is no need
- * to distinguish the Plan type during path processing.
+ * "pathtype" is the NodeTag of the Plan node we could build from this Path.
+ * It is partially redundant with the Path's NodeTag, but allows us to use
+ * the same Path type for multiple Plan types when there is no need to
+ * distinguish the Plan type during path processing.
+ *
+ * "rows" is the same as parent->rows in simple paths, but in parameterized
+ * paths and UniquePaths it can be less than parent->rows, reflecting the
+ * fact that we've filtered by extra join conditions or removed duplicates.
+ *
+ * "pathkeys" is a List of PathKey nodes (see above), describing the sort
+ * ordering of the path's output rows.
+ *
+ * "required_outer", if not NULL, contains the relids of one or more relations
+ * that must provide parameter values to each scan of this path, because the
+ * path relies on join clauses using those rels. That means this path can only
+ * be joined to those rels by means of nestloop joins with this path on the
+ * inside.  Note: for a normal unparameterized path, required_outer must be
+ * NULL, not an empty-but-not-null Bitmapset.
+ *
+ * "param_clauses" is a List of RestrictInfo nodes, containing the join
+ * clauses used by a parameterized path.  Ideally param_clauses should be NIL
+ * if and only if required_outer is NULL.  XXX for the moment, however, we do
+ * not compute param_clauses for Append and MergeAppend paths, so the list
+ * is inaccurate in those paths and possibly paths above them.
   */
-
  typedef struct Path
  {
         NodeTag         type;
@@ -639,12 +653,15 @@ typedef struct Path
  
         RelOptInfo *parent;                     /* the relation this path can build */
  
-       /* estimated execution costs for path (see costsize.c for more info) */
+       /* estimated size/costs for path (see costsize.c for more info) */
+       double          rows;                   /* estimated number of result tuples */
         Cost            startup_cost;   /* cost expended before fetching any tuples */
         Cost            total_cost;             /* total cost (assuming all tuples fetched) */
  
         List       *pathkeys;           /* sort ordering of path's output */
-       /* pathkeys is a List of PathKey nodes; see above */
+
+       Relids          required_outer; /* rels supplying parameters used by path */
+       List       *param_clauses;      /* join clauses that use such parameters */
  } Path;
  
  /*----------
@@ -685,12 +702,6 @@ typedef struct Path
   * ORDER BY expression is meant to be used with.  (There is no restriction
   * on which index column each ORDER BY can be used with.)
   *
- * 'isjoininner' is TRUE if the path is a nestloop inner scan (that is,
- * some of the index conditions are join rather than restriction clauses).
- * Note that the path costs will be calculated differently from a plain
- * indexscan in this case, and in addition there's a special 'rows' value
- * different from the parent RelOptInfo's (see below).
- *
   * 'indexscandir' is one of:
   *             ForwardScanDirection: forward scan of an ordered index
   *             BackwardScanDirection: backward scan of an ordered index
@@ -703,12 +714,6 @@ typedef struct Path
   * we need not recompute them when considering using the same index in a
   * bitmap index/heap scan (see BitmapHeapPath).  The costs of the IndexPath
   * itself represent the costs of an IndexScan or IndexOnlyScan plan type.
- *
- * 'rows' is the estimated result tuple count for the indexscan.  This
- * is the same as path.parent->rows for a simple indexscan, but it is
- * different for a nestloop inner scan, because the additional indexquals
- * coming from join clauses make the scan more selective than the parent
- * rel's restrict clauses alone would do.
   *----------
   */
  typedef struct IndexPath
@@ -720,11 +725,9 @@ typedef struct IndexPath
         List       *indexqualcols;
         List       *indexorderbys;
         List       *indexorderbycols;
-       bool            isjoininner;
         ScanDirection indexscandir;
         Cost            indextotalcost;
         Selectivity indexselectivity;
-       double          rows;                   /* estimated number of result tuples */
  } IndexPath;
  
  /*
@@ -743,16 +746,11 @@ typedef struct IndexPath
   * always represent the costs to use it as a regular (or index-only)
   * IndexScan.  The costs of a BitmapIndexScan can be computed using the
   * IndexPath's indextotalcost and indexselectivity.
- *
- * BitmapHeapPaths can be nestloop inner indexscans.  The isjoininner and
- * rows fields serve the same purpose as for plain IndexPaths.
   */
  typedef struct BitmapHeapPath
  {
         Path            path;
         Path       *bitmapqual;         /* IndexPath, BitmapAndPath, BitmapOrPath */
-       bool            isjoininner;    /* T if it's a nestloop inner scan */
-       double          rows;                   /* estimated number of result tuples */
  } BitmapHeapPath;
  
  /*
@@ -822,6 +820,11 @@ typedef struct AppendPath
  #define IS_DUMMY_PATH(p) \
         (IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL)
  
+/* A relation that's been proven empty will have one path that is dummy */
+#define IS_DUMMY_REL(r) \
+       ((r)->cheapest_total_path != NULL && \
+        IS_DUMMY_PATH((r)->cheapest_total_path))
+
  /*
   * MergeAppendPath represents a MergeAppend plan, ie, the merging of sorted
   * results from several member plans to produce similarly-sorted output.
@@ -885,7 +888,6 @@ typedef struct UniquePath
         UniquePathMethod umethod;
         List       *in_operators;       /* equality operators of the IN clause */
         List       *uniq_exprs;         /* expressions to be made unique */
-       double          rows;                   /* estimated number of result tuples */
  } UniquePath;
  
  /*
@@ -1172,42 +1174,6 @@ typedef struct MergeScanSelCache
         Selectivity rightendsel;        /* last-join fraction for clause right side */
  } MergeScanSelCache;
  
-/*
- * Inner indexscan info.
- *
- * An inner indexscan is one that uses one or more joinclauses as index
- * conditions (perhaps in addition to plain restriction clauses).  So it
- * can only be used as the inner path of a nestloop join where the outer
- * relation includes all other relids appearing in those joinclauses.
- * The set of usable joinclauses, and thus the best inner indexscan,
- * thus varies depending on which outer relation we consider; so we have
- * to recompute the best such paths for every join.  To avoid lots of
- * redundant computation, we cache the results of such searches.  For
- * each relation we compute the set of possible otherrelids (all relids
- * appearing in joinquals that could become indexquals for this table).
- * Two outer relations whose relids have the same intersection with this
- * set will have the same set of available joinclauses and thus the same
- * best inner indexscans for the inner relation.  By taking the intersection
- * before scanning the cache, we avoid recomputing when considering
- * join rels that differ only by the inclusion of irrelevant other rels.
- *
- * The search key also includes a bool showing whether the join being
- * considered is an outer join.  Since we constrain the join order for
- * outer joins, I believe that this bool can only have one possible value
- * for any particular lookup key; but store it anyway to avoid confusion.
- */
-
-typedef struct InnerIndexscanInfo
-{
-       NodeTag         type;
-       /* The lookup key: */
-       Relids          other_relids;   /* a set of relevant other relids */
-       bool            isouterjoin;    /* true if join is outer */
-       /* Best paths for this lookup key (NULL if no available indexscans): */
-       Path       *cheapest_startup_innerpath;         /* cheapest startup cost */
-       Path       *cheapest_total_innerpath;           /* cheapest total cost */
-} InnerIndexscanInfo;
-
  /*
   * Placeholder node for an expression to be evaluated below the top level
   * of a plan tree.     This is used during planning to represent the contained
@@ -1490,4 +1456,64 @@ typedef struct PlannerParamItem
         Index           abslevel;               /* its absolute query level */
  } PlannerParamItem;
  
+/*
+ * When making cost estimates for a SEMI or ANTI join, there are some
+ * correction factors that are needed in both nestloop and hash joins
+ * to account for the fact that the executor can stop scanning inner rows
+ * as soon as it finds a match to the current outer row.  These numbers
+ * depend only on the selected outer and inner join relations, not on the
+ * particular paths used for them, so it's worthwhile to calculate them
+ * just once per relation pair not once per considered path.  This struct
+ * is filled by compute_semi_anti_join_factors and must be passed along
+ * to the join cost estimation functions.
+ *
+ * outer_match_frac is the fraction of the outer tuples that are
+ *             expected to have at least one match.
+ * match_count is the average number of matches expected for
+ *             outer tuples that have at least one match.
+ */
+typedef struct SemiAntiJoinFactors
+{
+       Selectivity outer_match_frac;
+       Selectivity match_count;
+} SemiAntiJoinFactors;
+
+/*
+ * For speed reasons, cost estimation for join paths is performed in two
+ * phases: the first phase tries to quickly derive a lower bound for the
+ * join cost, and then we check if that's sufficient to reject the path.
+ * If not, we come back for a more refined cost estimate.  The first phase
+ * fills a JoinCostWorkspace struct with its preliminary cost estimates
+ * and possibly additional intermediate values.  The second phase takes
+ * these values as inputs to avoid repeating work.
+ *
+ * (Ideally we'd declare this in cost.h, but it's also needed in pathnode.h,
+ * so seems best to put it here.)
+ */
+typedef struct JoinCostWorkspace
+{
+       /* Preliminary cost estimates --- must not be larger than final ones! */
+       Cost            startup_cost;   /* cost expended before fetching any tuples */
+       Cost            total_cost;             /* total cost (assuming all tuples fetched) */
+
+       /* Fields below here should be treated as private to costsize.c */
+       Cost            run_cost;               /* non-startup cost components */
+
+       /* private for cost_nestloop code */
+       Cost            inner_rescan_run_cost;
+       double          outer_matched_rows;
+       Selectivity inner_scan_frac;
+
+       /* private for cost_mergejoin code */
+       Cost            inner_run_cost;
+       double          outer_rows;
+       double          inner_rows;
+       double          outer_skip_rows;
+       double          inner_skip_rows;
+
+       /* private for cost_hashjoin code */
+       int                     numbuckets;
+       int                     numbatches;
+} JoinCostWorkspace;
+
  #endif   /* RELATION_H */
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 1786d5e93607846acf70aab80093163442700a13..b1e664265b25d2c3118fc82c2147e4d1c7b7e3ef 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -68,9 +68,9 @@ extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,
                                         double index_pages, PlannerInfo *root);
  extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel);
  extern void cost_index(IndexPath *path, PlannerInfo *root,
-                                          RelOptInfo *outer_rel);
+                                          double loop_count);
  extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
-                                         Path *bitmapqual, RelOptInfo *outer_rel);
+                                         Path *bitmapqual, double loop_count);
  extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
  extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
  extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
@@ -107,15 +107,47 @@ extern void cost_group(Path *path, PlannerInfo *root,
                    int numGroupCols, double numGroups,
                    Cost input_startup_cost, Cost input_total_cost,
                    double input_tuples);
-extern void cost_nestloop(NestPath *path, PlannerInfo *root,
-                         SpecialJoinInfo *sjinfo);
-extern void cost_mergejoin(MergePath *path, PlannerInfo *root,
-                          SpecialJoinInfo *sjinfo);
-extern void cost_hashjoin(HashPath *path, PlannerInfo *root,
-                         SpecialJoinInfo *sjinfo);
+extern void initial_cost_nestloop(PlannerInfo *root,
+                                         JoinCostWorkspace *workspace,
+                                         JoinType jointype,
+                                         Path *outer_path, Path *inner_path,
+                                         SpecialJoinInfo *sjinfo,
+                                         SemiAntiJoinFactors *semifactors);
+extern void final_cost_nestloop(PlannerInfo *root, NestPath *path,
+                                       JoinCostWorkspace *workspace,
+                                       SpecialJoinInfo *sjinfo,
+                                       SemiAntiJoinFactors *semifactors);
+extern void initial_cost_mergejoin(PlannerInfo *root,
+                                          JoinCostWorkspace *workspace,
+                                          JoinType jointype,
+                                          List *mergeclauses,
+                                          Path *outer_path, Path *inner_path,
+                                          List *outersortkeys, List *innersortkeys,
+                                          SpecialJoinInfo *sjinfo);
+extern void final_cost_mergejoin(PlannerInfo *root, MergePath *path,
+                                        JoinCostWorkspace *workspace,
+                                        SpecialJoinInfo *sjinfo);
+extern void initial_cost_hashjoin(PlannerInfo *root,
+                                         JoinCostWorkspace *workspace,
+                                         JoinType jointype,
+                                         List *hashclauses,
+                                         Path *outer_path, Path *inner_path,
+                                         SpecialJoinInfo *sjinfo,
+                                         SemiAntiJoinFactors *semifactors);
+extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
+                                       JoinCostWorkspace *workspace,
+                                       SpecialJoinInfo *sjinfo,
+                                       SemiAntiJoinFactors *semifactors);
  extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
  extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
  extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
+extern void compute_semi_anti_join_factors(PlannerInfo *root,
+                                                          RelOptInfo *outerrel,
+                                                          RelOptInfo *innerrel,
+                                                          JoinType jointype,
+                                                          SpecialJoinInfo *sjinfo,
+                                                          List *restrictlist,
+                                                          SemiAntiJoinFactors *semifactors);
  extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel);
  extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
                                                    RelOptInfo *outer_rel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 3bc1b27384c0c6d9d06e13068f8be085b7abb18c..1cf34171f4fce1d68bec190c9c2d4dbeb850e976 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -26,6 +26,9 @@ extern int compare_fractional_path_costs(Path *path1, Path *path2,
                                                           double fraction);
  extern void set_cheapest(RelOptInfo *parent_rel);
  extern void add_path(RelOptInfo *parent_rel, Path *new_path);
+extern bool add_path_precheck(RelOptInfo *parent_rel,
+                                 Cost startup_cost, Cost total_cost,
+                                 List *pathkeys, Relids required_outer);
  
  extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel);
  extern IndexPath *create_index_path(PlannerInfo *root,
@@ -37,11 +40,12 @@ extern IndexPath *create_index_path(PlannerInfo *root,
                                   List *pathkeys,
                                   ScanDirection indexscandir,
                                   bool indexonly,
-                                 RelOptInfo *outer_rel);
+                                 Relids required_outer,
+                                 double loop_count);
  extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
                                                 RelOptInfo *rel,
                                                 Path *bitmapqual,
-                                               RelOptInfo *outer_rel);
+                                               double loop_count);
  extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root,
                                            RelOptInfo *rel,
                                            List *bitmapquals);
@@ -66,23 +70,31 @@ extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel);
  extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel);
  extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel);
  
+extern Relids calc_nestloop_required_outer(Path *outer_path, Path *inner_path);
+extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path);
+
  extern NestPath *create_nestloop_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                        JoinCostWorkspace *workspace,
                                          SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
-                                        List *pathkeys);
+                                        List *pathkeys,
+                                        Relids required_outer);
  
  extern MergePath *create_mergejoin_path(PlannerInfo *root,
                                           RelOptInfo *joinrel,
                                           JoinType jointype,
+                                         JoinCostWorkspace *workspace,
                                           SpecialJoinInfo *sjinfo,
                                           Path *outer_path,
                                           Path *inner_path,
                                           List *restrict_clauses,
                                           List *pathkeys,
+                                         Relids required_outer,
                                           List *mergeclauses,
                                           List *outersortkeys,
                                           List *innersortkeys);
@@ -90,10 +102,13 @@ extern MergePath *create_mergejoin_path(PlannerInfo *root,
  extern HashPath *create_hashjoin_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                        JoinCostWorkspace *workspace,
                                          SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
+                                        Relids required_outer,
                                          List *hashclauses);
  
  /*
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h

index 76d7b77fc1d782f8812d7308baa1e357db9ca5c6..082b54e120f3c4e6ae0f91cacc24bd1bcda98323 100644 (file)
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -44,17 +44,14 @@ extern void debug_print_rel(PlannerInfo *root, RelOptInfo *rel);
   */
  extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
  extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
-                                                List *clauses, List *outer_clauses,
-                                                RelOptInfo *outer_rel);
-extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
-                                        RelOptInfo *outer_rel, JoinType jointype,
-                                        Path **cheapest_startup, Path **cheapest_total);
+                                                List *clauses, List *other_clauses,
+                                                bool restriction_only);
  extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
                                                           List *restrictlist,
                                                           List *exprlist, List *oprlist);
-extern bool eclass_matches_any_index(EquivalenceClass *ec,
-                                                EquivalenceMember *em,
-                                                RelOptInfo *rel);
+extern bool eclass_member_matches_indexcol(EquivalenceClass *ec,
+                                                                                  EquivalenceMember *em,
+                                                                                  IndexOptInfo *index, int indexcol);
  extern bool match_index_to_operand(Node *operand, int indexcol,
                                            IndexOptInfo *index);
  extern void expand_indexqual_conditions(IndexOptInfo *index,
@@ -127,9 +124,9 @@ extern void add_child_rel_equivalences(PlannerInfo *root,
  extern void mutate_eclass_expressions(PlannerInfo *root,
                                                   Node *(*mutator) (),
                                                   void *context);
-extern List *find_eclass_clauses_for_index_join(PlannerInfo *root,
-                                                                  RelOptInfo *rel,
-                                                                  Relids outer_relids);
+extern List *generate_implied_equalities_for_indexcol(PlannerInfo *root,
+                                                                                IndexOptInfo *index,
+                                                                                int indexcol);
  extern bool have_relevant_eclass_joinclause(PlannerInfo *root,
                                                                 RelOptInfo *rel1, RelOptInfo *rel2);
  extern bool has_relevant_eclass_joinclause(PlannerInfo *root,
@@ -153,9 +150,11 @@ extern List *canonicalize_pathkeys(PlannerInfo *root, List *pathkeys);
  extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
  extern bool pathkeys_contained_in(List *keys1, List *keys2);
  extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
+                                                          Relids required_outer,
                                                            CostSelector cost_criterion);
  extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
                                                                                   List *pathkeys,
+                                                                                 Relids required_outer,
                                                                                   double fraction);
  extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index,
                                          ScanDirection scandir);
diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h

index 157f58e5fa8a4a3d3956750a9fd4e509a5084dfb..34977f9b95cbf5b96502100b5c1b695e74b3a58a 100644 (file)
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@@ -40,8 +40,7 @@ extern List *extract_actual_clauses(List *restrictinfo_list,
  extern void extract_actual_join_clauses(List *restrictinfo_list,
                                                         List **joinquals,
                                                         List **otherquals);
-extern List *select_nonredundant_join_clauses(PlannerInfo *root,
-                                                                List *restrictinfo_list,
-                                                                Path *inner_path);
+extern List *select_nonredundant_join_clauses(List *restrictinfo_list,
+                                                                List *reference_list);
  
  #endif   /* RESTRICTINFO_H */
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 28 Jan 2012 00:26:38 +0000 (19:26 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 28 Jan 2012 00:26:38 +0000 (19:26 -0500)
doc/src/sgml/indexam.sgml		patch \| blob \| history
src/backend/nodes/bitmapset.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/optimizer/README		patch \| blob \| history
src/backend/optimizer/path/allpaths.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/equivclass.c		patch \| blob \| history
src/backend/optimizer/path/indxpath.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/path/joinrels.c		patch \| blob \| history
src/backend/optimizer/path/orindxpath.c		patch \| blob \| history
src/backend/optimizer/path/pathkeys.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/planmain.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/optimizer/util/relnode.c		patch \| blob \| history
src/backend/optimizer/util/restrictinfo.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/include/nodes/bitmapset.h		patch \| blob \| history
src/include/nodes/nodes.h		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history
src/include/optimizer/paths.h		patch \| blob \| history
src/include/optimizer/restrictinfo.h		patch \| blob \| history