TABLESAMPLE, SQL Standard and extensible

[postgresql] / src / backend / optimizer / util / pathnode.c
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index 0808ac07b0f4ffad096427c31659364792b6fdad..ea7a47bdf457b93251534edf3e0e59b1b0a50026 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3,12 +3,12 @@
   * pathnode.c
   *       Routines to manipulate pathlists and create path nodes
   *
- * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.142 2008/01/01 19:45:50 momjian Exp $
+ *       src/backend/optimizer/util/pathnode.c
   *
   *-------------------------------------------------------------------------
   */
@@ -16,25 +16,29 @@
  
  #include <math.h>
  
-#include "catalog/pg_operator.h"
-#include "executor/executor.h"
  #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
  #include "optimizer/paths.h"
-#include "optimizer/tlist.h"
-#include "parser/parse_expr.h"
-#include "parser/parse_oper.h"
+#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
+#include "optimizer/var.h"
  #include "parser/parsetree.h"
-#include "utils/selfuncs.h"
  #include "utils/lsyscache.h"
-#include "utils/syscache.h"
+#include "utils/selfuncs.h"
  
  
+typedef enum
+{
+       COSTS_EQUAL,                            /* path costs are fuzzily equal */
+       COSTS_BETTER1,                          /* first path is cheaper than second */
+       COSTS_BETTER2,                          /* second path is cheaper than first */
+       COSTS_DIFFERENT                         /* neither path dominates the other on cost */
+} PathCostComparison;
+
  static List *translate_sub_tlist(List *tlist, int relid);
-static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
-static Oid     distinct_col_search(int colno, List *colnos, List *opids);
-static bool hash_safe_operators(List *opids);
  
  
  /*****************************************************************************
@@ -83,58 +87,6 @@ compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
         return 0;
  }
  
-/*
- * compare_fuzzy_path_costs
- *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
- *       or more expensive than path2 for the specified criterion.
- *
- * This differs from compare_path_costs in that we consider the costs the
- * same if they agree to within a "fuzz factor".  This is used by add_path
- * to avoid keeping both of a pair of paths that really have insignificantly
- * different cost.
- */
-static int
-compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
-{
-       /*
-        * We use a fuzz factor of 1% of the smaller cost.
-        *
-        * XXX does this percentage need to be user-configurable?
-        */
-       if (criterion == STARTUP_COST)
-       {
-               if (path1->startup_cost > path2->startup_cost * 1.01)
-                       return +1;
-               if (path2->startup_cost > path1->startup_cost * 1.01)
-                       return -1;
-
-               /*
-                * If paths have the same startup cost (not at all unlikely), order
-                * them by total cost.
-                */
-               if (path1->total_cost > path2->total_cost * 1.01)
-                       return +1;
-               if (path2->total_cost > path1->total_cost * 1.01)
-                       return -1;
-       }
-       else
-       {
-               if (path1->total_cost > path2->total_cost * 1.01)
-                       return +1;
-               if (path2->total_cost > path1->total_cost * 1.01)
-                       return -1;
-
-               /*
-                * If paths have the same total cost, order them by startup cost.
-                */
-               if (path1->startup_cost > path2->startup_cost * 1.01)
-                       return +1;
-               if (path2->startup_cost > path1->startup_cost * 1.01)
-                       return -1;
-       }
-       return 0;
-}
-
  /*
   * compare_path_fractional_costs
   *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
@@ -164,75 +116,266 @@ compare_fractional_path_costs(Path *path1, Path *path2,
         return 0;
  }
  
+/*
+ * compare_path_costs_fuzzily
+ *       Compare the costs of two paths to see if either can be said to
+ *       dominate the other.
+ *
+ * We use fuzzy comparisons so that add_path() can avoid keeping both of
+ * a pair of paths that really have insignificantly different cost.
+ *
+ * The fuzz_factor argument must be 1.0 plus delta, where delta is the
+ * fraction of the smaller cost that is considered to be a significant
+ * difference.  For example, fuzz_factor = 1.01 makes the fuzziness limit
+ * be 1% of the smaller cost.
+ *
+ * The two paths are said to have "equal" costs if both startup and total
+ * costs are fuzzily the same.  Path1 is said to be better than path2 if
+ * it has fuzzily better startup cost and fuzzily no worse total cost,
+ * or if it has fuzzily better total cost and fuzzily no worse startup cost.
+ * Path2 is better than path1 if the reverse holds.  Finally, if one path
+ * is fuzzily better than the other on startup cost and fuzzily worse on
+ * total cost, we just say that their costs are "different", since neither
+ * dominates the other across the whole performance spectrum.
+ *
+ * If consider_startup is false, then we don't care about keeping paths with
+ * good startup cost, so we'll never return COSTS_DIFFERENT.
+ *
+ * This function also includes special hacks to support a policy enforced
+ * by its sole caller, add_path(): paths that have any parameterization
+ * cannot win comparisons on the grounds of having cheaper startup cost,
+ * since we deem only total cost to be of interest for a parameterized path.
+ * (Unparameterized paths are more common, so we check for this case last.)
+ */
+static PathCostComparison
+compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor,
+                                                  bool consider_startup)
+{
+       /*
+        * Check total cost first since it's more likely to be different; many
+        * paths have zero startup cost.
+        */
+       if (path1->total_cost > path2->total_cost * fuzz_factor)
+       {
+               /* path1 fuzzily worse on total cost */
+               if (consider_startup &&
+                       path2->startup_cost > path1->startup_cost * fuzz_factor &&
+                       path1->param_info == NULL)
+               {
+                       /* ... but path2 fuzzily worse on startup, so DIFFERENT */
+                       return COSTS_DIFFERENT;
+               }
+               /* else path2 dominates */
+               return COSTS_BETTER2;
+       }
+       if (path2->total_cost > path1->total_cost * fuzz_factor)
+       {
+               /* path2 fuzzily worse on total cost */
+               if (consider_startup &&
+                       path1->startup_cost > path2->startup_cost * fuzz_factor &&
+                       path2->param_info == NULL)
+               {
+                       /* ... but path1 fuzzily worse on startup, so DIFFERENT */
+                       return COSTS_DIFFERENT;
+               }
+               /* else path1 dominates */
+               return COSTS_BETTER1;
+       }
+       /* fuzzily the same on total cost */
+       /* (so we may as well compare startup cost, even if !consider_startup) */
+       if (path1->startup_cost > path2->startup_cost * fuzz_factor &&
+               path2->param_info == NULL)
+       {
+               /* ... but path1 fuzzily worse on startup, so path2 wins */
+               return COSTS_BETTER2;
+       }
+       if (path2->startup_cost > path1->startup_cost * fuzz_factor &&
+               path1->param_info == NULL)
+       {
+               /* ... but path2 fuzzily worse on startup, so path1 wins */
+               return COSTS_BETTER1;
+       }
+       /* fuzzily the same on both costs */
+       return COSTS_EQUAL;
+}
+
  /*
   * set_cheapest
   *       Find the minimum-cost paths from among a relation's paths,
   *       and save them in the rel's cheapest-path fields.
   *
+ * cheapest_total_path is normally the cheapest-total-cost unparameterized
+ * path; but if there are no unparameterized paths, we assign it to be the
+ * best (cheapest least-parameterized) parameterized path.  However, only
+ * unparameterized paths are considered candidates for cheapest_startup_path,
+ * so that will be NULL if there are no unparameterized paths.
+ *
+ * The cheapest_parameterized_paths list collects all parameterized paths
+ * that have survived the add_path() tournament for this relation.  (Since
+ * add_path ignores pathkeys and startup cost for a parameterized path,
+ * these will be paths that have best total cost or best row count for their
+ * parameterization.)  cheapest_parameterized_paths always includes the
+ * cheapest-total unparameterized path, too, if there is one; the users of
+ * that list find it more convenient if that's included.
+ *
   * This is normally called only after we've finished constructing the path
   * list for the rel node.
- *
- * If we find two paths of identical costs, try to keep the better-sorted one.
- * The paths might have unrelated sort orderings, in which case we can only
- * guess which might be better to keep, but if one is superior then we
- * definitely should keep it.
   */
  void
  set_cheapest(RelOptInfo *parent_rel)
  {
-       List       *pathlist = parent_rel->pathlist;
-       ListCell   *p;
         Path       *cheapest_startup_path;
         Path       *cheapest_total_path;
+       Path       *best_param_path;
+       List       *parameterized_paths;
+       ListCell   *p;
  
         Assert(IsA(parent_rel, RelOptInfo));
  
-       if (pathlist == NIL)
+       if (parent_rel->pathlist == NIL)
                 elog(ERROR, "could not devise a query plan for the given query");
  
-       cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
+       cheapest_startup_path = cheapest_total_path = best_param_path = NULL;
+       parameterized_paths = NIL;
  
-       for_each_cell(p, lnext(list_head(pathlist)))
+       foreach(p, parent_rel->pathlist)
         {
                 Path       *path = (Path *) lfirst(p);
                 int                     cmp;
  
-               cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
-               if (cmp > 0 ||
-                       (cmp == 0 &&
-                        compare_pathkeys(cheapest_startup_path->pathkeys,
-                                                         path->pathkeys) == PATHKEYS_BETTER2))
-                       cheapest_startup_path = path;
-
-               cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
-               if (cmp > 0 ||
-                       (cmp == 0 &&
-                        compare_pathkeys(cheapest_total_path->pathkeys,
-                                                         path->pathkeys) == PATHKEYS_BETTER2))
-                       cheapest_total_path = path;
+               if (path->param_info)
+               {
+                       /* Parameterized path, so add it to parameterized_paths */
+                       parameterized_paths = lappend(parameterized_paths, path);
+
+                       /*
+                        * If we have an unparameterized cheapest-total, we no longer care
+                        * about finding the best parameterized path, so move on.
+                        */
+                       if (cheapest_total_path)
+                               continue;
+
+                       /*
+                        * Otherwise, track the best parameterized path, which is the one
+                        * with least total cost among those of the minimum
+                        * parameterization.
+                        */
+                       if (best_param_path == NULL)
+                               best_param_path = path;
+                       else
+                       {
+                               switch (bms_subset_compare(PATH_REQ_OUTER(path),
+                                                                                  PATH_REQ_OUTER(best_param_path)))
+                               {
+                                       case BMS_EQUAL:
+                                               /* keep the cheaper one */
+                                               if (compare_path_costs(path, best_param_path,
+                                                                                          TOTAL_COST) < 0)
+                                                       best_param_path = path;
+                                               break;
+                                       case BMS_SUBSET1:
+                                               /* new path is less-parameterized */
+                                               best_param_path = path;
+                                               break;
+                                       case BMS_SUBSET2:
+                                               /* old path is less-parameterized, keep it */
+                                               break;
+                                       case BMS_DIFFERENT:
+
+                                               /*
+                                                * This means that neither path has the least possible
+                                                * parameterization for the rel.  We'll sit on the old
+                                                * path until something better comes along.
+                                                */
+                                               break;
+                               }
+                       }
+               }
+               else
+               {
+                       /* Unparameterized path, so consider it for cheapest slots */
+                       if (cheapest_total_path == NULL)
+                       {
+                               cheapest_startup_path = cheapest_total_path = path;
+                               continue;
+                       }
+
+                       /*
+                        * If we find two paths of identical costs, try to keep the
+                        * better-sorted one.  The paths might have unrelated sort
+                        * orderings, in which case we can only guess which might be
+                        * better to keep, but if one is superior then we definitely
+                        * should keep that one.
+                        */
+                       cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
+                       if (cmp > 0 ||
+                               (cmp == 0 &&
+                                compare_pathkeys(cheapest_startup_path->pathkeys,
+                                                                 path->pathkeys) == PATHKEYS_BETTER2))
+                               cheapest_startup_path = path;
+
+                       cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
+                       if (cmp > 0 ||
+                               (cmp == 0 &&
+                                compare_pathkeys(cheapest_total_path->pathkeys,
+                                                                 path->pathkeys) == PATHKEYS_BETTER2))
+                               cheapest_total_path = path;
+               }
         }
  
+       /* Add cheapest unparameterized path, if any, to parameterized_paths */
+       if (cheapest_total_path)
+               parameterized_paths = lcons(cheapest_total_path, parameterized_paths);
+
+       /*
+        * If there is no unparameterized path, use the best parameterized path as
+        * cheapest_total_path (but not as cheapest_startup_path).
+        */
+       if (cheapest_total_path == NULL)
+               cheapest_total_path = best_param_path;
+       Assert(cheapest_total_path != NULL);
+
         parent_rel->cheapest_startup_path = cheapest_startup_path;
         parent_rel->cheapest_total_path = cheapest_total_path;
         parent_rel->cheapest_unique_path = NULL;        /* computed only if needed */
+       parent_rel->cheapest_parameterized_paths = parameterized_paths;
  }
  
  /*
   * add_path
   *       Consider a potential implementation path for the specified parent rel,
   *       and add it to the rel's pathlist if it is worthy of consideration.
- *       A path is worthy if it has either a better sort order (better pathkeys)
- *       or cheaper cost (on either dimension) than any of the existing old paths.
+ *       A path is worthy if it has a better sort order (better pathkeys) or
+ *       cheaper cost (on either dimension), or generates fewer rows, than any
+ *       existing path that has the same or superset parameterization rels.
   *
   *       We also remove from the rel's pathlist any old paths that are dominated
- *       by new_path --- that is, new_path is both cheaper and at least as well
- *       ordered.
+ *       by new_path --- that is, new_path is cheaper, at least as well ordered,
+ *       generates no more rows, and requires no outer rels not required by the
+ *       old path.
+ *
+ *       In most cases, a path with a superset parameterization will generate
+ *       fewer rows (since it has more join clauses to apply), so that those two
+ *       figures of merit move in opposite directions; this means that a path of
+ *       one parameterization can seldom dominate a path of another.  But such
+ *       cases do arise, so we make the full set of checks anyway.
+ *
+ *       There are two policy decisions embedded in this function, along with
+ *       its sibling add_path_precheck: we treat all parameterized paths as
+ *       having NIL pathkeys, and we ignore their startup costs, so that they
+ *       compete only on parameterization, total cost and rowcount.  This is to
+ *       reduce the number of parameterized paths that are kept.  See discussion
+ *       in src/backend/optimizer/README.
   *
- *       The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
- *       at the front.  No code depends on that for correctness; it's simply
- *       a speed hack within this routine.  Doing it that way makes it more
- *       likely that we will reject an inferior path after a few comparisons,
- *       rather than many comparisons.
+ *       Another policy that is enforced here is that we only consider cheap
+ *       startup cost to be interesting if parent_rel->consider_startup is true.
+ *
+ *       The pathlist is kept sorted by total_cost, with cheaper paths
+ *       at the front.  Within this routine, that's simply a speed hack:
+ *       doing it that way makes it more likely that we will reject an inferior
+ *       path after a few comparisons, rather than many comparisons.
+ *       However, add_path_precheck relies on this ordering to exit early
+ *       when possible.
   *
   *       NOTE: discarded Path objects are immediately pfree'd to reduce planner
   *       memory consumption.  We dare not try to free the substructure of a Path,
@@ -253,8 +396,10 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
  {
         bool            accept_new = true;              /* unless we find a superior old path */
         ListCell   *insert_after = NULL;        /* where to insert new item */
-       ListCell   *p1_prev = NULL;
+       List       *new_path_pathkeys;
         ListCell   *p1;
+       ListCell   *p1_prev;
+       ListCell   *p1_next;
  
         /*
          * This is a convenient place to check for query cancel --- no part of the
@@ -262,70 +407,146 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
          */
         CHECK_FOR_INTERRUPTS();
  
+       /* Pretend parameterized paths have no pathkeys, per comment above */
+       new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys;
+
         /*
          * Loop to check proposed new path against old paths.  Note it is possible
          * for more than one old path to be tossed out because new_path dominates
          * it.
+        *
+        * We can't use foreach here because the loop body may delete the current
+        * list cell.
          */
-       p1 = list_head(parent_rel->pathlist);           /* cannot use foreach here */
-       while (p1 != NULL)
+       p1_prev = NULL;
+       for (p1 = list_head(parent_rel->pathlist); p1 != NULL; p1 = p1_next)
         {
                 Path       *old_path = (Path *) lfirst(p1);
                 bool            remove_old = false; /* unless new proves superior */
-               int                     costcmp;
+               PathCostComparison costcmp;
+               PathKeysComparison keyscmp;
+               BMS_Comparison outercmp;
+
+               p1_next = lnext(p1);
  
                 /*
-                * As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
-                * cycles keeping paths that are really not significantly different in
-                * cost.
+                * Do a fuzzy cost comparison with 1% fuzziness limit.  (XXX does this
+                * percentage need to be user-configurable?)
                  */
-               costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
+               costcmp = compare_path_costs_fuzzily(new_path, old_path, 1.01,
+                                                                                        parent_rel->consider_startup);
  
                 /*
                  * If the two paths compare differently for startup and total cost,
-                * then we want to keep both, and we can skip the (much slower)
-                * comparison of pathkeys.      If they compare the same, proceed with the
-                * pathkeys comparison.  Note: this test relies on the fact that
-                * compare_fuzzy_path_costs will only return 0 if both costs are
-                * effectively equal (and, therefore, there's no need to call it twice
-                * in that case).
+                * then we want to keep both, and we can skip comparing pathkeys and
+                * required_outer rels.  If they compare the same, proceed with the
+                * other comparisons.  Row count is checked last.  (We make the tests
+                * in this order because the cost comparison is most likely to turn
+                * out "different", and the pathkeys comparison next most likely.  As
+                * explained above, row count very seldom makes a difference, so even
+                * though it's cheap to compare there's not much point in checking it
+                * earlier.)
                  */
-               if (costcmp == 0 ||
-                       costcmp == compare_fuzzy_path_costs(new_path, old_path,
-                                                                                               STARTUP_COST))
+               if (costcmp != COSTS_DIFFERENT)
                 {
-                       switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
+                       /* Similarly check to see if either dominates on pathkeys */
+                       List       *old_path_pathkeys;
+
+                       old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
+                       keyscmp = compare_pathkeys(new_path_pathkeys,
+                                                                          old_path_pathkeys);
+                       if (keyscmp != PATHKEYS_DIFFERENT)
                         {
-                               case PATHKEYS_EQUAL:
-                                       if (costcmp < 0)
-                                               remove_old = true;              /* new dominates old */
-                                       else if (costcmp > 0)
-                                               accept_new = false;             /* old dominates new */
-                                       else
-                                       {
+                               switch (costcmp)
+                               {
+                                       case COSTS_EQUAL:
+                                               outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
+                                                                                                  PATH_REQ_OUTER(old_path));
+                                               if (keyscmp == PATHKEYS_BETTER1)
+                                               {
+                                                       if ((outercmp == BMS_EQUAL ||
+                                                                outercmp == BMS_SUBSET1) &&
+                                                               new_path->rows <= old_path->rows)
+                                                               remove_old = true;              /* new dominates old */
+                                               }
+                                               else if (keyscmp == PATHKEYS_BETTER2)
+                                               {
+                                                       if ((outercmp == BMS_EQUAL ||
+                                                                outercmp == BMS_SUBSET2) &&
+                                                               new_path->rows >= old_path->rows)
+                                                               accept_new = false;             /* old dominates new */
+                                               }
+                                               else    /* keyscmp == PATHKEYS_EQUAL */
+                                               {
+                                                       if (outercmp == BMS_EQUAL)
+                                                       {
+                                                               /*
+                                                                * Same pathkeys and outer rels, and fuzzily
+                                                                * the same cost, so keep just one; to decide
+                                                                * which, first check rows and then do a fuzzy
+                                                                * cost comparison with very small fuzz limit.
+                                                                * (We used to do an exact cost comparison,
+                                                                * but that results in annoying
+                                                                * platform-specific plan variations due to
+                                                                * roundoff in the cost estimates.)  If things
+                                                                * are still tied, arbitrarily keep only the
+                                                                * old path.  Notice that we will keep only
+                                                                * the old path even if the less-fuzzy
+                                                                * comparison decides the startup and total
+                                                                * costs compare differently.
+                                                                */
+                                                               if (new_path->rows < old_path->rows)
+                                                                       remove_old = true;      /* new dominates old */
+                                                               else if (new_path->rows > old_path->rows)
+                                                                       accept_new = false; /* old dominates new */
+                                                               else if (compare_path_costs_fuzzily(new_path,
+                                                                                                                                       old_path,
+                                                                                                                               1.0000000001,
+                                                                                                                                       parent_rel->consider_startup) == COSTS_BETTER1)
+                                                                       remove_old = true;      /* new dominates old */
+                                                               else
+                                                                       accept_new = false; /* old equals or
+                                                                                                                * dominates new */
+                                                       }
+                                                       else if (outercmp == BMS_SUBSET1 &&
+                                                                        new_path->rows <= old_path->rows)
+                                                               remove_old = true;              /* new dominates old */
+                                                       else if (outercmp == BMS_SUBSET2 &&
+                                                                        new_path->rows >= old_path->rows)
+                                                               accept_new = false;             /* old dominates new */
+                                                       /* else different parameterizations, keep both */
+                                               }
+                                               break;
+                                       case COSTS_BETTER1:
+                                               if (keyscmp != PATHKEYS_BETTER2)
+                                               {
+                                                       outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
+                                                                                                  PATH_REQ_OUTER(old_path));
+                                                       if ((outercmp == BMS_EQUAL ||
+                                                                outercmp == BMS_SUBSET1) &&
+                                                               new_path->rows <= old_path->rows)
+                                                               remove_old = true;              /* new dominates old */
+                                               }
+                                               break;
+                                       case COSTS_BETTER2:
+                                               if (keyscmp != PATHKEYS_BETTER1)
+                                               {
+                                                       outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
+                                                                                                  PATH_REQ_OUTER(old_path));
+                                                       if ((outercmp == BMS_EQUAL ||
+                                                                outercmp == BMS_SUBSET2) &&
+                                                               new_path->rows >= old_path->rows)
+                                                               accept_new = false;             /* old dominates new */
+                                               }
+                                               break;
+                                       case COSTS_DIFFERENT:
+
                                                 /*
-                                                * Same pathkeys, and fuzzily the same cost, so keep
-                                                * just one --- but we'll do an exact cost comparison
-                                                * to decide which.
+                                                * can't get here, but keep this case to keep compiler
+                                                * quiet
                                                  */
-                                               if (compare_path_costs(new_path, old_path,
-                                                                                          TOTAL_COST) < 0)
-                                                       remove_old = true;      /* new dominates old */
-                                               else
-                                                       accept_new = false; /* old equals or dominates new */
-                                       }
-                                       break;
-                               case PATHKEYS_BETTER1:
-                                       if (costcmp <= 0)
-                                               remove_old = true;              /* new dominates old */
-                                       break;
-                               case PATHKEYS_BETTER2:
-                                       if (costcmp >= 0)
-                                               accept_new = false;             /* old dominates new */
-                                       break;
-                               case PATHKEYS_DIFFERENT:
-                                       /* keep both paths, since they have different ordering */
-                                       break;
+                                               break;
+                               }
                         }
                 }
  
@@ -342,20 +563,15 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                          */
                         if (!IsA(old_path, IndexPath))
                                 pfree(old_path);
-                       /* Advance list pointer */
-                       if (p1_prev)
-                               p1 = lnext(p1_prev);
-                       else
-                               p1 = list_head(parent_rel->pathlist);
+                       /* p1_prev does not advance */
                 }
                 else
                 {
                         /* new belongs after this old path if it has cost >= old's */
-                       if (costcmp >= 0)
+                       if (new_path->total_cost >= old_path->total_cost)
                                 insert_after = p1;
-                       /* Advance list pointers */
+                       /* p1_prev advances */
                         p1_prev = p1;
-                       p1 = lnext(p1);
                 }
  
                 /*
@@ -383,6 +599,86 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
         }
  }
  
+/*
+ * add_path_precheck
+ *       Check whether a proposed new path could possibly get accepted.
+ *       We assume we know the path's pathkeys and parameterization accurately,
+ *       and have lower bounds for its costs.
+ *
+ * Note that we do not know the path's rowcount, since getting an estimate for
+ * that is too expensive to do before prechecking.  We assume here that paths
+ * of a superset parameterization will generate fewer rows; if that holds,
+ * then paths with different parameterizations cannot dominate each other
+ * and so we can simply ignore existing paths of another parameterization.
+ * (In the infrequent cases where that rule of thumb fails, add_path will
+ * get rid of the inferior path.)
+ *
+ * At the time this is called, we haven't actually built a Path structure,
+ * so the required information has to be passed piecemeal.
+ */
+bool
+add_path_precheck(RelOptInfo *parent_rel,
+                                 Cost startup_cost, Cost total_cost,
+                                 List *pathkeys, Relids required_outer)
+{
+       List       *new_path_pathkeys;
+       ListCell   *p1;
+
+       /* Pretend parameterized paths have no pathkeys, per add_path policy */
+       new_path_pathkeys = required_outer ? NIL : pathkeys;
+
+       foreach(p1, parent_rel->pathlist)
+       {
+               Path       *old_path = (Path *) lfirst(p1);
+               PathKeysComparison keyscmp;
+
+               /*
+                * We are looking for an old_path with the same parameterization (and
+                * by assumption the same rowcount) that dominates the new path on
+                * pathkeys as well as both cost metrics.  If we find one, we can
+                * reject the new path.
+                *
+                * For speed, we make exact rather than fuzzy cost comparisons. If an
+                * old path dominates the new path exactly on both costs, it will
+                * surely do so fuzzily.
+                */
+               if (total_cost >= old_path->total_cost)
+               {
+                       /* can win on startup cost only if unparameterized */
+                       if (startup_cost >= old_path->startup_cost || required_outer)
+                       {
+                               /* new path does not win on cost, so check pathkeys... */
+                               List       *old_path_pathkeys;
+
+                               old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
+                               keyscmp = compare_pathkeys(new_path_pathkeys,
+                                                                                  old_path_pathkeys);
+                               if (keyscmp == PATHKEYS_EQUAL ||
+                                       keyscmp == PATHKEYS_BETTER2)
+                               {
+                                       /* new path does not win on pathkeys... */
+                                       if (bms_equal(required_outer, PATH_REQ_OUTER(old_path)))
+                                       {
+                                               /* Found an old path that dominates the new one */
+                                               return false;
+                                       }
+                               }
+                       }
+               }
+               else
+               {
+                       /*
+                        * Since the pathlist is sorted by total_cost, we can stop looking
+                        * once we reach a path with a total_cost larger than the new
+                        * path's.
+                        */
+                       break;
+               }
+       }
+
+       return true;
+}
+
  
  /*****************************************************************************
   *             PATH NODE CREATION ROUTINES
@@ -394,15 +690,37 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
   *       pathnode.
   */
  Path *
-create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
+create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
  {
         Path       *pathnode = makeNode(Path);
  
         pathnode->pathtype = T_SeqScan;
         pathnode->parent = rel;
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
         pathnode->pathkeys = NIL;       /* seqscan has unordered result */
  
-       cost_seqscan(pathnode, root, rel);
+       cost_seqscan(pathnode, root, rel, pathnode->param_info);
+
+       return pathnode;
+}
+
+/*
+ * create_samplescan_path
+ *       Like seqscan but uses sampling function while scanning.
+ */
+Path *
+create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
+{
+       Path               *pathnode = makeNode(Path);
+
+       pathnode->pathtype = T_SampleScan;
+       pathnode->parent = rel;
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
+       pathnode->pathkeys = NIL;       /* samplescan has unordered result */
+
+       cost_samplescan(pathnode, root, rel);
  
         return pathnode;
  }
@@ -412,98 +730,63 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
   *       Creates a path node for an index scan.
   *
   * 'index' is a usable index.
- * 'clause_groups' is a list of lists of RestrictInfo nodes
+ * 'indexclauses' is a list of RestrictInfo nodes representing clauses
   *                     to be used as index qual conditions in the scan.
+ * 'indexclausecols' is an integer list of index column numbers (zero based)
+ *                     the indexclauses can be used with.
+ * 'indexorderbys' is a list of bare expressions (no RestrictInfos)
+ *                     to be used as index ordering operators in the scan.
+ * 'indexorderbycols' is an integer list of index column numbers (zero based)
+ *                     the ordering operators can be used with.
   * 'pathkeys' describes the ordering of the path.
   * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
   *                     for an ordered index, or NoMovementScanDirection for
   *                     an unordered index.
- * 'outer_rel' is the outer relation if this is a join inner indexscan path.
- *                     (pathkeys and indexscandir are ignored if so.)  NULL if not.
+ * 'indexonly' is true if an index-only scan is wanted.
+ * 'required_outer' is the set of outer relids for a parameterized path.
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ *             estimates of caching behavior.
   *
   * Returns the new path node.
   */
  IndexPath *
  create_index_path(PlannerInfo *root,
                                   IndexOptInfo *index,
-                                 List *clause_groups,
+                                 List *indexclauses,
+                                 List *indexclausecols,
+                                 List *indexorderbys,
+                                 List *indexorderbycols,
                                   List *pathkeys,
                                   ScanDirection indexscandir,
-                                 RelOptInfo *outer_rel)
+                                 bool indexonly,
+                                 Relids required_outer,
+                                 double loop_count)
  {
         IndexPath  *pathnode = makeNode(IndexPath);
         RelOptInfo *rel = index->rel;
         List       *indexquals,
-                          *allclauses;
+                          *indexqualcols;
  
-       /*
-        * For a join inner scan, there's no point in marking the path with any
-        * pathkeys, since it will only ever be used as the inner path of a
-        * nestloop, and so its ordering does not matter.  For the same reason we
-        * don't really care what order it's scanned in.  (We could expect the
-        * caller to supply the correct values, but it's easier to force it here.)
-        */
-       if (outer_rel != NULL)
-       {
-               pathkeys = NIL;
-               indexscandir = NoMovementScanDirection;
-       }
-
-       pathnode->path.pathtype = T_IndexScan;
+       pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;
         pathnode->path.parent = rel;
+       pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                                 required_outer);
         pathnode->path.pathkeys = pathkeys;
  
         /* Convert clauses to indexquals the executor can handle */
-       indexquals = expand_indexqual_conditions(index, clause_groups);
-
-       /* Flatten the clause-groups list to produce indexclauses list */
-       allclauses = flatten_clausegroups_list(clause_groups);
+       expand_indexqual_conditions(index, indexclauses, indexclausecols,
+                                                               &indexquals, &indexqualcols);
  
         /* Fill in the pathnode */
         pathnode->indexinfo = index;
-       pathnode->indexclauses = allclauses;
+       pathnode->indexclauses = indexclauses;
         pathnode->indexquals = indexquals;
-
-       pathnode->isjoininner = (outer_rel != NULL);
+       pathnode->indexqualcols = indexqualcols;
+       pathnode->indexorderbys = indexorderbys;
+       pathnode->indexorderbycols = indexorderbycols;
         pathnode->indexscandir = indexscandir;
  
-       if (outer_rel != NULL)
-       {
-               /*
-                * We must compute the estimated number of output rows for the
-                * indexscan.  This is less than rel->rows because of the additional
-                * selectivity of the join clauses.  Since clause_groups may contain
-                * both restriction and join clauses, we have to do a set union to get
-                * the full set of clauses that must be considered to compute the
-                * correct selectivity.  (Without the union operation, we might have
-                * some restriction clauses appearing twice, which'd mislead
-                * clauselist_selectivity into double-counting their selectivity.
-                * However, since RestrictInfo nodes aren't copied when linking them
-                * into different lists, it should be sufficient to use pointer
-                * comparison to remove duplicates.)
-                *
-                * Always assume the join type is JOIN_INNER; even if some of the join
-                * clauses come from other contexts, that's not our problem.
-                */
-               allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
-               pathnode->rows = rel->tuples *
-                       clauselist_selectivity(root,
-                                                                  allclauses,
-                                                                  rel->relid,  /* do not use 0! */
-                                                                  JOIN_INNER);
-               /* Like costsize.c, force estimate to be at least one row */
-               pathnode->rows = clamp_row_est(pathnode->rows);
-       }
-       else
-       {
-               /*
-                * The number of rows is the same as the parent rel's estimate, since
-                * this isn't a join inner indexscan.
-                */
-               pathnode->rows = rel->rows;
-       }
-
-       cost_index(pathnode, root, index, indexquals, outer_rel);
+       cost_index(pathnode, root, loop_count);
  
         return pathnode;
  }
@@ -513,55 +796,33 @@ create_index_path(PlannerInfo *root,
   *       Creates a path node for a bitmap scan.
   *
   * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
+ * 'required_outer' is the set of outer relids for a parameterized path.
+ * 'loop_count' is the number of repetitions of the indexscan to factor into
+ *             estimates of caching behavior.
   *
- * If this is a join inner indexscan path, 'outer_rel' is the outer relation,
- * and all the component IndexPaths should have been costed accordingly.
+ * loop_count should match the value used when creating the component
+ * IndexPaths.
   */
  BitmapHeapPath *
  create_bitmap_heap_path(PlannerInfo *root,
                                                 RelOptInfo *rel,
                                                 Path *bitmapqual,
-                                               RelOptInfo *outer_rel)
+                                               Relids required_outer,
+                                               double loop_count)
  {
         BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
  
         pathnode->path.pathtype = T_BitmapHeapScan;
         pathnode->path.parent = rel;
+       pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                                 required_outer);
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->bitmapqual = bitmapqual;
-       pathnode->isjoininner = (outer_rel != NULL);
-
-       if (pathnode->isjoininner)
-       {
-               /*
-                * We must compute the estimated number of output rows for the
-                * indexscan.  This is less than rel->rows because of the additional
-                * selectivity of the join clauses.  We make use of the selectivity
-                * estimated for the bitmap to do this; this isn't really quite right
-                * since there may be restriction conditions not included in the
-                * bitmap ...
-                */
-               Cost            indexTotalCost;
-               Selectivity indexSelectivity;
-
-               cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
-               pathnode->rows = rel->tuples * indexSelectivity;
-               if (pathnode->rows > rel->rows)
-                       pathnode->rows = rel->rows;
-               /* Like costsize.c, force estimate to be at least one row */
-               pathnode->rows = clamp_row_est(pathnode->rows);
-       }
-       else
-       {
-               /*
-                * The number of rows is the same as the parent rel's estimate, since
-                * this isn't a join inner indexscan.
-                */
-               pathnode->rows = rel->rows;
-       }
  
-       cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
+       cost_bitmap_heap_scan(&pathnode->path, root, rel,
+                                                 pathnode->path.param_info,
+                                                 bitmapqual, loop_count);
  
         return pathnode;
  }
@@ -579,6 +840,7 @@ create_bitmap_and_path(PlannerInfo *root,
  
         pathnode->path.pathtype = T_BitmapAnd;
         pathnode->path.parent = rel;
+       pathnode->path.param_info = NULL;       /* not used in bitmap trees */
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->bitmapquals = bitmapquals;
@@ -602,6 +864,7 @@ create_bitmap_or_path(PlannerInfo *root,
  
         pathnode->path.pathtype = T_BitmapOr;
         pathnode->path.parent = rel;
+       pathnode->path.param_info = NULL;       /* not used in bitmap trees */
         pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->bitmapquals = bitmapquals;
@@ -617,17 +880,21 @@ create_bitmap_or_path(PlannerInfo *root,
   *       Creates a path corresponding to a scan by TID, returning the pathnode.
   */
  TidPath *
-create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
+create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
+                                       Relids required_outer)
  {
         TidPath    *pathnode = makeNode(TidPath);
  
         pathnode->path.pathtype = T_TidScan;
         pathnode->path.parent = rel;
-       pathnode->path.pathkeys = NIL;
+       pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                                 required_outer);
+       pathnode->path.pathkeys = NIL;          /* always unordered */
  
         pathnode->tidquals = tidquals;
  
-       cost_tidscan(&pathnode->path, root, rel, tidquals);
+       cost_tidscan(&pathnode->path, root, rel, tidquals,
+                                pathnode->path.param_info);
  
         return pathnode;
  }
@@ -636,33 +903,133 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
   * create_append_path
   *       Creates a path corresponding to an Append plan, returning the
   *       pathnode.
+ *
+ * Note that we must handle subpaths = NIL, representing a dummy access path.
   */
  AppendPath *
-create_append_path(RelOptInfo *rel, List *subpaths)
+create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer)
  {
         AppendPath *pathnode = makeNode(AppendPath);
         ListCell   *l;
  
         pathnode->path.pathtype = T_Append;
         pathnode->path.parent = rel;
+       pathnode->path.param_info = get_appendrel_parampathinfo(rel,
+                                                                                                                       required_outer);
         pathnode->path.pathkeys = NIL;          /* result is always considered
                                                                                  * unsorted */
         pathnode->subpaths = subpaths;
  
+       /*
+        * We don't bother with inventing a cost_append(), but just do it here.
+        *
+        * Compute rows and costs as sums of subplan rows and costs.  We charge
+        * nothing extra for the Append itself, which perhaps is too optimistic,
+        * but since it doesn't do any selection or projection, it is a pretty
+        * cheap node.  If you change this, see also make_append().
+        */
+       pathnode->path.rows = 0;
         pathnode->path.startup_cost = 0;
         pathnode->path.total_cost = 0;
         foreach(l, subpaths)
         {
                 Path       *subpath = (Path *) lfirst(l);
  
+               pathnode->path.rows += subpath->rows;
+
                 if (l == list_head(subpaths))   /* first node? */
                         pathnode->path.startup_cost = subpath->startup_cost;
                 pathnode->path.total_cost += subpath->total_cost;
+
+               /* All child paths must have same parameterization */
+               Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
         }
  
         return pathnode;
  }
  
+/*
+ * create_merge_append_path
+ *       Creates a path corresponding to a MergeAppend plan, returning the
+ *       pathnode.
+ */
+MergeAppendPath *
+create_merge_append_path(PlannerInfo *root,
+                                                RelOptInfo *rel,
+                                                List *subpaths,
+                                                List *pathkeys,
+                                                Relids required_outer)
+{
+       MergeAppendPath *pathnode = makeNode(MergeAppendPath);
+       Cost            input_startup_cost;
+       Cost            input_total_cost;
+       ListCell   *l;
+
+       pathnode->path.pathtype = T_MergeAppend;
+       pathnode->path.parent = rel;
+       pathnode->path.param_info = get_appendrel_parampathinfo(rel,
+                                                                                                                       required_outer);
+       pathnode->path.pathkeys = pathkeys;
+       pathnode->subpaths = subpaths;
+
+       /*
+        * Apply query-wide LIMIT if known and path is for sole base relation.
+        * (Handling this at this low level is a bit klugy.)
+        */
+       if (bms_equal(rel->relids, root->all_baserels))
+               pathnode->limit_tuples = root->limit_tuples;
+       else
+               pathnode->limit_tuples = -1.0;
+
+       /*
+        * Add up the sizes and costs of the input paths.
+        */
+       pathnode->path.rows = 0;
+       input_startup_cost = 0;
+       input_total_cost = 0;
+       foreach(l, subpaths)
+       {
+               Path       *subpath = (Path *) lfirst(l);
+
+               pathnode->path.rows += subpath->rows;
+
+               if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+               {
+                       /* Subpath is adequately ordered, we won't need to sort it */
+                       input_startup_cost += subpath->startup_cost;
+                       input_total_cost += subpath->total_cost;
+               }
+               else
+               {
+                       /* We'll need to insert a Sort node, so include cost for that */
+                       Path            sort_path;              /* dummy for result of cost_sort */
+
+                       cost_sort(&sort_path,
+                                         root,
+                                         pathkeys,
+                                         subpath->total_cost,
+                                         subpath->parent->tuples,
+                                         subpath->parent->width,
+                                         0.0,
+                                         work_mem,
+                                         pathnode->limit_tuples);
+                       input_startup_cost += sort_path.startup_cost;
+                       input_total_cost += sort_path.total_cost;
+               }
+
+               /* All child paths must have same parameterization */
+               Assert(bms_equal(PATH_REQ_OUTER(subpath), required_outer));
+       }
+
+       /* Now we can compute total costs of the MergeAppend */
+       cost_merge_append(&pathnode->path, root,
+                                         pathkeys, list_length(subpaths),
+                                         input_startup_cost, input_total_cost,
+                                         rel->tuples);
+
+       return pathnode;
+}
+
  /*
   * create_result_path
   *       Creates a path representing a Result-and-nothing-else plan.
@@ -675,10 +1042,12 @@ create_result_path(List *quals)
  
         pathnode->path.pathtype = T_Result;
         pathnode->path.parent = NULL;
+       pathnode->path.param_info = NULL;       /* there are no other rels... */
         pathnode->path.pathkeys = NIL;
         pathnode->quals = quals;
  
-       /* Ideally should define cost_result(), but I'm too lazy */
+       /* Hardly worth defining a cost_result() function ... just do it */
+       pathnode->path.rows = 1;
         pathnode->path.startup_cost = 0;
         pathnode->path.total_cost = cpu_tuple_cost;
  
@@ -702,16 +1071,19 @@ create_material_path(RelOptInfo *rel, Path *subpath)
  {
         MaterialPath *pathnode = makeNode(MaterialPath);
  
+       Assert(subpath->parent == rel);
+
         pathnode->path.pathtype = T_Material;
         pathnode->path.parent = rel;
-
+       pathnode->path.param_info = subpath->param_info;
         pathnode->path.pathkeys = subpath->pathkeys;
  
         pathnode->subpath = subpath;
  
         cost_material(&pathnode->path,
+                                 subpath->startup_cost,
                                   subpath->total_cost,
-                                 rel->rows,
+                                 subpath->rows,
                                   rel->width);
  
         return pathnode;
@@ -720,72 +1092,83 @@ create_material_path(RelOptInfo *rel, Path *subpath)
  /*
   * create_unique_path
   *       Creates a path representing elimination of distinct rows from the
- *       input data.
+ *       input data.  Distinct-ness is defined according to the needs of the
+ *       semijoin represented by sjinfo.  If it is not possible to identify
+ *       how to make the data unique, NULL is returned.
   *
   * If used at all, this is likely to be called repeatedly on the same rel;
   * and the input subpath should always be the same (the cheapest_total path
   * for the rel).  So we cache the result.
   */
  UniquePath *
-create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
+create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
+                                  SpecialJoinInfo *sjinfo)
  {
         UniquePath *pathnode;
         Path            sort_path;              /* dummy for result of cost_sort */
         Path            agg_path;               /* dummy for result of cost_agg */
         MemoryContext oldcontext;
-       List       *sub_targetlist;
-       List       *in_operators;
-       ListCell   *l;
         int                     numCols;
  
-       /* Caller made a mistake if subpath isn't cheapest_total */
+       /* Caller made a mistake if subpath isn't cheapest_total ... */
         Assert(subpath == rel->cheapest_total_path);
+       Assert(subpath->parent == rel);
+       /* ... or if SpecialJoinInfo is the wrong one */
+       Assert(sjinfo->jointype == JOIN_SEMI);
+       Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
  
         /* If result already cached, return it */
         if (rel->cheapest_unique_path)
                 return (UniquePath *) rel->cheapest_unique_path;
  
+       /* If it's not possible to unique-ify, return NULL */
+       if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
+               return NULL;
+
         /*
-        * We must ensure path struct is allocated in main planning context;
-        * otherwise GEQO memory management causes trouble.  (Compare
-        * best_inner_indexscan().)
+        * We must ensure path struct and subsidiary data are allocated in main
+        * planning context; otherwise GEQO memory management causes trouble.
          */
         oldcontext = MemoryContextSwitchTo(root->planner_cxt);
  
         pathnode = makeNode(UniquePath);
  
-       /* There is no substructure to allocate, so can switch back right away */
-       MemoryContextSwitchTo(oldcontext);
-
         pathnode->path.pathtype = T_Unique;
         pathnode->path.parent = rel;
+       pathnode->path.param_info = subpath->param_info;
  
         /*
-        * Treat the output as always unsorted, since we don't necessarily have
-        * pathkeys to represent it.
+        * Assume the output is unsorted, since we don't necessarily have pathkeys
+        * to represent it.  (This might get overridden below.)
          */
         pathnode->path.pathkeys = NIL;
  
         pathnode->subpath = subpath;
+       pathnode->in_operators = sjinfo->semi_operators;
+       pathnode->uniq_exprs = sjinfo->semi_rhs_exprs;
  
         /*
-        * Try to identify the targetlist that will actually be unique-ified. In
-        * current usage, this routine is only used for sub-selects of IN clauses,
-        * so we should be able to find the tlist in in_info_list.      Get the IN
-        * clause's operators, too, because they determine what "unique" means.
+        * If the input is a relation and it has a unique index that proves the
+        * semi_rhs_exprs are unique, then we don't need to do anything.  Note
+        * that relation_has_unique_index_for automatically considers restriction
+        * clauses for the rel, as well.
          */
-       sub_targetlist = NIL;
-       in_operators = NIL;
-       foreach(l, root->in_info_list)
+       if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
+               relation_has_unique_index_for(root, rel, NIL,
+                                                                         sjinfo->semi_rhs_exprs,
+                                                                         sjinfo->semi_operators))
         {
-               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+               pathnode->umethod = UNIQUE_PATH_NOOP;
+               pathnode->path.rows = rel->rows;
+               pathnode->path.startup_cost = subpath->startup_cost;
+               pathnode->path.total_cost = subpath->total_cost;
+               pathnode->path.pathkeys = subpath->pathkeys;
  
-               if (bms_equal(ininfo->righthand, rel->relids))
-               {
-                       sub_targetlist = ininfo->sub_targetlist;
-                       in_operators = ininfo->in_operators;
-                       break;
-               }
+               rel->cheapest_unique_path = (Path *) pathnode;
+
+               MemoryContextSwitchTo(oldcontext);
+
+               return pathnode;
         }
  
         /*
@@ -793,72 +1176,70 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
          * don't need to do anything.  The test for uniqueness has to consider
          * exactly which columns we are extracting; for example "SELECT DISTINCT
          * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
-        * this optimization unless we found our own targetlist above, and it
-        * consists only of simple Vars referencing subquery outputs.  (Possibly
-        * we could do something with expressions in the subquery outputs, too,
-        * but for now keep it simple.)
+        * this optimization unless semi_rhs_exprs consists only of simple Vars
+        * referencing subquery outputs.  (Possibly we could do something with
+        * expressions in the subquery outputs, too, but for now keep it simple.)
          */
-       if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
+       if (rel->rtekind == RTE_SUBQUERY)
         {
                 RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
-               List       *sub_tlist_colnos;
-
-               sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
  
-               if (sub_tlist_colnos &&
-                       query_is_distinct_for(rte->subquery,
-                                                                 sub_tlist_colnos, in_operators))
+               if (query_supports_distinctness(rte->subquery))
                 {
-                       pathnode->umethod = UNIQUE_PATH_NOOP;
-                       pathnode->rows = rel->rows;
-                       pathnode->path.startup_cost = subpath->startup_cost;
-                       pathnode->path.total_cost = subpath->total_cost;
-                       pathnode->path.pathkeys = subpath->pathkeys;
+                       List       *sub_tlist_colnos;
  
-                       rel->cheapest_unique_path = (Path *) pathnode;
+                       sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
+                                                                                                  rel->relid);
  
-                       return pathnode;
+                       if (sub_tlist_colnos &&
+                               query_is_distinct_for(rte->subquery,
+                                                                         sub_tlist_colnos,
+                                                                         sjinfo->semi_operators))
+                       {
+                               pathnode->umethod = UNIQUE_PATH_NOOP;
+                               pathnode->path.rows = rel->rows;
+                               pathnode->path.startup_cost = subpath->startup_cost;
+                               pathnode->path.total_cost = subpath->total_cost;
+                               pathnode->path.pathkeys = subpath->pathkeys;
+
+                               rel->cheapest_unique_path = (Path *) pathnode;
+
+                               MemoryContextSwitchTo(oldcontext);
+
+                               return pathnode;
+                       }
                 }
         }
  
-       /*
-        * If we know the targetlist, try to estimate number of result rows;
-        * otherwise punt.
-        */
-       if (sub_targetlist)
-       {
-               pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
-               numCols = list_length(sub_targetlist);
-       }
-       else
-       {
-               pathnode->rows = rel->rows;
-               numCols = list_length(rel->reltargetlist);
-       }
+       /* Estimate number of output rows */
+       pathnode->path.rows = estimate_num_groups(root,
+                                                                                         sjinfo->semi_rhs_exprs,
+                                                                                         rel->rows);
+       numCols = list_length(sjinfo->semi_rhs_exprs);
  
-       /*
-        * Estimate cost for sort+unique implementation
-        */
-       cost_sort(&sort_path, root, NIL,
-                         subpath->total_cost,
-                         rel->rows,
-                         rel->width,
-                         -1.0);
+       if (sjinfo->semi_can_btree)
+       {
+               /*
+                * Estimate cost for sort+unique implementation
+                */
+               cost_sort(&sort_path, root, NIL,
+                                 subpath->total_cost,
+                                 rel->rows,
+                                 rel->width,
+                                 0.0,
+                                 work_mem,
+                                 -1.0);
  
-       /*
-        * Charge one cpu_operator_cost per comparison per input tuple. We assume
-        * all columns get compared at most of the tuples.      (XXX probably this is
-        * an overestimate.)  This should agree with make_unique.
-        */
-       sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+               /*
+                * Charge one cpu_operator_cost per comparison per input tuple. We
+                * assume all columns get compared at most of the tuples. (XXX
+                * probably this is an overestimate.)  This should agree with
+                * make_unique.
+                */
+               sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+       }
  
-       /*
-        * Is it safe to use a hashed implementation?  If so, estimate and compare
-        * costs.  We only try this if we know the IN operators, else we can't
-        * check their hashability.
-        */
-       pathnode->umethod = UNIQUE_PATH_SORT;
-       if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
+       if (sjinfo->semi_can_hash)
         {
                 /*
                  * Estimate the overhead per hashtable entry at 64 bytes (same as in
@@ -866,17 +1247,39 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
                  */
                 int                     hashentrysize = rel->width + 64;
  
-               if (hashentrysize * pathnode->rows <= work_mem * 1024L)
+               if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
                 {
+                       /*
+                        * We should not try to hash.  Hack the SpecialJoinInfo to
+                        * remember this, in case we come through here again.
+                        */
+                       sjinfo->semi_can_hash = false;
+               }
+               else
                         cost_agg(&agg_path, root,
-                                        AGG_HASHED, 0,
-                                        numCols, pathnode->rows,
+                                        AGG_HASHED, NULL,
+                                        numCols, pathnode->path.rows,
                                          subpath->startup_cost,
                                          subpath->total_cost,
                                          rel->rows);
-                       if (agg_path.total_cost < sort_path.total_cost)
-                               pathnode->umethod = UNIQUE_PATH_HASH;
-               }
+       }
+
+       if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
+       {
+               if (agg_path.total_cost < sort_path.total_cost)
+                       pathnode->umethod = UNIQUE_PATH_HASH;
+               else
+                       pathnode->umethod = UNIQUE_PATH_SORT;
+       }
+       else if (sjinfo->semi_can_btree)
+               pathnode->umethod = UNIQUE_PATH_SORT;
+       else if (sjinfo->semi_can_hash)
+               pathnode->umethod = UNIQUE_PATH_HASH;
+       else
+       {
+               /* we can get here only if we abandoned hashing above */
+               MemoryContextSwitchTo(oldcontext);
+               return NULL;
         }
  
         if (pathnode->umethod == UNIQUE_PATH_HASH)
@@ -892,13 +1295,15 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
  
         rel->cheapest_unique_path = (Path *) pathnode;
  
+       MemoryContextSwitchTo(oldcontext);
+
         return pathnode;
  }
  
  /*
   * translate_sub_tlist - get subquery column numbers represented by tlist
   *
- * The given targetlist should contain only Vars referencing the given relid.
+ * The given targetlist usually contains only Vars referencing the given relid.
   * Extract their varattnos (ie, the column numbers of the subquery) and return
   * as an integer List.
   *
@@ -926,226 +1331,202 @@ translate_sub_tlist(List *tlist, int relid)
  }
  
  /*
- * query_is_distinct_for - does query never return duplicates of the
- *             specified columns?
- *
- * colnos is an integer list of output column numbers (resno's).  We are
- * interested in whether rows consisting of just these columns are certain
- * to be distinct.     "Distinctness" is defined according to whether the
- * corresponding upper-level equality operators listed in opids would think
- * the values are distinct.  (Note: the opids entries could be cross-type
- * operators, and thus not exactly the equality operators that the subquery
- * would use itself.  We assume that the subquery is compatible if these
- * operators appear in the same btree opfamily as the ones the subquery uses.)
+ * create_subqueryscan_path
+ *       Creates a path corresponding to a sequential scan of a subquery,
+ *       returning the pathnode.
   */
-static bool
-query_is_distinct_for(Query *query, List *colnos, List *opids)
+Path *
+create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
+                                                List *pathkeys, Relids required_outer)
  {
-       ListCell   *l;
-       Oid                     opid;
-
-       Assert(list_length(colnos) == list_length(opids));
-
-       /*
-        * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
-        * columns in the DISTINCT clause appear in colnos and operator semantics
-        * match.
-        */
-       if (query->distinctClause)
-       {
-               foreach(l, query->distinctClause)
-               {
-                       SortClause *scl = (SortClause *) lfirst(l);
-                       TargetEntry *tle = get_sortgroupclause_tle(scl,
-                                                                                                          query->targetList);
-
-                       opid = distinct_col_search(tle->resno, colnos, opids);
-                       if (!OidIsValid(opid) ||
-                               !ops_in_same_btree_opfamily(opid, scl->sortop))
-                               break;                  /* exit early if no match */
-               }
-               if (l == NULL)                  /* had matches for all? */
-                       return true;
-       }
-
-       /*
-        * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
-        * appear in colnos and operator semantics match.
-        */
-       if (query->groupClause)
-       {
-               foreach(l, query->groupClause)
-               {
-                       GroupClause *grpcl = (GroupClause *) lfirst(l);
-                       TargetEntry *tle = get_sortgroupclause_tle(grpcl,
-                                                                                                          query->targetList);
-
-                       opid = distinct_col_search(tle->resno, colnos, opids);
-                       if (!OidIsValid(opid) ||
-                               !ops_in_same_btree_opfamily(opid, grpcl->sortop))
-                               break;                  /* exit early if no match */
-               }
-               if (l == NULL)                  /* had matches for all? */
-                       return true;
-       }
-       else
-       {
-               /*
-                * If we have no GROUP BY, but do have aggregates or HAVING, then the
-                * result is at most one row so it's surely unique, for any operators.
-                */
-               if (query->hasAggs || query->havingQual)
-                       return true;
-       }
-
-       /*
-        * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
-        * except with ALL.
-        *
-        * XXX this code knows that prepunion.c will adopt the default ordering
-        * operator for each column datatype as the sortop.  It'd probably be
-        * better if these operators were chosen at parse time and stored into the
-        * parsetree, instead of leaving bits of the planner to decide semantics.
-        */
-       if (query->setOperations)
-       {
-               SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
-
-               Assert(IsA(topop, SetOperationStmt));
-               Assert(topop->op != SETOP_NONE);
-
-               if (!topop->all)
-               {
-                       /* We're good if all the nonjunk output columns are in colnos */
-                       foreach(l, query->targetList)
-                       {
-                               TargetEntry *tle = (TargetEntry *) lfirst(l);
-
-                               if (tle->resjunk)
-                                       continue;       /* ignore resjunk columns */
+       Path       *pathnode = makeNode(Path);
  
-                               opid = distinct_col_search(tle->resno, colnos, opids);
-                               if (!OidIsValid(opid) ||
-                                       !ops_in_same_btree_opfamily(opid,
-                                                  ordering_oper_opid(exprType((Node *) tle->expr))))
-                                       break;          /* exit early if no match */
-                       }
-                       if (l == NULL)          /* had matches for all? */
-                               return true;
-               }
-       }
+       pathnode->pathtype = T_SubqueryScan;
+       pathnode->parent = rel;
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
+       pathnode->pathkeys = pathkeys;
  
-       /*
-        * XXX Are there any other cases in which we can easily see the result
-        * must be distinct?
-        */
+       cost_subqueryscan(pathnode, root, rel, pathnode->param_info);
  
-       return false;
+       return pathnode;
  }
  
  /*
- * distinct_col_search - subroutine for query_is_distinct_for
- *
- * If colno is in colnos, return the corresponding element of opids,
- * else return InvalidOid.     (We expect colnos does not contain duplicates,
- * so the result is well-defined.)
+ * create_functionscan_path
+ *       Creates a path corresponding to a sequential scan of a function,
+ *       returning the pathnode.
   */
-static Oid
-distinct_col_search(int colno, List *colnos, List *opids)
+Path *
+create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
+                                                List *pathkeys, Relids required_outer)
  {
-       ListCell   *lc1,
-                          *lc2;
+       Path       *pathnode = makeNode(Path);
  
-       forboth(lc1, colnos, lc2, opids)
-       {
-               if (colno == lfirst_int(lc1))
-                       return lfirst_oid(lc2);
-       }
-       return InvalidOid;
-}
+       pathnode->pathtype = T_FunctionScan;
+       pathnode->parent = rel;
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
+       pathnode->pathkeys = pathkeys;
  
-/*
- * hash_safe_operators - can all the specified IN operators be hashed?
- *
- * We assume hashed aggregation will work if each IN operator is marked
- * hashjoinable.  If the IN operators are cross-type, this could conceivably
- * fail: the aggregation will need a hashable equality operator for the RHS
- * datatype --- but it's pretty hard to conceive of a hash opfamily that has
- * cross-type hashing without support for hashing the individual types, so
- * we don't expend cycles here to support the case.  We could check
- * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
- * former is a significantly more expensive test.
- */
-static bool
-hash_safe_operators(List *opids)
-{
-       ListCell   *lc;
+       cost_functionscan(pathnode, root, rel, pathnode->param_info);
  
-       foreach(lc, opids)
-       {
-               if (!op_hashjoinable(lfirst_oid(lc)))
-                       return false;
-       }
-       return true;
+       return pathnode;
  }
  
  /*
- * create_subqueryscan_path
- *       Creates a path corresponding to a sequential scan of a subquery,
+ * create_valuesscan_path
+ *       Creates a path corresponding to a scan of a VALUES list,
   *       returning the pathnode.
   */
  Path *
-create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
+create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
+                                          Relids required_outer)
  {
         Path       *pathnode = makeNode(Path);
  
-       pathnode->pathtype = T_SubqueryScan;
+       pathnode->pathtype = T_ValuesScan;
         pathnode->parent = rel;
-       pathnode->pathkeys = pathkeys;
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
+       pathnode->pathkeys = NIL;       /* result is always unordered */
  
-       cost_subqueryscan(pathnode, rel);
+       cost_valuesscan(pathnode, root, rel, pathnode->param_info);
  
         return pathnode;
  }
  
  /*
- * create_functionscan_path
- *       Creates a path corresponding to a sequential scan of a function,
+ * create_ctescan_path
+ *       Creates a path corresponding to a scan of a non-self-reference CTE,
   *       returning the pathnode.
   */
  Path *
-create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
+create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
  {
         Path       *pathnode = makeNode(Path);
  
-       pathnode->pathtype = T_FunctionScan;
+       pathnode->pathtype = T_CteScan;
         pathnode->parent = rel;
-       pathnode->pathkeys = NIL;       /* for now, assume unordered result */
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
+       pathnode->pathkeys = NIL;       /* XXX for now, result is always unordered */
  
-       cost_functionscan(pathnode, root, rel);
+       cost_ctescan(pathnode, root, rel, pathnode->param_info);
  
         return pathnode;
  }
  
  /*
- * create_valuesscan_path
- *       Creates a path corresponding to a scan of a VALUES list,
+ * create_worktablescan_path
+ *       Creates a path corresponding to a scan of a self-reference CTE,
   *       returning the pathnode.
   */
  Path *
-create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
+create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
+                                                 Relids required_outer)
  {
         Path       *pathnode = makeNode(Path);
  
-       pathnode->pathtype = T_ValuesScan;
+       pathnode->pathtype = T_WorkTableScan;
         pathnode->parent = rel;
+       pathnode->param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                        required_outer);
         pathnode->pathkeys = NIL;       /* result is always unordered */
  
-       cost_valuesscan(pathnode, root, rel);
+       /* Cost is the same as for a regular CTE scan */
+       cost_ctescan(pathnode, root, rel, pathnode->param_info);
+
+       return pathnode;
+}
+
+/*
+ * create_foreignscan_path
+ *       Creates a path corresponding to a scan of a foreign table,
+ *       returning the pathnode.
+ *
+ * This function is never called from core Postgres; rather, it's expected
+ * to be called by the GetForeignPaths function of a foreign data wrapper.
+ * We make the FDW supply all fields of the path, since we do not have any
+ * way to calculate them in core.
+ */
+ForeignPath *
+create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
+                                               double rows, Cost startup_cost, Cost total_cost,
+                                               List *pathkeys,
+                                               Relids required_outer,
+                                               List *fdw_private)
+{
+       ForeignPath *pathnode = makeNode(ForeignPath);
+
+       pathnode->path.pathtype = T_ForeignScan;
+       pathnode->path.parent = rel;
+       pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+                                                                                                                 required_outer);
+       pathnode->path.rows = rows;
+       pathnode->path.startup_cost = startup_cost;
+       pathnode->path.total_cost = total_cost;
+       pathnode->path.pathkeys = pathkeys;
+
+       pathnode->fdw_private = fdw_private;
  
         return pathnode;
  }
  
+/*
+ * calc_nestloop_required_outer
+ *       Compute the required_outer set for a nestloop join path
+ *
+ * Note: result must not share storage with either input
+ */
+Relids
+calc_nestloop_required_outer(Path *outer_path, Path *inner_path)
+{
+       Relids          outer_paramrels = PATH_REQ_OUTER(outer_path);
+       Relids          inner_paramrels = PATH_REQ_OUTER(inner_path);
+       Relids          required_outer;
+
+       /* inner_path can require rels from outer path, but not vice versa */
+       Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids));
+       /* easy case if inner path is not parameterized */
+       if (!inner_paramrels)
+               return bms_copy(outer_paramrels);
+       /* else, form the union ... */
+       required_outer = bms_union(outer_paramrels, inner_paramrels);
+       /* ... and remove any mention of now-satisfied outer rels */
+       required_outer = bms_del_members(required_outer,
+                                                                        outer_path->parent->relids);
+       /* maintain invariant that required_outer is exactly NULL if empty */
+       if (bms_is_empty(required_outer))
+       {
+               bms_free(required_outer);
+               required_outer = NULL;
+       }
+       return required_outer;
+}
+
+/*
+ * calc_non_nestloop_required_outer
+ *       Compute the required_outer set for a merge or hash join path
+ *
+ * Note: result must not share storage with either input
+ */
+Relids
+calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path)
+{
+       Relids          outer_paramrels = PATH_REQ_OUTER(outer_path);
+       Relids          inner_paramrels = PATH_REQ_OUTER(inner_path);
+       Relids          required_outer;
+
+       /* neither path can require rels from the other */
+       Assert(!bms_overlap(outer_paramrels, inner_path->parent->relids));
+       Assert(!bms_overlap(inner_paramrels, outer_path->parent->relids));
+       /* form the union ... */
+       required_outer = bms_union(outer_paramrels, inner_paramrels);
+       /* we do not need an explicit test for empty; bms_union gets it right */
+       return required_outer;
+}
+
  /*
   * create_nestloop_path
   *       Creates a pathnode corresponding to a nestloop join between two
@@ -1153,10 +1534,14 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
   *
   * 'joinrel' is the join relation.
   * 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_nestloop
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
   * 'outer_path' is the outer path
   * 'inner_path' is the inner path
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'pathkeys' are the path keys of the new join path
+ * 'required_outer' is the set of required outer rels
   *
   * Returns the resulting path node.
   */
@@ -1164,22 +1549,61 @@ NestPath *
  create_nestloop_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                        JoinCostWorkspace *workspace,
+                                        SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
-                                        List *pathkeys)
+                                        List *pathkeys,
+                                        Relids required_outer)
  {
         NestPath   *pathnode = makeNode(NestPath);
+       Relids          inner_req_outer = PATH_REQ_OUTER(inner_path);
+
+       /*
+        * If the inner path is parameterized by the outer, we must drop any
+        * restrict_clauses that are due to be moved into the inner path.  We have
+        * to do this now, rather than postpone the work till createplan time,
+        * because the restrict_clauses list can affect the size and cost
+        * estimates for this path.
+        */
+       if (bms_overlap(inner_req_outer, outer_path->parent->relids))
+       {
+               Relids          inner_and_outer = bms_union(inner_path->parent->relids,
+                                                                                               inner_req_outer);
+               List       *jclauses = NIL;
+               ListCell   *lc;
+
+               foreach(lc, restrict_clauses)
+               {
+                       RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+                       if (!join_clause_is_movable_into(rinfo,
+                                                                                        inner_path->parent->relids,
+                                                                                        inner_and_outer))
+                               jclauses = lappend(jclauses, rinfo);
+               }
+               restrict_clauses = jclauses;
+       }
  
         pathnode->path.pathtype = T_NestLoop;
         pathnode->path.parent = joinrel;
+       pathnode->path.param_info =
+               get_joinrel_parampathinfo(root,
+                                                                 joinrel,
+                                                                 outer_path,
+                                                                 inner_path,
+                                                                 sjinfo,
+                                                                 required_outer,
+                                                                 &restrict_clauses);
+       pathnode->path.pathkeys = pathkeys;
         pathnode->jointype = jointype;
         pathnode->outerjoinpath = outer_path;
         pathnode->innerjoinpath = inner_path;
         pathnode->joinrestrictinfo = restrict_clauses;
-       pathnode->path.pathkeys = pathkeys;
  
-       cost_nestloop(pathnode, root);
+       final_cost_nestloop(root, pathnode, workspace, sjinfo, semifactors);
  
         return pathnode;
  }
@@ -1191,10 +1615,13 @@ create_nestloop_path(PlannerInfo *root,
   *
   * 'joinrel' is the join relation
   * 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_mergejoin
+ * 'sjinfo' is extra info about the join for selectivity estimation
   * 'outer_path' is the outer path
   * 'inner_path' is the inner path
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'pathkeys' are the path keys of the new join path
+ * 'required_outer' is the set of required outer rels
   * 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
   *             (this should be a subset of the restrict_clauses list)
   * 'outersortkeys' are the sort varkeys for the outer relation
@@ -1204,55 +1631,40 @@ MergePath *
  create_mergejoin_path(PlannerInfo *root,
                                           RelOptInfo *joinrel,
                                           JoinType jointype,
+                                         JoinCostWorkspace *workspace,
+                                         SpecialJoinInfo *sjinfo,
                                           Path *outer_path,
                                           Path *inner_path,
                                           List *restrict_clauses,
                                           List *pathkeys,
+                                         Relids required_outer,
                                           List *mergeclauses,
                                           List *outersortkeys,
                                           List *innersortkeys)
  {
         MergePath  *pathnode = makeNode(MergePath);
  
-       /*
-        * If the given paths are already well enough ordered, we can skip doing
-        * an explicit sort.
-        */
-       if (outersortkeys &&
-               pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
-               outersortkeys = NIL;
-       if (innersortkeys &&
-               pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
-               innersortkeys = NIL;
-
-       /*
-        * If we are not sorting the inner path, we may need a materialize node to
-        * ensure it can be marked/restored.  (Sort does support mark/restore, so
-        * no materialize is needed in that case.)
-        *
-        * Since the inner side must be ordered, and only Sorts and IndexScans can
-        * create order to begin with, you might think there's no problem --- but
-        * you'd be wrong.  Nestloop and merge joins can *preserve* the order of
-        * their inputs, so they can be selected as the input of a mergejoin, and
-        * they don't support mark/restore at present.
-        */
-       if (innersortkeys == NIL &&
-               !ExecSupportsMarkRestore(inner_path->pathtype))
-               inner_path = (Path *)
-                       create_material_path(inner_path->parent, inner_path);
-
         pathnode->jpath.path.pathtype = T_MergeJoin;
         pathnode->jpath.path.parent = joinrel;
+       pathnode->jpath.path.param_info =
+               get_joinrel_parampathinfo(root,
+                                                                 joinrel,
+                                                                 outer_path,
+                                                                 inner_path,
+                                                                 sjinfo,
+                                                                 required_outer,
+                                                                 &restrict_clauses);
+       pathnode->jpath.path.pathkeys = pathkeys;
         pathnode->jpath.jointype = jointype;
         pathnode->jpath.outerjoinpath = outer_path;
         pathnode->jpath.innerjoinpath = inner_path;
         pathnode->jpath.joinrestrictinfo = restrict_clauses;
-       pathnode->jpath.path.pathkeys = pathkeys;
         pathnode->path_mergeclauses = mergeclauses;
         pathnode->outersortkeys = outersortkeys;
         pathnode->innersortkeys = innersortkeys;
+       /* pathnode->materialize_inner will be set by final_cost_mergejoin */
  
-       cost_mergejoin(pathnode, root);
+       final_cost_mergejoin(root, pathnode, workspace, sjinfo);
  
         return pathnode;
  }
@@ -1263,9 +1675,13 @@ create_mergejoin_path(PlannerInfo *root,
   *
   * 'joinrel' is the join relation
   * 'jointype' is the type of join required
+ * 'workspace' is the result from initial_cost_hashjoin
+ * 'sjinfo' is extra info about the join for selectivity estimation
+ * 'semifactors' contains valid data if jointype is SEMI or ANTI
   * 'outer_path' is the cheapest outer path
   * 'inner_path' is the cheapest inner path
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
+ * 'required_outer' is the set of required outer rels
   * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
   *             (this should be a subset of the restrict_clauses list)
   */
@@ -1273,24 +1689,119 @@ HashPath *
  create_hashjoin_path(PlannerInfo *root,
                                          RelOptInfo *joinrel,
                                          JoinType jointype,
+                                        JoinCostWorkspace *workspace,
+                                        SpecialJoinInfo *sjinfo,
+                                        SemiAntiJoinFactors *semifactors,
                                          Path *outer_path,
                                          Path *inner_path,
                                          List *restrict_clauses,
+                                        Relids required_outer,
                                          List *hashclauses)
  {
         HashPath   *pathnode = makeNode(HashPath);
  
         pathnode->jpath.path.pathtype = T_HashJoin;
         pathnode->jpath.path.parent = joinrel;
+       pathnode->jpath.path.param_info =
+               get_joinrel_parampathinfo(root,
+                                                                 joinrel,
+                                                                 outer_path,
+                                                                 inner_path,
+                                                                 sjinfo,
+                                                                 required_outer,
+                                                                 &restrict_clauses);
+
+       /*
+        * A hashjoin never has pathkeys, since its output ordering is
+        * unpredictable due to possible batching.  XXX If the inner relation is
+        * small enough, we could instruct the executor that it must not batch,
+        * and then we could assume that the output inherits the outer relation's
+        * ordering, which might save a sort step.  However there is considerable
+        * downside if our estimate of the inner relation size is badly off. For
+        * the moment we don't risk it.  (Note also that if we wanted to take this
+        * seriously, joinpath.c would have to consider many more paths for the
+        * outer rel than it does now.)
+        */
+       pathnode->jpath.path.pathkeys = NIL;
         pathnode->jpath.jointype = jointype;
         pathnode->jpath.outerjoinpath = outer_path;
         pathnode->jpath.innerjoinpath = inner_path;
         pathnode->jpath.joinrestrictinfo = restrict_clauses;
-       /* A hashjoin never has pathkeys, since its ordering is unpredictable */
-       pathnode->jpath.path.pathkeys = NIL;
         pathnode->path_hashclauses = hashclauses;
+       /* final_cost_hashjoin will fill in pathnode->num_batches */
  
-       cost_hashjoin(pathnode, root);
+       final_cost_hashjoin(root, pathnode, workspace, sjinfo, semifactors);
  
         return pathnode;
  }
+
+/*
+ * reparameterize_path
+ *             Attempt to modify a Path to have greater parameterization
+ *
+ * We use this to attempt to bring all child paths of an appendrel to the
+ * same parameterization level, ensuring that they all enforce the same set
+ * of join quals (and thus that that parameterization can be attributed to
+ * an append path built from such paths).  Currently, only a few path types
+ * are supported here, though more could be added at need.  We return NULL
+ * if we can't reparameterize the given path.
+ *
+ * Note: we intentionally do not pass created paths to add_path(); it would
+ * possibly try to delete them on the grounds of being cost-inferior to the
+ * paths they were made from, and we don't want that.  Paths made here are
+ * not necessarily of general-purpose usefulness, but they can be useful
+ * as members of an append path.
+ */
+Path *
+reparameterize_path(PlannerInfo *root, Path *path,
+                                       Relids required_outer,
+                                       double loop_count)
+{
+       RelOptInfo *rel = path->parent;
+
+       /* Can only increase, not decrease, path's parameterization */
+       if (!bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+               return NULL;
+       switch (path->pathtype)
+       {
+               case T_SeqScan:
+                       return create_seqscan_path(root, rel, required_outer);
+               case T_IndexScan:
+               case T_IndexOnlyScan:
+                       {
+                               IndexPath  *ipath = (IndexPath *) path;
+                               IndexPath  *newpath = makeNode(IndexPath);
+
+                               /*
+                                * We can't use create_index_path directly, and would not want
+                                * to because it would re-compute the indexqual conditions
+                                * which is wasted effort.  Instead we hack things a bit:
+                                * flat-copy the path node, revise its param_info, and redo
+                                * the cost estimate.
+                                */
+                               memcpy(newpath, ipath, sizeof(IndexPath));
+                               newpath->path.param_info =
+                                       get_baserel_parampathinfo(root, rel, required_outer);
+                               cost_index(newpath, root, loop_count);
+                               return (Path *) newpath;
+                       }
+               case T_BitmapHeapScan:
+                       {
+                               BitmapHeapPath *bpath = (BitmapHeapPath *) path;
+
+                               return (Path *) create_bitmap_heap_path(root,
+                                                                                                               rel,
+                                                                                                               bpath->bitmapqual,
+                                                                                                               required_outer,
+                                                                                                               loop_count);
+                       }
+               case T_SubqueryScan:
+                       return create_subqueryscan_path(root, rel, path->pathkeys,
+                                                                                       required_outer);
+               case T_SampleScan:
+                       return (Path *) create_samplescan_path(root, rel, required_outer);
+               default:
+                       break;
+       }
+       return NULL;
+}