]> granicus.if.org Git - postgresql/blobdiff - src/backend/optimizer/util/pathnode.c
Refactor planner's pathkeys data structure to create a separate, explicit
[postgresql] / src / backend / optimizer / util / pathnode.c
index 407c132b4f7a6388b093806fd3eb01286906e084..5832d145ef09d151b3a1b7a6e0429740f106b6ff 100644 (file)
@@ -3,12 +3,12 @@
  * pathnode.c
  *       Routines to manipulate pathlists and create path nodes
  *
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.137 2007/01/20 20:45:39 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include <math.h>
 
-#include "nodes/plannodes.h"
+#include "catalog/pg_operator.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
-#include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parsetree.h"
+#include "utils/selfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static List *translate_sub_tlist(List *tlist, int relid);
+static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+static Oid     distinct_col_search(int colno, List *colnos, List *opids);
+static bool hash_safe_operators(List *opids);
 
 
 /*****************************************************************************
@@ -43,8 +57,8 @@ compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
                        return +1;
 
                /*
-                * If paths have the same startup cost (not at all unlikely),
-                * order them by total cost.
+                * If paths have the same startup cost (not at all unlikely), order
+                * them by total cost.
                 */
                if (path1->total_cost < path2->total_cost)
                        return -1;
@@ -69,6 +83,58 @@ compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
        return 0;
 }
 
+/*
+ * compare_fuzzy_path_costs
+ *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
+ *       or more expensive than path2 for the specified criterion.
+ *
+ * This differs from compare_path_costs in that we consider the costs the
+ * same if they agree to within a "fuzz factor".  This is used by add_path
+ * to avoid keeping both of a pair of paths that really have insignificantly
+ * different cost.
+ */
+static int
+compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
+{
+       /*
+        * We use a fuzz factor of 1% of the smaller cost.
+        *
+        * XXX does this percentage need to be user-configurable?
+        */
+       if (criterion == STARTUP_COST)
+       {
+               if (path1->startup_cost > path2->startup_cost * 1.01)
+                       return +1;
+               if (path2->startup_cost > path1->startup_cost * 1.01)
+                       return -1;
+
+               /*
+                * If paths have the same startup cost (not at all unlikely), order
+                * them by total cost.
+                */
+               if (path1->total_cost > path2->total_cost * 1.01)
+                       return +1;
+               if (path2->total_cost > path1->total_cost * 1.01)
+                       return -1;
+       }
+       else
+       {
+               if (path1->total_cost > path2->total_cost * 1.01)
+                       return +1;
+               if (path2->total_cost > path1->total_cost * 1.01)
+                       return -1;
+
+               /*
+                * If paths have the same total cost, order them by startup cost.
+                */
+               if (path1->startup_cost > path2->startup_cost * 1.01)
+                       return +1;
+               if (path2->startup_cost > path1->startup_cost * 1.01)
+                       return -1;
+       }
+       return 0;
+}
+
 /*
  * compare_path_fractional_costs
  *       Return -1, 0, or +1 according as path1 is cheaper, the same cost,
@@ -115,18 +181,18 @@ void
 set_cheapest(RelOptInfo *parent_rel)
 {
        List       *pathlist = parent_rel->pathlist;
-       List       *p;
+       ListCell   *p;
        Path       *cheapest_startup_path;
        Path       *cheapest_total_path;
 
        Assert(IsA(parent_rel, RelOptInfo));
 
        if (pathlist == NIL)
-               elog(ERROR, "Unable to devise a query plan for the given query");
+               elog(ERROR, "could not devise a query plan for the given query");
 
-       cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);
+       cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
 
-       foreach(p, lnext(pathlist))
+       for_each_cell(p, lnext(list_head(pathlist)))
        {
                Path       *path = (Path *) lfirst(p);
                int                     cmp;
@@ -148,6 +214,7 @@ set_cheapest(RelOptInfo *parent_rel)
 
        parent_rel->cheapest_startup_path = cheapest_startup_path;
        parent_rel->cheapest_total_path = cheapest_total_path;
+       parent_rel->cheapest_unique_path = NULL;        /* computed only if needed */
 }
 
 /*
@@ -157,9 +224,9 @@ set_cheapest(RelOptInfo *parent_rel)
  *       A path is worthy if it has either a better sort order (better pathkeys)
  *       or cheaper cost (on either dimension) than any of the existing old paths.
  *
- *       Unless parent_rel->pruneable is false, we also remove from the rel's
- *       pathlist any old paths that are dominated by new_path --- that is,
- *       new_path is both cheaper and at least as well ordered.
+ *       We also remove from the rel's pathlist any old paths that are dominated
+ *       by new_path --- that is, new_path is both cheaper and at least as well
+ *       ordered.
  *
  *       The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
  *       at the front.  No code depends on that for correctness; it's simply
@@ -173,6 +240,9 @@ set_cheapest(RelOptInfo *parent_rel)
  *       but just recycling discarded Path nodes is a very useful savings in
  *       a large join tree.  We can recycle the List nodes of pathlist, too.
  *
+ *       BUT: we do not pfree IndexPath objects, since they may be referenced as
+ *       children of BitmapHeapPaths as well as being paths in their own right.
+ *
  * 'parent_rel' is the relation entry to which the path corresponds.
  * 'new_path' is a potential path for parent_rel.
  *
@@ -181,47 +251,69 @@ set_cheapest(RelOptInfo *parent_rel)
 void
 add_path(RelOptInfo *parent_rel, Path *new_path)
 {
-       bool            accept_new = true;              /* unless we find a superior old
-                                                                                * path */
-       List       *insert_after = NIL;         /* where to insert new item */
-       List       *p1_prev = NIL;
-       List       *p1;
+       bool            accept_new = true;              /* unless we find a superior old path */
+       ListCell   *insert_after = NULL;        /* where to insert new item */
+       ListCell   *p1_prev = NULL;
+       ListCell   *p1;
 
        /*
-        * Loop to check proposed new path against old paths.  Note it is
-        * possible for more than one old path to be tossed out because
-        * new_path dominates it.
+        * This is a convenient place to check for query cancel --- no part of the
+        * planner goes very long without calling add_path().
         */
-       p1 = parent_rel->pathlist;      /* cannot use foreach here */
-       while (p1 != NIL)
+       CHECK_FOR_INTERRUPTS();
+
+       /*
+        * Loop to check proposed new path against old paths.  Note it is possible
+        * for more than one old path to be tossed out because new_path dominates
+        * it.
+        */
+       p1 = list_head(parent_rel->pathlist);           /* cannot use foreach here */
+       while (p1 != NULL)
        {
                Path       *old_path = (Path *) lfirst(p1);
                bool            remove_old = false; /* unless new proves superior */
                int                     costcmp;
 
-               costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
+               /*
+                * As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
+                * cycles keeping paths that are really not significantly different in
+                * cost.
+                */
+               costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
 
                /*
-                * If the two paths compare differently for startup and total
-                * cost, then we want to keep both, and we can skip the (much
-                * slower) comparison of pathkeys.      If they compare the same,
-                * proceed with the pathkeys comparison.  Note: this test relies
-                * on the fact that compare_path_costs will only return 0 if both
-                * costs are equal (and, therefore, there's no need to call it
-                * twice in that case).
+                * If the two paths compare differently for startup and total cost,
+                * then we want to keep both, and we can skip the (much slower)
+                * comparison of pathkeys.      If they compare the same, proceed with the
+                * pathkeys comparison.  Note: this test relies on the fact that
+                * compare_fuzzy_path_costs will only return 0 if both costs are
+                * effectively equal (and, therefore, there's no need to call it twice
+                * in that case).
                 */
                if (costcmp == 0 ||
-                       costcmp == compare_path_costs(new_path, old_path,
-                                                                                 STARTUP_COST))
+                       costcmp == compare_fuzzy_path_costs(new_path, old_path,
+                                                                                               STARTUP_COST))
                {
                        switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
                        {
                                case PATHKEYS_EQUAL:
                                        if (costcmp < 0)
                                                remove_old = true;              /* new dominates old */
+                                       else if (costcmp > 0)
+                                               accept_new = false;             /* old dominates new */
                                        else
-                                               accept_new = false;             /* old equals or dominates
-                                                                                                * new */
+                                       {
+                                               /*
+                                                * Same pathkeys, and fuzzily the same cost, so keep
+                                                * just one --- but we'll do an exact cost comparison
+                                                * to decide which.
+                                                */
+                                               if (compare_path_costs(new_path, old_path,
+                                                                                          TOTAL_COST) < 0)
+                                                       remove_old = true;      /* new dominates old */
+                                               else
+                                                       accept_new = false; /* old equals or dominates new */
+                                       }
                                        break;
                                case PATHKEYS_BETTER1:
                                        if (costcmp <= 0)
@@ -238,26 +330,30 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
                }
 
                /*
-                * Remove current element from pathlist if dominated by new,
-                * unless xfunc told us not to remove any paths.
+                * Remove current element from pathlist if dominated by new.
                 */
-               if (remove_old && parent_rel->pruneable)
+               if (remove_old)
                {
-                       List       *p1_next = lnext(p1);
-
+                       parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
+                                                                                                       p1, p1_prev);
+
+                       /*
+                        * Delete the data pointed-to by the deleted cell, if possible
+                        */
+                       if (!IsA(old_path, IndexPath))
+                               pfree(old_path);
+                       /* Advance list pointer */
                        if (p1_prev)
-                               lnext(p1_prev) = p1_next;
+                               p1 = lnext(p1_prev);
                        else
-                               parent_rel->pathlist = p1_next;
-                       pfree(old_path);
-                       pfree(p1);                      /* this is why we can't use foreach */
-                       p1 = p1_next;
+                               p1 = list_head(parent_rel->pathlist);
                }
                else
                {
                        /* new belongs after this old path if it has cost >= old's */
                        if (costcmp >= 0)
                                insert_after = p1;
+                       /* Advance list pointers */
                        p1_prev = p1;
                        p1 = lnext(p1);
                }
@@ -275,14 +371,15 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
        {
                /* Accept the new path: insert it at proper place in pathlist */
                if (insert_after)
-                       lnext(insert_after) = lcons(new_path, lnext(insert_after));
+                       lappend_cell(parent_rel->pathlist, insert_after, new_path);
                else
                        parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
        }
        else
        {
                /* Reject and recycle the new path */
-               pfree(new_path);
+               if (!IsA(new_path, IndexPath))
+                       pfree(new_path);
        }
 }
 
@@ -297,7 +394,7 @@ add_path(RelOptInfo *parent_rel, Path *new_path)
  *       pathnode.
  */
 Path *
-create_seqscan_path(RelOptInfo *rel)
+create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
 {
        Path       *pathnode = makeNode(Path);
 
@@ -305,7 +402,7 @@ create_seqscan_path(RelOptInfo *rel)
        pathnode->parent = rel;
        pathnode->pathkeys = NIL;       /* seqscan has unordered result */
 
-       cost_seqscan(pathnode, rel);
+       cost_seqscan(pathnode, root, rel);
 
        return pathnode;
 }
@@ -314,83 +411,223 @@ create_seqscan_path(RelOptInfo *rel)
  * create_index_path
  *       Creates a path node for an index scan.
  *
- * 'rel' is the parent rel
- * 'index' is an index on 'rel'
- * 'restriction_clauses' is a list of RestrictInfo nodes
+ * 'index' is a usable index.
+ * 'clause_groups' is a list of lists of RestrictInfo nodes
  *                     to be used as index qual conditions in the scan.
  * 'pathkeys' describes the ordering of the path.
  * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
  *                     for an ordered index, or NoMovementScanDirection for
  *                     an unordered index.
+ * 'outer_rel' is the outer relation if this is a join inner indexscan path.
+ *                     (pathkeys and indexscandir are ignored if so.)  NULL if not.
  *
  * Returns the new path node.
  */
-IndexPath  *
-create_index_path(Query *root,
-                                 RelOptInfo *rel,
+IndexPath *
+create_index_path(PlannerInfo *root,
                                  IndexOptInfo *index,
-                                 List *restriction_clauses,
+                                 List *clause_groups,
                                  List *pathkeys,
-                                 ScanDirection indexscandir)
+                                 ScanDirection indexscandir,
+                                 RelOptInfo *outer_rel)
 {
        IndexPath  *pathnode = makeNode(IndexPath);
-       List       *indexquals;
+       RelOptInfo *rel = index->rel;
+       List       *indexquals,
+                          *allclauses;
+
+       /*
+        * For a join inner scan, there's no point in marking the path with any
+        * pathkeys, since it will only ever be used as the inner path of a
+        * nestloop, and so its ordering does not matter.  For the same reason we
+        * don't really care what order it's scanned in.  (We could expect the
+        * caller to supply the correct values, but it's easier to force it here.)
+        */
+       if (outer_rel != NULL)
+       {
+               pathkeys = NIL;
+               indexscandir = NoMovementScanDirection;
+       }
 
        pathnode->path.pathtype = T_IndexScan;
        pathnode->path.parent = rel;
        pathnode->path.pathkeys = pathkeys;
 
-       indexquals = get_actual_clauses(restriction_clauses);
-       /* expand special operators to indexquals the executor can handle */
-       indexquals = expand_indexqual_conditions(indexquals);
+       /* Convert clauses to indexquals the executor can handle */
+       indexquals = expand_indexqual_conditions(index, clause_groups);
 
-       /*
-        * We are making a pathnode for a single-scan indexscan; therefore,
-        * both indexid and indexqual should be single-element lists.
-        */
-       pathnode->indexid = makeListi1(index->indexoid);
-       pathnode->indexqual = makeList1(indexquals);
+       /* Flatten the clause-groups list to produce indexclauses list */
+       allclauses = flatten_clausegroups_list(clause_groups);
+
+       /* Fill in the pathnode */
+       pathnode->indexinfo = index;
+       pathnode->indexclauses = allclauses;
+       pathnode->indexquals = indexquals;
 
+       pathnode->isjoininner = (outer_rel != NULL);
        pathnode->indexscandir = indexscandir;
 
-       /*
-        * This routine is only used to generate "standalone" indexpaths, not
-        * nestloop inner indexpaths.  So joinrelids is always NIL and the
-        * number of rows is the same as the parent rel's estimate.
-        */
-       pathnode->joinrelids = NIL; /* no join clauses here */
-       pathnode->alljoinquals = false;
-       pathnode->rows = rel->rows;
+       if (outer_rel != NULL)
+       {
+               /*
+                * We must compute the estimated number of output rows for the
+                * indexscan.  This is less than rel->rows because of the additional
+                * selectivity of the join clauses.  Since clause_groups may contain
+                * both restriction and join clauses, we have to do a set union to get
+                * the full set of clauses that must be considered to compute the
+                * correct selectivity.  (Without the union operation, we might have
+                * some restriction clauses appearing twice, which'd mislead
+                * clauselist_selectivity into double-counting their selectivity.
+                * However, since RestrictInfo nodes aren't copied when linking them
+                * into different lists, it should be sufficient to use pointer
+                * comparison to remove duplicates.)
+                *
+                * Always assume the join type is JOIN_INNER; even if some of the join
+                * clauses come from other contexts, that's not our problem.
+                */
+               allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
+               pathnode->rows = rel->tuples *
+                       clauselist_selectivity(root,
+                                                                  allclauses,
+                                                                  rel->relid,  /* do not use 0! */
+                                                                  JOIN_INNER);
+               /* Like costsize.c, force estimate to be at least one row */
+               pathnode->rows = clamp_row_est(pathnode->rows);
+       }
+       else
+       {
+               /*
+                * The number of rows is the same as the parent rel's estimate, since
+                * this isn't a join inner indexscan.
+                */
+               pathnode->rows = rel->rows;
+       }
 
-       cost_index(&pathnode->path, root, rel, index, indexquals, false);
+       cost_index(pathnode, root, index, indexquals, outer_rel);
 
        return pathnode;
 }
 
 /*
- * create_tidscan_path
- *       Creates a path corresponding to a tid_direct scan, returning the
- *       pathnode.
+ * create_bitmap_heap_path
+ *       Creates a path node for a bitmap scan.
  *
+ * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
+ *
+ * If this is a join inner indexscan path, 'outer_rel' is the outer relation,
+ * and all the component IndexPaths should have been costed accordingly.
+ */
+BitmapHeapPath *
+create_bitmap_heap_path(PlannerInfo *root,
+                                               RelOptInfo *rel,
+                                               Path *bitmapqual,
+                                               RelOptInfo *outer_rel)
+{
+       BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
+
+       pathnode->path.pathtype = T_BitmapHeapScan;
+       pathnode->path.parent = rel;
+       pathnode->path.pathkeys = NIL;          /* always unordered */
+
+       pathnode->bitmapqual = bitmapqual;
+       pathnode->isjoininner = (outer_rel != NULL);
+
+       if (pathnode->isjoininner)
+       {
+               /*
+                * We must compute the estimated number of output rows for the
+                * indexscan.  This is less than rel->rows because of the additional
+                * selectivity of the join clauses.  We make use of the selectivity
+                * estimated for the bitmap to do this; this isn't really quite right
+                * since there may be restriction conditions not included in the
+                * bitmap ...
+                */
+               Cost            indexTotalCost;
+               Selectivity indexSelectivity;
+
+               cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+               pathnode->rows = rel->tuples * indexSelectivity;
+               if (pathnode->rows > rel->rows)
+                       pathnode->rows = rel->rows;
+               /* Like costsize.c, force estimate to be at least one row */
+               pathnode->rows = clamp_row_est(pathnode->rows);
+       }
+       else
+       {
+               /*
+                * The number of rows is the same as the parent rel's estimate, since
+                * this isn't a join inner indexscan.
+                */
+               pathnode->rows = rel->rows;
+       }
+
+       cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
+
+       return pathnode;
+}
+
+/*
+ * create_bitmap_and_path
+ *       Creates a path node representing a BitmapAnd.
+ */
+BitmapAndPath *
+create_bitmap_and_path(PlannerInfo *root,
+                                          RelOptInfo *rel,
+                                          List *bitmapquals)
+{
+       BitmapAndPath *pathnode = makeNode(BitmapAndPath);
+
+       pathnode->path.pathtype = T_BitmapAnd;
+       pathnode->path.parent = rel;
+       pathnode->path.pathkeys = NIL;          /* always unordered */
+
+       pathnode->bitmapquals = bitmapquals;
+
+       /* this sets bitmapselectivity as well as the regular cost fields: */
+       cost_bitmap_and_node(pathnode, root);
+
+       return pathnode;
+}
+
+/*
+ * create_bitmap_or_path
+ *       Creates a path node representing a BitmapOr.
+ */
+BitmapOrPath *
+create_bitmap_or_path(PlannerInfo *root,
+                                         RelOptInfo *rel,
+                                         List *bitmapquals)
+{
+       BitmapOrPath *pathnode = makeNode(BitmapOrPath);
+
+       pathnode->path.pathtype = T_BitmapOr;
+       pathnode->path.parent = rel;
+       pathnode->path.pathkeys = NIL;          /* always unordered */
+
+       pathnode->bitmapquals = bitmapquals;
+
+       /* this sets bitmapselectivity as well as the regular cost fields: */
+       cost_bitmap_or_node(pathnode, root);
+
+       return pathnode;
+}
+
+/*
+ * create_tidscan_path
+ *       Creates a path corresponding to a scan by TID, returning the pathnode.
  */
-TidPath    *
-create_tidscan_path(RelOptInfo *rel, List *tideval)
+TidPath *
+create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
 {
        TidPath    *pathnode = makeNode(TidPath);
 
        pathnode->path.pathtype = T_TidScan;
        pathnode->path.parent = rel;
        pathnode->path.pathkeys = NIL;
-       pathnode->tideval = copyObject(tideval);        /* is copy really
-                                                                                                * necessary? */
-       pathnode->unjoined_relids = NIL;
 
-       cost_tidscan(&pathnode->path, rel, tideval);
+       pathnode->tidquals = tidquals;
 
-       /*
-        * divide selectivity for each clause to get an equal selectivity as
-        * IndexScan does OK ?
-        */
+       cost_tidscan(&pathnode->path, root, rel, tidquals);
 
        return pathnode;
 }
@@ -399,13 +636,12 @@ create_tidscan_path(RelOptInfo *rel, List *tideval)
  * create_append_path
  *       Creates a path corresponding to an Append plan, returning the
  *       pathnode.
- *
  */
 AppendPath *
 create_append_path(RelOptInfo *rel, List *subpaths)
 {
        AppendPath *pathnode = makeNode(AppendPath);
-       List       *l;
+       ListCell   *l;
 
        pathnode->path.pathtype = T_Append;
        pathnode->path.parent = rel;
@@ -419,7 +655,7 @@ create_append_path(RelOptInfo *rel, List *subpaths)
        {
                Path       *subpath = (Path *) lfirst(l);
 
-               if (l == subpaths)              /* first node? */
+               if (l == list_head(subpaths))   /* first node? */
                        pathnode->path.startup_cost = subpath->startup_cost;
                pathnode->path.total_cost += subpath->total_cost;
        }
@@ -427,23 +663,485 @@ create_append_path(RelOptInfo *rel, List *subpaths)
        return pathnode;
 }
 
+/*
+ * create_result_path
+ *       Creates a path representing a Result-and-nothing-else plan.
+ *       This is only used for the case of a query with an empty jointree.
+ */
+ResultPath *
+create_result_path(List *quals)
+{
+       ResultPath *pathnode = makeNode(ResultPath);
+
+       pathnode->path.pathtype = T_Result;
+       pathnode->path.parent = NULL;
+       pathnode->path.pathkeys = NIL;
+       pathnode->quals = quals;
+
+       /* Ideally should define cost_result(), but I'm too lazy */
+       pathnode->path.startup_cost = 0;
+       pathnode->path.total_cost = cpu_tuple_cost;
+
+       /*
+        * In theory we should include the qual eval cost as well, but at present
+        * that doesn't accomplish much except duplicate work that will be done
+        * again in make_result; since this is only used for degenerate cases,
+        * nothing interesting will be done with the path cost values...
+        */
+
+       return pathnode;
+}
+
+/*
+ * create_material_path
+ *       Creates a path corresponding to a Material plan, returning the
+ *       pathnode.
+ */
+MaterialPath *
+create_material_path(RelOptInfo *rel, Path *subpath)
+{
+       MaterialPath *pathnode = makeNode(MaterialPath);
+
+       pathnode->path.pathtype = T_Material;
+       pathnode->path.parent = rel;
+
+       pathnode->path.pathkeys = subpath->pathkeys;
+
+       pathnode->subpath = subpath;
+
+       cost_material(&pathnode->path,
+                                 subpath->total_cost,
+                                 rel->rows,
+                                 rel->width);
+
+       return pathnode;
+}
+
+/*
+ * create_unique_path
+ *       Creates a path representing elimination of distinct rows from the
+ *       input data.
+ *
+ * If used at all, this is likely to be called repeatedly on the same rel;
+ * and the input subpath should always be the same (the cheapest_total path
+ * for the rel).  So we cache the result.
+ */
+UniquePath *
+create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
+{
+       UniquePath *pathnode;
+       Path            sort_path;              /* dummy for result of cost_sort */
+       Path            agg_path;               /* dummy for result of cost_agg */
+       MemoryContext oldcontext;
+       List       *sub_targetlist;
+       List       *in_operators;
+       ListCell   *l;
+       int                     numCols;
+
+       /* Caller made a mistake if subpath isn't cheapest_total */
+       Assert(subpath == rel->cheapest_total_path);
+
+       /* If result already cached, return it */
+       if (rel->cheapest_unique_path)
+               return (UniquePath *) rel->cheapest_unique_path;
+
+       /*
+        * We must ensure path struct is allocated in main planning context;
+        * otherwise GEQO memory management causes trouble.  (Compare
+        * best_inner_indexscan().)
+        */
+       oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+       pathnode = makeNode(UniquePath);
+
+       /* There is no substructure to allocate, so can switch back right away */
+       MemoryContextSwitchTo(oldcontext);
+
+       pathnode->path.pathtype = T_Unique;
+       pathnode->path.parent = rel;
+
+       /*
+        * Treat the output as always unsorted, since we don't necessarily have
+        * pathkeys to represent it.
+        */
+       pathnode->path.pathkeys = NIL;
+
+       pathnode->subpath = subpath;
+
+       /*
+        * Try to identify the targetlist that will actually be unique-ified. In
+        * current usage, this routine is only used for sub-selects of IN clauses,
+        * so we should be able to find the tlist in in_info_list.  Get the IN
+        * clause's operators, too, because they determine what "unique" means.
+        */
+       sub_targetlist = NIL;
+       in_operators = NIL;
+       foreach(l, root->in_info_list)
+       {
+               InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+               if (bms_equal(ininfo->righthand, rel->relids))
+               {
+                       sub_targetlist = ininfo->sub_targetlist;
+                       in_operators = ininfo->in_operators;
+                       break;
+               }
+       }
+
+       /*
+        * If the input is a subquery whose output must be unique already, then we
+        * don't need to do anything.  The test for uniqueness has to consider
+        * exactly which columns we are extracting; for example "SELECT DISTINCT
+        * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
+        * this optimization unless we found our own targetlist above, and it
+        * consists only of simple Vars referencing subquery outputs.  (Possibly
+        * we could do something with expressions in the subquery outputs, too,
+        * but for now keep it simple.)
+        */
+       if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
+       {
+               RangeTblEntry *rte = rt_fetch(rel->relid, root->parse->rtable);
+               List       *sub_tlist_colnos;
+
+               sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
+
+               if (sub_tlist_colnos &&
+                       query_is_distinct_for(rte->subquery,
+                                                                 sub_tlist_colnos, in_operators))
+               {
+                       pathnode->umethod = UNIQUE_PATH_NOOP;
+                       pathnode->rows = rel->rows;
+                       pathnode->path.startup_cost = subpath->startup_cost;
+                       pathnode->path.total_cost = subpath->total_cost;
+                       pathnode->path.pathkeys = subpath->pathkeys;
+
+                       rel->cheapest_unique_path = (Path *) pathnode;
+
+                       return pathnode;
+               }
+       }
+
+       /*
+        * If we know the targetlist, try to estimate number of result rows;
+        * otherwise punt.
+        */
+       if (sub_targetlist)
+       {
+               pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
+               numCols = list_length(sub_targetlist);
+       }
+       else
+       {
+               pathnode->rows = rel->rows;
+               numCols = list_length(rel->reltargetlist);
+       }
+
+       /*
+        * Estimate cost for sort+unique implementation
+        */
+       cost_sort(&sort_path, root, NIL,
+                         subpath->total_cost,
+                         rel->rows,
+                         rel->width);
+
+       /*
+        * Charge one cpu_operator_cost per comparison per input tuple. We assume
+        * all columns get compared at most of the tuples.      (XXX probably this is
+        * an overestimate.)  This should agree with make_unique.
+        */
+       sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+
+       /*
+        * Is it safe to use a hashed implementation?  If so, estimate and compare
+        * costs.  We only try this if we know the IN operators, else we can't
+        * check their hashability.
+        */
+       pathnode->umethod = UNIQUE_PATH_SORT;
+       if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
+       {
+               /*
+                * Estimate the overhead per hashtable entry at 64 bytes (same as in
+                * planner.c).
+                */
+               int                     hashentrysize = rel->width + 64;
+
+               if (hashentrysize * pathnode->rows <= work_mem * 1024L)
+               {
+                       cost_agg(&agg_path, root,
+                                        AGG_HASHED, 0,
+                                        numCols, pathnode->rows,
+                                        subpath->startup_cost,
+                                        subpath->total_cost,
+                                        rel->rows);
+                       if (agg_path.total_cost < sort_path.total_cost)
+                               pathnode->umethod = UNIQUE_PATH_HASH;
+               }
+       }
+
+       if (pathnode->umethod == UNIQUE_PATH_HASH)
+       {
+               pathnode->path.startup_cost = agg_path.startup_cost;
+               pathnode->path.total_cost = agg_path.total_cost;
+       }
+       else
+       {
+               pathnode->path.startup_cost = sort_path.startup_cost;
+               pathnode->path.total_cost = sort_path.total_cost;
+       }
+
+       rel->cheapest_unique_path = (Path *) pathnode;
+
+       return pathnode;
+}
+
+/*
+ * translate_sub_tlist - get subquery column numbers represented by tlist
+ *
+ * The given targetlist should contain only Vars referencing the given relid.
+ * Extract their varattnos (ie, the column numbers of the subquery) and return
+ * as an integer List.
+ *
+ * If any of the tlist items is not a simple Var, we cannot determine whether
+ * the subquery's uniqueness condition (if any) matches ours, so punt and
+ * return NIL.
+ */
+static List *
+translate_sub_tlist(List *tlist, int relid)
+{
+       List       *result = NIL;
+       ListCell   *l;
+
+       foreach(l, tlist)
+       {
+               Var                *var = (Var *) lfirst(l);
+
+               if (!var || !IsA(var, Var) ||
+                       var->varno != relid)
+                       return NIL;                     /* punt */
+
+               result = lappend_int(result, var->varattno);
+       }
+       return result;
+}
+
+/*
+ * query_is_distinct_for - does query never return duplicates of the
+ *             specified columns?
+ *
+ * colnos is an integer list of output column numbers (resno's).  We are
+ * interested in whether rows consisting of just these columns are certain
+ * to be distinct.  "Distinctness" is defined according to whether the
+ * corresponding upper-level equality operators listed in opids would think
+ * the values are distinct.  (Note: the opids entries could be cross-type
+ * operators, and thus not exactly the equality operators that the subquery
+ * would use itself.  We assume that the subquery is compatible if these
+ * operators appear in the same btree opfamily as the ones the subquery uses.)
+ */
+static bool
+query_is_distinct_for(Query *query, List *colnos, List *opids)
+{
+       ListCell   *l;
+       Oid                     opid;
+
+       Assert(list_length(colnos) == list_length(opids));
+
+       /*
+        * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
+        * columns in the DISTINCT clause appear in colnos and operator
+        * semantics match.
+        */
+       if (query->distinctClause)
+       {
+               foreach(l, query->distinctClause)
+               {
+                       SortClause *scl = (SortClause *) lfirst(l);
+                       TargetEntry *tle = get_sortgroupclause_tle(scl,
+                                                                                                          query->targetList);
+
+                       opid = distinct_col_search(tle->resno, colnos, opids);
+                       if (!OidIsValid(opid) ||
+                               !ops_in_same_btree_opfamily(opid, scl->sortop))
+                               break;                  /* exit early if no match */
+               }
+               if (l == NULL)                  /* had matches for all? */
+                       return true;
+       }
+
+       /*
+        * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
+        * appear in colnos and operator semantics match.
+        */
+       if (query->groupClause)
+       {
+               foreach(l, query->groupClause)
+               {
+                       GroupClause *grpcl = (GroupClause *) lfirst(l);
+                       TargetEntry *tle = get_sortgroupclause_tle(grpcl,
+                                                                                                          query->targetList);
+
+                       opid = distinct_col_search(tle->resno, colnos, opids);
+                       if (!OidIsValid(opid) ||
+                               !ops_in_same_btree_opfamily(opid, grpcl->sortop))
+                               break;                  /* exit early if no match */
+               }
+               if (l == NULL)                  /* had matches for all? */
+                       return true;
+       }
+       else
+       {
+               /*
+                * If we have no GROUP BY, but do have aggregates or HAVING, then the
+                * result is at most one row so it's surely unique, for any operators.
+                */
+               if (query->hasAggs || query->havingQual)
+                       return true;
+       }
+
+       /*
+        * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
+        * except with ALL.
+        *
+        * XXX this code knows that prepunion.c will adopt the default ordering
+        * operator for each column datatype as the sortop.  It'd probably be
+        * better if these operators were chosen at parse time and stored into
+        * the parsetree, instead of leaving bits of the planner to decide
+        * semantics.
+        */
+       if (query->setOperations)
+       {
+               SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
+
+               Assert(IsA(topop, SetOperationStmt));
+               Assert(topop->op != SETOP_NONE);
+
+               if (!topop->all)
+               {
+                       /* We're good if all the nonjunk output columns are in colnos */
+                       foreach(l, query->targetList)
+                       {
+                               TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+                               if (tle->resjunk)
+                                       continue;       /* ignore resjunk columns */
+
+                               opid = distinct_col_search(tle->resno, colnos, opids);
+                               if (!OidIsValid(opid) ||
+                                       !ops_in_same_btree_opfamily(opid,
+                                                                                               ordering_oper_opid(exprType((Node *) tle->expr))))
+                                       break;          /* exit early if no match */
+                       }
+                       if (l == NULL)          /* had matches for all? */
+                               return true;
+               }
+       }
+
+       /*
+        * XXX Are there any other cases in which we can easily see the result
+        * must be distinct?
+        */
+
+       return false;
+}
+
+/*
+ * distinct_col_search - subroutine for query_is_distinct_for
+ *
+ * If colno is in colnos, return the corresponding element of opids,
+ * else return InvalidOid.  (We expect colnos does not contain duplicates,
+ * so the result is well-defined.)
+ */
+static Oid
+distinct_col_search(int colno, List *colnos, List *opids)
+{
+       ListCell   *lc1,
+                          *lc2;
+
+       forboth(lc1, colnos, lc2, opids)
+       {
+               if (colno == lfirst_int(lc1))
+                       return lfirst_oid(lc2);
+       }
+       return InvalidOid;
+}
+
+/*
+ * hash_safe_operators - can all the specified IN operators be hashed?
+ *
+ * We assume hashed aggregation will work if each IN operator is marked
+ * hashjoinable.  If the IN operators are cross-type, this could conceivably
+ * fail: the aggregation will need a hashable equality operator for the RHS
+ * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * cross-type hashing without support for hashing the individual types, so
+ * we don't expend cycles here to support the case.  We could check
+ * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
+ * former is a significantly more expensive test.
+ */
+static bool
+hash_safe_operators(List *opids)
+{
+       ListCell   *lc;
+
+       foreach(lc, opids)
+       {
+               if (!op_hashjoinable(lfirst_oid(lc)))
+                       return false;
+       }
+       return true;
+}
+
 /*
  * create_subqueryscan_path
  *       Creates a path corresponding to a sequential scan of a subquery,
  *       returning the pathnode.
  */
 Path *
-create_subqueryscan_path(RelOptInfo *rel)
+create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
 {
        Path       *pathnode = makeNode(Path);
 
        pathnode->pathtype = T_SubqueryScan;
        pathnode->parent = rel;
+       pathnode->pathkeys = pathkeys;
+
+       cost_subqueryscan(pathnode, rel);
+
+       return pathnode;
+}
+
+/*
+ * create_functionscan_path
+ *       Creates a path corresponding to a sequential scan of a function,
+ *       returning the pathnode.
+ */
+Path *
+create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
+{
+       Path       *pathnode = makeNode(Path);
+
+       pathnode->pathtype = T_FunctionScan;
+       pathnode->parent = rel;
        pathnode->pathkeys = NIL;       /* for now, assume unordered result */
 
-       /* just copy the subplan's cost estimates */
-       pathnode->startup_cost = rel->subplan->startup_cost;
-       pathnode->total_cost = rel->subplan->total_cost;
+       cost_functionscan(pathnode, root, rel);
+
+       return pathnode;
+}
+
+/*
+ * create_valuesscan_path
+ *       Creates a path corresponding to a scan of a VALUES list,
+ *       returning the pathnode.
+ */
+Path *
+create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
+{
+       Path       *pathnode = makeNode(Path);
+
+       pathnode->pathtype = T_ValuesScan;
+       pathnode->parent = rel;
+       pathnode->pathkeys = NIL;       /* result is always unordered */
+
+       cost_valuesscan(pathnode, root, rel);
 
        return pathnode;
 }
@@ -461,10 +1159,10 @@ create_subqueryscan_path(RelOptInfo *rel)
  * 'pathkeys' are the path keys of the new join path
  *
  * Returns the resulting path node.
- *
  */
-NestPath   *
-create_nestloop_path(RelOptInfo *joinrel,
+NestPath *
+create_nestloop_path(PlannerInfo *root,
+                                        RelOptInfo *joinrel,
                                         JoinType jointype,
                                         Path *outer_path,
                                         Path *inner_path,
@@ -481,7 +1179,7 @@ create_nestloop_path(RelOptInfo *joinrel,
        pathnode->joinrestrictinfo = restrict_clauses;
        pathnode->path.pathkeys = pathkeys;
 
-       cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses);
+       cost_nestloop(pathnode, root);
 
        return pathnode;
 }
@@ -501,10 +1199,10 @@ create_nestloop_path(RelOptInfo *joinrel,
  *             (this should be a subset of the restrict_clauses list)
  * 'outersortkeys' are the sort varkeys for the outer relation
  * 'innersortkeys' are the sort varkeys for the inner relation
- *
  */
-MergePath  *
-create_mergejoin_path(RelOptInfo *joinrel,
+MergePath *
+create_mergejoin_path(PlannerInfo *root,
+                                         RelOptInfo *joinrel,
                                          JoinType jointype,
                                          Path *outer_path,
                                          Path *inner_path,
@@ -517,8 +1215,8 @@ create_mergejoin_path(RelOptInfo *joinrel,
        MergePath  *pathnode = makeNode(MergePath);
 
        /*
-        * If the given paths are already well enough ordered, we can skip
-        * doing an explicit sort.
+        * If the given paths are already well enough ordered, we can skip doing
+        * an explicit sort.
         */
        if (outersortkeys &&
                pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
@@ -527,6 +1225,22 @@ create_mergejoin_path(RelOptInfo *joinrel,
                pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
                innersortkeys = NIL;
 
+       /*
+        * If we are not sorting the inner path, we may need a materialize node to
+        * ensure it can be marked/restored.  (Sort does support mark/restore, so
+        * no materialize is needed in that case.)
+        *
+        * Since the inner side must be ordered, and only Sorts and IndexScans can
+        * create order to begin with, you might think there's no problem --- but
+        * you'd be wrong.  Nestloop and merge joins can *preserve* the order of
+        * their inputs, so they can be selected as the input of a mergejoin, and
+        * they don't support mark/restore at present.
+        */
+       if (innersortkeys == NIL &&
+               !ExecSupportsMarkRestore(inner_path->pathtype))
+               inner_path = (Path *)
+                       create_material_path(inner_path->parent, inner_path);
+
        pathnode->jpath.path.pathtype = T_MergeJoin;
        pathnode->jpath.path.parent = joinrel;
        pathnode->jpath.jointype = jointype;
@@ -538,12 +1252,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
        pathnode->outersortkeys = outersortkeys;
        pathnode->innersortkeys = innersortkeys;
 
-       cost_mergejoin(&pathnode->jpath.path,
-                                  outer_path,
-                                  inner_path,
-                                  restrict_clauses,
-                                  outersortkeys,
-                                  innersortkeys);
+       cost_mergejoin(pathnode, root);
 
        return pathnode;
 }
@@ -557,19 +1266,17 @@ create_mergejoin_path(RelOptInfo *joinrel,
  * 'outer_path' is the cheapest outer path
  * 'inner_path' is the cheapest inner path
  * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
- * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
  *             (this should be a subset of the restrict_clauses list)
- * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
- *
  */
-HashPath   *
-create_hashjoin_path(RelOptInfo *joinrel,
+HashPath *
+create_hashjoin_path(PlannerInfo *root,
+                                        RelOptInfo *joinrel,
                                         JoinType jointype,
                                         Path *outer_path,
                                         Path *inner_path,
                                         List *restrict_clauses,
-                                        List *hashclauses,
-                                        Selectivity innerbucketsize)
+                                        List *hashclauses)
 {
        HashPath   *pathnode = makeNode(HashPath);
 
@@ -583,11 +1290,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
        pathnode->jpath.path.pathkeys = NIL;
        pathnode->path_hashclauses = hashclauses;
 
-       cost_hashjoin(&pathnode->jpath.path,
-                                 outer_path,
-                                 inner_path,
-                                 restrict_clauses,
-                                 innerbucketsize);
+       cost_hashjoin(pathnode, root);
 
        return pathnode;
 }