* pathnode.c
* Routines to manipulate pathlists and create path nodes
*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.137 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include <math.h>
-#include "nodes/plannodes.h"
+#include "catalog/pg_operator.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
-#include "optimizer/restrictinfo.h"
+#include "optimizer/tlist.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
+#include "parser/parsetree.h"
+#include "utils/selfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+
+static List *translate_sub_tlist(List *tlist, int relid);
+static bool query_is_distinct_for(Query *query, List *colnos, List *opids);
+static Oid distinct_col_search(int colno, List *colnos, List *opids);
+static bool hash_safe_operators(List *opids);
/*****************************************************************************
return +1;
/*
- * If paths have the same startup cost (not at all unlikely),
- * order them by total cost.
+ * If paths have the same startup cost (not at all unlikely), order
+ * them by total cost.
*/
if (path1->total_cost < path2->total_cost)
return -1;
return 0;
}
+/*
+ * compare_fuzzy_path_costs
+ * Return -1, 0, or +1 according as path1 is cheaper, the same cost,
+ * or more expensive than path2 for the specified criterion.
+ *
+ * This differs from compare_path_costs in that we consider the costs the
+ * same if they agree to within a "fuzz factor". This is used by add_path
+ * to avoid keeping both of a pair of paths that really have insignificantly
+ * different cost.
+ */
+static int
+compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
+{
+ /*
+ * We use a fuzz factor of 1% of the smaller cost.
+ *
+ * XXX does this percentage need to be user-configurable?
+ */
+ if (criterion == STARTUP_COST)
+ {
+ if (path1->startup_cost > path2->startup_cost * 1.01)
+ return +1;
+ if (path2->startup_cost > path1->startup_cost * 1.01)
+ return -1;
+
+ /*
+ * If paths have the same startup cost (not at all unlikely), order
+ * them by total cost.
+ */
+ if (path1->total_cost > path2->total_cost * 1.01)
+ return +1;
+ if (path2->total_cost > path1->total_cost * 1.01)
+ return -1;
+ }
+ else
+ {
+ if (path1->total_cost > path2->total_cost * 1.01)
+ return +1;
+ if (path2->total_cost > path1->total_cost * 1.01)
+ return -1;
+
+ /*
+ * If paths have the same total cost, order them by startup cost.
+ */
+ if (path1->startup_cost > path2->startup_cost * 1.01)
+ return +1;
+ if (path2->startup_cost > path1->startup_cost * 1.01)
+ return -1;
+ }
+ return 0;
+}
+
/*
* compare_path_fractional_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
set_cheapest(RelOptInfo *parent_rel)
{
List *pathlist = parent_rel->pathlist;
- List *p;
+ ListCell *p;
Path *cheapest_startup_path;
Path *cheapest_total_path;
Assert(IsA(parent_rel, RelOptInfo));
if (pathlist == NIL)
- elog(ERROR, "Unable to devise a query plan for the given query");
+ elog(ERROR, "could not devise a query plan for the given query");
- cheapest_startup_path = cheapest_total_path = (Path *) lfirst(pathlist);
+ cheapest_startup_path = cheapest_total_path = (Path *) linitial(pathlist);
- foreach(p, lnext(pathlist))
+ for_each_cell(p, lnext(list_head(pathlist)))
{
Path *path = (Path *) lfirst(p);
int cmp;
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
+ parent_rel->cheapest_unique_path = NULL; /* computed only if needed */
}
/*
* A path is worthy if it has either a better sort order (better pathkeys)
* or cheaper cost (on either dimension) than any of the existing old paths.
*
- * Unless parent_rel->pruneable is false, we also remove from the rel's
- * pathlist any old paths that are dominated by new_path --- that is,
- * new_path is both cheaper and at least as well ordered.
+ * We also remove from the rel's pathlist any old paths that are dominated
+ * by new_path --- that is, new_path is both cheaper and at least as well
+ * ordered.
*
* The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
* at the front. No code depends on that for correctness; it's simply
* but just recycling discarded Path nodes is a very useful savings in
* a large join tree. We can recycle the List nodes of pathlist, too.
*
+ * BUT: we do not pfree IndexPath objects, since they may be referenced as
+ * children of BitmapHeapPaths as well as being paths in their own right.
+ *
* 'parent_rel' is the relation entry to which the path corresponds.
* 'new_path' is a potential path for parent_rel.
*
void
add_path(RelOptInfo *parent_rel, Path *new_path)
{
- bool accept_new = true; /* unless we find a superior old
- * path */
- List *insert_after = NIL; /* where to insert new item */
- List *p1_prev = NIL;
- List *p1;
+ bool accept_new = true; /* unless we find a superior old path */
+ ListCell *insert_after = NULL; /* where to insert new item */
+ ListCell *p1_prev = NULL;
+ ListCell *p1;
/*
- * Loop to check proposed new path against old paths. Note it is
- * possible for more than one old path to be tossed out because
- * new_path dominates it.
+ * This is a convenient place to check for query cancel --- no part of the
+ * planner goes very long without calling add_path().
*/
- p1 = parent_rel->pathlist; /* cannot use foreach here */
- while (p1 != NIL)
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * Loop to check proposed new path against old paths. Note it is possible
+ * for more than one old path to be tossed out because new_path dominates
+ * it.
+ */
+ p1 = list_head(parent_rel->pathlist); /* cannot use foreach here */
+ while (p1 != NULL)
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
int costcmp;
- costcmp = compare_path_costs(new_path, old_path, TOTAL_COST);
+ /*
+ * As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
+ * cycles keeping paths that are really not significantly different in
+ * cost.
+ */
+ costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
/*
- * If the two paths compare differently for startup and total
- * cost, then we want to keep both, and we can skip the (much
- * slower) comparison of pathkeys. If they compare the same,
- * proceed with the pathkeys comparison. Note: this test relies
- * on the fact that compare_path_costs will only return 0 if both
- * costs are equal (and, therefore, there's no need to call it
- * twice in that case).
+ * If the two paths compare differently for startup and total cost,
+ * then we want to keep both, and we can skip the (much slower)
+ * comparison of pathkeys. If they compare the same, proceed with the
+ * pathkeys comparison. Note: this test relies on the fact that
+ * compare_fuzzy_path_costs will only return 0 if both costs are
+ * effectively equal (and, therefore, there's no need to call it twice
+ * in that case).
*/
if (costcmp == 0 ||
- costcmp == compare_path_costs(new_path, old_path,
- STARTUP_COST))
+ costcmp == compare_fuzzy_path_costs(new_path, old_path,
+ STARTUP_COST))
{
switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
{
case PATHKEYS_EQUAL:
if (costcmp < 0)
remove_old = true; /* new dominates old */
+ else if (costcmp > 0)
+ accept_new = false; /* old dominates new */
else
- accept_new = false; /* old equals or dominates
- * new */
+ {
+ /*
+ * Same pathkeys, and fuzzily the same cost, so keep
+ * just one --- but we'll do an exact cost comparison
+ * to decide which.
+ */
+ if (compare_path_costs(new_path, old_path,
+ TOTAL_COST) < 0)
+ remove_old = true; /* new dominates old */
+ else
+ accept_new = false; /* old equals or dominates new */
+ }
break;
case PATHKEYS_BETTER1:
if (costcmp <= 0)
}
/*
- * Remove current element from pathlist if dominated by new,
- * unless xfunc told us not to remove any paths.
+ * Remove current element from pathlist if dominated by new.
*/
- if (remove_old && parent_rel->pruneable)
+ if (remove_old)
{
- List *p1_next = lnext(p1);
-
+ parent_rel->pathlist = list_delete_cell(parent_rel->pathlist,
+ p1, p1_prev);
+
+ /*
+ * Delete the data pointed-to by the deleted cell, if possible
+ */
+ if (!IsA(old_path, IndexPath))
+ pfree(old_path);
+ /* Advance list pointer */
if (p1_prev)
- lnext(p1_prev) = p1_next;
+ p1 = lnext(p1_prev);
else
- parent_rel->pathlist = p1_next;
- pfree(old_path);
- pfree(p1); /* this is why we can't use foreach */
- p1 = p1_next;
+ p1 = list_head(parent_rel->pathlist);
}
else
{
/* new belongs after this old path if it has cost >= old's */
if (costcmp >= 0)
insert_after = p1;
+ /* Advance list pointers */
p1_prev = p1;
p1 = lnext(p1);
}
{
/* Accept the new path: insert it at proper place in pathlist */
if (insert_after)
- lnext(insert_after) = lcons(new_path, lnext(insert_after));
+ lappend_cell(parent_rel->pathlist, insert_after, new_path);
else
parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
}
else
{
/* Reject and recycle the new path */
- pfree(new_path);
+ if (!IsA(new_path, IndexPath))
+ pfree(new_path);
}
}
* pathnode.
*/
Path *
-create_seqscan_path(RelOptInfo *rel)
+create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* seqscan has unordered result */
- cost_seqscan(pathnode, rel);
+ cost_seqscan(pathnode, root, rel);
return pathnode;
}
* create_index_path
* Creates a path node for an index scan.
*
- * 'rel' is the parent rel
- * 'index' is an index on 'rel'
- * 'restriction_clauses' is a list of RestrictInfo nodes
+ * 'index' is a usable index.
+ * 'clause_groups' is a list of lists of RestrictInfo nodes
* to be used as index qual conditions in the scan.
* 'pathkeys' describes the ordering of the path.
* 'indexscandir' is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
+ * 'outer_rel' is the outer relation if this is a join inner indexscan path.
+ * (pathkeys and indexscandir are ignored if so.) NULL if not.
*
* Returns the new path node.
*/
-IndexPath *
-create_index_path(Query *root,
- RelOptInfo *rel,
+IndexPath *
+create_index_path(PlannerInfo *root,
IndexOptInfo *index,
- List *restriction_clauses,
+ List *clause_groups,
List *pathkeys,
- ScanDirection indexscandir)
+ ScanDirection indexscandir,
+ RelOptInfo *outer_rel)
{
IndexPath *pathnode = makeNode(IndexPath);
- List *indexquals;
+ RelOptInfo *rel = index->rel;
+ List *indexquals,
+ *allclauses;
+
+ /*
+ * For a join inner scan, there's no point in marking the path with any
+ * pathkeys, since it will only ever be used as the inner path of a
+ * nestloop, and so its ordering does not matter. For the same reason we
+ * don't really care what order it's scanned in. (We could expect the
+ * caller to supply the correct values, but it's easier to force it here.)
+ */
+ if (outer_rel != NULL)
+ {
+ pathkeys = NIL;
+ indexscandir = NoMovementScanDirection;
+ }
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
- indexquals = get_actual_clauses(restriction_clauses);
- /* expand special operators to indexquals the executor can handle */
- indexquals = expand_indexqual_conditions(indexquals);
+ /* Convert clauses to indexquals the executor can handle */
+ indexquals = expand_indexqual_conditions(index, clause_groups);
- /*
- * We are making a pathnode for a single-scan indexscan; therefore,
- * both indexid and indexqual should be single-element lists.
- */
- pathnode->indexid = makeListi1(index->indexoid);
- pathnode->indexqual = makeList1(indexquals);
+ /* Flatten the clause-groups list to produce indexclauses list */
+ allclauses = flatten_clausegroups_list(clause_groups);
+
+ /* Fill in the pathnode */
+ pathnode->indexinfo = index;
+ pathnode->indexclauses = allclauses;
+ pathnode->indexquals = indexquals;
+ pathnode->isjoininner = (outer_rel != NULL);
pathnode->indexscandir = indexscandir;
- /*
- * This routine is only used to generate "standalone" indexpaths, not
- * nestloop inner indexpaths. So joinrelids is always NIL and the
- * number of rows is the same as the parent rel's estimate.
- */
- pathnode->joinrelids = NIL; /* no join clauses here */
- pathnode->alljoinquals = false;
- pathnode->rows = rel->rows;
+ if (outer_rel != NULL)
+ {
+ /*
+ * We must compute the estimated number of output rows for the
+ * indexscan. This is less than rel->rows because of the additional
+ * selectivity of the join clauses. Since clause_groups may contain
+ * both restriction and join clauses, we have to do a set union to get
+ * the full set of clauses that must be considered to compute the
+ * correct selectivity. (Without the union operation, we might have
+ * some restriction clauses appearing twice, which'd mislead
+ * clauselist_selectivity into double-counting their selectivity.
+ * However, since RestrictInfo nodes aren't copied when linking them
+ * into different lists, it should be sufficient to use pointer
+ * comparison to remove duplicates.)
+ *
+ * Always assume the join type is JOIN_INNER; even if some of the join
+ * clauses come from other contexts, that's not our problem.
+ */
+ allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
+ pathnode->rows = rel->tuples *
+ clauselist_selectivity(root,
+ allclauses,
+ rel->relid, /* do not use 0! */
+ JOIN_INNER);
+ /* Like costsize.c, force estimate to be at least one row */
+ pathnode->rows = clamp_row_est(pathnode->rows);
+ }
+ else
+ {
+ /*
+ * The number of rows is the same as the parent rel's estimate, since
+ * this isn't a join inner indexscan.
+ */
+ pathnode->rows = rel->rows;
+ }
- cost_index(&pathnode->path, root, rel, index, indexquals, false);
+ cost_index(pathnode, root, index, indexquals, outer_rel);
return pathnode;
}
/*
- * create_tidscan_path
- * Creates a path corresponding to a tid_direct scan, returning the
- * pathnode.
+ * create_bitmap_heap_path
+ * Creates a path node for a bitmap scan.
*
+ * 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
+ *
+ * If this is a join inner indexscan path, 'outer_rel' is the outer relation,
+ * and all the component IndexPaths should have been costed accordingly.
+ */
+BitmapHeapPath *
+create_bitmap_heap_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ Path *bitmapqual,
+ RelOptInfo *outer_rel)
+{
+ BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
+
+ pathnode->path.pathtype = T_BitmapHeapScan;
+ pathnode->path.parent = rel;
+ pathnode->path.pathkeys = NIL; /* always unordered */
+
+ pathnode->bitmapqual = bitmapqual;
+ pathnode->isjoininner = (outer_rel != NULL);
+
+ if (pathnode->isjoininner)
+ {
+ /*
+ * We must compute the estimated number of output rows for the
+ * indexscan. This is less than rel->rows because of the additional
+ * selectivity of the join clauses. We make use of the selectivity
+ * estimated for the bitmap to do this; this isn't really quite right
+ * since there may be restriction conditions not included in the
+ * bitmap ...
+ */
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+
+ cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+ pathnode->rows = rel->tuples * indexSelectivity;
+ if (pathnode->rows > rel->rows)
+ pathnode->rows = rel->rows;
+ /* Like costsize.c, force estimate to be at least one row */
+ pathnode->rows = clamp_row_est(pathnode->rows);
+ }
+ else
+ {
+ /*
+ * The number of rows is the same as the parent rel's estimate, since
+ * this isn't a join inner indexscan.
+ */
+ pathnode->rows = rel->rows;
+ }
+
+ cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
+
+ return pathnode;
+}
+
+/*
+ * create_bitmap_and_path
+ * Creates a path node representing a BitmapAnd.
+ */
+BitmapAndPath *
+create_bitmap_and_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ List *bitmapquals)
+{
+ BitmapAndPath *pathnode = makeNode(BitmapAndPath);
+
+ pathnode->path.pathtype = T_BitmapAnd;
+ pathnode->path.parent = rel;
+ pathnode->path.pathkeys = NIL; /* always unordered */
+
+ pathnode->bitmapquals = bitmapquals;
+
+ /* this sets bitmapselectivity as well as the regular cost fields: */
+ cost_bitmap_and_node(pathnode, root);
+
+ return pathnode;
+}
+
+/*
+ * create_bitmap_or_path
+ * Creates a path node representing a BitmapOr.
+ */
+BitmapOrPath *
+create_bitmap_or_path(PlannerInfo *root,
+ RelOptInfo *rel,
+ List *bitmapquals)
+{
+ BitmapOrPath *pathnode = makeNode(BitmapOrPath);
+
+ pathnode->path.pathtype = T_BitmapOr;
+ pathnode->path.parent = rel;
+ pathnode->path.pathkeys = NIL; /* always unordered */
+
+ pathnode->bitmapquals = bitmapquals;
+
+ /* this sets bitmapselectivity as well as the regular cost fields: */
+ cost_bitmap_or_node(pathnode, root);
+
+ return pathnode;
+}
+
+/*
+ * create_tidscan_path
+ * Creates a path corresponding to a scan by TID, returning the pathnode.
*/
-TidPath *
-create_tidscan_path(RelOptInfo *rel, List *tideval)
+TidPath *
+create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
{
TidPath *pathnode = makeNode(TidPath);
pathnode->path.pathtype = T_TidScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL;
- pathnode->tideval = copyObject(tideval); /* is copy really
- * necessary? */
- pathnode->unjoined_relids = NIL;
- cost_tidscan(&pathnode->path, rel, tideval);
+ pathnode->tidquals = tidquals;
- /*
- * divide selectivity for each clause to get an equal selectivity as
- * IndexScan does OK ?
- */
+ cost_tidscan(&pathnode->path, root, rel, tidquals);
return pathnode;
}
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
- *
*/
AppendPath *
create_append_path(RelOptInfo *rel, List *subpaths)
{
AppendPath *pathnode = makeNode(AppendPath);
- List *l;
+ ListCell *l;
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
{
Path *subpath = (Path *) lfirst(l);
- if (l == subpaths) /* first node? */
+ if (l == list_head(subpaths)) /* first node? */
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost += subpath->total_cost;
}
return pathnode;
}
+/*
+ * create_result_path
+ * Creates a path representing a Result-and-nothing-else plan.
+ * This is only used for the case of a query with an empty jointree.
+ */
+ResultPath *
+create_result_path(List *quals)
+{
+ ResultPath *pathnode = makeNode(ResultPath);
+
+ pathnode->path.pathtype = T_Result;
+ pathnode->path.parent = NULL;
+ pathnode->path.pathkeys = NIL;
+ pathnode->quals = quals;
+
+ /* Ideally should define cost_result(), but I'm too lazy */
+ pathnode->path.startup_cost = 0;
+ pathnode->path.total_cost = cpu_tuple_cost;
+
+ /*
+ * In theory we should include the qual eval cost as well, but at present
+ * that doesn't accomplish much except duplicate work that will be done
+ * again in make_result; since this is only used for degenerate cases,
+ * nothing interesting will be done with the path cost values...
+ */
+
+ return pathnode;
+}
+
+/*
+ * create_material_path
+ * Creates a path corresponding to a Material plan, returning the
+ * pathnode.
+ */
+MaterialPath *
+create_material_path(RelOptInfo *rel, Path *subpath)
+{
+ MaterialPath *pathnode = makeNode(MaterialPath);
+
+ pathnode->path.pathtype = T_Material;
+ pathnode->path.parent = rel;
+
+ pathnode->path.pathkeys = subpath->pathkeys;
+
+ pathnode->subpath = subpath;
+
+ cost_material(&pathnode->path,
+ subpath->total_cost,
+ rel->rows,
+ rel->width);
+
+ return pathnode;
+}
+
+/*
+ * create_unique_path
+ * Creates a path representing elimination of distinct rows from the
+ * input data.
+ *
+ * If used at all, this is likely to be called repeatedly on the same rel;
+ * and the input subpath should always be the same (the cheapest_total path
+ * for the rel). So we cache the result.
+ */
+UniquePath *
+create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
+{
+ UniquePath *pathnode;
+ Path sort_path; /* dummy for result of cost_sort */
+ Path agg_path; /* dummy for result of cost_agg */
+ MemoryContext oldcontext;
+ List *sub_targetlist;
+ List *in_operators;
+ ListCell *l;
+ int numCols;
+
+ /* Caller made a mistake if subpath isn't cheapest_total */
+ Assert(subpath == rel->cheapest_total_path);
+
+ /* If result already cached, return it */
+ if (rel->cheapest_unique_path)
+ return (UniquePath *) rel->cheapest_unique_path;
+
+ /*
+ * We must ensure path struct is allocated in main planning context;
+ * otherwise GEQO memory management causes trouble. (Compare
+ * best_inner_indexscan().)
+ */
+ oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+ pathnode = makeNode(UniquePath);
+
+ /* There is no substructure to allocate, so can switch back right away */
+ MemoryContextSwitchTo(oldcontext);
+
+ pathnode->path.pathtype = T_Unique;
+ pathnode->path.parent = rel;
+
+ /*
+ * Treat the output as always unsorted, since we don't necessarily have
+ * pathkeys to represent it.
+ */
+ pathnode->path.pathkeys = NIL;
+
+ pathnode->subpath = subpath;
+
+ /*
+ * Try to identify the targetlist that will actually be unique-ified. In
+ * current usage, this routine is only used for sub-selects of IN clauses,
+ * so we should be able to find the tlist in in_info_list. Get the IN
+ * clause's operators, too, because they determine what "unique" means.
+ */
+ sub_targetlist = NIL;
+ in_operators = NIL;
+ foreach(l, root->in_info_list)
+ {
+ InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
+
+ if (bms_equal(ininfo->righthand, rel->relids))
+ {
+ sub_targetlist = ininfo->sub_targetlist;
+ in_operators = ininfo->in_operators;
+ break;
+ }
+ }
+
+ /*
+ * If the input is a subquery whose output must be unique already, then we
+ * don't need to do anything. The test for uniqueness has to consider
+ * exactly which columns we are extracting; for example "SELECT DISTINCT
+ * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
+ * this optimization unless we found our own targetlist above, and it
+ * consists only of simple Vars referencing subquery outputs. (Possibly
+ * we could do something with expressions in the subquery outputs, too,
+ * but for now keep it simple.)
+ */
+ if (sub_targetlist && rel->rtekind == RTE_SUBQUERY)
+ {
+ RangeTblEntry *rte = rt_fetch(rel->relid, root->parse->rtable);
+ List *sub_tlist_colnos;
+
+ sub_tlist_colnos = translate_sub_tlist(sub_targetlist, rel->relid);
+
+ if (sub_tlist_colnos &&
+ query_is_distinct_for(rte->subquery,
+ sub_tlist_colnos, in_operators))
+ {
+ pathnode->umethod = UNIQUE_PATH_NOOP;
+ pathnode->rows = rel->rows;
+ pathnode->path.startup_cost = subpath->startup_cost;
+ pathnode->path.total_cost = subpath->total_cost;
+ pathnode->path.pathkeys = subpath->pathkeys;
+
+ rel->cheapest_unique_path = (Path *) pathnode;
+
+ return pathnode;
+ }
+ }
+
+ /*
+ * If we know the targetlist, try to estimate number of result rows;
+ * otherwise punt.
+ */
+ if (sub_targetlist)
+ {
+ pathnode->rows = estimate_num_groups(root, sub_targetlist, rel->rows);
+ numCols = list_length(sub_targetlist);
+ }
+ else
+ {
+ pathnode->rows = rel->rows;
+ numCols = list_length(rel->reltargetlist);
+ }
+
+ /*
+ * Estimate cost for sort+unique implementation
+ */
+ cost_sort(&sort_path, root, NIL,
+ subpath->total_cost,
+ rel->rows,
+ rel->width);
+
+ /*
+ * Charge one cpu_operator_cost per comparison per input tuple. We assume
+ * all columns get compared at most of the tuples. (XXX probably this is
+ * an overestimate.) This should agree with make_unique.
+ */
+ sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
+
+ /*
+ * Is it safe to use a hashed implementation? If so, estimate and compare
+ * costs. We only try this if we know the IN operators, else we can't
+ * check their hashability.
+ */
+ pathnode->umethod = UNIQUE_PATH_SORT;
+ if (enable_hashagg && in_operators && hash_safe_operators(in_operators))
+ {
+ /*
+ * Estimate the overhead per hashtable entry at 64 bytes (same as in
+ * planner.c).
+ */
+ int hashentrysize = rel->width + 64;
+
+ if (hashentrysize * pathnode->rows <= work_mem * 1024L)
+ {
+ cost_agg(&agg_path, root,
+ AGG_HASHED, 0,
+ numCols, pathnode->rows,
+ subpath->startup_cost,
+ subpath->total_cost,
+ rel->rows);
+ if (agg_path.total_cost < sort_path.total_cost)
+ pathnode->umethod = UNIQUE_PATH_HASH;
+ }
+ }
+
+ if (pathnode->umethod == UNIQUE_PATH_HASH)
+ {
+ pathnode->path.startup_cost = agg_path.startup_cost;
+ pathnode->path.total_cost = agg_path.total_cost;
+ }
+ else
+ {
+ pathnode->path.startup_cost = sort_path.startup_cost;
+ pathnode->path.total_cost = sort_path.total_cost;
+ }
+
+ rel->cheapest_unique_path = (Path *) pathnode;
+
+ return pathnode;
+}
+
+/*
+ * translate_sub_tlist - get subquery column numbers represented by tlist
+ *
+ * The given targetlist should contain only Vars referencing the given relid.
+ * Extract their varattnos (ie, the column numbers of the subquery) and return
+ * as an integer List.
+ *
+ * If any of the tlist items is not a simple Var, we cannot determine whether
+ * the subquery's uniqueness condition (if any) matches ours, so punt and
+ * return NIL.
+ */
+static List *
+translate_sub_tlist(List *tlist, int relid)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, tlist)
+ {
+ Var *var = (Var *) lfirst(l);
+
+ if (!var || !IsA(var, Var) ||
+ var->varno != relid)
+ return NIL; /* punt */
+
+ result = lappend_int(result, var->varattno);
+ }
+ return result;
+}
+
+/*
+ * query_is_distinct_for - does query never return duplicates of the
+ * specified columns?
+ *
+ * colnos is an integer list of output column numbers (resno's). We are
+ * interested in whether rows consisting of just these columns are certain
+ * to be distinct. "Distinctness" is defined according to whether the
+ * corresponding upper-level equality operators listed in opids would think
+ * the values are distinct. (Note: the opids entries could be cross-type
+ * operators, and thus not exactly the equality operators that the subquery
+ * would use itself. We assume that the subquery is compatible if these
+ * operators appear in the same btree opfamily as the ones the subquery uses.)
+ */
+static bool
+query_is_distinct_for(Query *query, List *colnos, List *opids)
+{
+ ListCell *l;
+ Oid opid;
+
+ Assert(list_length(colnos) == list_length(opids));
+
+ /*
+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
+ * columns in the DISTINCT clause appear in colnos and operator
+ * semantics match.
+ */
+ if (query->distinctClause)
+ {
+ foreach(l, query->distinctClause)
+ {
+ SortClause *scl = (SortClause *) lfirst(l);
+ TargetEntry *tle = get_sortgroupclause_tle(scl,
+ query->targetList);
+
+ opid = distinct_col_search(tle->resno, colnos, opids);
+ if (!OidIsValid(opid) ||
+ !ops_in_same_btree_opfamily(opid, scl->sortop))
+ break; /* exit early if no match */
+ }
+ if (l == NULL) /* had matches for all? */
+ return true;
+ }
+
+ /*
+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
+ * appear in colnos and operator semantics match.
+ */
+ if (query->groupClause)
+ {
+ foreach(l, query->groupClause)
+ {
+ GroupClause *grpcl = (GroupClause *) lfirst(l);
+ TargetEntry *tle = get_sortgroupclause_tle(grpcl,
+ query->targetList);
+
+ opid = distinct_col_search(tle->resno, colnos, opids);
+ if (!OidIsValid(opid) ||
+ !ops_in_same_btree_opfamily(opid, grpcl->sortop))
+ break; /* exit early if no match */
+ }
+ if (l == NULL) /* had matches for all? */
+ return true;
+ }
+ else
+ {
+ /*
+ * If we have no GROUP BY, but do have aggregates or HAVING, then the
+ * result is at most one row so it's surely unique, for any operators.
+ */
+ if (query->hasAggs || query->havingQual)
+ return true;
+ }
+
+ /*
+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
+ * except with ALL.
+ *
+ * XXX this code knows that prepunion.c will adopt the default ordering
+ * operator for each column datatype as the sortop. It'd probably be
+ * better if these operators were chosen at parse time and stored into
+ * the parsetree, instead of leaving bits of the planner to decide
+ * semantics.
+ */
+ if (query->setOperations)
+ {
+ SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
+
+ Assert(IsA(topop, SetOperationStmt));
+ Assert(topop->op != SETOP_NONE);
+
+ if (!topop->all)
+ {
+ /* We're good if all the nonjunk output columns are in colnos */
+ foreach(l, query->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk)
+ continue; /* ignore resjunk columns */
+
+ opid = distinct_col_search(tle->resno, colnos, opids);
+ if (!OidIsValid(opid) ||
+ !ops_in_same_btree_opfamily(opid,
+ ordering_oper_opid(exprType((Node *) tle->expr))))
+ break; /* exit early if no match */
+ }
+ if (l == NULL) /* had matches for all? */
+ return true;
+ }
+ }
+
+ /*
+ * XXX Are there any other cases in which we can easily see the result
+ * must be distinct?
+ */
+
+ return false;
+}
+
+/*
+ * distinct_col_search - subroutine for query_is_distinct_for
+ *
+ * If colno is in colnos, return the corresponding element of opids,
+ * else return InvalidOid. (We expect colnos does not contain duplicates,
+ * so the result is well-defined.)
+ */
+static Oid
+distinct_col_search(int colno, List *colnos, List *opids)
+{
+ ListCell *lc1,
+ *lc2;
+
+ forboth(lc1, colnos, lc2, opids)
+ {
+ if (colno == lfirst_int(lc1))
+ return lfirst_oid(lc2);
+ }
+ return InvalidOid;
+}
+
+/*
+ * hash_safe_operators - can all the specified IN operators be hashed?
+ *
+ * We assume hashed aggregation will work if each IN operator is marked
+ * hashjoinable. If the IN operators are cross-type, this could conceivably
+ * fail: the aggregation will need a hashable equality operator for the RHS
+ * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * cross-type hashing without support for hashing the individual types, so
+ * we don't expend cycles here to support the case. We could check
+ * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
+ * former is a significantly more expensive test.
+ */
+static bool
+hash_safe_operators(List *opids)
+{
+ ListCell *lc;
+
+ foreach(lc, opids)
+ {
+ if (!op_hashjoinable(lfirst_oid(lc)))
+ return false;
+ }
+ return true;
+}
+
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
* returning the pathnode.
*/
Path *
-create_subqueryscan_path(RelOptInfo *rel)
+create_subqueryscan_path(RelOptInfo *rel, List *pathkeys)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
+ pathnode->pathkeys = pathkeys;
+
+ cost_subqueryscan(pathnode, rel);
+
+ return pathnode;
+}
+
+/*
+ * create_functionscan_path
+ * Creates a path corresponding to a sequential scan of a function,
+ * returning the pathnode.
+ */
+Path *
+create_functionscan_path(PlannerInfo *root, RelOptInfo *rel)
+{
+ Path *pathnode = makeNode(Path);
+
+ pathnode->pathtype = T_FunctionScan;
+ pathnode->parent = rel;
pathnode->pathkeys = NIL; /* for now, assume unordered result */
- /* just copy the subplan's cost estimates */
- pathnode->startup_cost = rel->subplan->startup_cost;
- pathnode->total_cost = rel->subplan->total_cost;
+ cost_functionscan(pathnode, root, rel);
+
+ return pathnode;
+}
+
+/*
+ * create_valuesscan_path
+ * Creates a path corresponding to a scan of a VALUES list,
+ * returning the pathnode.
+ */
+Path *
+create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel)
+{
+ Path *pathnode = makeNode(Path);
+
+ pathnode->pathtype = T_ValuesScan;
+ pathnode->parent = rel;
+ pathnode->pathkeys = NIL; /* result is always unordered */
+
+ cost_valuesscan(pathnode, root, rel);
return pathnode;
}
* 'pathkeys' are the path keys of the new join path
*
* Returns the resulting path node.
- *
*/
-NestPath *
-create_nestloop_path(RelOptInfo *joinrel,
+NestPath *
+create_nestloop_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
JoinType jointype,
Path *outer_path,
Path *inner_path,
pathnode->joinrestrictinfo = restrict_clauses;
pathnode->path.pathkeys = pathkeys;
- cost_nestloop(&pathnode->path, outer_path, inner_path, restrict_clauses);
+ cost_nestloop(pathnode, root);
return pathnode;
}
* (this should be a subset of the restrict_clauses list)
* 'outersortkeys' are the sort varkeys for the outer relation
* 'innersortkeys' are the sort varkeys for the inner relation
- *
*/
-MergePath *
-create_mergejoin_path(RelOptInfo *joinrel,
+MergePath *
+create_mergejoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
JoinType jointype,
Path *outer_path,
Path *inner_path,
MergePath *pathnode = makeNode(MergePath);
/*
- * If the given paths are already well enough ordered, we can skip
- * doing an explicit sort.
+ * If the given paths are already well enough ordered, we can skip doing
+ * an explicit sort.
*/
if (outersortkeys &&
pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
+ /*
+ * If we are not sorting the inner path, we may need a materialize node to
+ * ensure it can be marked/restored. (Sort does support mark/restore, so
+ * no materialize is needed in that case.)
+ *
+ * Since the inner side must be ordered, and only Sorts and IndexScans can
+ * create order to begin with, you might think there's no problem --- but
+ * you'd be wrong. Nestloop and merge joins can *preserve* the order of
+ * their inputs, so they can be selected as the input of a mergejoin, and
+ * they don't support mark/restore at present.
+ */
+ if (innersortkeys == NIL &&
+ !ExecSupportsMarkRestore(inner_path->pathtype))
+ inner_path = (Path *)
+ create_material_path(inner_path->parent, inner_path);
+
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
- cost_mergejoin(&pathnode->jpath.path,
- outer_path,
- inner_path,
- restrict_clauses,
- outersortkeys,
- innersortkeys);
+ cost_mergejoin(pathnode, root);
return pathnode;
}
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
- * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
- * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
- *
*/
-HashPath *
-create_hashjoin_path(RelOptInfo *joinrel,
+HashPath *
+create_hashjoin_path(PlannerInfo *root,
+ RelOptInfo *joinrel,
JoinType jointype,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
- List *hashclauses,
- Selectivity innerbucketsize)
+ List *hashclauses)
{
HashPath *pathnode = makeNode(HashPath);
pathnode->jpath.path.pathkeys = NIL;
pathnode->path_hashclauses = hashclauses;
- cost_hashjoin(&pathnode->jpath.path,
- outer_path,
- inner_path,
- restrict_clauses,
- innerbucketsize);
+ cost_hashjoin(pathnode, root);
return pathnode;
}