/*-------------------------------------------------------------------------
*
- * planmain.c--
+ * planmain.c
* Routines to plan a single query
*
- * Copyright (c) 1994, Regents of the University of California
+ * What's in a name, anyway? The top-level entry point of the planner/
+ * optimizer is over in planner.c, not here as you might think from the
+ * file name. But this is the main code for planning a basic join operation,
+ * shorn of features like subselects, inheritance, aggregates, grouping,
+ * and so on. (Those are the things planner.c deals with.)
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.19 1998/02/13 03:36:57 vadim Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.75 2003/03/10 03:53:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-#include <sys/types.h>
-
#include "postgres.h"
-#include "nodes/pg_list.h"
-#include "nodes/plannodes.h"
-#include "nodes/parsenodes.h"
-#include "nodes/relation.h"
-#include "nodes/makefuncs.h"
-
-#include "optimizer/planmain.h"
-#include "optimizer/subselect.h"
-#include "optimizer/internal.h"
-#include "optimizer/prep.h"
-#include "optimizer/paths.h"
#include "optimizer/clauses.h"
-#include "optimizer/keys.h"
-#include "optimizer/tlist.h"
-#include "optimizer/var.h"
-#include "optimizer/xfunc.h"
#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/planmain.h"
-#include "tcop/dest.h"
-#include "utils/elog.h"
-#include "utils/palloc.h"
-#include "nodes/memnodes.h"
-#include "utils/mcxt.h"
-#include "utils/lsyscache.h"
-
-static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
-static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
-
-extern Plan *
-make_groupPlan(List **tlist, bool tuplePerGroup,
- List *groupClause, Plan *subplan);
-/*
- * query_planner--
- * Routine to create a query plan. It does so by first creating a
- * subplan for the topmost level of attributes in the query. Then,
- * it modifies all target list and qualifications to consider the next
- * level of nesting and creates a plan for this modified query by
- * recursively calling itself. The two pieces are then merged together
- * by creating a result node that indicates which attributes should
- * be placed where and any relation level qualifications to be
- * satisfied.
+/*--------------------
+ * query_planner
+ * Generate a path (that is, a simplified plan) for a basic query,
+ * which may involve joins but not any fancier features.
+ *
+ * Since query_planner does not handle the toplevel processing (grouping,
+ * sorting, etc) it cannot select the best path by itself. It selects
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path. The caller (grouping_planner)
+ * will make the final decision about which to use.
+ *
+ * Input parameters:
+ * root is the query to plan
+ * tlist is the target list the query should produce (NOT root->targetList!)
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
*
- * command-type is the query command, e.g., retrieve, delete, etc.
- * tlist is the target list of the query
- * qual is the qualification of the query
+ * Output parameters:
+ * *cheapest_path receives the overall-cheapest path for the query
+ * *sorted_path receives the cheapest presorted path for the query,
+ * if any (NULL if there is no useful presorted path)
*
- * Returns a query plan.
+ * Note: the Query node also includes a query_pathkeys field, which is both
+ * an input and an output of query_planner(). The input value signals
+ * query_planner that the indicated sort order is wanted in the final output
+ * plan. But this value has not yet been "canonicalized", since the needed
+ * info does not get computed until we scan the qual clauses. We canonicalize
+ * it as soon as that task is done. (The main reason query_pathkeys is a
+ * Query field and not a passed parameter is that the low-level routines in
+ * indxpath.c need to see it.)
+ *
+ * tuple_fraction is interpreted as follows:
+ * 0: expect all tuples to be retrieved (normal case)
+ * 0 < tuple_fraction < 1: expect the given fraction of tuples available
+ * from the plan to be retrieved
+ * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
+ * expected to be retrieved (ie, a LIMIT specification)
+ *--------------------
*/
-Plan *
-query_planner(Query *root,
- int command_type,
- List *tlist,
- List *qual)
+void
+query_planner(Query *root, List *tlist, double tuple_fraction,
+ Path **cheapest_path, Path **sorted_path)
{
- List *constant_qual = NIL;
- List *var_only_tlist = NIL;
- List *level_tlist = NIL;
- Plan *subplan = NULL;
-
- if ( PlannerQueryLevel > 1 )
- {
- /* should copy be made ? */
- tlist = (List *) SS_replace_correlation_vars ((Node*)tlist);
- qual = (List *) SS_replace_correlation_vars ((Node*)qual);
- }
- if (root->hasSubLinks)
- qual = (List *) SS_process_sublinks ((Node*) qual);
-
- qual = cnfify((Expr *) qual, true);
-
+ List *constant_quals;
+ RelOptInfo *final_rel;
+ Path *cheapestpath;
+ Path *sortedpath;
+
/*
- * A command without a target list or qualification is an error,
- * except for "delete foo".
+ * If the query has an empty join tree, then it's something easy like
+ * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly.
*/
- if (tlist == NIL && qual == NULL)
+ if (root->jointree->fromlist == NIL)
{
- if (command_type == CMD_DELETE ||
-
- /*
- * Total hack here. I don't know how to handle statements like
- * notify in action bodies. Notify doesn't return anything but
- * scans a system table.
- */
- command_type == CMD_NOTIFY)
- {
- return ((Plan *) make_seqscan(NIL,
- NIL,
- root->resultRelation,
- (Plan *) NULL));
- }
- else
- return ((Plan *) NULL);
+ *cheapest_path = (Path *) create_result_path(NULL, NULL,
+ (List *) root->jointree->quals);
+ *sorted_path = NULL;
+ return;
}
/*
- * Pull out any non-variable qualifications so these can be put in the
- * topmost result node. The opids for the remaining qualifications
- * will be changed to regprocs later.
- */
- qual = pull_constant_clauses(qual, &constant_qual);
- fix_opids(constant_qual);
-
- /*
- * Create a target list that consists solely of (resdom var) target
- * list entries, i.e., contains no arbitrary expressions.
- */
- var_only_tlist = flatten_tlist(tlist);
- if (var_only_tlist)
- level_tlist = var_only_tlist;
- else
- /* from old code. the logic is beyond me. - ay 2/95 */
- level_tlist = tlist;
-
- /*
- * A query may have a non-variable target list and a non-variable
- * qualification only under certain conditions: - the query creates
- * all-new tuples, or - the query is a replace (a scan must still be
- * done in this case).
+ * Pull out any non-variable WHERE clauses so these can be put in a
+ * toplevel "Result" node, where they will gate execution of the whole
+ * plan (the Result will not invoke its descendant plan unless the
+ * quals are true). Note that any *really* non-variable quals will
+ * have been optimized away by eval_const_expressions(). What we're
+ * mostly interested in here is quals that depend only on outer-level
+ * vars, although if the qual reduces to "WHERE FALSE" this path will
+ * also be taken.
*/
- if (var_only_tlist == NULL && qual == NULL)
- {
-
- switch (command_type)
- {
- case CMD_SELECT:
- case CMD_INSERT:
- return ((Plan *) make_result(tlist,
- (Node *) constant_qual,
- (Plan *) NULL));
- break;
-
- case CMD_DELETE:
- case CMD_UPDATE:
- {
- SeqScan *scan = make_seqscan(tlist,
- (List *) NULL,
- root->resultRelation,
- (Plan *) NULL);
-
- if (constant_qual != NULL)
- {
- return ((Plan *) make_result(tlist,
- (Node *) constant_qual,
- (Plan *) scan));
- }
- else
- {
- return ((Plan *) scan);
- }
- }
- break;
-
- default:
- return ((Plan *) NULL);
- }
- }
+ root->jointree->quals = (Node *)
+ pull_constant_clauses((List *) root->jointree->quals,
+ &constant_quals);
/*
- * Find the subplan (access path) and destructively modify the target
- * list of the newly created subplan to contain the appropriate join
- * references.
+ * init planner lists to empty
+ *
+ * NOTE: in_info_list was set up by subquery_planner, do not touch here
*/
- subplan = subplanner(root, level_tlist, qual);
-
- set_tlist_references(subplan);
+ root->base_rel_list = NIL;
+ root->other_rel_list = NIL;
+ root->join_rel_list = NIL;
+ root->equi_key_list = NIL;
/*
- * Build a result node linking the plan if we have constant quals
+ * Construct RelOptInfo nodes for all base relations in query.
*/
- if (constant_qual)
- {
- subplan = (Plan *)make_result((!root->hasAggs && !root->groupClause)
- ? tlist : subplan->targetlist,
- (Node *) constant_qual,
- subplan);
-
- /*
- * Change all varno's of the Result's node target list.
- */
- if (!root->hasAggs && !root->groupClause)
- set_tlist_references(subplan);
+ add_base_rels_to_query(root, (Node *) root->jointree);
- return subplan;
- }
/*
- * fix up the flattened target list of the plan root node so that
- * expressions are evaluated. this forces expression evaluations that
- * may involve expensive function calls to be delayed to the very last
- * stage of query execution. this could be bad. but it is joey's
- * responsibility to optimally push these expressions down the plan
- * tree. -- Wei
+ * Examine the targetlist and qualifications, adding entries to
+ * baserel targetlists for all referenced Vars. Restrict and join
+ * clauses are added to appropriate lists belonging to the mentioned
+ * relations. We also build lists of equijoined keys for pathkey
+ * construction.
*
- * But now nothing to do if there are GroupBy and/or Aggregates: 1.
- * make_groupPlan fixes tlist; 2. flatten_tlist_vars does nothing with
- * aggregates fixing only other entries (i.e. - GroupBy-ed and so
- * fixed by make_groupPlan). - vadim 04/05/97
+ * Note: all subplan nodes will have "flat" (var-only) tlists.
+ * This implies that all expression evaluations are done at the root of
+ * the plan tree. Once upon a time there was code to try to push
+ * expensive function calls down to lower plan nodes, but that's dead
+ * code and has been for a long time...
*/
- else
- {
- if (!root->hasAggs && !root->groupClause)
- subplan->targetlist = flatten_tlist_vars(tlist,
- subplan->targetlist);
- return subplan;
- }
-
-#ifdef NOT_USED
- /*
- * Destructively modify the query plan's targetlist to add fjoin lists
- * to flatten functions that return sets of base types
- */
- subplan->targetlist = generate_fjoin(subplan->targetlist);
-#endif
-
-}
+ build_base_rel_tlists(root, tlist);
-/*
- * subplanner
- *
- * Subplanner creates an entire plan consisting of joins and scans
- * for processing a single level of attributes.
- *
- * flat-tlist is the flattened target list
- * qual is the qualification to be satisfied
- *
- * Returns a subplan.
- *
- */
-static Plan *
-subplanner(Query *root,
- List *flat_tlist,
- List *qual)
-{
- Rel *final_relation;
- List *final_relation_list;
+ (void) distribute_quals_to_rels(root, (Node *) root->jointree);
/*
- * Initialize the targetlist and qualification, adding entries to
- * *query-relation-list* as relation references are found (e.g., in
- * the qualification, the targetlist, etc.)
+ * Use the completed lists of equijoined keys to deduce any implied
+ * but unstated equalities (for example, A=B and B=C imply A=C).
*/
- root->base_relation_list_ = NIL;
- root->join_relation_list_ = NIL;
- initialize_base_rels_list(root, flat_tlist);
- initialize_base_rels_jinfo(root, qual);
- add_missing_vars_to_base_rels(root, flat_tlist);
+ generate_implied_equalities(root);
/*
- * Find all possible scan and join paths. Mark all the clauses and
- * relations that can be processed using special join methods, then do
- * the exhaustive path search.
+ * We should now have all the pathkey equivalence sets built, so it's
+ * now possible to convert the requested query_pathkeys to canonical
+ * form.
*/
- initialize_join_clause_info(root->base_relation_list_);
- final_relation_list = find_paths(root,
- root->base_relation_list_);
-
- if (final_relation_list)
- final_relation = (Rel *) lfirst(final_relation_list);
- else
- final_relation = (Rel *) NIL;
-
-#if 0 /* fix xfunc */
+ root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
/*
- * Perform Predicate Migration on each path, to optimize and correctly
- * assess the cost of each before choosing the cheapest one. -- JMH,
- * 11/16/92
- *
- * Needn't do so if the top rel is pruneable: that means there's no
- * expensive functions left to pull up. -- JMH, 11/22/92
+ * Ready to do the primary planning.
*/
- if (XfuncMode != XFUNC_OFF && XfuncMode != XFUNC_NOPM &&
- XfuncMode != XFUNC_NOPULL && !final_relation->pruneable)
- {
- List *pathnode;
+ final_rel = make_one_rel(root);
- foreach(pathnode, final_relation->pathlist)
- {
- if (xfunc_do_predmig((Path *) lfirst(pathnode)))
- set_cheapest(final_relation, final_relation->pathlist);
- }
- }
-#endif
+ if (!final_rel || !final_rel->cheapest_total_path)
+ elog(ERROR, "query_planner: failed to construct a relation");
/*
- * Determine the cheapest path and create a subplan corresponding to
- * it.
+ * Now that we have an estimate of the final rel's size, we can
+ * convert a tuple_fraction specified as an absolute count (ie, a
+ * LIMIT option) into a fraction of the total tuples.
*/
- if (final_relation)
- {
- return (create_plan((Path *) final_relation->cheapestpath));
- }
- else
- {
- elog(NOTICE, "final relation is nil");
- return (create_plan((Path *) NULL));
- }
-
-}
-
-/*****************************************************************************
- *
- *****************************************************************************/
-
-static Result *
-make_result(List *tlist,
- Node *resconstantqual,
- Plan *subplan)
-{
- Result *node = makeNode(Result);
- Plan *plan = &node->plan;
-
-#ifdef NOT_USED
- tlist = generate_fjoin(tlist);
-#endif
- plan->cost = (subplan ? subplan->cost : 0);
- plan->state = (EState *) NULL;
- plan->targetlist = tlist;
- plan->lefttree = subplan;
- plan->righttree = NULL;
- node->resconstantqual = resconstantqual;
- node->resstate = NULL;
-
- return (node);
-}
-
-/*****************************************************************************
- *
- *****************************************************************************/
-
-Plan *
-make_groupPlan(List **tlist,
- bool tuplePerGroup,
- List *groupClause,
- Plan *subplan)
-{
- List *sort_tlist;
- List *sl,
- *gl;
- List *glc = listCopy(groupClause);
- List *otles = NIL; /* list of removed non-GroupBy entries */
- List *otlvars = NIL; /* list of var in them */
- int otlvcnt;
- Sort *sortplan;
- Group *grpplan;
- int numCols;
- AttrNumber *grpColIdx;
- int last_resno = 1;
-
- numCols = length(groupClause);
- grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-
- sort_tlist = new_unsorted_tlist(*tlist); /* it's copy */
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= final_rel->rows;
/*
- * Make template TL for subplan, Sort & Group: 1. If there are
- * aggregates (tuplePerGroup is true) then take away non-GroupBy
- * entries and re-set resno-s accordantly. 2. Make grpColIdx
+ * Pick out the cheapest-total path and the cheapest presorted path
+ * for the requested pathkeys (if there is one). We should take the
+ * tuple fraction into account when selecting the cheapest presorted
+ * path, but not when selecting the cheapest-total path, since if we
+ * have to sort then we'll have to fetch all the tuples. (But there's
+ * a special case: if query_pathkeys is NIL, meaning order doesn't
+ * matter, then the "cheapest presorted" path will be the cheapest
+ * overall for the tuple fraction.)
*
- * Note: we assume that TLEs in *tlist are ordered in accordance with
- * their resdom->resno.
+ * The cheapest-total path is also the one to use if grouping_planner
+ * decides to use hashed aggregation, so we return it separately even
+ * if this routine thinks the presorted path is the winner.
*/
- foreach(sl, sort_tlist)
- {
- Resdom *resdom = NULL;
- TargetEntry *te = (TargetEntry *) lfirst(sl);
- int keyno = 0;
-
- foreach(gl, groupClause)
- {
- GroupClause *grpcl = (GroupClause *) lfirst(gl);
+ cheapestpath = final_rel->cheapest_total_path;
- keyno++;
- if (grpcl->entry->resdom->resno == te->resdom->resno)
- {
+ sortedpath =
+ get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+ root->query_pathkeys,
+ tuple_fraction);
- resdom = te->resdom;
- resdom->reskey = keyno;
- resdom->reskeyop = get_opcode(grpcl->grpOpoid);
- resdom->resno = last_resno; /* re-set */
- grpColIdx[keyno - 1] = last_resno++;
- glc = lremove(lfirst(gl), glc); /* TLE found for it */
- break;
- }
- }
-
- /*
- * Non-GroupBy entry: remove it from Group/Sort TL if there are
- * aggregates in query - it will be evaluated by Aggregate plan
- */
- if (resdom == NULL)
- {
- if (tuplePerGroup)
- {
- otlvars = nconc(otlvars, pull_var_clause(te->expr));
- otles = lcons(te, otles);
- sort_tlist = lremove(te, sort_tlist);
- }
- else
- te->resdom->resno = last_resno++;
- }
- }
-
- if (length(glc) != 0)
- {
- elog(ERROR, "group attribute disappeared from target list");
- }
+ /* Don't return same path in both guises; just wastes effort */
+ if (sortedpath == cheapestpath)
+ sortedpath = NULL;
/*
- * If non-GroupBy entries were removed from TL - we are to add Vars
- * for them to the end of TL if there are no such Vars in TL already.
+ * Forget about the presorted path if it would be cheaper to sort the
+ * cheapest-total path. Here we need consider only the behavior at
+ * the tuple fraction point.
*/
-
- otlvcnt = length(otlvars);
- foreach(gl, otlvars)
+ if (sortedpath)
{
- Var *v = (Var *) lfirst(gl);
+ Path sort_path; /* dummy for result of cost_sort */
- if (tlist_member(v, sort_tlist) == NULL)
+ if (root->query_pathkeys == NIL ||
+ pathkeys_contained_in(root->query_pathkeys,
+ cheapestpath->pathkeys))
{
- sort_tlist = lappend(sort_tlist,
- create_tl_element(v, last_resno));
- last_resno++;
+ /* No sort needed for cheapest path */
+ sort_path.startup_cost = cheapestpath->startup_cost;
+ sort_path.total_cost = cheapestpath->total_cost;
}
else
-/* already in TL */
- otlvcnt--;
- }
- /* Now otlvcnt is number of Vars added in TL for non-GroupBy entries */
-
- /* Make TL for subplan: substitute Vars from subplan TL into new TL */
- sl = flatten_tlist_vars(sort_tlist, subplan->targetlist);
-
- subplan->targetlist = new_unsorted_tlist(sl); /* there */
-
- /*
- * Make Sort/Group TL : 1. make Var nodes (with varno = 1 and varnoold
- * = -1) for all functions, 'couse they will be evaluated by subplan;
- * 2. for real Vars: set varno = 1 and varattno to its resno in
- * subplan
- */
- foreach(sl, sort_tlist)
- {
- TargetEntry *te = (TargetEntry *) lfirst(sl);
- Resdom *resdom = te->resdom;
- Node *expr = te->expr;
-
- if (IsA(expr, Var))
{
-#if 0 /* subplanVar->resdom->resno expected to
- * be = te->resdom->resno */
- TargetEntry *subplanVar;
-
- subplanVar = match_varid((Var *) expr, subplan->targetlist);
- ((Var *) expr)->varattno = subplanVar->resdom->resno;
-#endif
- ((Var *) expr)->varattno = te->resdom->resno;
- ((Var *) expr)->varno = 1;
+ /* Figure cost for sorting */
+ cost_sort(&sort_path, root, root->query_pathkeys,
+ cheapestpath->total_cost,
+ final_rel->rows, final_rel->width);
}
- else
- te->expr = (Node *) makeVar(1, resdom->resno,
- resdom->restype,
- resdom->restypmod,
- 0, -1, resdom->resno);
- }
-
- sortplan = make_sort(sort_tlist,
- _TEMP_RELATION_ID_,
- subplan,
- numCols);
- sortplan->plan.cost = subplan->cost; /* XXX assume no cost */
- /*
- * make the Group node
- */
- sort_tlist = copyObject(sort_tlist);
- grpplan = make_group(sort_tlist, tuplePerGroup, numCols,
- grpColIdx, sortplan);
-
- /*
- * Make TL for parent: "restore" non-GroupBy entries (if they were
- * removed) and set resno-s of others accordantly.
- */
- sl = sort_tlist;
- sort_tlist = NIL; /* to be new parent TL */
- foreach(gl, *tlist)
- {
- List *temp = NIL;
- TargetEntry *te = (TargetEntry *) lfirst(gl);
-
- foreach(temp, otles) /* Is it removed non-GroupBy entry ? */
+ if (compare_fractional_path_costs(sortedpath, &sort_path,
+ tuple_fraction) > 0)
{
- TargetEntry *ote = (TargetEntry *) lfirst(temp);
-
- if (ote->resdom->resno == te->resdom->resno)
- {
- otles = lremove(ote, otles);
- break;
- }
- }
- if (temp == NIL) /* It's "our" TLE - we're to return */
- { /* it from Sort/Group plans */
- TargetEntry *my = (TargetEntry *) lfirst(sl); /* get it */
-
- sl = sl->next; /* prepare for the next "our" */
- my = copyObject(my);
- my->resdom->resno = te->resdom->resno; /* order of parent TL */
- sort_tlist = lappend(sort_tlist, my);
- continue;
+ /* Presorted path is a loser */
+ sortedpath = NULL;
}
- /* else - it's TLE of an non-GroupBy entry */
- sort_tlist = lappend(sort_tlist, copyObject(te));
}
/*
- * Pure non-GroupBy entries Vars were at the end of Group' TL. They
- * shouldn't appear in parent TL, all others shouldn't disappear.
+ * If we have constant quals, add a toplevel Result step to process them.
*/
- Assert(otlvcnt == length(sl));
- Assert(length(otles) == 0);
-
- *tlist = sort_tlist;
+ if (constant_quals)
+ {
+ cheapestpath = (Path *) create_result_path(final_rel,
+ cheapestpath,
+ constant_quals);
+ if (sortedpath)
+ sortedpath = (Path *) create_result_path(final_rel,
+ sortedpath,
+ constant_quals);
+ }
- return (Plan *) grpplan;
+ *cheapest_path = cheapestpath;
+ *sorted_path = sortedpath;
}