* planmain.c
* Routines to plan a single query
*
- * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
+ * What's in a name, anyway? The top-level entry point of the planner/
+ * optimizer is over in planner.c, not here as you might think from the
+ * file name. But this is the main code for planning a basic join operation,
+ * shorn of features like subselects, inheritance, aggregates, grouping,
+ * and so on. (Those are the things planner.c deals with.)
+ *
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.51 2000/02/07 04:41:00 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.75 2003/03/10 03:53:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-#include <sys/types.h>
-
#include "postgres.h"
-
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/planmain.h"
-#include "optimizer/prep.h"
-#include "optimizer/subselect.h"
-#include "optimizer/tlist.h"
-#include "utils/lsyscache.h"
-
-
-static Plan *subplanner(Query *root, List *flat_tlist, List *qual);
-/*
+/*--------------------
* query_planner
- * Routine to create a query plan. It does so by first creating a
- * subplan for the topmost level of attributes in the query. Then,
- * it modifies all target list and qualifications to consider the next
- * level of nesting and creates a plan for this modified query by
- * recursively calling itself. The two pieces are then merged together
- * by creating a result node that indicates which attributes should
- * be placed where and any relation level qualifications to be
- * satisfied.
+ * Generate a path (that is, a simplified plan) for a basic query,
+ * which may involve joins but not any fancier features.
*
- * tlist is the target list of the query (do NOT use root->targetList!)
- * qual is the qualification of the query (likewise!)
+ * Since query_planner does not handle the toplevel processing (grouping,
+ * sorting, etc) it cannot select the best path by itself. It selects
+ * two paths: the cheapest path that produces all the required tuples,
+ * independent of any ordering considerations, and the cheapest path that
+ * produces the expected fraction of the required tuples in the required
+ * ordering, if there is a path that is cheaper for this than just sorting
+ * the output of the cheapest overall path. The caller (grouping_planner)
+ * will make the final decision about which to use.
*
- * Note: the Query node now also includes a query_pathkeys field, which
- * is both an input and an output of query_planner(). The input value
- * signals query_planner that the indicated sort order is wanted in the
- * final output plan. The output value is the actual pathkeys of the
- * selected path. This might not be the same as what the caller requested;
- * the caller must do pathkeys_contained_in() to decide whether an
- * explicit sort is still needed. (The main reason query_pathkeys is a
- * Query field and not a passed parameter is that the low-level routines
- * in indxpath.c need to see it.)
+ * Input parameters:
+ * root is the query to plan
+ * tlist is the target list the query should produce (NOT root->targetList!)
+ * tuple_fraction is the fraction of tuples we expect will be retrieved
*
- * Returns a query plan.
+ * Output parameters:
+ * *cheapest_path receives the overall-cheapest path for the query
+ * *sorted_path receives the cheapest presorted path for the query,
+ * if any (NULL if there is no useful presorted path)
+ *
+ * Note: the Query node also includes a query_pathkeys field, which is both
+ * an input and an output of query_planner(). The input value signals
+ * query_planner that the indicated sort order is wanted in the final output
+ * plan. But this value has not yet been "canonicalized", since the needed
+ * info does not get computed until we scan the qual clauses. We canonicalize
+ * it as soon as that task is done. (The main reason query_pathkeys is a
+ * Query field and not a passed parameter is that the low-level routines in
+ * indxpath.c need to see it.)
+ *
+ * tuple_fraction is interpreted as follows:
+ * 0: expect all tuples to be retrieved (normal case)
+ * 0 < tuple_fraction < 1: expect the given fraction of tuples available
+ * from the plan to be retrieved
+ * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
+ * expected to be retrieved (ie, a LIMIT specification)
+ *--------------------
*/
-Plan *
-query_planner(Query *root,
- List *tlist,
- List *qual)
+void
+query_planner(Query *root, List *tlist, double tuple_fraction,
+ Path **cheapest_path, Path **sorted_path)
{
- List *constant_qual = NIL;
- List *var_only_tlist;
- Plan *subplan;
-
- /*
- * Note: union_planner should already have done constant folding
- * in both the tlist and qual, so we don't do it again here
- * (indeed, we may be getting a flattened var-only tlist anyway).
- *
- * Is there any value in re-folding the qual after canonicalize_qual?
- */
-
- /*
- * Canonicalize the qual, and convert it to implicit-AND format.
- */
- qual = canonicalize_qual((Expr *) qual, true);
-#ifdef OPTIMIZER_DEBUG
- printf("After canonicalize_qual()\n");
- pprint(qual);
-#endif
-
- /* Replace uplevel vars with Param nodes */
- if (PlannerQueryLevel > 1)
- {
- tlist = (List *) SS_replace_correlation_vars((Node *) tlist);
- qual = (List *) SS_replace_correlation_vars((Node *) qual);
- }
-
- /* Expand SubLinks to SubPlans */
- if (root->hasSubLinks)
- {
- tlist = (List *) SS_process_sublinks((Node *) tlist);
- qual = (List *) SS_process_sublinks((Node *) qual);
- if (root->groupClause != NIL)
- {
- /*
- * Check for ungrouped variables passed to subplans.
- * Note we do NOT do this for subplans in WHERE; it's legal
- * there because WHERE is evaluated pre-GROUP.
- */
- check_subplans_for_ungrouped_vars((Node *) tlist, root, tlist);
- }
- }
+ List *constant_quals;
+ RelOptInfo *final_rel;
+ Path *cheapestpath;
+ Path *sortedpath;
/*
- * If the query contains no relation references at all, it must be
- * something like "SELECT 2+2;". Build a trivial "Result" plan.
+ * If the query has an empty join tree, then it's something easy like
+ * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly.
*/
- if (root->rtable == NIL)
+ if (root->jointree->fromlist == NIL)
{
- /* If it's not a select, it should have had a target relation... */
- if (root->commandType != CMD_SELECT)
- elog(ERROR, "Empty range table for non-SELECT query");
-
- root->query_pathkeys = NIL; /* signal unordered result */
-
- /* Make childless Result node to evaluate given tlist. */
- return (Plan *) make_result(tlist, (Node *) qual, (Plan *) NULL);
+ *cheapest_path = (Path *) create_result_path(NULL, NULL,
+ (List *) root->jointree->quals);
+ *sorted_path = NULL;
+ return;
}
/*
- * Pull out any non-variable qual clauses so these can be put in a
+ * Pull out any non-variable WHERE clauses so these can be put in a
* toplevel "Result" node, where they will gate execution of the whole
* plan (the Result will not invoke its descendant plan unless the
* quals are true). Note that any *really* non-variable quals will
* vars, although if the qual reduces to "WHERE FALSE" this path will
* also be taken.
*/
- qual = pull_constant_clauses(qual, &constant_qual);
+ root->jointree->quals = (Node *)
+ pull_constant_clauses((List *) root->jointree->quals,
+ &constant_quals);
/*
- * Create a target list that consists solely of (resdom var) target
- * list entries, i.e., contains no arbitrary expressions.
- *
- * All subplan nodes will have "flat" (var-only) tlists.
+ * init planner lists to empty
*
- * This implies that all expression evaluations are done at the root
- * of the plan tree. Once upon a time there was code to try to push
- * expensive function calls down to lower plan nodes, but that's dead
- * code and has been for a long time...
+ * NOTE: in_info_list was set up by subquery_planner, do not touch here
*/
- var_only_tlist = flatten_tlist(tlist);
+ root->base_rel_list = NIL;
+ root->other_rel_list = NIL;
+ root->join_rel_list = NIL;
+ root->equi_key_list = NIL;
/*
- * Choose the best access path and build a plan for it.
+ * Construct RelOptInfo nodes for all base relations in query.
*/
- subplan = subplanner(root, var_only_tlist, qual);
+ add_base_rels_to_query(root, (Node *) root->jointree);
/*
- * Build a result node to control the plan if we have constant quals.
+ * Examine the targetlist and qualifications, adding entries to
+ * baserel targetlists for all referenced Vars. Restrict and join
+ * clauses are added to appropriate lists belonging to the mentioned
+ * relations. We also build lists of equijoined keys for pathkey
+ * construction.
+ *
+ * Note: all subplan nodes will have "flat" (var-only) tlists.
+ * This implies that all expression evaluations are done at the root of
+ * the plan tree. Once upon a time there was code to try to push
+ * expensive function calls down to lower plan nodes, but that's dead
+ * code and has been for a long time...
*/
- if (constant_qual)
- {
- /*
- * The result node will also be responsible for evaluating
- * the originally requested tlist.
- */
- subplan = (Plan *) make_result(tlist,
- (Node *) constant_qual,
- subplan);
- }
- else
- {
- /*
- * Replace the toplevel plan node's flattened target list with the
- * targetlist given by my caller, so that expressions are evaluated.
- */
- subplan->targetlist = tlist;
- }
+ build_base_rel_tlists(root, tlist);
- return subplan;
-
-#ifdef NOT_USED
+ (void) distribute_quals_to_rels(root, (Node *) root->jointree);
/*
- * Destructively modify the query plan's targetlist to add fjoin lists
- * to flatten functions that return sets of base types
+ * Use the completed lists of equijoined keys to deduce any implied
+ * but unstated equalities (for example, A=B and B=C imply A=C).
*/
- subplan->targetlist = generate_fjoin(subplan->targetlist);
-#endif
-
-}
-
-/*
- * subplanner
- *
- * Subplanner creates an entire plan consisting of joins and scans
- * for processing a single level of attributes.
- *
- * flat_tlist is the flattened target list
- * qual is the qualification to be satisfied
- *
- * Returns a subplan.
- *
- */
-static Plan *
-subplanner(Query *root,
- List *flat_tlist,
- List *qual)
-{
- RelOptInfo *final_rel;
- Cost cheapest_cost;
- Path *sortedpath;
+ generate_implied_equalities(root);
/*
- * Initialize the targetlist and qualification, adding entries to
- * base_rel_list as relation references are found (e.g., in the
- * qualification, the targetlist, etc.)
+ * We should now have all the pathkey equivalence sets built, so it's
+ * now possible to convert the requested query_pathkeys to canonical
+ * form.
*/
- root->base_rel_list = NIL;
- root->join_rel_list = NIL;
-
- make_var_only_tlist(root, flat_tlist);
- add_restrict_and_join_to_rels(root, qual);
- add_missing_rels_to_query(root);
+ root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
+ /*
+ * Ready to do the primary planning.
+ */
final_rel = make_one_rel(root);
- if (! final_rel)
- {
- /*
- * We expect to end up here for a trivial INSERT ... VALUES query
- * (which will have a target relation, so it gets past query_planner's
- * check for empty range table; but the target rel is unreferenced
- * and not marked inJoinSet, so we find there is nothing to join).
- *
- * It's also possible to get here if the query was rewritten by the
- * rule processor (creating rangetable entries not marked inJoinSet)
- * but the rules either did nothing or were simplified to nothing
- * by constant-expression folding. So, don't complain.
- */
- root->query_pathkeys = NIL; /* signal unordered result */
-
- /* Make childless Result node to evaluate given tlist. */
- return (Plan *) make_result(flat_tlist, (Node *) qual, (Plan *) NULL);
- }
-
-#ifdef NOT_USED /* fix xfunc */
+ if (!final_rel || !final_rel->cheapest_total_path)
+ elog(ERROR, "query_planner: failed to construct a relation");
/*
- * Perform Predicate Migration on each path, to optimize and correctly
- * assess the cost of each before choosing the cheapest one. -- JMH,
- * 11/16/92
- *
- * Needn't do so if the top rel is pruneable: that means there's no
- * expensive functions left to pull up. -- JMH, 11/22/92
+ * Now that we have an estimate of the final rel's size, we can
+ * convert a tuple_fraction specified as an absolute count (ie, a
+ * LIMIT option) into a fraction of the total tuples.
*/
- if (XfuncMode != XFUNC_OFF && XfuncMode != XFUNC_NOPM &&
- XfuncMode != XFUNC_NOPULL && !final_rel->pruneable)
- {
- List *pathnode;
-
- foreach(pathnode, final_rel->pathlist)
- {
- if (xfunc_do_predmig((Path *) lfirst(pathnode)))
- set_cheapest(final_rel, final_rel->pathlist);
- }
- }
-#endif
+ if (tuple_fraction >= 1.0)
+ tuple_fraction /= final_rel->rows;
/*
- * Determine the cheapest path and create a subplan to execute it.
+ * Pick out the cheapest-total path and the cheapest presorted path
+ * for the requested pathkeys (if there is one). We should take the
+ * tuple fraction into account when selecting the cheapest presorted
+ * path, but not when selecting the cheapest-total path, since if we
+ * have to sort then we'll have to fetch all the tuples. (But there's
+ * a special case: if query_pathkeys is NIL, meaning order doesn't
+ * matter, then the "cheapest presorted" path will be the cheapest
+ * overall for the tuple fraction.)
*
- * If no special sort order is wanted, or if the cheapest path is
- * already appropriately ordered, just use the cheapest path.
+ * The cheapest-total path is also the one to use if grouping_planner
+ * decides to use hashed aggregation, so we return it separately even
+ * if this routine thinks the presorted path is the winner.
*/
- if (root->query_pathkeys == NIL ||
- pathkeys_contained_in(root->query_pathkeys,
- final_rel->cheapestpath->pathkeys))
- {
- root->query_pathkeys = final_rel->cheapestpath->pathkeys;
- return create_plan(root, final_rel->cheapestpath);
- }
+ cheapestpath = final_rel->cheapest_total_path;
+
+ sortedpath =
+ get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
+ root->query_pathkeys,
+ tuple_fraction);
+
+ /* Don't return same path in both guises; just wastes effort */
+ if (sortedpath == cheapestpath)
+ sortedpath = NULL;
/*
- * Otherwise, look to see if we have an already-ordered path that is
- * cheaper than doing an explicit sort on cheapestpath.
+ * Forget about the presorted path if it would be cheaper to sort the
+ * cheapest-total path. Here we need consider only the behavior at
+ * the tuple fraction point.
*/
- cheapest_cost = final_rel->cheapestpath->path_cost +
- cost_sort(root->query_pathkeys, final_rel->rows, final_rel->width);
-
- sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
- root->query_pathkeys,
- false);
if (sortedpath)
{
- if (sortedpath->path_cost <= cheapest_cost)
+ Path sort_path; /* dummy for result of cost_sort */
+
+ if (root->query_pathkeys == NIL ||
+ pathkeys_contained_in(root->query_pathkeys,
+ cheapestpath->pathkeys))
{
- /* Found a better presorted path, use it */
- root->query_pathkeys = sortedpath->pathkeys;
- return create_plan(root, sortedpath);
+ /* No sort needed for cheapest path */
+ sort_path.startup_cost = cheapestpath->startup_cost;
+ sort_path.total_cost = cheapestpath->total_cost;
}
- /* otherwise, doing it the hard way is still cheaper */
- }
- else
- {
- /*
- * If we found no usable presorted path at all, it is possible
- * that the user asked for descending sort order. Check to see
- * if we can satisfy the pathkeys by using a backwards indexscan.
- * To do this, we commute all the operators in the pathkeys and
- * then look for a matching path that is an IndexPath.
- */
- List *commuted_pathkeys = copyObject(root->query_pathkeys);
-
- if (commute_pathkeys(commuted_pathkeys))
+ else
{
- /* pass 'true' to force only IndexPaths to be considered */
- sortedpath = get_cheapest_path_for_pathkeys(final_rel->pathlist,
- commuted_pathkeys,
- true);
- if (sortedpath && sortedpath->path_cost <= cheapest_cost)
- {
- /*
- * Kluge here: since IndexPath has no representation for
- * backwards scan, we have to convert to Plan format and
- * then poke the result.
- */
- Plan *sortedplan = create_plan(root, sortedpath);
- List *sortedpathkeys;
-
- Assert(IsA(sortedplan, IndexScan));
- ((IndexScan *) sortedplan)->indxorderdir = BackwardScanDirection;
- /*
- * Need to generate commuted keys representing the actual
- * sort order. This should succeed, probably, but just in
- * case it does not, use the original root->query_pathkeys
- * as a conservative approximation.
- */
- sortedpathkeys = copyObject(sortedpath->pathkeys);
- if (commute_pathkeys(sortedpathkeys))
- root->query_pathkeys = sortedpathkeys;
+ /* Figure cost for sorting */
+ cost_sort(&sort_path, root, root->query_pathkeys,
+ cheapestpath->total_cost,
+ final_rel->rows, final_rel->width);
+ }
- return sortedplan;
- }
+ if (compare_fractional_path_costs(sortedpath, &sort_path,
+ tuple_fraction) > 0)
+ {
+ /* Presorted path is a loser */
+ sortedpath = NULL;
}
}
/*
- * Nothing for it but to sort the cheapestpath --- but we let the
- * caller do that. union_planner has to be able to add a sort node
- * anyway, so no need for extra code here. (Furthermore, the given
- * pathkeys might involve something we can't compute here, such as
- * an aggregate function...)
+ * If we have constant quals, add a toplevel Result step to process them.
*/
- root->query_pathkeys = final_rel->cheapestpath->pathkeys;
- return create_plan(root, final_rel->cheapestpath);
+ if (constant_quals)
+ {
+ cheapestpath = (Path *) create_result_path(final_rel,
+ cheapestpath,
+ constant_quals);
+ if (sortedpath)
+ sortedpath = (Path *) create_result_path(final_rel,
+ sortedpath,
+ constant_quals);
+ }
+
+ *cheapest_path = cheapestpath;
+ *sorted_path = sortedpath;
}