1 /*-------------------------------------------------------------------------
4 * Routines to plan a single query
6 * What's in a name, anyway? The top-level entry point of the planner/
7 * optimizer is over in planner.c, not here as you might think from the
8 * file name. But this is the main code for planning a basic join operation,
9 * shorn of features like subselects, inheritance, aggregates, grouping,
10 * and so on. (Those are the things planner.c deals with.)
12 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
13 * Portions Copyright (c) 1994, Regents of the University of California
17 * $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.112 2008/10/22 20:17:51 tgl Exp $
19 *-------------------------------------------------------------------------
23 #include "optimizer/cost.h"
24 #include "optimizer/pathnode.h"
25 #include "optimizer/paths.h"
26 #include "optimizer/placeholder.h"
27 #include "optimizer/planmain.h"
28 #include "optimizer/tlist.h"
29 #include "utils/selfuncs.h"
34 * Generate a path (that is, a simplified plan) for a basic query,
35 * which may involve joins but not any fancier features.
37 * Since query_planner does not handle the toplevel processing (grouping,
38 * sorting, etc) it cannot select the best path by itself. It selects
39 * two paths: the cheapest path that produces all the required tuples,
40 * independent of any ordering considerations, and the cheapest path that
41 * produces the expected fraction of the required tuples in the required
42 * ordering, if there is a path that is cheaper for this than just sorting
43 * the output of the cheapest overall path. The caller (grouping_planner)
44 * will make the final decision about which to use.
47 * root describes the query to plan
48 * tlist is the target list the query should produce
49 * (this is NOT necessarily root->parse->targetList!)
50 * tuple_fraction is the fraction of tuples we expect will be retrieved
51 * limit_tuples is a hard limit on number of tuples to retrieve,
55 * *cheapest_path receives the overall-cheapest path for the query
56 * *sorted_path receives the cheapest presorted path for the query,
57 * if any (NULL if there is no useful presorted path)
58 * *num_groups receives the estimated number of groups, or 1 if query
59 * does not use grouping
61 * Note: the PlannerInfo node also includes a query_pathkeys field, which is
62 * both an input and an output of query_planner(). The input value signals
63 * query_planner that the indicated sort order is wanted in the final output
64 * plan. But this value has not yet been "canonicalized", since the needed
65 * info does not get computed until we scan the qual clauses. We canonicalize
66 * it as soon as that task is done. (The main reason query_pathkeys is a
67 * PlannerInfo field and not a passed parameter is that the low-level routines
68 * in indxpath.c need to see it.)
70 * Note: the PlannerInfo node also includes group_pathkeys, distinct_pathkeys,
71 * and sort_pathkeys, which like query_pathkeys need to be canonicalized once
72 * the info is available.
74 * tuple_fraction is interpreted as follows:
75 * 0: expect all tuples to be retrieved (normal case)
76 * 0 < tuple_fraction < 1: expect the given fraction of tuples available
77 * from the plan to be retrieved
78 * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
79 * expected to be retrieved (ie, a LIMIT specification)
80 * Note that a nonzero tuple_fraction could come from outer context; it is
81 * therefore not redundant with limit_tuples. We use limit_tuples to determine
82 * whether a bounded sort can be used at runtime.
85 query_planner(PlannerInfo *root, List *tlist,
86 double tuple_fraction, double limit_tuples,
87 Path **cheapest_path, Path **sorted_path,
90 Query *parse = root->parse;
92 RelOptInfo *final_rel;
99 /* Make tuple_fraction accessible to lower-level routines */
100 root->tuple_fraction = tuple_fraction;
102 *num_groups = 1; /* default result */
105 * If the query has an empty join tree, then it's something easy like
106 * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly.
108 if (parse->jointree->fromlist == NIL)
110 /* We need a trivial path result */
111 *cheapest_path = (Path *)
112 create_result_path((List *) parse->jointree->quals);
116 * We still are required to canonicalize any pathkeys, in case it's
117 * something like "SELECT 2+2 ORDER BY 1".
119 root->canon_pathkeys = NIL;
120 root->query_pathkeys = canonicalize_pathkeys(root,
121 root->query_pathkeys);
122 root->group_pathkeys = canonicalize_pathkeys(root,
123 root->group_pathkeys);
124 root->distinct_pathkeys = canonicalize_pathkeys(root,
125 root->distinct_pathkeys);
126 root->sort_pathkeys = canonicalize_pathkeys(root,
127 root->sort_pathkeys);
132 * Init planner lists to empty, and set up the array to hold RelOptInfos
135 * NOTE: append_rel_list was set up by subquery_planner, so do not touch
136 * here; eq_classes may contain data already, too.
138 root->simple_rel_array_size = list_length(parse->rtable) + 1;
139 root->simple_rel_array = (RelOptInfo **)
140 palloc0(root->simple_rel_array_size * sizeof(RelOptInfo *));
141 root->join_rel_list = NIL;
142 root->join_rel_hash = NULL;
143 root->canon_pathkeys = NIL;
144 root->left_join_clauses = NIL;
145 root->right_join_clauses = NIL;
146 root->full_join_clauses = NIL;
147 root->join_info_list = NIL;
148 root->placeholder_list = NIL;
149 root->initial_rels = NIL;
152 * Make a flattened version of the rangetable for faster access (this is
153 * OK because the rangetable won't change any more).
155 root->simple_rte_array = (RangeTblEntry **)
156 palloc0(root->simple_rel_array_size * sizeof(RangeTblEntry *));
158 foreach(lc, parse->rtable)
160 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
162 root->simple_rte_array[rti++] = rte;
166 * Construct RelOptInfo nodes for all base relations in query, and
167 * indirectly for all appendrel member relations ("other rels"). This
168 * will give us a RelOptInfo for every "simple" (non-join) rel involved in
171 * Note: the reason we find the rels by searching the jointree and
172 * appendrel list, rather than just scanning the rangetable, is that the
173 * rangetable may contain RTEs for rels not actively part of the query,
174 * for example views. We don't want to make RelOptInfos for them.
176 add_base_rels_to_query(root, (Node *) parse->jointree);
179 * We should now have size estimates for every actual table involved in
180 * the query, so we can compute total_table_pages. Note that appendrels
181 * are not double-counted here, even though we don't bother to distinguish
182 * RelOptInfos for appendrel parents, because the parents will still have
185 * XXX if a table is self-joined, we will count it once per appearance,
186 * which perhaps is the wrong thing ... but that's not completely clear,
187 * and detecting self-joins here is difficult, so ignore it for now.
190 for (rti = 1; rti < root->simple_rel_array_size; rti++)
192 RelOptInfo *brel = root->simple_rel_array[rti];
197 Assert(brel->relid == rti); /* sanity check on array */
199 total_pages += (double) brel->pages;
201 root->total_table_pages = total_pages;
204 * Examine the targetlist and qualifications, adding entries to baserel
205 * targetlists for all referenced Vars. Restrict and join clauses are
206 * added to appropriate lists belonging to the mentioned relations. We
207 * also build EquivalenceClasses for provably equivalent expressions, and
208 * form a target joinlist for make_one_rel() to work from.
210 build_base_rel_tlists(root, tlist);
212 joinlist = deconstruct_jointree(root);
215 * Reconsider any postponed outer-join quals now that we have built up
216 * equivalence classes. (This could result in further additions or
217 * mergings of classes.)
219 reconsider_outer_join_clauses(root);
222 * If we formed any equivalence classes, generate additional restriction
223 * clauses as appropriate. (Implied join clauses are formed on-the-fly
226 generate_base_implied_equalities(root);
229 * We have completed merging equivalence sets, so it's now possible to
230 * convert the requested query_pathkeys to canonical form. Also
231 * canonicalize the groupClause, distinctClause and sortClause pathkeys
234 root->query_pathkeys = canonicalize_pathkeys(root, root->query_pathkeys);
235 root->group_pathkeys = canonicalize_pathkeys(root, root->group_pathkeys);
236 root->distinct_pathkeys = canonicalize_pathkeys(root, root->distinct_pathkeys);
237 root->sort_pathkeys = canonicalize_pathkeys(root, root->sort_pathkeys);
240 * Examine any "placeholder" expressions generated during subquery pullup.
241 * Make sure that we know what level to evaluate them at, and that the
242 * Vars they need are marked as needed.
244 fix_placeholder_eval_levels(root);
247 * Ready to do the primary planning.
249 final_rel = make_one_rel(root, joinlist);
251 if (!final_rel || !final_rel->cheapest_total_path)
252 elog(ERROR, "failed to construct the join relation");
255 * If there's grouping going on, estimate the number of result groups. We
256 * couldn't do this any earlier because it depends on relation size
257 * estimates that were set up above.
259 * Then convert tuple_fraction to fractional form if it is absolute, and
260 * adjust it based on the knowledge that grouping_planner will be doing
261 * grouping or aggregation work with our result.
263 * This introduces some undesirable coupling between this code and
264 * grouping_planner, but the alternatives seem even uglier; we couldn't
265 * pass back completed paths without making these decisions here.
267 if (parse->groupClause)
271 groupExprs = get_sortgrouplist_exprs(parse->groupClause,
273 *num_groups = estimate_num_groups(root,
278 * In GROUP BY mode, an absolute LIMIT is relative to the number of
279 * groups not the number of tuples. If the caller gave us a fraction,
280 * keep it as-is. (In both cases, we are effectively assuming that
281 * all the groups are about the same size.)
283 if (tuple_fraction >= 1.0)
284 tuple_fraction /= *num_groups;
287 * If both GROUP BY and ORDER BY are specified, we will need two
288 * levels of sort --- and, therefore, certainly need to read all the
289 * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if
290 * we have both DISTINCT and GROUP BY.
292 if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
293 !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys))
294 tuple_fraction = 0.0;
296 else if (parse->hasAggs || root->hasHavingQual)
299 * Ungrouped aggregate will certainly want to read all the tuples, and
300 * it will deliver a single result row (so leave *num_groups 1).
302 tuple_fraction = 0.0;
304 else if (parse->distinctClause)
307 * Since there was no grouping or aggregation, it's reasonable to
308 * assume the UNIQUE filter has effects comparable to GROUP BY. Return
309 * the estimated number of output rows for use by caller. (If DISTINCT
310 * is used with grouping, we ignore its effects for rowcount
311 * estimation purposes; this amounts to assuming the grouped rows are
316 distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
318 *num_groups = estimate_num_groups(root,
323 * Adjust tuple_fraction the same way as for GROUP BY, too.
325 if (tuple_fraction >= 1.0)
326 tuple_fraction /= *num_groups;
331 * Plain non-grouped, non-aggregated query: an absolute tuple fraction
332 * can be divided by the number of tuples.
334 if (tuple_fraction >= 1.0)
335 tuple_fraction /= final_rel->rows;
339 * Pick out the cheapest-total path and the cheapest presorted path for
340 * the requested pathkeys (if there is one). We should take the tuple
341 * fraction into account when selecting the cheapest presorted path, but
342 * not when selecting the cheapest-total path, since if we have to sort
343 * then we'll have to fetch all the tuples. (But there's a special case:
344 * if query_pathkeys is NIL, meaning order doesn't matter, then the
345 * "cheapest presorted" path will be the cheapest overall for the tuple
348 * The cheapest-total path is also the one to use if grouping_planner
349 * decides to use hashed aggregation, so we return it separately even if
350 * this routine thinks the presorted path is the winner.
352 cheapestpath = final_rel->cheapest_total_path;
355 get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
356 root->query_pathkeys,
359 /* Don't return same path in both guises; just wastes effort */
360 if (sortedpath == cheapestpath)
364 * Forget about the presorted path if it would be cheaper to sort the
365 * cheapest-total path. Here we need consider only the behavior at the
366 * tuple fraction point.
370 Path sort_path; /* dummy for result of cost_sort */
372 if (root->query_pathkeys == NIL ||
373 pathkeys_contained_in(root->query_pathkeys,
374 cheapestpath->pathkeys))
376 /* No sort needed for cheapest path */
377 sort_path.startup_cost = cheapestpath->startup_cost;
378 sort_path.total_cost = cheapestpath->total_cost;
382 /* Figure cost for sorting */
383 cost_sort(&sort_path, root, root->query_pathkeys,
384 cheapestpath->total_cost,
385 final_rel->rows, final_rel->width,
389 if (compare_fractional_path_costs(sortedpath, &sort_path,
392 /* Presorted path is a loser */
397 *cheapest_path = cheapestpath;
398 *sorted_path = sortedpath;