1 /*-------------------------------------------------------------------------
4 * The query optimizer external interface.
6 * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.75 2000/02/15 20:49:18 tgl Exp $
13 *-------------------------------------------------------------------------
15 #include <sys/types.h>
19 #include "access/genam.h"
20 #include "access/heapam.h"
21 #include "catalog/pg_type.h"
22 #include "executor/executor.h"
23 #include "nodes/makefuncs.h"
24 #include "optimizer/clauses.h"
25 #include "optimizer/internal.h"
26 #include "optimizer/paths.h"
27 #include "optimizer/planmain.h"
28 #include "optimizer/planner.h"
29 #include "optimizer/prep.h"
30 #include "optimizer/subselect.h"
31 #include "optimizer/tlist.h"
32 #include "optimizer/var.h"
33 #include "parser/parse_expr.h"
34 #include "parser/parse_oper.h"
35 #include "utils/builtins.h"
36 #include "utils/lsyscache.h"
37 #include "utils/syscache.h"
39 static List *make_subplanTargetList(Query *parse, List *tlist,
40 AttrNumber **groupColIdx);
41 static Plan *make_groupplan(List *group_tlist, bool tuplePerGroup,
42 List *groupClause, AttrNumber *grpColIdx,
43 bool is_presorted, Plan *subplan);
44 static Plan *make_sortplan(List *tlist, List *sortcls, Plan *plannode);
46 /*****************************************************************************
48 * Query optimizer entry point
50 *****************************************************************************/
56 /* Initialize state for subselects */
57 PlannerQueryLevel = 1;
58 PlannerInitPlan = NULL;
59 PlannerParamVar = NULL;
62 transformKeySetQuery(parse);
64 result_plan = union_planner(parse, -1.0 /* default case */);
66 Assert(PlannerQueryLevel == 1);
67 if (PlannerPlanId > 0)
69 result_plan->initPlan = PlannerInitPlan;
70 (void) SS_finalize_plan(result_plan);
72 result_plan->nParamExec = length(PlannerParamVar);
74 set_plan_references(result_plan);
79 /*--------------------
81 * Invokes the planner on union-type queries (both regular UNIONs and
82 * appends produced by inheritance), recursing if necessary to get them
83 * all, then processes normal plans.
85 * parse is the querytree produced by the parser & rewriter.
86 * tuple_fraction is the fraction of tuples we expect will be retrieved
88 * tuple_fraction is interpreted as follows:
89 * < 0: determine fraction by inspection of query (normal case)
90 * 0: expect all tuples to be retrieved
91 * 0 < tuple_fraction < 1: expect the given fraction of tuples available
92 * from the plan to be retrieved
93 * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
94 * expected to be retrieved (ie, a LIMIT specification)
95 * The normal case is to pass -1, but some callers pass values >= 0 to
96 * override this routine's determination of the appropriate fraction.
98 * Returns a query plan.
102 union_planner(Query *parse,
103 double tuple_fraction)
105 List *tlist = parse->targetList;
106 List *rangetable = parse->rtable;
107 Plan *result_plan = (Plan *) NULL;
108 AttrNumber *groupColIdx = NULL;
109 List *current_pathkeys = NIL;
110 List *group_pathkeys;
115 * A HAVING clause without aggregates is equivalent to a WHERE clause
116 * (except it can only refer to grouped fields). If there are no
117 * aggs anywhere in the query, then we don't want to create an Agg
118 * plan node, so merge the HAVING condition into WHERE. (We used to
119 * consider this an error condition, but it seems to be legal SQL.)
121 if (parse->havingQual != NULL && ! parse->hasAggs)
123 if (parse->qual == NULL)
124 parse->qual = parse->havingQual;
126 parse->qual = (Node *) make_andclause(lappend(lcons(parse->qual,
129 parse->havingQual = NULL;
133 * Simplify constant expressions in targetlist and quals.
135 * Note that at this point the qual has not yet been converted to
136 * implicit-AND form, so we can apply eval_const_expressions directly.
137 * Also note that we need to do this before SS_process_sublinks,
138 * because that routine inserts bogus "Const" nodes.
140 tlist = (List *) eval_const_expressions((Node *) tlist);
141 parse->qual = eval_const_expressions(parse->qual);
142 parse->havingQual = eval_const_expressions(parse->havingQual);
145 if (parse->unionClause)
147 result_plan = (Plan *) plan_union_queries(parse);
148 /* XXX do we need to do this? bjm 12/19/97 */
149 tlist = preprocess_targetlist(tlist,
151 parse->resultRelation,
154 * We leave current_pathkeys NIL indicating we do not know sort order.
155 * Actually, for a normal UNION we have done an explicit sort; ought
156 * to change interface to plan_union_queries to pass that info back!
159 /* Calculate pathkeys that represent grouping/ordering requirements */
160 group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
162 sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
165 else if ((rt_index = first_inherit_rt_entry(rangetable)) != -1)
170 * Generate appropriate target list for subplan; may be different
171 * from tlist if grouping or aggregation is needed.
173 sub_tlist = make_subplanTargetList(parse, tlist, &groupColIdx);
176 * Recursively plan the subqueries needed for inheritance
178 result_plan = (Plan *) plan_inherit_queries(parse, sub_tlist,
182 * Fix up outer target list. NOTE: unlike the case for non-inherited
183 * query, we pass the unfixed tlist to subplans, which do their own
184 * fixing. But we still want to fix the outer target list afterwards.
185 * I *think* this is correct --- doing the fix before recursing is
186 * definitely wrong, because preprocess_targetlist() will do the
187 * wrong thing if invoked twice on the same list. Maybe that is a bug?
190 tlist = preprocess_targetlist(tlist,
192 parse->resultRelation,
195 if (parse->rowMark != NULL)
196 elog(ERROR, "SELECT FOR UPDATE is not supported for inherit queries");
198 * We leave current_pathkeys NIL indicating we do not know sort order
199 * of the Append-ed results.
202 /* Calculate pathkeys that represent grouping/ordering requirements */
203 group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
205 sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
212 /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
213 tlist = preprocess_targetlist(tlist,
215 parse->resultRelation,
219 * Add row-mark targets for UPDATE (should this be done in
220 * preprocess_targetlist?)
222 if (parse->rowMark != NULL)
226 foreach(l, parse->rowMark)
228 RowMark *rowmark = (RowMark *) lfirst(l);
234 if (!(rowmark->info & ROW_MARK_FOR_UPDATE))
237 resname = (char *) palloc(32);
238 sprintf(resname, "ctid%u", rowmark->rti);
239 resdom = makeResdom(length(tlist) + 1,
247 var = makeVar(rowmark->rti, -1, TIDOID, -1, 0);
249 ctid = makeTargetEntry(resdom, (Node *) var);
250 tlist = lappend(tlist, ctid);
255 * Generate appropriate target list for subplan; may be different
256 * from tlist if grouping or aggregation is needed.
258 sub_tlist = make_subplanTargetList(parse, tlist, &groupColIdx);
260 /* Calculate pathkeys that represent grouping/ordering requirements */
261 group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
263 sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
267 * Figure out whether we need a sorted result from query_planner.
269 * If we have a GROUP BY clause, then we want a result sorted
270 * properly for grouping. Otherwise, if there is an ORDER BY clause,
271 * we want to sort by the ORDER BY clause. (Note: if we have both,
272 * and ORDER BY is a superset of GROUP BY, it would be tempting to
273 * request sort by ORDER BY --- but that might just leave us failing
274 * to exploit an available sort order at all. Needs more thought...)
276 if (parse->groupClause)
277 parse->query_pathkeys = group_pathkeys;
278 else if (parse->sortClause)
279 parse->query_pathkeys = sort_pathkeys;
281 parse->query_pathkeys = NIL;
284 * Figure out whether we expect to retrieve all the tuples that the
285 * plan can generate, or to stop early due to a LIMIT or other
286 * factors. If the caller passed a value >= 0, believe that value,
287 * else do our own examination of the query context.
289 if (tuple_fraction < 0.0)
291 /* Initial assumption is we need all the tuples */
292 tuple_fraction = 0.0;
296 * For now, we deliberately ignore the OFFSET clause, so that
297 * queries with the same LIMIT and different OFFSETs will get
298 * the same queryplan and therefore generate consistent results
299 * (to the extent the planner can guarantee that, anyway).
300 * XXX Perhaps it would be better to use the OFFSET too, and tell
301 * users to specify ORDER BY if they want consistent results
302 * across different LIMIT queries.
304 if (parse->limitCount != NULL)
306 if (IsA(parse->limitCount, Const))
308 Const *ccount = (Const *) parse->limitCount;
309 tuple_fraction = (double) ((int) (ccount->constvalue));
310 /* the constant can legally be either 0 ("ALL") or a
311 * positive integer; either is consistent with our
312 * conventions for tuple_fraction.
317 /* It's a PARAM ... don't know exactly what the limit
318 * will be, but for lack of a better idea assume 10%
319 * of the plan's result is wanted.
321 tuple_fraction = 0.10;
325 * Check for a retrieve-into-portal, ie DECLARE CURSOR.
327 * We have no real idea how many tuples the user will ultimately
328 * FETCH from a cursor, but it seems a good bet that he doesn't
329 * want 'em all. Optimize for 10% retrieval (you gotta better
333 tuple_fraction = 0.10;
336 * Adjust tuple_fraction if we see that we are going to apply
337 * grouping/aggregation/etc. This is not overridable by the
338 * caller, since it reflects plan actions that this routine
339 * will certainly take, not assumptions about context.
341 if (parse->groupClause)
344 * In GROUP BY mode, we have the little problem that we don't
345 * really know how many input tuples will be needed to make a
346 * group, so we can't translate an output LIMIT count into an
347 * input count. For lack of a better idea, assume 10% of the
348 * input data will be processed if there is any output limit.
350 if (tuple_fraction > 0.0)
351 tuple_fraction = 0.10;
353 * If both GROUP BY and ORDER BY are specified, we will need
354 * two levels of sort --- and, therefore, certainly need to
355 * read all the input tuples --- unless ORDER BY is a subset
356 * of GROUP BY. (Although we are comparing non-canonicalized
357 * pathkeys here, it should be OK since they will both contain
358 * only single-element sublists at this point. See pathkeys.c.)
360 if (parse->groupClause && parse->sortClause &&
361 ! pathkeys_contained_in(sort_pathkeys, group_pathkeys))
362 tuple_fraction = 0.0;
364 else if (parse->hasAggs)
366 /* Ungrouped aggregate will certainly want all the input tuples. */
367 tuple_fraction = 0.0;
369 else if (parse->distinctClause)
372 * SELECT DISTINCT, like GROUP, will absorb an unpredictable
373 * number of input tuples per output tuple. So, fall back to
374 * our same old 10% default...
376 if (tuple_fraction > 0.0)
377 tuple_fraction = 0.10;
380 /* Generate the (sub) plan */
381 result_plan = query_planner(parse,
383 (List *) parse->qual,
386 /* query_planner returns actual sort order (which is not
387 * necessarily what we requested) in query_pathkeys.
389 current_pathkeys = parse->query_pathkeys;
392 /* query_planner returns NULL if it thinks plan is bogus */
394 elog(ERROR, "union_planner: failed to create plan");
397 * We couldn't canonicalize group_pathkeys and sort_pathkeys before
398 * running query_planner(), so do it now.
400 group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);
401 sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
404 * If we have a GROUP BY clause, insert a group node (plus the
405 * appropriate sort node, if necessary).
407 if (parse->groupClause)
414 * Decide whether how many tuples per group the Group node needs
415 * to return. (Needs only one tuple per group if no aggregate is
416 * present. Otherwise, need every tuple from the group to do the
417 * aggregation.) Note tuplePerGroup is named backwards :-(
419 tuplePerGroup = parse->hasAggs;
422 * If there are aggregates then the Group node should just return
423 * the same set of vars as the subplan did (but we can exclude
424 * any GROUP BY expressions). If there are no aggregates
425 * then the Group node had better compute the final tlist.
428 group_tlist = flatten_tlist(result_plan->targetlist);
433 * Figure out whether the path result is already ordered the way we
434 * need it --- if so, no need for an explicit sort step.
436 if (pathkeys_contained_in(group_pathkeys, current_pathkeys))
438 is_sorted = true; /* no sort needed now */
439 /* current_pathkeys remains unchanged */
443 /* We will need to do an explicit sort by the GROUP BY clause.
444 * make_groupplan will do the work, but set current_pathkeys
445 * to indicate the resulting order.
448 current_pathkeys = group_pathkeys;
451 result_plan = make_groupplan(group_tlist,
460 * If we have a HAVING clause, do the necessary things with it.
461 * This code should parallel query_planner()'s initial processing
462 * of the WHERE clause.
464 if (parse->havingQual)
466 /* Convert the havingQual to implicit-AND normal form */
467 parse->havingQual = (Node *)
468 canonicalize_qual((Expr *) parse->havingQual, true);
470 /* Replace uplevel Vars with Params */
471 if (PlannerQueryLevel > 1)
472 parse->havingQual = SS_replace_correlation_vars(parse->havingQual);
474 if (parse->hasSubLinks)
476 /* Expand SubLinks to SubPlans */
477 parse->havingQual = SS_process_sublinks(parse->havingQual);
478 /* Check for ungrouped variables passed to subplans */
479 check_subplans_for_ungrouped_vars(parse->havingQual,
486 * If aggregate is present, insert the Agg node
488 * HAVING clause, if any, becomes qual of the Agg node
492 result_plan = (Plan *) make_agg(tlist,
493 (List *) parse->havingQual,
495 /* Note: Agg does not affect any existing sort order of the tuples */
499 * If we were not able to make the plan come out in the right order,
500 * add an explicit sort step.
502 if (parse->sortClause)
504 if (! pathkeys_contained_in(sort_pathkeys, current_pathkeys))
506 result_plan = make_sortplan(tlist, parse->sortClause, result_plan);
511 * Finally, if there is a DISTINCT clause, add the UNIQUE node.
513 if (parse->distinctClause)
515 result_plan = (Plan *) make_unique(tlist, result_plan,
516 parse->distinctClause);
523 * make_subplanTargetList
524 * Generate appropriate target list when grouping is required.
526 * When union_planner inserts Aggregate and/or Group plan nodes above
527 * the result of query_planner, we typically want to pass a different
528 * target list to query_planner than the outer plan nodes should have.
529 * This routine generates the correct target list for the subplan.
531 * The initial target list passed from the parser already contains entries
532 * for all ORDER BY and GROUP BY expressions, but it will not have entries
533 * for variables used only in HAVING clauses; so we need to add those
534 * variables to the subplan target list. Also, if we are doing either
535 * grouping or aggregation, we flatten all expressions except GROUP BY items
536 * into their component variables; the other expressions will be computed by
537 * the inserted nodes rather than by the subplan. For example,
539 * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
540 * we want to pass this targetlist to the subplan:
542 * where the a+b target will be used by the Sort/Group steps, and the
543 * other targets will be used for computing the final results. (In the
544 * above example we could theoretically suppress the a and b targets and
545 * use only a+b, but it's not really worth the trouble.)
547 * 'parse' is the query being processed.
548 * 'tlist' is the query's target list.
549 * 'groupColIdx' receives an array of column numbers for the GROUP BY
550 * expressions (if there are any) in the subplan's target list.
552 * The result is the targetlist to be passed to the subplan.
556 make_subplanTargetList(Query *parse,
558 AttrNumber **groupColIdx)
567 * If we're not grouping or aggregating, nothing to do here;
568 * query_planner should receive the unmodified target list.
570 if (!parse->hasAggs && !parse->groupClause && !parse->havingQual)
574 * Otherwise, start with a "flattened" tlist (having just the vars
575 * mentioned in the targetlist and HAVING qual --- but not upper-
576 * level Vars; they will be replaced by Params later on).
578 sub_tlist = flatten_tlist(tlist);
579 extravars = pull_var_clause(parse->havingQual, false);
580 sub_tlist = add_to_flat_tlist(sub_tlist, extravars);
584 * If grouping, create sub_tlist entries for all GROUP BY expressions
585 * (GROUP BY items that are simple Vars should be in the list already),
586 * and make an array showing where the group columns are in the sub_tlist.
588 numCols = length(parse->groupClause);
592 AttrNumber *grpColIdx;
595 grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
596 *groupColIdx = grpColIdx;
598 foreach(gl, parse->groupClause)
600 GroupClause *grpcl = (GroupClause *) lfirst(gl);
601 Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
602 TargetEntry *te = NULL;
605 /* Find or make a matching sub_tlist entry */
606 foreach(sl, sub_tlist)
608 te = (TargetEntry *) lfirst(sl);
609 if (equal(groupexpr, te->expr))
614 te = makeTargetEntry(makeResdom(length(sub_tlist) + 1,
616 exprTypmod(groupexpr),
622 sub_tlist = lappend(sub_tlist, te);
625 /* and save its resno */
626 grpColIdx[keyno++] = te->resdom->resno;
635 * Add a Group node for GROUP BY processing.
636 * If we couldn't make the subplan produce presorted output for grouping,
637 * first add an explicit Sort node.
640 make_groupplan(List *group_tlist,
643 AttrNumber *grpColIdx,
647 int numCols = length(groupClause);
652 * The Sort node always just takes a copy of the subplan's tlist
653 * plus ordering information. (This might seem inefficient if the
654 * subplan contains complex GROUP BY expressions, but in fact Sort
655 * does not evaluate its targetlist --- it only outputs the same
656 * tuples in a new order. So the expressions we might be copying
657 * are just dummies with no extra execution cost.)
659 List *sort_tlist = new_unsorted_tlist(subplan->targetlist);
663 foreach(gl, groupClause)
665 GroupClause *grpcl = (GroupClause *) lfirst(gl);
666 TargetEntry *te = nth(grpColIdx[keyno]-1, sort_tlist);
667 Resdom *resdom = te->resdom;
670 * Check for the possibility of duplicate group-by clauses --- the
671 * parser should have removed 'em, but the Sort executor will get
672 * terribly confused if any get through!
674 if (resdom->reskey == 0)
676 /* OK, insert the ordering info needed by the executor. */
677 resdom->reskey = ++keyno;
678 resdom->reskeyop = get_opcode(grpcl->sortop);
682 subplan = (Plan *) make_sort(sort_tlist,
683 _NONAME_RELATION_ID_,
688 return (Plan *) make_group(group_tlist, tuplePerGroup, numCols,
694 * Add a Sort node to implement an explicit ORDER BY clause.
697 make_sortplan(List *tlist, List *sortcls, Plan *plannode)
704 * First make a copy of the tlist so that we don't corrupt the
708 temp_tlist = new_unsorted_tlist(tlist);
712 SortClause *sortcl = (SortClause *) lfirst(i);
713 TargetEntry *tle = get_sortgroupclause_tle(sortcl, temp_tlist);
714 Resdom *resdom = tle->resdom;
717 * Check for the possibility of duplicate order-by clauses --- the
718 * parser should have removed 'em, but the executor will get terribly
719 * confused if any get through!
721 if (resdom->reskey == 0)
723 /* OK, insert the ordering info needed by the executor. */
724 resdom->reskey = ++keyno;
725 resdom->reskeyop = get_opcode(sortcl->sortop);
729 return (Plan *) make_sort(temp_tlist,
730 _NONAME_RELATION_ID_,
736 * pg_checkretval() -- check return value of a list of sql parse
739 * The return value of a sql function is the value returned by
740 * the final query in the function. We do some ad-hoc define-time
741 * type checking here to be sure that the user is returning the
744 * XXX Why is this function in this module?
747 pg_checkretval(Oid rettype, List *queryTreeList)
760 /* find the final query */
761 parse = (Query *) nth(length(queryTreeList) - 1, queryTreeList);
764 * test 1: if the last query is a utility invocation, then there had
765 * better not be a return value declared.
767 if (parse->commandType == CMD_UTILITY)
769 if (rettype == InvalidOid)
772 elog(ERROR, "return type mismatch in function decl: final query is a catalog utility");
775 /* okay, it's an ordinary query */
776 tlist = parse->targetList;
778 cmd = parse->commandType;
781 * test 2: if the function is declared to return no value, then the
782 * final query had better not be a retrieve.
784 if (rettype == InvalidOid)
786 if (cmd == CMD_SELECT)
788 "function declared with no return type, but final query is a retrieve");
793 /* by here, the function is declared to return some type */
794 if ((typ = typeidType(rettype)) == NULL)
795 elog(ERROR, "can't find return type %u for function\n", rettype);
798 * test 3: if the function is declared to return a value, then the
799 * final query had better be a retrieve.
801 if (cmd != CMD_SELECT)
802 elog(ERROR, "function declared to return type %s, but final query is not a retrieve", typeTypeName(typ));
805 * test 4: for base type returns, the target list should have exactly
806 * one entry, and its type should agree with what the user declared.
809 if (typeTypeRelid(typ) == InvalidOid)
811 if (ExecTargetListLength(tlist) > 1)
812 elog(ERROR, "function declared to return %s returns multiple values in final retrieve", typeTypeName(typ));
814 resnode = (Resdom *) ((TargetEntry *) lfirst(tlist))->resdom;
815 if (resnode->restype != rettype)
816 elog(ERROR, "return type mismatch in function: declared to return %s, returns %s", typeTypeName(typ), typeidTypeName(resnode->restype));
818 /* by here, base return types match */
823 * If the target list is of length 1, and the type of the varnode in
824 * the target list is the same as the declared return type, this is
825 * okay. This can happen, for example, where the body of the function
826 * is 'retrieve (x = func2())', where func2 has the same return type
827 * as the function that's calling it.
829 if (ExecTargetListLength(tlist) == 1)
831 resnode = (Resdom *) ((TargetEntry *) lfirst(tlist))->resdom;
832 if (resnode->restype == rettype)
837 * By here, the procedure returns a (set of) tuples. This part of the
838 * typechecking is a hack. We look up the relation that is the
839 * declared return type, and be sure that attributes 1 .. n in the
840 * target list match the declared types.
842 reln = heap_open(typeTypeRelid(typ), AccessShareLock);
844 relnatts = reln->rd_rel->relnatts;
846 if (ExecTargetListLength(tlist) != relnatts)
847 elog(ERROR, "function declared to return type %s does not retrieve (%s.*)", typeTypeName(typ), typeTypeName(typ));
849 /* expect attributes 1 .. n in order */
850 for (i = 1; i <= relnatts; i++)
852 TargetEntry *tle = lfirst(tlist);
853 Node *thenode = tle->expr;
854 Oid tletype = exprType(thenode);
856 if (tletype != reln->rd_att->attrs[i - 1]->atttypid)
857 elog(ERROR, "function declared to return type %s does not retrieve (%s.all)", typeTypeName(typ), typeTypeName(typ));
858 tlist = lnext(tlist);
861 heap_close(reln, AccessShareLock);