1 /*-------------------------------------------------------------------------
4 * Planning routines for subselects and parameters.
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.134 2008/08/17 01:20:00 tgl Exp $
12 *-------------------------------------------------------------------------
16 #include "catalog/pg_operator.h"
17 #include "catalog/pg_type.h"
18 #include "miscadmin.h"
19 #include "nodes/makefuncs.h"
20 #include "optimizer/clauses.h"
21 #include "optimizer/cost.h"
22 #include "optimizer/planmain.h"
23 #include "optimizer/planner.h"
24 #include "optimizer/prep.h"
25 #include "optimizer/subselect.h"
26 #include "optimizer/var.h"
27 #include "parser/parse_expr.h"
28 #include "parser/parse_relation.h"
29 #include "parser/parsetree.h"
30 #include "rewrite/rewriteManip.h"
31 #include "utils/builtins.h"
32 #include "utils/lsyscache.h"
33 #include "utils/syscache.h"
36 typedef struct convert_testexpr_context
39 List *subst_nodes; /* Nodes to substitute for Params */
40 } convert_testexpr_context;
42 typedef struct process_sublinks_context
46 } process_sublinks_context;
48 typedef struct finalize_primnode_context
51 Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */
52 } finalize_primnode_context;
55 static List *generate_subquery_params(PlannerInfo *root, List *tlist,
57 static List *generate_subquery_vars(PlannerInfo *root, List *tlist,
59 static Node *convert_testexpr(PlannerInfo *root,
62 static Node *convert_testexpr_mutator(Node *node,
63 convert_testexpr_context *context);
64 static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan);
65 static bool hash_ok_operator(OpExpr *expr);
66 static bool simplify_EXISTS_query(Query *query);
67 static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
68 static Node *process_sublinks_mutator(Node *node,
69 process_sublinks_context *context);
70 static Bitmapset *finalize_plan(PlannerInfo *root,
72 Bitmapset *valid_params);
73 static bool finalize_primnode(Node *node, finalize_primnode_context *context);
77 * Generate a Param node to replace the given Var,
78 * which is expected to have varlevelsup > 0 (ie, it is not local).
81 replace_outer_var(PlannerInfo *root, Var *var)
85 PlannerParamItem *pitem;
89 Assert(var->varlevelsup > 0 && var->varlevelsup < root->query_level);
90 abslevel = root->query_level - var->varlevelsup;
93 * If there's already a paramlist entry for this same Var, just use it.
94 * NOTE: in sufficiently complex querytrees, it is possible for the same
95 * varno/abslevel to refer to different RTEs in different parts of the
96 * parsetree, so that different fields might end up sharing the same Param
97 * number. As long as we check the vartype as well, I believe that this
98 * sort of aliasing will cause no trouble. The correct field should get
99 * stored into the Param slot at execution in each part of the tree.
101 * We also need to demand a match on vartypmod. This does not matter for
102 * the Param itself, since those are not typmod-dependent, but it does
103 * matter when make_subplan() instantiates a modified copy of the Var for
104 * a subplan's args list.
107 foreach(ppl, root->glob->paramlist)
109 pitem = (PlannerParamItem *) lfirst(ppl);
110 if (pitem->abslevel == abslevel && IsA(pitem->item, Var))
112 Var *pvar = (Var *) pitem->item;
114 if (pvar->varno == var->varno &&
115 pvar->varattno == var->varattno &&
116 pvar->vartype == var->vartype &&
117 pvar->vartypmod == var->vartypmod)
125 /* Nope, so make a new one */
126 var = (Var *) copyObject(var);
127 var->varlevelsup = 0;
129 pitem = makeNode(PlannerParamItem);
130 pitem->item = (Node *) var;
131 pitem->abslevel = abslevel;
133 root->glob->paramlist = lappend(root->glob->paramlist, pitem);
134 /* i is already the correct index for the new item */
137 retval = makeNode(Param);
138 retval->paramkind = PARAM_EXEC;
140 retval->paramtype = var->vartype;
141 retval->paramtypmod = var->vartypmod;
147 * Generate a Param node to replace the given Aggref
148 * which is expected to have agglevelsup > 0 (ie, it is not local).
151 replace_outer_agg(PlannerInfo *root, Aggref *agg)
154 PlannerParamItem *pitem;
158 Assert(agg->agglevelsup > 0 && agg->agglevelsup < root->query_level);
159 abslevel = root->query_level - agg->agglevelsup;
162 * It does not seem worthwhile to try to match duplicate outer aggs. Just
163 * make a new slot every time.
165 agg = (Aggref *) copyObject(agg);
166 IncrementVarSublevelsUp((Node *) agg, -((int) agg->agglevelsup), 0);
167 Assert(agg->agglevelsup == 0);
169 pitem = makeNode(PlannerParamItem);
170 pitem->item = (Node *) agg;
171 pitem->abslevel = abslevel;
173 root->glob->paramlist = lappend(root->glob->paramlist, pitem);
174 i = list_length(root->glob->paramlist) - 1;
176 retval = makeNode(Param);
177 retval->paramkind = PARAM_EXEC;
179 retval->paramtype = agg->aggtype;
180 retval->paramtypmod = -1;
186 * Generate a new Param node that will not conflict with any other.
188 * This is used to allocate PARAM_EXEC slots for subplan outputs.
191 generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
194 PlannerParamItem *pitem;
196 retval = makeNode(Param);
197 retval->paramkind = PARAM_EXEC;
198 retval->paramid = list_length(root->glob->paramlist);
199 retval->paramtype = paramtype;
200 retval->paramtypmod = paramtypmod;
202 pitem = makeNode(PlannerParamItem);
203 pitem->item = (Node *) retval;
204 pitem->abslevel = root->query_level;
206 root->glob->paramlist = lappend(root->glob->paramlist, pitem);
212 * Get the datatype of the first column of the plan's output.
214 * This is stored for ARRAY_SUBLINK and for exprType(), which doesn't have any
215 * way to get at the plan associated with a SubPlan node. We really only need
216 * the value for EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency
220 get_first_col_type(Plan *plan)
222 /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
223 if (plan->targetlist)
225 TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
227 Assert(IsA(tent, TargetEntry));
229 return exprType((Node *) tent->expr);
235 * Convert a SubLink (as created by the parser) into a SubPlan.
237 * We are given the original SubLink and the already-processed testexpr
238 * (use this instead of the SubLink's own field). We are also told if
239 * this expression appears at top level of a WHERE/HAVING qual.
241 * The result is whatever we need to substitute in place of the SubLink
242 * node in the executable expression. This will be either the SubPlan
243 * node (if we have to do the subplan as a subplan), or a Param node
244 * representing the result of an InitPlan, or a row comparison expression
245 * tree containing InitPlan Param nodes.
248 make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
250 Query *subquery = (Query *) (slink->subselect);
251 double tuple_fraction;
254 PlannerInfo *subroot;
261 * Copy the source Query node. This is a quick and dirty kluge to resolve
262 * the fact that the parser can generate trees with multiple links to the
263 * same sub-Query node, but the planner wants to scribble on the Query.
264 * Try to clean this up when we do querytree redesign...
266 subquery = (Query *) copyObject(subquery);
269 * If it's an EXISTS subplan, we might be able to simplify it.
271 if (slink->subLinkType == EXISTS_SUBLINK)
272 (void) simplify_EXISTS_query(subquery);
275 * For an EXISTS subplan, tell lower-level planner to expect that only the
276 * first tuple will be retrieved. For ALL and ANY subplans, we will be
277 * able to stop evaluating if the test condition fails, so very often not
278 * all the tuples will be retrieved; for lack of a better idea, specify
279 * 50% retrieval. For EXPR and ROWCOMPARE subplans, use default behavior
280 * (we're only expecting one row out, anyway).
282 * NOTE: if you change these numbers, also change cost_qual_eval_walker()
283 * and get_initplan_cost() in path/costsize.c.
285 * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or
286 * materialize its result below. In that case it would've been better to
287 * specify full retrieval. At present, however, we can only detect
288 * correlation or lack of it after we've made the subplan :-(. Perhaps
289 * detection of correlation should be done as a separate step. Meanwhile,
290 * we don't want to be too optimistic about the percentage of tuples
291 * retrieved, for fear of selecting a plan that's bad for the
292 * materialization case.
294 if (slink->subLinkType == EXISTS_SUBLINK)
295 tuple_fraction = 1.0; /* just like a LIMIT 1 */
296 else if (slink->subLinkType == ALL_SUBLINK ||
297 slink->subLinkType == ANY_SUBLINK)
298 tuple_fraction = 0.5; /* 50% */
300 tuple_fraction = 0.0; /* default behavior */
303 * Generate the plan for the subquery.
305 plan = subquery_planner(root->glob, subquery,
306 root->query_level + 1,
311 * Initialize the SubPlan node. Note plan_id isn't set yet.
313 splan = makeNode(SubPlan);
314 splan->subLinkType = slink->subLinkType;
315 splan->testexpr = NULL;
316 splan->paramIds = NIL;
317 splan->firstColType = get_first_col_type(plan);
318 splan->useHashTable = false;
319 /* At top level of a qual, can treat UNKNOWN the same as FALSE */
320 splan->unknownEqFalse = isTopQual;
321 splan->setParam = NIL;
322 splan->parParam = NIL;
326 * Make parParam list of params that current query level will pass to this
329 tmpset = bms_copy(plan->extParam);
330 while ((paramid = bms_first_member(tmpset)) >= 0)
332 PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid);
334 if (pitem->abslevel == root->query_level)
335 splan->parParam = lappend_int(splan->parParam, paramid);
340 * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, or
341 * ROWCOMPARE types can be used as initPlans. For EXISTS, EXPR, or ARRAY,
342 * we just produce a Param referring to the result of evaluating the
343 * initPlan. For ROWCOMPARE, we must modify the testexpr tree to contain
344 * PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted by the
347 if (splan->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK)
351 prm = generate_new_param(root, BOOLOID, -1);
352 splan->setParam = list_make1_int(prm->paramid);
354 result = (Node *) prm;
356 else if (splan->parParam == NIL && slink->subLinkType == EXPR_SUBLINK)
358 TargetEntry *te = linitial(plan->targetlist);
361 Assert(!te->resjunk);
362 prm = generate_new_param(root,
363 exprType((Node *) te->expr),
364 exprTypmod((Node *) te->expr));
365 splan->setParam = list_make1_int(prm->paramid);
367 result = (Node *) prm;
369 else if (splan->parParam == NIL && slink->subLinkType == ARRAY_SUBLINK)
371 TargetEntry *te = linitial(plan->targetlist);
375 Assert(!te->resjunk);
376 arraytype = get_array_type(exprType((Node *) te->expr));
377 if (!OidIsValid(arraytype))
378 elog(ERROR, "could not find array type for datatype %s",
379 format_type_be(exprType((Node *) te->expr)));
380 prm = generate_new_param(root,
382 exprTypmod((Node *) te->expr));
383 splan->setParam = list_make1_int(prm->paramid);
385 result = (Node *) prm;
387 else if (splan->parParam == NIL && slink->subLinkType == ROWCOMPARE_SUBLINK)
389 /* Adjust the Params */
392 params = generate_subquery_params(root,
395 result = convert_testexpr(root,
398 splan->setParam = list_copy(splan->paramIds);
402 * The executable expression is returned to become part of the outer
403 * plan's expression tree; it is not kept in the initplan node.
415 /* Adjust the Params in the testexpr */
416 params = generate_subquery_params(root,
419 splan->testexpr = convert_testexpr(root,
425 * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to
426 * initPlans, even when they are uncorrelated or undirect correlated,
427 * because we need to scan the output of the subplan for each outer
428 * tuple. But if it's an IN (= ANY) test, we might be able to use a
429 * hashtable to avoid comparing all the tuples.
431 if (subplan_is_hashable(slink, splan, plan))
432 splan->useHashTable = true;
435 * Otherwise, we have the option to tack a MATERIAL node onto the top
436 * of the subplan, to reduce the cost of reading it repeatedly. This
437 * is pointless for a direct-correlated subplan, since we'd have to
438 * recompute its results each time anyway. For uncorrelated/undirect
439 * correlated subplans, we add MATERIAL unless the subplan's top plan
440 * node would materialize its output anyway.
442 else if (splan->parParam == NIL)
446 switch (nodeTag(plan))
451 use_material = false;
458 plan = materialize_finished_plan(plan);
462 * Make splan->args from parParam.
465 foreach(l, splan->parParam)
467 PlannerParamItem *pitem = list_nth(root->glob->paramlist,
471 * The Var or Aggref has already been adjusted to have the correct
472 * varlevelsup or agglevelsup. We probably don't even need to
473 * copy it again, but be safe.
475 args = lappend(args, copyObject(pitem->item));
479 result = (Node *) splan;
484 * Add the subplan and its rtable to the global lists.
486 root->glob->subplans = lappend(root->glob->subplans,
488 root->glob->subrtables = lappend(root->glob->subrtables,
489 subroot->parse->rtable);
490 splan->plan_id = list_length(root->glob->subplans);
493 root->init_plans = lappend(root->init_plans, splan);
496 * A parameterless subplan (not initplan) should be prepared to handle
497 * REWIND efficiently. If it has direct parameters then there's no point
498 * since it'll be reset on each scan anyway; and if it's an initplan then
499 * there's no point since it won't get re-run without parameter changes
500 * anyway. The input of a hashed subplan doesn't need REWIND either.
502 if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable)
503 root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs,
510 * generate_subquery_params: build a list of Params representing the output
511 * columns of a sublink's sub-select, given the sub-select's targetlist.
513 * We also return an integer list of the paramids of the Params.
516 generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds)
525 TargetEntry *tent = (TargetEntry *) lfirst(lc);
531 param = generate_new_param(root,
532 exprType((Node *) tent->expr),
533 exprTypmod((Node *) tent->expr));
534 result = lappend(result, param);
535 ids = lappend_int(ids, param->paramid);
543 * generate_subquery_vars: build a list of Vars representing the output
544 * columns of a sublink's sub-select, given the sub-select's targetlist.
545 * The Vars have the specified varno (RTE index).
548 generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno)
556 TargetEntry *tent = (TargetEntry *) lfirst(lc);
564 exprType((Node *) tent->expr),
565 exprTypmod((Node *) tent->expr),
567 result = lappend(result, var);
574 * convert_testexpr: convert the testexpr given by the parser into
575 * actually executable form. This entails replacing PARAM_SUBLINK Params
576 * with Params or Vars representing the results of the sub-select. The
577 * nodes to be substituted are passed in as the List result from
578 * generate_subquery_params or generate_subquery_vars.
580 * The given testexpr has already been recursively processed by
581 * process_sublinks_mutator. Hence it can no longer contain any
582 * PARAM_SUBLINK Params for lower SubLink nodes; we can safely assume that
583 * any we find are for our own level of SubLink.
586 convert_testexpr(PlannerInfo *root,
590 convert_testexpr_context context;
593 context.subst_nodes = subst_nodes;
594 return convert_testexpr_mutator(testexpr, &context);
598 convert_testexpr_mutator(Node *node,
599 convert_testexpr_context *context)
603 if (IsA(node, Param))
605 Param *param = (Param *) node;
607 if (param->paramkind == PARAM_SUBLINK)
609 if (param->paramid <= 0 ||
610 param->paramid > list_length(context->subst_nodes))
611 elog(ERROR, "unexpected PARAM_SUBLINK ID: %d", param->paramid);
614 * We copy the list item to avoid having doubly-linked
615 * substructure in the modified parse tree. This is probably
616 * unnecessary when it's a Param, but be safe.
618 return (Node *) copyObject(list_nth(context->subst_nodes,
619 param->paramid - 1));
622 return expression_tree_mutator(node,
623 convert_testexpr_mutator,
628 * subplan_is_hashable: decide whether we can implement a subplan by hashing
630 * Caution: the SubPlan node is not completely filled in yet. We can rely
631 * on its plan and parParam fields, however.
634 subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan)
636 double subquery_size;
640 * The sublink type must be "= ANY" --- that is, an IN operator. We
641 * expect that the test expression will be either a single OpExpr, or an
642 * AND-clause containing OpExprs. (If it's anything else then the parser
643 * must have determined that the operators have non-equality-like
644 * semantics. In the OpExpr case we can't be sure what the operator's
645 * semantics are like, but the test below for hashability will reject
646 * anything that's not equality.)
648 if (slink->subLinkType != ANY_SUBLINK)
650 if (slink->testexpr == NULL ||
651 (!IsA(slink->testexpr, OpExpr) &&
652 !and_clause(slink->testexpr)))
656 * The subplan must not have any direct correlation vars --- else we'd
657 * have to recompute its output each time, so that the hashtable wouldn't
660 if (node->parParam != NIL)
664 * The estimated size of the subquery result must fit in work_mem. (Note:
665 * we use sizeof(HeapTupleHeaderData) here even though the tuples will
666 * actually be stored as MinimalTuples; this provides some fudge factor
667 * for hashtable overhead.)
669 subquery_size = plan->plan_rows *
670 (MAXALIGN(plan->plan_width) + MAXALIGN(sizeof(HeapTupleHeaderData)));
671 if (subquery_size > work_mem * 1024L)
675 * The combining operators must be hashable and strict. The need for
676 * hashability is obvious, since we want to use hashing. Without
677 * strictness, behavior in the presence of nulls is too unpredictable. We
678 * actually must assume even more than plain strictness: they can't yield
679 * NULL for non-null inputs, either (see nodeSubplan.c). However, hash
680 * indexes and hash joins assume that too.
682 if (IsA(slink->testexpr, OpExpr))
684 if (!hash_ok_operator((OpExpr *) slink->testexpr))
689 foreach(l, ((BoolExpr *) slink->testexpr)->args)
691 Node *andarg = (Node *) lfirst(l);
693 if (!IsA(andarg, OpExpr))
694 return false; /* probably can't happen */
695 if (!hash_ok_operator((OpExpr *) andarg))
704 hash_ok_operator(OpExpr *expr)
706 Oid opid = expr->opno;
708 Form_pg_operator optup;
710 tup = SearchSysCache(OPEROID,
711 ObjectIdGetDatum(opid),
713 if (!HeapTupleIsValid(tup))
714 elog(ERROR, "cache lookup failed for operator %u", opid);
715 optup = (Form_pg_operator) GETSTRUCT(tup);
716 if (!optup->oprcanhash || !func_strict(optup->oprcode))
718 ReleaseSysCache(tup);
721 ReleaseSysCache(tup);
726 * convert_ANY_sublink_to_join: can we convert an ANY SubLink to a join?
728 * The caller has found an ANY SubLink at the top level of one of the query's
729 * qual clauses, but has not checked the properties of the SubLink further.
730 * Decide whether it is appropriate to process this SubLink in join style.
731 * Return TRUE if so, FALSE if the SubLink cannot be converted.
733 * The only non-obvious input parameter is available_rels: this is the set
734 * of query rels that can safely be referenced in the sublink expression.
735 * (We must restrict this to avoid changing the semantics when a sublink
736 * is present in an outer join's ON qual.) The conversion must fail if
737 * the converted qual would reference any but these parent-query relids.
739 * On success, two output parameters are returned:
740 * *new_qual is set to the qual tree that should replace the SubLink in
741 * the parent query's qual tree. The qual clauses are wrapped in a
742 * FlattenedSubLink node to help later processing place them properly.
743 * *fromlist is set to a list of pulled-up jointree item(s) that must be
744 * added at the proper spot in the parent query's jointree.
746 * Side effects of a successful conversion include adding the SubLink's
747 * subselect to the query's rangetable.
750 convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
751 Relids available_rels,
752 Node **new_qual, List **fromlist)
754 Query *parse = root->parse;
755 Query *subselect = (Query *) sublink->subselect;
762 FlattenedSubLink *fslink;
764 Assert(sublink->subLinkType == ANY_SUBLINK);
767 * The sub-select must not refer to any Vars of the parent query. (Vars of
768 * higher levels should be okay, though.)
770 if (contain_vars_of_level((Node *) subselect, 1))
774 * The test expression must contain some Vars of the current query,
775 * else it's not gonna be a join. (Note that it won't have Vars
776 * referring to the subquery, rather Params.)
778 left_varnos = pull_varnos(sublink->testexpr);
779 if (bms_is_empty(left_varnos))
783 * However, it can't refer to anything outside available_rels.
785 if (!bms_is_subset(left_varnos, available_rels))
789 * The combining operators and left-hand expressions mustn't be volatile.
791 if (contain_volatile_functions(sublink->testexpr))
795 * Okay, pull up the sub-select into upper range table.
797 * We rely here on the assumption that the outer query has no references
798 * to the inner (necessarily true, other than the Vars that we build
799 * below). Therefore this is a lot easier than what pull_up_subqueries has
802 rte = addRangeTableEntryForSubquery(NULL,
804 makeAlias("ANY_subquery", NIL),
806 parse->rtable = lappend(parse->rtable, rte);
807 rtindex = list_length(parse->rtable);
810 * Form a RangeTblRef for the pulled-up sub-select. This must be added
811 * to the upper jointree, but it is caller's responsibility to figure
814 rtr = makeNode(RangeTblRef);
815 rtr->rtindex = rtindex;
816 *fromlist = list_make1(rtr);
819 * Build a list of Vars representing the subselect outputs.
821 subquery_vars = generate_subquery_vars(root,
822 subselect->targetList,
826 * Build the replacement qual expression, replacing Params with these Vars.
828 quals = (Expr *) convert_testexpr(root,
833 * And finally, build the FlattenedSubLink node.
835 fslink = makeNode(FlattenedSubLink);
836 fslink->jointype = JOIN_SEMI;
837 fslink->lefthand = left_varnos;
838 fslink->righthand = bms_make_singleton(rtindex);
839 fslink->quals = quals;
841 *new_qual = (Node *) fslink;
847 * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
849 * The only thing that matters about an EXISTS query is whether it returns
850 * zero or more than zero rows. Therefore, we can remove certain SQL features
851 * that won't affect that. The only part that is really likely to matter in
852 * typical usage is simplifying the targetlist: it's a common habit to write
853 * "SELECT * FROM" even though there is no need to evaluate any columns.
855 * Note: by suppressing the targetlist we could cause an observable behavioral
856 * change, namely that any errors that might occur in evaluating the tlist
857 * won't occur, nor will other side-effects of volatile functions. This seems
858 * unlikely to bother anyone in practice.
860 * Returns TRUE if was able to discard the targetlist, else FALSE.
863 simplify_EXISTS_query(Query *query)
866 * We don't try to simplify at all if the query uses set operations,
867 * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
868 * seem likely in normal usage and their possible effects are complex.
870 if (query->commandType != CMD_SELECT ||
872 query->setOperations ||
875 query->limitOffset ||
881 * Mustn't throw away the targetlist if it contains set-returning
882 * functions; those could affect whether zero rows are returned!
884 if (expression_returns_set((Node *) query->targetList))
888 * Otherwise, we can throw away the targetlist, as well as any GROUP,
889 * DISTINCT, and ORDER BY clauses; none of those clauses will change
890 * a nonzero-rows result to zero rows or vice versa. (Furthermore,
891 * since our parsetree representation of these clauses depends on the
892 * targetlist, we'd better throw them away if we drop the targetlist.)
894 query->targetList = NIL;
895 query->groupClause = NIL;
896 query->distinctClause = NIL;
897 query->sortClause = NIL;
898 query->hasDistinctOn = false;
904 * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join?
906 * The API of this function is identical to convert_ANY_sublink_to_join's,
907 * except that we also support the case where the caller has found NOT EXISTS,
908 * so we need an additional input parameter "under_not".
911 convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
913 Relids available_rels,
914 Node **new_qual, List **fromlist)
916 Query *parse = root->parse;
917 Query *subselect = (Query *) sublink->subselect;
921 Relids clause_varnos;
924 Relids subselect_varnos;
925 FlattenedSubLink *fslink;
927 Assert(sublink->subLinkType == EXISTS_SUBLINK);
930 * Copy the subquery so we can modify it safely (see comments in
933 subselect = (Query *) copyObject(subselect);
936 * See if the subquery can be simplified based on the knowledge that
937 * it's being used in EXISTS(). If we aren't able to get rid of its
938 * targetlist, we have to fail, because the pullup operation leaves
939 * us with noplace to evaluate the targetlist.
941 if (!simplify_EXISTS_query(subselect))
945 * Separate out the WHERE clause. (We could theoretically also remove
946 * top-level plain JOIN/ON clauses, but it's probably not worth the
949 whereClause = subselect->jointree->quals;
950 subselect->jointree->quals = NULL;
953 * The rest of the sub-select must not refer to any Vars of the parent
954 * query. (Vars of higher levels should be okay, though.)
956 if (contain_vars_of_level((Node *) subselect, 1))
960 * On the other hand, the WHERE clause must contain some Vars of the
961 * parent query, else it's not gonna be a join.
963 if (!contain_vars_of_level(whereClause, 1))
967 * We don't risk optimizing if the WHERE clause is volatile, either.
969 if (contain_volatile_functions(whereClause))
973 * Also disallow SubLinks within the WHERE clause. (XXX this could
974 * probably be supported, but it would complicate the transformation
975 * below, and it doesn't seem worth worrying about in a first pass.)
977 if (contain_subplans(whereClause))
981 * Prepare to pull up the sub-select into top range table.
983 * We rely here on the assumption that the outer query has no references
984 * to the inner (necessarily true). Therefore this is a lot easier than
985 * what pull_up_subqueries has to go through.
987 * In fact, it's even easier than what convert_ANY_sublink_to_join has
988 * to do. The machinations of simplify_EXISTS_query ensured that there
989 * is nothing interesting in the subquery except an rtable and jointree,
990 * and even the jointree FromExpr no longer has quals. So we can just
991 * append the rtable to our own and attach the fromlist to our own.
992 * But first, adjust all level-zero varnos in the subquery to account
993 * for the rtable merger.
995 rtoffset = list_length(parse->rtable);
996 OffsetVarNodes((Node *) subselect, rtoffset, 0);
997 OffsetVarNodes(whereClause, rtoffset, 0);
1000 * Upper-level vars in subquery will now be one level closer to their
1001 * parent than before; in particular, anything that had been level 1
1002 * becomes level zero.
1004 IncrementVarSublevelsUp((Node *) subselect, -1, 1);
1005 IncrementVarSublevelsUp(whereClause, -1, 1);
1008 * Now that the WHERE clause is adjusted to match the parent query
1009 * environment, we can easily identify all the level-zero rels it uses.
1010 * The ones <= rtoffset are "left rels" of the join we're forming,
1011 * and the ones > rtoffset are "right rels".
1013 clause_varnos = pull_varnos(whereClause);
1014 left_varnos = right_varnos = NULL;
1015 while ((varno = bms_first_member(clause_varnos)) >= 0)
1017 if (varno <= rtoffset)
1018 left_varnos = bms_add_member(left_varnos, varno);
1020 right_varnos = bms_add_member(right_varnos, varno);
1022 bms_free(clause_varnos);
1023 Assert(!bms_is_empty(left_varnos));
1026 * Now that we've got the set of upper-level varnos, we can make the
1027 * last check: only available_rels can be referenced.
1029 if (!bms_is_subset(left_varnos, available_rels))
1032 /* Identify all the rels syntactically within the subselect */
1033 subselect_varnos = get_relids_in_jointree((Node *) subselect->jointree,
1035 Assert(bms_is_subset(right_varnos, subselect_varnos));
1037 /* Now we can attach the modified subquery rtable to the parent */
1038 parse->rtable = list_concat(parse->rtable, subselect->rtable);
1041 * Pass back the subquery fromlist to be attached to upper jointree
1042 * in a suitable place.
1044 *fromlist = subselect->jointree->fromlist;
1047 * And finally, build the FlattenedSubLink node.
1049 fslink = makeNode(FlattenedSubLink);
1050 fslink->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
1051 fslink->lefthand = left_varnos;
1052 fslink->righthand = subselect_varnos;
1053 fslink->quals = (Expr *) whereClause;
1055 *new_qual = (Node *) fslink;
1061 * Replace correlation vars (uplevel vars) with Params.
1063 * Uplevel aggregates are replaced, too.
1065 * Note: it is critical that this runs immediately after SS_process_sublinks.
1066 * Since we do not recurse into the arguments of uplevel aggregates, they will
1067 * get copied to the appropriate subplan args list in the parent query with
1068 * uplevel vars not replaced by Params, but only adjusted in level (see
1069 * replace_outer_agg). That's exactly what we want for the vars of the parent
1070 * level --- but if an aggregate's argument contains any further-up variables,
1071 * they have to be replaced with Params in their turn. That will happen when
1072 * the parent level runs SS_replace_correlation_vars. Therefore it must do
1073 * so after expanding its sublinks to subplans. And we don't want any steps
1074 * in between, else those steps would never get applied to the aggregate
1075 * argument expressions, either in the parent or the child level.
1078 SS_replace_correlation_vars(PlannerInfo *root, Node *expr)
1080 /* No setup needed for tree walk, so away we go */
1081 return replace_correlation_vars_mutator(expr, root);
1085 replace_correlation_vars_mutator(Node *node, PlannerInfo *root)
1091 if (((Var *) node)->varlevelsup > 0)
1092 return (Node *) replace_outer_var(root, (Var *) node);
1094 if (IsA(node, Aggref))
1096 if (((Aggref *) node)->agglevelsup > 0)
1097 return (Node *) replace_outer_agg(root, (Aggref *) node);
1099 return expression_tree_mutator(node,
1100 replace_correlation_vars_mutator,
1105 * Expand SubLinks to SubPlans in the given expression.
1107 * The isQual argument tells whether or not this expression is a WHERE/HAVING
1108 * qualifier expression. If it is, any sublinks appearing at top level need
1109 * not distinguish FALSE from UNKNOWN return values.
1112 SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual)
1114 process_sublinks_context context;
1116 context.root = root;
1117 context.isTopQual = isQual;
1118 return process_sublinks_mutator(expr, &context);
1122 process_sublinks_mutator(Node *node, process_sublinks_context *context)
1124 process_sublinks_context locContext;
1126 locContext.root = context->root;
1130 if (IsA(node, SubLink))
1132 SubLink *sublink = (SubLink *) node;
1136 * First, recursively process the lefthand-side expressions, if any.
1137 * They're not top-level anymore.
1139 locContext.isTopQual = false;
1140 testexpr = process_sublinks_mutator(sublink->testexpr, &locContext);
1143 * Now build the SubPlan node and make the expr to return.
1145 return make_subplan(context->root,
1148 context->isTopQual);
1152 * We should never see a SubPlan expression in the input (since this is
1153 * the very routine that creates 'em to begin with). We shouldn't find
1154 * ourselves invoked directly on a Query, either.
1156 Assert(!is_subplan(node));
1157 Assert(!IsA(node, Query));
1160 * Because make_subplan() could return an AND or OR clause, we have to
1161 * take steps to preserve AND/OR flatness of a qual. We assume the input
1162 * has been AND/OR flattened and so we need no recursion here.
1164 * If we recurse down through anything other than an AND node, we are
1165 * definitely not at top qual level anymore. (Due to the coding here, we
1166 * will not get called on the List subnodes of an AND, so no check is
1169 if (and_clause(node))
1171 List *newargs = NIL;
1174 /* Still at qual top-level */
1175 locContext.isTopQual = context->isTopQual;
1177 foreach(l, ((BoolExpr *) node)->args)
1181 newarg = process_sublinks_mutator(lfirst(l), &locContext);
1182 if (and_clause(newarg))
1183 newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
1185 newargs = lappend(newargs, newarg);
1187 return (Node *) make_andclause(newargs);
1190 /* otherwise not at qual top-level */
1191 locContext.isTopQual = false;
1193 if (or_clause(node))
1195 List *newargs = NIL;
1198 foreach(l, ((BoolExpr *) node)->args)
1202 newarg = process_sublinks_mutator(lfirst(l), &locContext);
1203 if (or_clause(newarg))
1204 newargs = list_concat(newargs, ((BoolExpr *) newarg)->args);
1206 newargs = lappend(newargs, newarg);
1208 return (Node *) make_orclause(newargs);
1211 return expression_tree_mutator(node,
1212 process_sublinks_mutator,
1213 (void *) &locContext);
1217 * SS_finalize_plan - do final sublink processing for a completed Plan.
1219 * This recursively computes the extParam and allParam sets for every Plan
1220 * node in the given plan tree. It also optionally attaches any previously
1221 * generated InitPlans to the top plan node. (Any InitPlans should already
1222 * have been put through SS_finalize_plan.)
1225 SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans)
1227 Bitmapset *valid_params,
1235 * Examine any initPlans to determine the set of external params they
1236 * reference, the set of output params they supply, and their total cost.
1237 * We'll use at least some of this info below. (Note we are assuming that
1238 * finalize_plan doesn't touch the initPlans.)
1240 * In the case where attach_initplans is false, we are assuming that the
1241 * existing initPlans are siblings that might supply params needed by the
1244 initExtParam = initSetParam = NULL;
1246 foreach(l, root->init_plans)
1248 SubPlan *initsubplan = (SubPlan *) lfirst(l);
1249 Plan *initplan = planner_subplan_get_plan(root, initsubplan);
1252 initExtParam = bms_add_members(initExtParam, initplan->extParam);
1253 foreach(l2, initsubplan->setParam)
1255 initSetParam = bms_add_member(initSetParam, lfirst_int(l2));
1257 initplan_cost += get_initplan_cost(root, initsubplan);
1261 * Now determine the set of params that are validly referenceable in this
1262 * query level; to wit, those available from outer query levels plus the
1263 * output parameters of any initPlans. (We do not include output
1264 * parameters of regular subplans. Those should only appear within the
1265 * testexpr of SubPlan nodes, and are taken care of locally within
1266 * finalize_primnode.)
1268 * Note: this is a bit overly generous since some parameters of upper
1269 * query levels might belong to query subtrees that don't include this
1270 * query. However, valid_params is only a debugging crosscheck, so it
1271 * doesn't seem worth expending lots of cycles to try to be exact.
1273 valid_params = bms_copy(initSetParam);
1275 foreach(l, root->glob->paramlist)
1277 PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l);
1279 if (pitem->abslevel < root->query_level)
1281 /* valid outer-level parameter */
1282 valid_params = bms_add_member(valid_params, paramid);
1289 * Now recurse through plan tree.
1291 (void) finalize_plan(root, plan, valid_params);
1293 bms_free(valid_params);
1296 * Finally, attach any initPlans to the topmost plan node, and add their
1297 * extParams to the topmost node's, too. However, any setParams of the
1298 * initPlans should not be present in the topmost node's extParams, only
1299 * in its allParams. (As of PG 8.1, it's possible that some initPlans
1300 * have extParams that are setParams of other initPlans, so we have to
1301 * take care of this situation explicitly.)
1303 * We also add the eval cost of each initPlan to the startup cost of the
1304 * top node. This is a conservative overestimate, since in fact each
1305 * initPlan might be executed later than plan startup, or even not at all.
1307 if (attach_initplans)
1309 plan->initPlan = root->init_plans;
1310 root->init_plans = NIL; /* make sure they're not attached twice */
1312 /* allParam must include all these params */
1313 plan->allParam = bms_add_members(plan->allParam, initExtParam);
1314 plan->allParam = bms_add_members(plan->allParam, initSetParam);
1315 /* extParam must include any child extParam */
1316 plan->extParam = bms_add_members(plan->extParam, initExtParam);
1317 /* but extParam shouldn't include any setParams */
1318 plan->extParam = bms_del_members(plan->extParam, initSetParam);
1319 /* ensure extParam is exactly NULL if it's empty */
1320 if (bms_is_empty(plan->extParam))
1321 plan->extParam = NULL;
1323 plan->startup_cost += initplan_cost;
1324 plan->total_cost += initplan_cost;
1329 * Recursive processing of all nodes in the plan tree
1331 * The return value is the computed allParam set for the given Plan node.
1332 * This is just an internal notational convenience.
1335 finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params)
1337 finalize_primnode_context context;
1342 context.root = root;
1343 context.paramids = NULL; /* initialize set to empty */
1346 * When we call finalize_primnode, context.paramids sets are automatically
1347 * merged together. But when recursing to self, we have to do it the hard
1348 * way. We want the paramids set to include params in subplans as well as
1352 /* Find params in targetlist and qual */
1353 finalize_primnode((Node *) plan->targetlist, &context);
1354 finalize_primnode((Node *) plan->qual, &context);
1356 /* Check additional node-type-specific fields */
1357 switch (nodeTag(plan))
1360 finalize_primnode(((Result *) plan)->resconstantqual,
1365 finalize_primnode((Node *) ((IndexScan *) plan)->indexqual,
1369 * we need not look at indexqualorig, since it will have the same
1370 * param references as indexqual.
1374 case T_BitmapIndexScan:
1375 finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual,
1379 * we need not look at indexqualorig, since it will have the same
1380 * param references as indexqual.
1384 case T_BitmapHeapScan:
1385 finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig,
1390 finalize_primnode((Node *) ((TidScan *) plan)->tidquals,
1394 case T_SubqueryScan:
1397 * In a SubqueryScan, SS_finalize_plan has already been run on the
1398 * subplan by the inner invocation of subquery_planner, so there's
1399 * no need to do it again. Instead, just pull out the subplan's
1400 * extParams list, which represents the params it needs from my
1401 * level and higher levels.
1403 context.paramids = bms_add_members(context.paramids,
1404 ((SubqueryScan *) plan)->subplan->extParam);
1407 case T_FunctionScan:
1408 finalize_primnode(((FunctionScan *) plan)->funcexpr,
1413 finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists,
1421 foreach(l, ((Append *) plan)->appendplans)
1424 bms_add_members(context.paramids,
1436 foreach(l, ((BitmapAnd *) plan)->bitmapplans)
1439 bms_add_members(context.paramids,
1451 foreach(l, ((BitmapOr *) plan)->bitmapplans)
1454 bms_add_members(context.paramids,
1463 finalize_primnode((Node *) ((Join *) plan)->joinqual,
1468 finalize_primnode((Node *) ((Join *) plan)->joinqual,
1470 finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses,
1475 finalize_primnode((Node *) ((Join *) plan)->joinqual,
1477 finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses,
1482 finalize_primnode(((Limit *) plan)->limitOffset,
1484 finalize_primnode(((Limit *) plan)->limitCount,
1499 elog(ERROR, "unrecognized node type: %d",
1500 (int) nodeTag(plan));
1503 /* Process left and right child plans, if any */
1504 context.paramids = bms_add_members(context.paramids,
1509 context.paramids = bms_add_members(context.paramids,
1514 /* Now we have all the paramids */
1516 if (!bms_is_subset(context.paramids, valid_params))
1517 elog(ERROR, "plan should not reference subplan's variable");
1520 * Note: by definition, extParam and allParam should have the same value
1521 * in any plan node that doesn't have child initPlans. We set them
1522 * equal here, and later SS_finalize_plan will update them properly
1523 * in node(s) that it attaches initPlans to.
1525 * For speed at execution time, make sure extParam/allParam are actually
1526 * NULL if they are empty sets.
1528 if (bms_is_empty(context.paramids))
1530 plan->extParam = NULL;
1531 plan->allParam = NULL;
1535 plan->extParam = context.paramids;
1536 plan->allParam = bms_copy(context.paramids);
1539 return plan->allParam;
1543 * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given
1544 * expression tree to the result set.
1547 finalize_primnode(Node *node, finalize_primnode_context *context)
1551 if (IsA(node, Param))
1553 if (((Param *) node)->paramkind == PARAM_EXEC)
1555 int paramid = ((Param *) node)->paramid;
1557 context->paramids = bms_add_member(context->paramids, paramid);
1559 return false; /* no more to do here */
1561 if (is_subplan(node))
1563 SubPlan *subplan = (SubPlan *) node;
1564 Plan *plan = planner_subplan_get_plan(context->root, subplan);
1566 Bitmapset *subparamids;
1568 /* Recurse into the testexpr, but not into the Plan */
1569 finalize_primnode(subplan->testexpr, context);
1572 * Remove any param IDs of output parameters of the subplan that were
1573 * referenced in the testexpr. These are not interesting for
1574 * parameter change signaling since we always re-evaluate the subplan.
1575 * Note that this wouldn't work too well if there might be uses of the
1576 * same param IDs elsewhere in the plan, but that can't happen because
1577 * generate_new_param never tries to merge params.
1579 foreach(lc, subplan->paramIds)
1581 context->paramids = bms_del_member(context->paramids,
1585 /* Also examine args list */
1586 finalize_primnode((Node *) subplan->args, context);
1589 * Add params needed by the subplan to paramids, but excluding those
1590 * we will pass down to it.
1592 subparamids = bms_copy(plan->extParam);
1593 foreach(lc, subplan->parParam)
1595 subparamids = bms_del_member(subparamids, lfirst_int(lc));
1597 context->paramids = bms_join(context->paramids, subparamids);
1599 return false; /* no more to do here */
1601 return expression_tree_walker(node, finalize_primnode,
1606 * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
1608 * The plan is expected to return a scalar value of the indicated type.
1609 * We build an EXPR_SUBLINK SubPlan node and put it into the initplan
1610 * list for the current query level. A Param that represents the initplan's
1611 * output is returned.
1613 * We assume the plan hasn't been put through SS_finalize_plan.
1616 SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan,
1617 Oid resulttype, int32 resulttypmod)
1623 * We must run SS_finalize_plan(), since that's normally done before a
1624 * subplan gets put into the initplan list. Tell it not to attach any
1625 * pre-existing initplans to this one, since they are siblings not
1626 * children of this initplan. (This is something else that could perhaps
1627 * be cleaner if we did extParam/allParam processing in setrefs.c instead
1628 * of here? See notes for materialize_finished_plan.)
1632 * Build extParam/allParam sets for plan nodes.
1634 SS_finalize_plan(root, plan, false);
1637 * Add the subplan and its rtable to the global lists.
1639 root->glob->subplans = lappend(root->glob->subplans,
1641 root->glob->subrtables = lappend(root->glob->subrtables,
1642 root->parse->rtable);
1645 * Create a SubPlan node and add it to the outer list of InitPlans.
1646 * Note it has to appear after any other InitPlans it might depend on
1647 * (see comments in ExecReScan).
1649 node = makeNode(SubPlan);
1650 node->subLinkType = EXPR_SUBLINK;
1651 node->firstColType = get_first_col_type(plan);
1652 node->plan_id = list_length(root->glob->subplans);
1654 root->init_plans = lappend(root->init_plans, node);
1657 * The node can't have any inputs (since it's an initplan), so the
1658 * parParam and args lists remain empty.
1662 * Make a Param that will be the subplan's output.
1664 prm = generate_new_param(root, resulttype, resulttypmod);
1665 node->setParam = list_make1_int(prm->paramid);