From bd3daddaf232d95b0c9ba6f99b0170a0147dd8af Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 22 Aug 2008 00:16:04 +0000 Subject: [PATCH] Arrange to convert EXISTS subqueries that are equivalent to hashable IN subqueries into the same thing you'd have gotten from IN (except always with unknownEqFalse = true, so as to get the proper semantics for an EXISTS). I believe this fixes the last case within CVS HEAD in which an EXISTS could give worse performance than an equivalent IN subquery. The tricky part of this is that if the upper query probes the EXISTS for only a few rows, the hashing implementation can actually be worse than the default, and therefore we need to make a cost-based decision about which way to use. But at the time when the planner generates plans for subqueries, it doesn't really know how many times the subquery will be executed. The least invasive solution seems to be to generate both plans and postpone the choice until execution. Therefore, in a query that has been optimized this way, EXPLAIN will show two subplans for the EXISTS, of which only one will actually get executed. There is a lot more that could be done based on this infrastructure: in particular it's interesting to consider switching to the hash plan if we start out using the non-hashed plan but find a lot more upper rows going by than we expected. I have therefore left some minor inefficiencies in place, such as initializing both subplans even though we will currently only use one. --- src/backend/catalog/dependency.c | 4 +- src/backend/executor/execQual.c | 15 +- src/backend/executor/nodeSubplan.c | 92 +++- src/backend/nodes/copyfuncs.c | 20 +- src/backend/nodes/equalfuncs.c | 15 +- src/backend/nodes/outfuncs.c | 15 +- src/backend/optimizer/path/clausesel.c | 5 +- src/backend/optimizer/path/costsize.c | 213 ++++---- src/backend/optimizer/plan/subselect.c | 647 ++++++++++++++++++------- src/backend/optimizer/util/clauses.c | 26 +- src/backend/optimizer/util/var.c | 6 +- src/backend/parser/parse_expr.c | 11 +- src/backend/rewrite/rewriteManip.c | 38 +- src/backend/utils/adt/ruleutils.c | 7 +- src/include/executor/nodeSubplan.h | 8 +- src/include/nodes/execnodes.h | 13 +- src/include/nodes/nodes.h | 4 +- src/include/nodes/primnodes.h | 26 +- src/include/optimizer/clauses.h | 3 +- src/include/optimizer/cost.h | 4 +- src/include/rewrite/rewriteManip.h | 3 +- 21 files changed, 854 insertions(+), 321 deletions(-) diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 5f5c91366f..0c85ac785f 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.78 2008/08/07 01:11:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/dependency.c,v 1.79 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1464,7 +1464,7 @@ find_expr_references_walker(Node *node, context->addrs); /* fall through to examine arguments */ } - else if (is_subplan(node)) + else if (IsA(node, SubPlan)) { /* Extra work needed here if we ever need this case */ elog(ERROR, "already-planned subqueries not supported"); diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 7761485d7c..57b2df9c00 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.231 2008/05/15 00:17:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.232 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3957,6 +3957,19 @@ ExecInitExpr(Expr *node, PlanState *parent) state = (ExprState *) sstate; } break; + case T_AlternativeSubPlan: + { + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; + AlternativeSubPlanState *asstate; + + if (!parent) + elog(ERROR, "AlternativeSubPlan found with no parent plan"); + + asstate = ExecInitAlternativeSubPlan(asplan, parent); + + state = (ExprState *) asstate; + } + break; case T_FieldSelect: { FieldSelect *fselect = (FieldSelect *) node; diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 0c9ec82b01..3ebb045f92 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.93 2008/05/12 00:00:49 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.94 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,6 +29,14 @@ #include "utils/memutils.h" +static Datum ExecSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone); +static Datum ExecAlternativeSubPlan(AlternativeSubPlanState *node, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone); static Datum ExecHashSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull); @@ -45,7 +53,7 @@ static bool slotNoNulls(TupleTableSlot *slot); * ExecSubPlan * ---------------------------------------------------------------- */ -Datum +static Datum ExecSubPlan(SubPlanState *node, ExprContext *econtext, bool *isNull, @@ -1066,3 +1074,83 @@ ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent) parent->chgParam = bms_add_member(parent->chgParam, paramid); } } + + +/* + * ExecInitAlternativeSubPlan + * + * Initialize for execution of one of a set of alternative subplans. + */ +AlternativeSubPlanState * +ExecInitAlternativeSubPlan(AlternativeSubPlan *asplan, PlanState *parent) +{ + AlternativeSubPlanState *asstate = makeNode(AlternativeSubPlanState); + double num_calls; + SubPlan *subplan1; + SubPlan *subplan2; + Cost cost1; + Cost cost2; + + asstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecAlternativeSubPlan; + asstate->xprstate.expr = (Expr *) asplan; + + /* + * Initialize subplans. (Can we get away with only initializing the + * one we're going to use?) + */ + asstate->subplans = (List *) ExecInitExpr((Expr *) asplan->subplans, + parent); + + /* + * Select the one to be used. For this, we need an estimate of the + * number of executions of the subplan. We use the number of output + * rows expected from the parent plan node. This is a good estimate + * if we are in the parent's targetlist, and an underestimate (but + * probably not by more than a factor of 2) if we are in the qual. + */ + num_calls = parent->plan->plan_rows; + + /* + * The planner saved enough info so that we don't have to work very hard + * to estimate the total cost, given the number-of-calls estimate. + */ + Assert(list_length(asplan->subplans) == 2); + subplan1 = (SubPlan *) linitial(asplan->subplans); + subplan2 = (SubPlan *) lsecond(asplan->subplans); + + cost1 = subplan1->startup_cost + num_calls * subplan1->per_call_cost; + cost2 = subplan2->startup_cost + num_calls * subplan2->per_call_cost; + + if (cost1 < cost2) + asstate->active = 0; + else + asstate->active = 1; + + return asstate; +} + +/* + * ExecAlternativeSubPlan + * + * Execute one of a set of alternative subplans. + * + * Note: in future we might consider changing to different subplans on the + * fly, in case the original rowcount estimate turns out to be way off. + */ +static Datum +ExecAlternativeSubPlan(AlternativeSubPlanState *node, + ExprContext *econtext, + bool *isNull, + ExprDoneCond *isDone) +{ + /* Just pass control to the active subplan */ + SubPlanState *activesp = (SubPlanState *) list_nth(node->subplans, + node->active); + + Assert(IsA(activesp, SubPlanState)); + + return ExecSubPlan(activesp, + econtext, + isNull, + isDone); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 83322320b0..4a30a3d822 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -15,7 +15,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.400 2008/08/14 18:47:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.401 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -969,6 +969,21 @@ _copySubPlan(SubPlan *from) COPY_NODE_FIELD(setParam); COPY_NODE_FIELD(parParam); COPY_NODE_FIELD(args); + COPY_SCALAR_FIELD(startup_cost); + COPY_SCALAR_FIELD(per_call_cost); + + return newnode; +} + +/* + * _copyAlternativeSubPlan + */ +static AlternativeSubPlan * +_copyAlternativeSubPlan(AlternativeSubPlan *from) +{ + AlternativeSubPlan *newnode = makeNode(AlternativeSubPlan); + + COPY_NODE_FIELD(subplans); return newnode; } @@ -3146,6 +3161,9 @@ copyObject(void *from) case T_SubPlan: retval = _copySubPlan(from); break; + case T_AlternativeSubPlan: + retval = _copyAlternativeSubPlan(from); + break; case T_FieldSelect: retval = _copyFieldSelect(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 831a5fe19b..3a25111c60 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -18,7 +18,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.327 2008/08/14 18:47:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.328 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -314,6 +314,16 @@ _equalSubPlan(SubPlan *a, SubPlan *b) COMPARE_NODE_FIELD(setParam); COMPARE_NODE_FIELD(parParam); COMPARE_NODE_FIELD(args); + COMPARE_SCALAR_FIELD(startup_cost); + COMPARE_SCALAR_FIELD(per_call_cost); + + return true; +} + +static bool +_equalAlternativeSubPlan(AlternativeSubPlan *a, AlternativeSubPlan *b) +{ + COMPARE_NODE_FIELD(subplans); return true; } @@ -2098,6 +2108,9 @@ equal(void *a, void *b) case T_SubPlan: retval = _equalSubPlan(a, b); break; + case T_AlternativeSubPlan: + retval = _equalAlternativeSubPlan(a, b); + break; case T_FieldSelect: retval = _equalFieldSelect(a, b); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 13c824389e..76f8da1bab 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.334 2008/08/14 18:47:58 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.335 2008/08/22 00:16:03 tgl Exp $ * * NOTES * Every node type that can appear in stored rules' parsetrees *must* @@ -846,6 +846,16 @@ _outSubPlan(StringInfo str, SubPlan *node) WRITE_NODE_FIELD(setParam); WRITE_NODE_FIELD(parParam); WRITE_NODE_FIELD(args); + WRITE_FLOAT_FIELD(startup_cost, "%.2f"); + WRITE_FLOAT_FIELD(per_call_cost, "%.2f"); +} + +static void +_outAlternativeSubPlan(StringInfo str, AlternativeSubPlan *node) +{ + WRITE_NODE_TYPE("ALTERNATIVESUBPLAN"); + + WRITE_NODE_FIELD(subplans); } static void @@ -2208,6 +2218,9 @@ _outNode(StringInfo str, void *obj) case T_SubPlan: _outSubPlan(str, obj); break; + case T_AlternativeSubPlan: + _outAlternativeSubPlan(str, obj); + break; case T_FieldSelect: _outFieldSelect(str, obj); break; diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 6db42c5f58..9ffdc32e77 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.92 2008/08/16 00:01:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.93 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -680,7 +680,8 @@ clause_selectivity(PlannerInfo *root, s1 = (Selectivity) 0.3333333; } #ifdef NOT_USED - else if (is_subplan(clause)) + else if (IsA(clause, SubPlan) || + IsA(clause, AlternativeSubPlan)) { /* * Just for the moment! FIX ME! - vadim 02/04/98 diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 14c3cf1f21..487de9ee93 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -54,7 +54,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.194 2008/08/16 00:01:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.195 2008/08/22 00:16:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1868,6 +1868,93 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo) } +/* + * cost_subplan + * Figure the costs for a SubPlan (or initplan). + * + * Note: we could dig the subplan's Plan out of the root list, but in practice + * all callers have it handy already, so we make them pass it. + */ +void +cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan) +{ + QualCost sp_cost; + + /* Figure any cost for evaluating the testexpr */ + cost_qual_eval(&sp_cost, + make_ands_implicit((Expr *) subplan->testexpr), + root); + + if (subplan->useHashTable) + { + /* + * If we are using a hash table for the subquery outputs, then the + * cost of evaluating the query is a one-time cost. We charge one + * cpu_operator_cost per tuple for the work of loading the hashtable, + * too. + */ + sp_cost.startup += plan->total_cost + + cpu_operator_cost * plan->plan_rows; + + /* + * The per-tuple costs include the cost of evaluating the lefthand + * expressions, plus the cost of probing the hashtable. We already + * accounted for the lefthand expressions as part of the testexpr, + * and will also have counted one cpu_operator_cost for each + * comparison operator. That is probably too low for the probing + * cost, but it's hard to make a better estimate, so live with it for + * now. + */ + } + else + { + /* + * Otherwise we will be rescanning the subplan output on each + * evaluation. We need to estimate how much of the output we will + * actually need to scan. NOTE: this logic should agree with the + * tuple_fraction estimates used by make_subplan() in + * plan/subselect.c. + */ + Cost plan_run_cost = plan->total_cost - plan->startup_cost; + + if (subplan->subLinkType == EXISTS_SUBLINK) + { + /* we only need to fetch 1 tuple */ + sp_cost.per_tuple += plan_run_cost / plan->plan_rows; + } + else if (subplan->subLinkType == ALL_SUBLINK || + subplan->subLinkType == ANY_SUBLINK) + { + /* assume we need 50% of the tuples */ + sp_cost.per_tuple += 0.50 * plan_run_cost; + /* also charge a cpu_operator_cost per row examined */ + sp_cost.per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost; + } + else + { + /* assume we need all tuples */ + sp_cost.per_tuple += plan_run_cost; + } + + /* + * Also account for subplan's startup cost. If the subplan is + * uncorrelated or undirect correlated, AND its topmost node is a Sort + * or Material node, assume that we'll only need to pay its startup + * cost once; otherwise assume we pay the startup cost every time. + */ + if (subplan->parParam == NIL && + (IsA(plan, Sort) || + IsA(plan, Material))) + sp_cost.startup += plan->startup_cost; + else + sp_cost.per_tuple += plan->startup_cost; + } + + subplan->startup_cost = sp_cost.startup; + subplan->per_call_cost = sp_cost.per_tuple; +} + + /* * cost_qual_eval * Estimate the CPU costs of evaluating a WHERE clause. @@ -2066,79 +2153,30 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) * subplan will be executed on each evaluation, so charge accordingly. * (Sub-selects that can be executed as InitPlans have already been * removed from the expression.) - * - * An exception occurs when we have decided we can implement the - * subplan by hashing. */ SubPlan *subplan = (SubPlan *) node; - Plan *plan = planner_subplan_get_plan(context->root, subplan); - if (subplan->useHashTable) - { - /* - * If we are using a hash table for the subquery outputs, then the - * cost of evaluating the query is a one-time cost. We charge one - * cpu_operator_cost per tuple for the work of loading the - * hashtable, too. - */ - context->total.startup += plan->total_cost + - cpu_operator_cost * plan->plan_rows; - - /* - * The per-tuple costs include the cost of evaluating the lefthand - * expressions, plus the cost of probing the hashtable. Recursion - * into the testexpr will handle the lefthand expressions - * properly, and will count one cpu_operator_cost for each - * comparison operator. That is probably too low for the probing - * cost, but it's hard to make a better estimate, so live with it - * for now. - */ - } - else - { - /* - * Otherwise we will be rescanning the subplan output on each - * evaluation. We need to estimate how much of the output we will - * actually need to scan. NOTE: this logic should agree with - * get_initplan_cost, below, and with the estimates used by - * make_subplan() in plan/subselect.c. - */ - Cost plan_run_cost = plan->total_cost - plan->startup_cost; + context->total.startup += subplan->startup_cost; + context->total.per_tuple += subplan->per_call_cost; - if (subplan->subLinkType == EXISTS_SUBLINK) - { - /* we only need to fetch 1 tuple */ - context->total.per_tuple += plan_run_cost / plan->plan_rows; - } - else if (subplan->subLinkType == ALL_SUBLINK || - subplan->subLinkType == ANY_SUBLINK) - { - /* assume we need 50% of the tuples */ - context->total.per_tuple += 0.50 * plan_run_cost; - /* also charge a cpu_operator_cost per row examined */ - context->total.per_tuple += - 0.50 * plan->plan_rows * cpu_operator_cost; - } - else - { - /* assume we need all tuples */ - context->total.per_tuple += plan_run_cost; - } + /* + * We don't want to recurse into the testexpr, because it was already + * counted in the SubPlan node's costs. So we're done. + */ + return false; + } + else if (IsA(node, AlternativeSubPlan)) + { + /* + * Arbitrarily use the first alternative plan for costing. (We should + * certainly only include one alternative, and we don't yet have + * enough information to know which one the executor is most likely + * to use.) + */ + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; - /* - * Also account for subplan's startup cost. If the subplan is - * uncorrelated or undirect correlated, AND its topmost node is a - * Sort or Material node, assume that we'll only need to pay its - * startup cost once; otherwise assume we pay the startup cost - * every time. - */ - if (subplan->parParam == NIL && - (IsA(plan, Sort) || - IsA(plan, Material))) - context->total.startup += plan->startup_cost; - else - context->total.per_tuple += plan->startup_cost; - } + return cost_qual_eval_walker((Node *) linitial(asplan->subplans), + context); } /* recurse into children */ @@ -2147,43 +2185,6 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) } -/* - * get_initplan_cost - * Get the expected cost of evaluating an initPlan. - * - * Keep this in sync with cost_qual_eval_walker's handling of subplans, above, - * and with the estimates used by make_subplan() in plan/subselect.c. - */ -Cost -get_initplan_cost(PlannerInfo *root, SubPlan *subplan) -{ - Cost result; - Plan *plan = planner_subplan_get_plan(root, subplan); - - /* initPlans never use hashtables */ - Assert(!subplan->useHashTable); - /* they are never ALL or ANY, either */ - Assert(!(subplan->subLinkType == ALL_SUBLINK || - subplan->subLinkType == ANY_SUBLINK)); - - if (subplan->subLinkType == EXISTS_SUBLINK) - { - /* we only need to fetch 1 tuple */ - Cost plan_run_cost = plan->total_cost - plan->startup_cost; - - result = plan->startup_cost; - result += plan_run_cost / plan->plan_rows; - } - else - { - /* assume we need all tuples */ - result = plan->total_cost; - } - - return result; -} - - /* * approx_tuple_count * Quick-and-dirty estimation of the number of join rows passing diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 6e6e340759..ee2d936b34 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.137 2008/08/20 19:58:24 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.138 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,6 +52,9 @@ typedef struct finalize_primnode_context } finalize_primnode_context; +static Node *build_subplan(PlannerInfo *root, Plan *plan, List *rtable, + SubLinkType subLinkType, Node *testexpr, + bool adjust_testexpr, bool unknownEqFalse); static List *generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds); static List *generate_subquery_vars(PlannerInfo *root, List *tlist, @@ -61,9 +64,12 @@ static Node *convert_testexpr(PlannerInfo *root, List *subst_nodes); static Node *convert_testexpr_mutator(Node *node, convert_testexpr_context *context); -static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan); +static bool subplan_is_hashable(Plan *plan); +static bool testexpr_is_hashable(Node *testexpr); static bool hash_ok_operator(OpExpr *expr); static bool simplify_EXISTS_query(Query *query); +static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, + Node **testexpr, List **paramIds); static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); static Node *process_sublinks_mutator(Node *node, process_sublinks_context *context); @@ -229,9 +235,13 @@ get_first_col_type(Plan *plan) /* * Convert a SubLink (as created by the parser) into a SubPlan. * - * We are given the original SubLink and the already-processed testexpr - * (use this instead of the SubLink's own field). We are also told if - * this expression appears at top level of a WHERE/HAVING qual. + * We are given the SubLink's contained query, type, and testexpr. We are + * also told if this expression appears at top level of a WHERE/HAVING qual. + * + * Note: we assume that the testexpr has been AND/OR flattened (actually, + * it's been through eval_const_expressions), but not converted to + * implicit-AND form; and any SubLinks in it should already have been + * converted to SubPlans. The subquery is as yet untouched, however. * * The result is whatever we need to substitute in place of the SubLink * node in the executable expression. This will be either the SubPlan @@ -240,16 +250,14 @@ get_first_col_type(Plan *plan) * tree containing InitPlan Param nodes. */ static Node * -make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) +make_subplan(PlannerInfo *root, Query *orig_subquery, SubLinkType subLinkType, + Node *testexpr, bool isTopQual) { - Query *subquery = (Query *) (slink->subselect); + Query *subquery; + bool simple_exists = false; double tuple_fraction; - SubPlan *splan; Plan *plan; PlannerInfo *subroot; - bool isInitPlan; - Bitmapset *tmpset; - int paramid; Node *result; /* @@ -258,38 +266,37 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) * same sub-Query node, but the planner wants to scribble on the Query. * Try to clean this up when we do querytree redesign... */ - subquery = (Query *) copyObject(subquery); + subquery = (Query *) copyObject(orig_subquery); /* * If it's an EXISTS subplan, we might be able to simplify it. */ - if (slink->subLinkType == EXISTS_SUBLINK) - (void) simplify_EXISTS_query(subquery); + if (subLinkType == EXISTS_SUBLINK) + simple_exists = simplify_EXISTS_query(subquery); /* * For an EXISTS subplan, tell lower-level planner to expect that only the * first tuple will be retrieved. For ALL and ANY subplans, we will be - * able to stop evaluating if the test condition fails, so very often not - * all the tuples will be retrieved; for lack of a better idea, specify - * 50% retrieval. For EXPR and ROWCOMPARE subplans, use default behavior - * (we're only expecting one row out, anyway). + * able to stop evaluating if the test condition fails or matches, so very + * often not all the tuples will be retrieved; for lack of a better idea, + * specify 50% retrieval. For EXPR and ROWCOMPARE subplans, use default + * behavior (we're only expecting one row out, anyway). * - * NOTE: if you change these numbers, also change cost_qual_eval_walker() - * and get_initplan_cost() in path/costsize.c. + * NOTE: if you change these numbers, also change cost_subplan() in + * path/costsize.c. * - * XXX If an ALL/ANY subplan is uncorrelated, we may decide to hash or - * materialize its result below. In that case it would've been better to - * specify full retrieval. At present, however, we can only detect - * correlation or lack of it after we've made the subplan :-(. Perhaps - * detection of correlation should be done as a separate step. Meanwhile, - * we don't want to be too optimistic about the percentage of tuples - * retrieved, for fear of selecting a plan that's bad for the - * materialization case. - */ - if (slink->subLinkType == EXISTS_SUBLINK) + * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash + * its output. In that case it would've been better to specify full + * retrieval. At present, however, we can only check hashability after + * we've made the subplan :-(. (Determining whether it'll fit in work_mem + * is the really hard part.) Therefore, we don't want to be too + * optimistic about the percentage of tuples retrieved, for fear of + * selecting a plan that's bad for the materialization case. + */ + if (subLinkType == EXISTS_SUBLINK) tuple_fraction = 1.0; /* just like a LIMIT 1 */ - else if (slink->subLinkType == ALL_SUBLINK || - slink->subLinkType == ANY_SUBLINK) + else if (subLinkType == ALL_SUBLINK || + subLinkType == ANY_SUBLINK) tuple_fraction = 0.5; /* 50% */ else tuple_fraction = 0.0; /* default behavior */ @@ -302,24 +309,104 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) tuple_fraction, &subroot); + /* And convert to SubPlan or InitPlan format. */ + result = build_subplan(root, plan, subroot->parse->rtable, + subLinkType, testexpr, true, isTopQual); + + /* + * If it's a correlated EXISTS with an unimportant targetlist, we might be + * able to transform it to the equivalent of an IN and then implement it + * by hashing. We don't have enough information yet to tell which way + * is likely to be better (it depends on the expected number of executions + * of the EXISTS qual, and we are much too early in planning the outer + * query to be able to guess that). So we generate both plans, if + * possible, and leave it to the executor to decide which to use. + */ + if (simple_exists && IsA(result, SubPlan)) + { + Node *newtestexpr; + List *paramIds; + + /* Make a second copy of the original subquery */ + subquery = (Query *) copyObject(orig_subquery); + /* and re-simplify */ + simple_exists = simplify_EXISTS_query(subquery); + Assert(simple_exists); + /* See if it can be converted to an ANY query */ + subquery = convert_EXISTS_to_ANY(root, subquery, + &newtestexpr, ¶mIds); + if (subquery) + { + /* Generate the plan for the ANY subquery; we'll need all rows */ + plan = subquery_planner(root->glob, subquery, + root->query_level + 1, + 0.0, + &subroot); + + /* Now we can check if it'll fit in work_mem */ + if (subplan_is_hashable(plan)) + { + SubPlan *hashplan; + AlternativeSubPlan *asplan; + + /* OK, convert to SubPlan format. */ + hashplan = (SubPlan *) build_subplan(root, plan, + subroot->parse->rtable, + ANY_SUBLINK, newtestexpr, + false, true); + /* Check we got what we expected */ + Assert(IsA(hashplan, SubPlan)); + Assert(hashplan->parParam == NIL); + Assert(hashplan->useHashTable); + /* build_subplan won't have filled in paramIds */ + hashplan->paramIds = paramIds; + + /* Leave it to the executor to decide which plan to use */ + asplan = makeNode(AlternativeSubPlan); + asplan->subplans = list_make2(result, hashplan); + result = (Node *) asplan; + } + } + } + + return result; +} + +/* + * Build a SubPlan node given the raw inputs --- subroutine for make_subplan + * + * Returns either the SubPlan, or an expression using initplan output Params, + * as explained in the comments for make_subplan. + */ +static Node * +build_subplan(PlannerInfo *root, Plan *plan, List *rtable, + SubLinkType subLinkType, Node *testexpr, + bool adjust_testexpr, bool unknownEqFalse) +{ + Node *result; + SubPlan *splan; + bool isInitPlan; + Bitmapset *tmpset; + int paramid; + /* - * Initialize the SubPlan node. Note plan_id isn't set yet. + * Initialize the SubPlan node. Note plan_id isn't set till further down, + * likewise the cost fields. */ splan = makeNode(SubPlan); - splan->subLinkType = slink->subLinkType; + splan->subLinkType = subLinkType; splan->testexpr = NULL; splan->paramIds = NIL; splan->firstColType = get_first_col_type(plan); splan->useHashTable = false; - /* At top level of a qual, can treat UNKNOWN the same as FALSE */ - splan->unknownEqFalse = isTopQual; + splan->unknownEqFalse = unknownEqFalse; splan->setParam = NIL; splan->parParam = NIL; splan->args = NIL; /* - * Make parParam list of params that current query level will pass to this - * child plan. + * Make parParam and args lists of param IDs and expressions that current + * query level will pass to this child plan. */ tmpset = bms_copy(plan->extParam); while ((paramid = bms_first_member(tmpset)) >= 0) @@ -327,7 +414,15 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) PlannerParamItem *pitem = list_nth(root->glob->paramlist, paramid); if (pitem->abslevel == root->query_level) + { splan->parParam = lappend_int(splan->parParam, paramid); + /* + * The Var or Aggref has already been adjusted to have the correct + * varlevelsup or agglevelsup. We probably don't even need to + * copy it again, but be safe. + */ + splan->args = lappend(splan->args, copyObject(pitem->item)); + } } bms_free(tmpset); @@ -339,21 +434,23 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) * PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted by the * parser. */ - if (splan->parParam == NIL && slink->subLinkType == EXISTS_SUBLINK) + if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK) { Param *prm; + Assert(testexpr == NULL); prm = generate_new_param(root, BOOLOID, -1); splan->setParam = list_make1_int(prm->paramid); isInitPlan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && slink->subLinkType == EXPR_SUBLINK) + else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK) { TargetEntry *te = linitial(plan->targetlist); Param *prm; Assert(!te->resjunk); + Assert(testexpr == NULL); prm = generate_new_param(root, exprType((Node *) te->expr), exprTypmod((Node *) te->expr)); @@ -361,13 +458,14 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) isInitPlan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && slink->subLinkType == ARRAY_SUBLINK) + else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK) { TargetEntry *te = linitial(plan->targetlist); Oid arraytype; Param *prm; Assert(!te->resjunk); + Assert(testexpr == NULL); arraytype = get_array_type(exprType((Node *) te->expr)); if (!OidIsValid(arraytype)) elog(ERROR, "could not find array type for datatype %s", @@ -379,11 +477,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) isInitPlan = true; result = (Node *) prm; } - else if (splan->parParam == NIL && slink->subLinkType == ROWCOMPARE_SUBLINK) + else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK) { /* Adjust the Params */ List *params; + Assert(testexpr != NULL); params = generate_subquery_params(root, plan->targetlist, &splan->paramIds); @@ -400,14 +499,14 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) } else { - List *args; - ListCell *l; - - if (testexpr) + /* + * Adjust the Params in the testexpr, unless caller said it's not + * needed. + */ + if (testexpr && adjust_testexpr) { List *params; - /* Adjust the Params in the testexpr */ params = generate_subquery_params(root, plan->targetlist, &splan->paramIds); @@ -415,15 +514,20 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) testexpr, params); } + else + splan->testexpr = testexpr; /* * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to * initPlans, even when they are uncorrelated or undirect correlated, * because we need to scan the output of the subplan for each outer - * tuple. But if it's an IN (= ANY) test, we might be able to use a - * hashtable to avoid comparing all the tuples. + * tuple. But if it's a not-direct-correlated IN (= ANY) test, we + * might be able to use a hashtable to avoid comparing all the tuples. */ - if (subplan_is_hashable(slink, splan, plan)) + if (subLinkType == ANY_SUBLINK && + splan->parParam == NIL && + subplan_is_hashable(plan) && + testexpr_is_hashable(splan->testexpr)) splan->useHashTable = true; /* @@ -453,24 +557,6 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) plan = materialize_finished_plan(plan); } - /* - * Make splan->args from parParam. - */ - args = NIL; - foreach(l, splan->parParam) - { - PlannerParamItem *pitem = list_nth(root->glob->paramlist, - lfirst_int(l)); - - /* - * The Var or Aggref has already been adjusted to have the correct - * varlevelsup or agglevelsup. We probably don't even need to - * copy it again, but be safe. - */ - args = lappend(args, copyObject(pitem->item)); - } - splan->args = args; - result = (Node *) splan; isInitPlan = false; } @@ -478,10 +564,8 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) /* * Add the subplan and its rtable to the global lists. */ - root->glob->subplans = lappend(root->glob->subplans, - plan); - root->glob->subrtables = lappend(root->glob->subrtables, - subroot->parse->rtable); + root->glob->subplans = lappend(root->glob->subplans, plan); + root->glob->subrtables = lappend(root->glob->subrtables, rtable); splan->plan_id = list_length(root->glob->subplans); if (isInitPlan) @@ -498,6 +582,9 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs, splan->plan_id); + /* Lastly, fill in the cost estimates for use later */ + cost_subplan(root, splan, plan); + return result; } @@ -620,40 +707,12 @@ convert_testexpr_mutator(Node *node, } /* - * subplan_is_hashable: decide whether we can implement a subplan by hashing - * - * Caution: the SubPlan node is not completely filled in yet. We can rely - * on its plan and parParam fields, however. + * subplan_is_hashable: can we implement an ANY subplan by hashing? */ static bool -subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan) +subplan_is_hashable(Plan *plan) { double subquery_size; - ListCell *l; - - /* - * The sublink type must be "= ANY" --- that is, an IN operator. We - * expect that the test expression will be either a single OpExpr, or an - * AND-clause containing OpExprs. (If it's anything else then the parser - * must have determined that the operators have non-equality-like - * semantics. In the OpExpr case we can't be sure what the operator's - * semantics are like, but the test below for hashability will reject - * anything that's not equality.) - */ - if (slink->subLinkType != ANY_SUBLINK) - return false; - if (slink->testexpr == NULL || - (!IsA(slink->testexpr, OpExpr) && - !and_clause(slink->testexpr))) - return false; - - /* - * The subplan must not have any direct correlation vars --- else we'd - * have to recompute its output each time, so that the hashtable wouldn't - * gain anything. - */ - if (node->parParam != NIL) - return false; /* * The estimated size of the subquery result must fit in work_mem. (Note: @@ -666,7 +725,19 @@ subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan) if (subquery_size > work_mem * 1024L) return false; + return true; +} + +/* + * testexpr_is_hashable: is an ANY SubLink's test expression hashable? + */ +static bool +testexpr_is_hashable(Node *testexpr) +{ /* + * The testexpr must be a single OpExpr, or an AND-clause containing + * only OpExprs. + * * The combining operators must be hashable and strict. The need for * hashability is obvious, since we want to use hashing. Without * strictness, behavior in the presence of nulls is too unpredictable. We @@ -674,25 +745,28 @@ subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan) * NULL for non-null inputs, either (see nodeSubplan.c). However, hash * indexes and hash joins assume that too. */ - if (IsA(slink->testexpr, OpExpr)) + if (testexpr && IsA(testexpr, OpExpr)) { - if (!hash_ok_operator((OpExpr *) slink->testexpr)) - return false; + if (hash_ok_operator((OpExpr *) testexpr)) + return true; } - else + else if (and_clause(testexpr)) { - foreach(l, ((BoolExpr *) slink->testexpr)->args) + ListCell *l; + + foreach(l, ((BoolExpr *) testexpr)->args) { Node *andarg = (Node *) lfirst(l); if (!IsA(andarg, OpExpr)) - return false; /* probably can't happen */ + return false; if (!hash_ok_operator((OpExpr *) andarg)) return false; } + return true; } - return true; + return false; } static bool @@ -702,6 +776,10 @@ hash_ok_operator(OpExpr *expr) HeapTuple tup; Form_pg_operator optup; + /* quick out if not a binary operator */ + if (list_length(expr->args) != 2) + return false; + /* else must look up the operator properties */ tup = SearchSysCache(OPEROID, ObjectIdGetDatum(opid), 0, 0, 0); @@ -838,63 +916,6 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, return true; } -/* - * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery - * - * The only thing that matters about an EXISTS query is whether it returns - * zero or more than zero rows. Therefore, we can remove certain SQL features - * that won't affect that. The only part that is really likely to matter in - * typical usage is simplifying the targetlist: it's a common habit to write - * "SELECT * FROM" even though there is no need to evaluate any columns. - * - * Note: by suppressing the targetlist we could cause an observable behavioral - * change, namely that any errors that might occur in evaluating the tlist - * won't occur, nor will other side-effects of volatile functions. This seems - * unlikely to bother anyone in practice. - * - * Returns TRUE if was able to discard the targetlist, else FALSE. - */ -static bool -simplify_EXISTS_query(Query *query) -{ - /* - * We don't try to simplify at all if the query uses set operations, - * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these - * seem likely in normal usage and their possible effects are complex. - */ - if (query->commandType != CMD_SELECT || - query->intoClause || - query->setOperations || - query->hasAggs || - query->havingQual || - query->limitOffset || - query->limitCount || - query->rowMarks) - return false; - - /* - * Mustn't throw away the targetlist if it contains set-returning - * functions; those could affect whether zero rows are returned! - */ - if (expression_returns_set((Node *) query->targetList)) - return false; - - /* - * Otherwise, we can throw away the targetlist, as well as any GROUP, - * DISTINCT, and ORDER BY clauses; none of those clauses will change - * a nonzero-rows result to zero rows or vice versa. (Furthermore, - * since our parsetree representation of these clauses depends on the - * targetlist, we'd better throw them away if we drop the targetlist.) - */ - query->targetList = NIL; - query->groupClause = NIL; - query->distinctClause = NIL; - query->sortClause = NIL; - query->hasDistinctOn = false; - - return true; -} - /* * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join? * @@ -1044,6 +1065,288 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, return true; } +/* + * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery + * + * The only thing that matters about an EXISTS query is whether it returns + * zero or more than zero rows. Therefore, we can remove certain SQL features + * that won't affect that. The only part that is really likely to matter in + * typical usage is simplifying the targetlist: it's a common habit to write + * "SELECT * FROM" even though there is no need to evaluate any columns. + * + * Note: by suppressing the targetlist we could cause an observable behavioral + * change, namely that any errors that might occur in evaluating the tlist + * won't occur, nor will other side-effects of volatile functions. This seems + * unlikely to bother anyone in practice. + * + * Returns TRUE if was able to discard the targetlist, else FALSE. + */ +static bool +simplify_EXISTS_query(Query *query) +{ + /* + * We don't try to simplify at all if the query uses set operations, + * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these + * seem likely in normal usage and their possible effects are complex. + */ + if (query->commandType != CMD_SELECT || + query->intoClause || + query->setOperations || + query->hasAggs || + query->havingQual || + query->limitOffset || + query->limitCount || + query->rowMarks) + return false; + + /* + * Mustn't throw away the targetlist if it contains set-returning + * functions; those could affect whether zero rows are returned! + */ + if (expression_returns_set((Node *) query->targetList)) + return false; + + /* + * Otherwise, we can throw away the targetlist, as well as any GROUP, + * DISTINCT, and ORDER BY clauses; none of those clauses will change + * a nonzero-rows result to zero rows or vice versa. (Furthermore, + * since our parsetree representation of these clauses depends on the + * targetlist, we'd better throw them away if we drop the targetlist.) + */ + query->targetList = NIL; + query->groupClause = NIL; + query->distinctClause = NIL; + query->sortClause = NIL; + query->hasDistinctOn = false; + + return true; +} + +/* + * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink + * + * The subselect is expected to be a fresh copy that we can munge up, + * and to have been successfully passed through simplify_EXISTS_query. + * + * On success, the modified subselect is returned, and we store a suitable + * upper-level test expression at *testexpr, plus a list of the subselect's + * output Params at *paramIds. (The test expression is already Param-ified + * and hence need not go through convert_testexpr, which is why we have to + * deal with the Param IDs specially.) + * + * On failure, returns NULL. + */ +static Query * +convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, + Node **testexpr, List **paramIds) +{ + Node *whereClause; + List *leftargs, + *rightargs, + *opids, + *newWhere, + *tlist, + *testlist, + *paramids; + ListCell *lc, + *rc, + *oc; + AttrNumber resno; + + /* + * Query must not require a targetlist, since we have to insert a new one. + * Caller should have dealt with the case already. + */ + Assert(subselect->targetList == NIL); + + /* + * Separate out the WHERE clause. (We could theoretically also remove + * top-level plain JOIN/ON clauses, but it's probably not worth the + * trouble.) + */ + whereClause = subselect->jointree->quals; + subselect->jointree->quals = NULL; + + /* + * The rest of the sub-select must not refer to any Vars of the parent + * query. (Vars of higher levels should be okay, though.) + * + * Note: we need not check for Aggs separately because we know the + * sub-select is as yet unoptimized; any uplevel Agg must therefore + * contain an uplevel Var reference. This is not the case below ... + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + return NULL; + + /* + * Clean up the WHERE clause by doing const-simplification etc on it. + * Aside from simplifying the processing we're about to do, this is + * important for being able to pull chunks of the WHERE clause up into + * the parent query. Since we are invoked partway through the parent's + * preprocess_expression() work, earlier steps of preprocess_expression() + * wouldn't get applied to the pulled-up stuff unless we do them here. + * For the parts of the WHERE clause that get put back into the child + * query, this work is partially duplicative, but it shouldn't hurt. + * + * Note: we do not run flatten_join_alias_vars. This is OK because + * any parent aliases were flattened already, and we're not going to + * pull any child Vars (of any description) into the parent. + * + * Note: passing the parent's root to eval_const_expressions is technically + * wrong, but we can get away with it since only the boundParams (if any) + * are used, and those would be the same in a subroot. + */ + whereClause = eval_const_expressions(root, whereClause); + whereClause = (Node *) canonicalize_qual((Expr *) whereClause); + whereClause = (Node *) make_ands_implicit((Expr *) whereClause); + + /* + * We now have a flattened implicit-AND list of clauses, which we + * try to break apart into "outervar = innervar" hash clauses. + * Anything that can't be broken apart just goes back into the + * newWhere list. Note that we aren't trying hard yet to ensure + * that we have only outer or only inner on each side; we'll check + * that if we get to the end. + */ + leftargs = rightargs = opids = newWhere = NIL; + foreach(lc, (List *) whereClause) + { + OpExpr *expr = (OpExpr *) lfirst(lc); + + if (IsA(expr, OpExpr) && + hash_ok_operator(expr)) + { + Node *leftarg = (Node *) linitial(expr->args); + Node *rightarg = (Node *) lsecond(expr->args); + + if (contain_vars_of_level(leftarg, 1)) + { + leftargs = lappend(leftargs, leftarg); + rightargs = lappend(rightargs, rightarg); + opids = lappend_oid(opids, expr->opno); + continue; + } + if (contain_vars_of_level(rightarg, 1)) + { + /* + * We must commute the clause to put the outer var on the + * left, because the hashing code in nodeSubplan.c expects + * that. This probably shouldn't ever fail, since hashable + * operators ought to have commutators, but be paranoid. + */ + expr->opno = get_commutator(expr->opno); + if (OidIsValid(expr->opno) && hash_ok_operator(expr)) + { + leftargs = lappend(leftargs, rightarg); + rightargs = lappend(rightargs, leftarg); + opids = lappend_oid(opids, expr->opno); + continue; + } + /* If no commutator, no chance to optimize the WHERE clause */ + return NULL; + } + } + /* Couldn't handle it as a hash clause */ + newWhere = lappend(newWhere, expr); + } + + /* + * If we didn't find anything we could convert, fail. + */ + if (leftargs == NIL) + return NULL; + + /* + * There mustn't be any parent Vars or Aggs in the stuff that we intend to + * put back into the child query. Note: you might think we don't need to + * check for Aggs separately, because an uplevel Agg must contain an + * uplevel Var in its argument. But it is possible that the uplevel Var + * got optimized away by eval_const_expressions. Consider + * + * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END) + */ + if (contain_vars_of_level((Node *) newWhere, 1) || + contain_vars_of_level((Node *) rightargs, 1)) + return NULL; + if (root->parse->hasAggs && + (contain_aggs_of_level((Node *) newWhere, 1) || + contain_aggs_of_level((Node *) rightargs, 1))) + return NULL; + + /* + * And there can't be any child Vars in the stuff we intend to pull up. + * (Note: we'd need to check for child Aggs too, except we know the + * child has no aggs at all because of simplify_EXISTS_query's check.) + */ + if (contain_vars_of_level((Node *) leftargs, 0)) + return NULL; + + /* + * Also reject sublinks in the stuff we intend to pull up. (It might be + * possible to support this, but doesn't seem worth the complication.) + */ + if (contain_subplans((Node *) leftargs)) + return NULL; + + /* + * Okay, adjust the sublevelsup in the stuff we're pulling up. + */ + IncrementVarSublevelsUp((Node *) leftargs, -1, 1); + + /* + * Put back any child-level-only WHERE clauses. + */ + if (newWhere) + subselect->jointree->quals = (Node *) make_ands_explicit(newWhere); + + /* + * Build a new targetlist for the child that emits the expressions + * we need. Concurrently, build a testexpr for the parent using + * Params to reference the child outputs. (Since we generate Params + * directly here, there will be no need to convert the testexpr in + * build_subplan.) + */ + tlist = testlist = paramids = NIL; + resno = 1; + /* there's no "for3" so we have to chase one of the lists manually */ + oc = list_head(opids); + forboth(lc, leftargs, rc, rightargs) + { + Node *leftarg = (Node *) lfirst(lc); + Node *rightarg = (Node *) lfirst(rc); + Oid opid = lfirst_oid(oc); + Param *param; + + oc = lnext(oc); + param = generate_new_param(root, + exprType(rightarg), + exprTypmod(rightarg)); + tlist = lappend(tlist, + makeTargetEntry((Expr *) rightarg, + resno++, + NULL, + false)); + testlist = lappend(testlist, + make_opclause(opid, BOOLOID, false, + (Expr *) leftarg, (Expr *) param)); + paramids = lappend_int(paramids, param->paramid); + } + + /* Put everything where it should go, and we're done */ + subselect->targetList = tlist; + *testexpr = (Node *) make_ands_explicit(testlist); + *paramIds = paramids; + + return subselect; +} + + /* * Replace correlation vars (uplevel vars) with Params. * @@ -1130,7 +1433,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context) * Now build the SubPlan node and make the expr to return. */ return make_subplan(context->root, - sublink, + (Query *) sublink->subselect, + sublink->subLinkType, testexpr, context->isTopQual); } @@ -1140,7 +1444,8 @@ process_sublinks_mutator(Node *node, process_sublinks_context *context) * the very routine that creates 'em to begin with). We shouldn't find * ourselves invoked directly on a Query, either. */ - Assert(!is_subplan(node)); + Assert(!IsA(node, SubPlan)); + Assert(!IsA(node, AlternativeSubPlan)); Assert(!IsA(node, Query)); /* @@ -1251,7 +1556,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans) { initSetParam = bms_add_member(initSetParam, lfirst_int(l2)); } - initplan_cost += get_initplan_cost(root, initsubplan); + initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost; } /* @@ -1555,7 +1860,7 @@ finalize_primnode(Node *node, finalize_primnode_context *context) } return false; /* no more to do here */ } - if (is_subplan(node)) + if (IsA(node, SubPlan)) { SubPlan *subplan = (SubPlan *) node; Plan *plan = planner_subplan_get_plan(context->root, subplan); @@ -1655,6 +1960,8 @@ SS_make_initplan_from_plan(PlannerInfo *root, Plan *plan, * parParam and args lists remain empty. */ + cost_subplan(root, node, plan); + /* * Make a Param that will be the subplan's output. */ diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index ffe31bdff2..6a06b0806c 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.262 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.263 2008/08/22 00:16:04 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -360,7 +360,7 @@ make_ands_implicit(Expr *clause) * are no subqueries. There mustn't be outer-aggregate references either. * * (If you want something like this but able to deal with subqueries, - * see rewriteManip.c's checkExprHasAggs().) + * see rewriteManip.c's contain_aggs_of_level().) */ bool contain_agg_clause(Node *clause) @@ -566,6 +566,8 @@ expression_returns_set_walker(Node *node, void *context) return false; if (IsA(node, SubPlan)) return false; + if (IsA(node, AlternativeSubPlan)) + return false; if (IsA(node, ArrayExpr)) return false; if (IsA(node, RowExpr)) @@ -637,6 +639,8 @@ expression_returns_set_rows_walker(Node *node, double *count) return false; if (IsA(node, SubPlan)) return false; + if (IsA(node, AlternativeSubPlan)) + return false; if (IsA(node, ArrayExpr)) return false; if (IsA(node, RowExpr)) @@ -684,6 +688,7 @@ contain_subplans_walker(Node *node, void *context) if (node == NULL) return false; if (IsA(node, SubPlan) || + IsA(node, AlternativeSubPlan) || IsA(node, SubLink)) return true; /* abort the tree traversal and return true */ return expression_tree_walker(node, contain_subplans_walker, context); @@ -1012,6 +1017,8 @@ contain_nonstrict_functions_walker(Node *node, void *context) } if (IsA(node, SubPlan)) return true; + if (IsA(node, AlternativeSubPlan)) + return true; /* ArrayCoerceExpr is strict at the array level, regardless of elemfunc */ if (IsA(node, FieldStore)) return true; @@ -2308,7 +2315,8 @@ eval_const_expressions_mutator(Node *node, break; } } - if (IsA(node, SubPlan)) + if (IsA(node, SubPlan) || + IsA(node, AlternativeSubPlan)) { /* * Return a SubPlan unchanged --- too late to do anything with it. @@ -4156,6 +4164,8 @@ expression_tree_walker(Node *node, return true; } break; + case T_AlternativeSubPlan: + return walker(((AlternativeSubPlan *) node)->subplans, context); case T_FieldSelect: return walker(((FieldSelect *) node)->arg, context); case T_FieldStore: @@ -4630,6 +4640,16 @@ expression_tree_mutator(Node *node, return (Node *) newnode; } break; + case T_AlternativeSubPlan: + { + AlternativeSubPlan *asplan = (AlternativeSubPlan *) node; + AlternativeSubPlan *newnode; + + FLATCOPY(newnode, asplan, AlternativeSubPlan); + MUTATE(newnode->subplans, asplan->subplans, List *); + return (Node *) newnode; + } + break; case T_FieldSelect: { FieldSelect *fselect = (FieldSelect *) node; diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 290f9bb64b..f9bd59c799 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.75 2008/08/14 18:47:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/util/var.c,v 1.76 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -171,7 +171,7 @@ pull_varattnos_walker(Node *node, Bitmapset **varattnos) } /* Should not find a subquery or subplan */ Assert(!IsA(node, Query)); - Assert(!is_subplan(node)); + Assert(!IsA(node, SubPlan)); return expression_tree_walker(node, pull_varattnos_walker, (void *) varattnos); @@ -677,7 +677,7 @@ flatten_join_alias_vars_mutator(Node *node, return (Node *) newnode; } /* Already-planned tree not supported */ - Assert(!is_subplan(node)); + Assert(!IsA(node, SubPlan)); return expression_tree_mutator(node, flatten_join_alias_vars_mutator, (void *) context); diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 8addb53e51..6309525cab 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.229 2008/07/16 01:30:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.230 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1980,6 +1980,15 @@ exprType(Node *expr) } } break; + case T_AlternativeSubPlan: + { + /* As above, supported for the convenience of ruleutils.c */ + AlternativeSubPlan *asplan = (AlternativeSubPlan *) expr; + + /* subplans should all return the same thing */ + type = exprType((Node *) linitial(asplan->subplans)); + } + break; case T_FieldSelect: type = ((FieldSelect *) expr)->resulttype; break; diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 1202c31314..baa9330016 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.109 2008/08/14 20:31:29 heikki Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteManip.c,v 1.110 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -25,10 +25,10 @@ typedef struct { int sublevels_up; -} checkExprHasAggs_context; +} contain_aggs_of_level_context; -static bool checkExprHasAggs_walker(Node *node, - checkExprHasAggs_context *context); +static bool contain_aggs_of_level_walker(Node *node, + contain_aggs_of_level_context *context); static bool checkExprHasSubLink_walker(Node *node, void *context); static Relids offset_relid_set(Relids relids, int offset); static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); @@ -37,32 +37,44 @@ static Relids adjust_relid_set(Relids relids, int oldrelid, int newrelid); /* * checkExprHasAggs - * Check if an expression contains an aggregate function call. + */ +bool +checkExprHasAggs(Node *node) +{ + return contain_aggs_of_level(node, 0); +} + +/* + * contain_aggs_of_level - + * Check if an expression contains an aggregate function call of a + * specified query level. * * The objective of this routine is to detect whether there are aggregates - * belonging to the initial query level. Aggregates belonging to subqueries + * belonging to the given query level. Aggregates belonging to subqueries * or outer queries do NOT cause a true result. We must recurse into * subqueries to detect outer-reference aggregates that logically belong to - * the initial query level. + * the specified query level. */ bool -checkExprHasAggs(Node *node) +contain_aggs_of_level(Node *node, int levelsup) { - checkExprHasAggs_context context; + contain_aggs_of_level_context context; - context.sublevels_up = 0; + context.sublevels_up = levelsup; /* * Must be prepared to start with a Query or a bare expression tree; if * it's a Query, we don't want to increment sublevels_up. */ return query_or_expression_tree_walker(node, - checkExprHasAggs_walker, + contain_aggs_of_level_walker, (void *) &context, 0); } static bool -checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context) +contain_aggs_of_level_walker(Node *node, + contain_aggs_of_level_context *context) { if (node == NULL) return false; @@ -79,12 +91,12 @@ checkExprHasAggs_walker(Node *node, checkExprHasAggs_context *context) context->sublevels_up++; result = query_tree_walker((Query *) node, - checkExprHasAggs_walker, + contain_aggs_of_level_walker, (void *) context, 0); context->sublevels_up--; return result; } - return expression_tree_walker(node, checkExprHasAggs_walker, + return expression_tree_walker(node, contain_aggs_of_level_walker, (void *) context); } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 0120353344..3c62e7a972 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.279 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.280 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3732,6 +3732,11 @@ get_rule_expr(Node *node, deparse_context *context, } break; + case T_AlternativeSubPlan: + /* As above, just punt */ + appendStringInfo(buf, "(alternative subplans)"); + break; + case T_FieldSelect: { FieldSelect *fselect = (FieldSelect *) node; diff --git a/src/include/executor/nodeSubplan.h b/src/include/executor/nodeSubplan.h index c7c9f4e2dc..7d12a54b61 100644 --- a/src/include/executor/nodeSubplan.h +++ b/src/include/executor/nodeSubplan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/executor/nodeSubplan.h,v 1.27 2008/01/01 19:45:57 momjian Exp $ + * $PostgreSQL: pgsql/src/include/executor/nodeSubplan.h,v 1.28 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,10 +17,8 @@ #include "nodes/execnodes.h" extern SubPlanState *ExecInitSubPlan(SubPlan *subplan, PlanState *parent); -extern Datum ExecSubPlan(SubPlanState *node, - ExprContext *econtext, - bool *isNull, - ExprDoneCond *isDone); + +extern AlternativeSubPlanState *ExecInitAlternativeSubPlan(AlternativeSubPlan *asplan, PlanState *parent); extern void ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ce41c750af..ee8c4a2bef 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.186 2008/08/07 03:04:03 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.187 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -627,6 +627,17 @@ typedef struct SubPlanState FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */ } SubPlanState; +/* ---------------- + * AlternativeSubPlanState node + * ---------------- + */ +typedef struct AlternativeSubPlanState +{ + ExprState xprstate; + List *subplans; /* states of alternative subplans */ + int active; /* list index of the one we're using */ +} AlternativeSubPlanState; + /* ---------------- * FieldSelectState node * ---------------- diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 5fdd23c5a9..b9e00bfdde 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.208 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/nodes.h,v 1.209 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -118,6 +118,7 @@ typedef enum NodeTag T_BoolExpr, T_SubLink, T_SubPlan, + T_AlternativeSubPlan, T_FieldSelect, T_FieldStore, T_RelabelType, @@ -160,6 +161,7 @@ typedef enum NodeTag T_ScalarArrayOpExprState, T_BoolExprState, T_SubPlanState, + T_AlternativeSubPlanState, T_FieldSelectState, T_FieldStoreState, T_CoerceViaIOState, diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f0c55112cb..f6da1adbfa 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -10,7 +10,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.138 2008/08/02 21:32:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/nodes/primnodes.h,v 1.139 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -445,7 +445,8 @@ typedef struct SubLink * * If the sub-select becomes an initplan rather than a subplan, the executable * expression is part of the outer plan's expression tree (and the SubPlan - * node itself is not). In this case testexpr is NULL to avoid duplication. + * node itself is not, but rather is found in the outer plan's initPlan + * list). In this case testexpr is NULL to avoid duplication. * * The planner also derives lists of the values that need to be passed into * and out of the subplan. Input values are represented as a list "args" of @@ -457,6 +458,10 @@ typedef struct SubLink * is an initplan; they are listed in order by sub-select output column * position. (parParam and setParam are integer Lists, not Bitmapsets, * because their ordering is significant.) + * + * Also, the planner computes startup and per-call costs for use of the + * SubPlan. Note that these include the cost of the subquery proper, + * evaluation of the testexpr if any, and any hashtable management overhead. */ typedef struct SubPlan { @@ -482,8 +487,25 @@ typedef struct SubPlan * Params for parent plan */ List *parParam; /* indices of input Params from parent plan */ List *args; /* exprs to pass as parParam values */ + /* Estimated execution costs: */ + Cost startup_cost; /* one-time setup cost */ + Cost per_call_cost; /* cost for each subplan evaluation */ } SubPlan; +/* + * AlternativeSubPlan - expression node for a choice among SubPlans + * + * The subplans are given as a List so that the node definition need not + * change if there's ever more than two alternatives. For the moment, + * though, there are always exactly two; and the first one is the fast-start + * plan. + */ +typedef struct AlternativeSubPlan +{ + Expr xpr; + List *subplans; /* SubPlan(s) with equivalent results */ +} AlternativeSubPlan; + /* ---------------- * FieldSelect * diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 41e52d909a..1ea3d701fe 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.92 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.93 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,7 +19,6 @@ #define is_opclause(clause) ((clause) != NULL && IsA(clause, OpExpr)) #define is_funcclause(clause) ((clause) != NULL && IsA(clause, FuncExpr)) -#define is_subplan(clause) ((clause) != NULL && IsA(clause, SubPlan)) typedef struct { diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 17caeb4ee4..8ad8f7f062 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.91 2008/08/14 18:48:00 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.92 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -93,9 +93,9 @@ extern void cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo); extern void cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo); +extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan); extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root); extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root); -extern Cost get_initplan_cost(PlannerInfo *root, SubPlan *subplan); extern void set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel); extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 110d8ac0d1..a870ce889b 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.45 2008/08/14 20:31:29 heikki Exp $ + * $PostgreSQL: pgsql/src/include/rewrite/rewriteManip.h,v 1.46 2008/08/22 00:16:04 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -35,6 +35,7 @@ extern Query *getInsertSelectQuery(Query *parsetree, Query ***subquery_ptr); extern void AddQual(Query *parsetree, Node *qual); extern void AddInvertedQual(Query *parsetree, Node *qual); +extern bool contain_aggs_of_level(Node *node, int levelsup); extern bool checkExprHasAggs(Node *node); extern bool checkExprHasSubLink(Node *node); -- 2.40.0