From dfdf07aab109fd883877fd11819ac2e2981093d8 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 18 Aug 2005 17:51:12 +0000 Subject: [PATCH] Fix up LIMIT/OFFSET planning so that we cope with non-constant LIMIT or OFFSET clauses by using estimate_expression_value(). The main advantage of this is that if the expression is a Param and we have a value for the Param, we'll use that value rather than defaulting. Also, fix some thinkos in the logic for combining LIMIT/OFFSET with an externally supplied tuple fraction (this covers cases like EXISTS(...LIMIT...)). And make sure the results of all this are shown by EXPLAIN. Per a gripe from Merlin Moncure. --- src/backend/optimizer/plan/createplan.c | 84 ++++++---- src/backend/optimizer/plan/planagg.c | 5 +- src/backend/optimizer/plan/planner.c | 208 +++++++++++++++++------- src/include/optimizer/planmain.h | 5 +- 4 files changed, 201 insertions(+), 101 deletions(-) diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 697355cfdf..bc4dae5594 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.196 2005/07/28 20:26:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.197 2005/08/18 17:51:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2673,8 +2673,16 @@ make_setop(SetOpCmd cmd, Plan *lefttree, return node; } +/* + * Note: offset_est and count_est are passed in to save having to repeat + * work already done to estimate the values of the limitOffset and limitCount + * expressions. Their values are as returned by preprocess_limit (0 means + * "not relevant", -1 means "couldn't estimate"). Keep the code below in sync + * with that function! + */ Limit * -make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount) +make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, + int offset_est, int count_est) { Limit *node = makeNode(Limit); Plan *plan = &node->plan; @@ -2682,46 +2690,50 @@ make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount) copy_plan_costsize(plan, lefttree); /* - * If offset/count are constants, adjust the output rows count and - * costs accordingly. This is only a cosmetic issue if we are at top - * level, but if we are building a subquery then it's important to - * report correct info to the outer planner. + * Adjust the output rows count and costs according to the offset/limit. + * This is only a cosmetic issue if we are at top level, but if we are + * building a subquery then it's important to report correct info to the + * outer planner. + * + * When the offset or count couldn't be estimated, use 10% of the + * estimated number of rows emitted from the subplan. */ - if (limitOffset && IsA(limitOffset, Const)) + if (offset_est != 0) { - Const *limito = (Const *) limitOffset; - int32 offset = DatumGetInt32(limito->constvalue); + double offset_rows; - if (!limito->constisnull && offset > 0) - { - if (offset > plan->plan_rows) - offset = (int32) plan->plan_rows; - if (plan->plan_rows > 0) - plan->startup_cost += - (plan->total_cost - plan->startup_cost) - * ((double) offset) / plan->plan_rows; - plan->plan_rows -= offset; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } + if (offset_est > 0) + offset_rows = (double) offset_est; + else + offset_rows = clamp_row_est(lefttree->plan_rows * 0.10); + if (offset_rows > plan->plan_rows) + offset_rows = plan->plan_rows; + if (plan->plan_rows > 0) + plan->startup_cost += + (plan->total_cost - plan->startup_cost) + * offset_rows / plan->plan_rows; + plan->plan_rows -= offset_rows; + if (plan->plan_rows < 1) + plan->plan_rows = 1; } - if (limitCount && IsA(limitCount, Const)) + + if (count_est != 0) { - Const *limitc = (Const *) limitCount; - int32 count = DatumGetInt32(limitc->constvalue); + double count_rows; - if (!limitc->constisnull && count >= 0) - { - if (count > plan->plan_rows) - count = (int32) plan->plan_rows; - if (plan->plan_rows > 0) - plan->total_cost = plan->startup_cost + - (plan->total_cost - plan->startup_cost) - * ((double) count) / plan->plan_rows; - plan->plan_rows = count; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - } + if (count_est > 0) + count_rows = (double) count_est; + else + count_rows = clamp_row_est(lefttree->plan_rows * 0.10); + if (count_rows > plan->plan_rows) + count_rows = plan->plan_rows; + if (plan->plan_rows > 0) + plan->total_cost = plan->startup_cost + + (plan->total_cost - plan->startup_cost) + * count_rows / plan->plan_rows; + plan->plan_rows = count_rows; + if (plan->plan_rows < 1) + plan->plan_rows = 1; } plan->targetlist = copyObject(lefttree->targetlist); diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index 81f526ac11..6f9274fbc0 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.7 2005/07/28 20:26:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.8 2005/08/18 17:51:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -519,7 +519,8 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info, List *constant_quals) plan = (Plan *) make_limit(plan, subparse->limitOffset, - subparse->limitCount); + subparse->limitCount, + 0, 1); /* * Convert the plan into an InitPlan, and make a Param for its result. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 334f8504df..0b0cb4eabf 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.190 2005/07/02 23:00:41 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.191 2005/08/18 17:51:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -58,8 +58,9 @@ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); static Plan *inheritance_planner(PlannerInfo *root, List *inheritlist); static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction); -static double adjust_tuple_fraction_for_limit(PlannerInfo *root, - double tuple_fraction); +static double preprocess_limit(PlannerInfo *root, + double tuple_fraction, + int *offset_est, int *count_est); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, List *sort_pathkeys, List *group_pathkeys, @@ -649,13 +650,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) { Query *parse = root->parse; List *tlist = parse->targetList; + int offset_est; + int count_est; Plan *result_plan; List *current_pathkeys; List *sort_pathkeys; - /* Tweak caller-supplied tuple_fraction if have LIMIT */ - if (parse->limitCount != NULL) - tuple_fraction = adjust_tuple_fraction_for_limit(root, tuple_fraction); + /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ + if (parse->limitCount || parse->limitOffset) + tuple_fraction = preprocess_limit(root, tuple_fraction, + &offset_est, &count_est); if (parse->setOperations) { @@ -1144,11 +1148,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. */ - if (parse->limitOffset || parse->limitCount) + if (parse->limitCount || parse->limitOffset) { result_plan = (Plan *) make_limit(result_plan, parse->limitOffset, - parse->limitCount); + parse->limitCount, + offset_est, + count_est); } /* @@ -1161,72 +1167,107 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * adjust_tuple_fraction_for_limit - adjust tuple fraction for LIMIT + * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses * - * If the query contains LIMIT, we adjust the caller-supplied tuple_fraction - * accordingly. This is not overridable by the caller, since it reflects plan - * actions that grouping_planner() will certainly take, not assumptions about - * context. + * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the + * results back in *count_est and *offset_est. These variables are set to + * 0 if the corresponding clause is not present, and -1 if it's present + * but we couldn't estimate the value for it. (The "0" convention is OK + * for OFFSET but a little bit bogus for LIMIT: effectively we estimate + * LIMIT 0 as though it were LIMIT 1. But this is in line with the planner's + * usual practice of never estimating less than one row.) These values will + * be passed to make_limit, which see if you change this code. + * + * The return value is the suitably adjusted tuple_fraction to use for + * planning the query. This adjustment is not overridable, since it reflects + * plan actions that grouping_planner() will certainly take, not assumptions + * about context. */ static double -adjust_tuple_fraction_for_limit(PlannerInfo *root, double tuple_fraction) +preprocess_limit(PlannerInfo *root, double tuple_fraction, + int *offset_est, int *count_est) { Query *parse = root->parse; - double limit_fraction = 0.0; + Node *est; + double limit_fraction; - /* Should not be called unless LIMIT */ - Assert(parse->limitCount != NULL); + /* Should not be called unless LIMIT or OFFSET */ + Assert(parse->limitCount || parse->limitOffset); /* - * A LIMIT clause limits the absolute number of tuples returned. However, - * if it's not a constant LIMIT then we have to punt; for lack of a better - * idea, assume 10% of the plan's result is wanted. + * Try to obtain the clause values. We use estimate_expression_value + * primarily because it can sometimes do something useful with Params. */ - if (IsA(parse->limitCount, Const)) + if (parse->limitCount) { - Const *limitc = (Const *) parse->limitCount; - int32 count = DatumGetInt32(limitc->constvalue); - - /* - * A NULL-constant LIMIT represents "LIMIT ALL", which we treat the - * same as no limit (ie, expect to retrieve all the tuples). - */ - if (!limitc->constisnull && count > 0) + est = estimate_expression_value(parse->limitCount); + if (est && IsA(est, Const)) { - limit_fraction = (double) count; - /* We must also consider the OFFSET, if present */ - if (parse->limitOffset != NULL) + if (((Const *) est)->constisnull) { - if (IsA(parse->limitOffset, Const)) - { - int32 offset; - - limitc = (Const *) parse->limitOffset; - offset = DatumGetInt32(limitc->constvalue); - if (!limitc->constisnull && offset > 0) - limit_fraction += (double) offset; - } - else - { - /* OFFSET is an expression ... punt ... */ - limit_fraction = 0.10; - } + /* NULL indicates LIMIT ALL, ie, no limit */ + *count_est = 0; /* treat as not present */ + } + else + { + *count_est = DatumGetInt32(((Const *) est)->constvalue); + if (*count_est <= 0) + *count_est = 1; /* force to at least 1 */ } } + else + *count_est = -1; /* can't estimate */ } else + *count_est = 0; /* not present */ + + if (parse->limitOffset) { - /* LIMIT is an expression ... punt ... */ - limit_fraction = 0.10; + est = estimate_expression_value(parse->limitOffset); + if (est && IsA(est, Const)) + { + if (((Const *) est)->constisnull) + { + /* Treat NULL as no offset; the executor will too */ + *offset_est = 0; /* treat as not present */ + } + else + { + *offset_est = DatumGetInt32(((Const *) est)->constvalue); + if (*offset_est < 0) + *offset_est = 0; /* less than 0 is same as 0 */ + } + } + else + *offset_est = -1; /* can't estimate */ } + else + *offset_est = 0; /* not present */ - if (limit_fraction > 0.0) + if (*count_est != 0) { + /* + * A LIMIT clause limits the absolute number of tuples returned. + * However, if it's not a constant LIMIT then we have to guess; for + * lack of a better idea, assume 10% of the plan's result is wanted. + */ + if (*count_est < 0 || *offset_est < 0) + { + /* LIMIT or OFFSET is an expression ... punt ... */ + limit_fraction = 0.10; + } + else + { + /* LIMIT (plus OFFSET, if any) is max number of tuples needed */ + limit_fraction = (double) *count_est + (double) *offset_est; + } + /* * If we have absolute limits from both caller and LIMIT, use the - * smaller value; if one is fractional and the other absolute, - * treat the fraction as a fraction of the absolute value; - * else we can multiply the two fractions together. + * smaller value; likewise if they are both fractional. If one is + * fractional and the other absolute, we can't easily determine which + * is smaller, but we use the heuristic that the absolute will usually + * be smaller. */ if (tuple_fraction >= 1.0) { @@ -1237,25 +1278,20 @@ adjust_tuple_fraction_for_limit(PlannerInfo *root, double tuple_fraction) } else { - /* caller absolute, limit fractional */ - tuple_fraction *= limit_fraction; - if (tuple_fraction < 1.0) - tuple_fraction = 1.0; + /* caller absolute, limit fractional; use caller's value */ } } else if (tuple_fraction > 0.0) { if (limit_fraction >= 1.0) { - /* caller fractional, limit absolute */ - tuple_fraction *= limit_fraction; - if (tuple_fraction < 1.0) - tuple_fraction = 1.0; + /* caller fractional, limit absolute; use limit */ + tuple_fraction = limit_fraction; } else { /* both fractional */ - tuple_fraction *= limit_fraction; + tuple_fraction = Min(tuple_fraction, limit_fraction); } } else @@ -1264,6 +1300,56 @@ adjust_tuple_fraction_for_limit(PlannerInfo *root, double tuple_fraction) tuple_fraction = limit_fraction; } } + else if (*offset_est != 0 && tuple_fraction > 0.0) + { + /* + * We have an OFFSET but no LIMIT. This acts entirely differently + * from the LIMIT case: here, we need to increase rather than + * decrease the caller's tuple_fraction, because the OFFSET acts + * to cause more tuples to be fetched instead of fewer. This only + * matters if we got a tuple_fraction > 0, however. + * + * As above, use 10% if OFFSET is present but unestimatable. + */ + if (*offset_est < 0) + limit_fraction = 0.10; + else + limit_fraction = (double) *offset_est; + + /* + * If we have absolute counts from both caller and OFFSET, add them + * together; likewise if they are both fractional. If one is + * fractional and the other absolute, we want to take the larger, + * and we heuristically assume that's the fractional one. + */ + if (tuple_fraction >= 1.0) + { + if (limit_fraction >= 1.0) + { + /* both absolute, so add them together */ + tuple_fraction += limit_fraction; + } + else + { + /* caller absolute, limit fractional; use limit */ + tuple_fraction = limit_fraction; + } + } + else + { + if (limit_fraction >= 1.0) + { + /* caller fractional, limit absolute; use caller's value */ + } + else + { + /* both fractional, so add them together */ + tuple_fraction += limit_fraction; + if (tuple_fraction >= 1.0) + tuple_fraction = 0.0; /* assume fetch all */ + } + } + } return tuple_fraction; } diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 4518635186..474a14da9a 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.86 2005/06/05 22:32:58 tgl Exp $ + * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.87 2005/08/18 17:51:12 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,7 +54,8 @@ extern Group *make_group(PlannerInfo *root, List *tlist, List *qual, extern Material *make_material(Plan *lefttree); extern Plan *materialize_finished_plan(Plan *subplan); extern Unique *make_unique(Plan *lefttree, List *distinctList); -extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount); +extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount, + int offset_est, int count_est); extern SetOp *make_setop(SetOpCmd cmd, Plan *lefttree, List *distinctList, AttrNumber flagColIdx); extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan); -- 2.40.0