]> granicus.if.org Git - postgresql/commitdiff
Fix up LIMIT/OFFSET planning so that we cope with non-constant LIMIT
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 18 Aug 2005 17:51:12 +0000 (17:51 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 18 Aug 2005 17:51:12 +0000 (17:51 +0000)
or OFFSET clauses by using estimate_expression_value().  The main advantage
of this is that if the expression is a Param and we have a value for the
Param, we'll use that value rather than defaulting.  Also, fix some
thinkos in the logic for combining LIMIT/OFFSET with an externally
supplied tuple fraction (this covers cases like EXISTS(...LIMIT...)).
And make sure the results of all this are shown by EXPLAIN.  Per a
gripe from Merlin Moncure.

src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/planagg.c
src/backend/optimizer/plan/planner.c
src/include/optimizer/planmain.h

index 697355cfdf87b056f9f8eca9d9a2ca87a24f96ff..bc4dae55945e3b8bdaf60659cabdf01aa6d424ba 100644 (file)
@@ -10,7 +10,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.196 2005/07/28 20:26:21 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.197 2005/08/18 17:51:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2673,8 +2673,16 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
        return node;
 }
 
+/*
+ * Note: offset_est and count_est are passed in to save having to repeat
+ * work already done to estimate the values of the limitOffset and limitCount
+ * expressions.  Their values are as returned by preprocess_limit (0 means
+ * "not relevant", -1 means "couldn't estimate").  Keep the code below in sync
+ * with that function!
+ */
 Limit *
-make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount)
+make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
+                  int offset_est, int count_est)
 {
        Limit      *node = makeNode(Limit);
        Plan       *plan = &node->plan;
@@ -2682,46 +2690,50 @@ make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount)
        copy_plan_costsize(plan, lefttree);
 
        /*
-        * If offset/count are constants, adjust the output rows count and
-        * costs accordingly.  This is only a cosmetic issue if we are at top
-        * level, but if we are building a subquery then it's important to
-        * report correct info to the outer planner.
+        * Adjust the output rows count and costs according to the offset/limit.
+        * This is only a cosmetic issue if we are at top level, but if we are
+        * building a subquery then it's important to report correct info to the
+        * outer planner.
+        *
+        * When the offset or count couldn't be estimated, use 10% of the
+        * estimated number of rows emitted from the subplan.
         */
-       if (limitOffset && IsA(limitOffset, Const))
+       if (offset_est != 0)
        {
-               Const      *limito = (Const *) limitOffset;
-               int32           offset = DatumGetInt32(limito->constvalue);
+               double          offset_rows;
 
-               if (!limito->constisnull && offset > 0)
-               {
-                       if (offset > plan->plan_rows)
-                               offset = (int32) plan->plan_rows;
-                       if (plan->plan_rows > 0)
-                               plan->startup_cost +=
-                                       (plan->total_cost - plan->startup_cost)
-                                       * ((double) offset) / plan->plan_rows;
-                       plan->plan_rows -= offset;
-                       if (plan->plan_rows < 1)
-                               plan->plan_rows = 1;
-               }
+               if (offset_est > 0)
+                       offset_rows = (double) offset_est;
+               else
+                       offset_rows = clamp_row_est(lefttree->plan_rows * 0.10);
+               if (offset_rows > plan->plan_rows)
+                       offset_rows = plan->plan_rows;
+               if (plan->plan_rows > 0)
+                       plan->startup_cost +=
+                               (plan->total_cost - plan->startup_cost)
+                               * offset_rows / plan->plan_rows;
+               plan->plan_rows -= offset_rows;
+               if (plan->plan_rows < 1)
+                       plan->plan_rows = 1;
        }
-       if (limitCount && IsA(limitCount, Const))
+
+       if (count_est != 0)
        {
-               Const      *limitc = (Const *) limitCount;
-               int32           count = DatumGetInt32(limitc->constvalue);
+               double          count_rows;
 
-               if (!limitc->constisnull && count >= 0)
-               {
-                       if (count > plan->plan_rows)
-                               count = (int32) plan->plan_rows;
-                       if (plan->plan_rows > 0)
-                               plan->total_cost = plan->startup_cost +
-                                       (plan->total_cost - plan->startup_cost)
-                                       * ((double) count) / plan->plan_rows;
-                       plan->plan_rows = count;
-                       if (plan->plan_rows < 1)
-                               plan->plan_rows = 1;
-               }
+               if (count_est > 0)
+                       count_rows = (double) count_est;
+               else
+                       count_rows = clamp_row_est(lefttree->plan_rows * 0.10);
+               if (count_rows > plan->plan_rows)
+                       count_rows = plan->plan_rows;
+               if (plan->plan_rows > 0)
+                       plan->total_cost = plan->startup_cost +
+                               (plan->total_cost - plan->startup_cost)
+                               * count_rows / plan->plan_rows;
+               plan->plan_rows = count_rows;
+               if (plan->plan_rows < 1)
+                       plan->plan_rows = 1;
        }
 
        plan->targetlist = copyObject(lefttree->targetlist);
index 81f526ac11398df91876c6ed0f3986e96691c609..6f9274fbc0b5984efb709620382188fd87d597e2 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.7 2005/07/28 20:26:21 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.8 2005/08/18 17:51:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -519,7 +519,8 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info, List *constant_quals)
 
        plan = (Plan *) make_limit(plan, 
                                                           subparse->limitOffset,
-                                                          subparse->limitCount);
+                                                          subparse->limitCount,
+                                                          0, 1);
 
        /*
         * Convert the plan into an InitPlan, and make a Param for its result.
index 334f8504dff2b0c4e4cb04e2c913b80b04190386..0b0cb4eabf665d337bc8f691238be0bfd695915b 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.190 2005/07/02 23:00:41 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.191 2005/08/18 17:51:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -58,8 +58,9 @@ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
 static Plan *inheritance_planner(PlannerInfo *root, List *inheritlist);
 static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
-static double adjust_tuple_fraction_for_limit(PlannerInfo *root,
-                                                                                         double tuple_fraction);
+static double preprocess_limit(PlannerInfo *root,
+                                                          double tuple_fraction,
+                                                          int *offset_est, int *count_est);
 static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                                           Path *cheapest_path, Path *sorted_path,
                                           List *sort_pathkeys, List *group_pathkeys,
@@ -649,13 +650,16 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 {
        Query      *parse = root->parse;
        List       *tlist = parse->targetList;
+       int                     offset_est;
+       int                     count_est;
        Plan       *result_plan;
        List       *current_pathkeys;
        List       *sort_pathkeys;
 
-       /* Tweak caller-supplied tuple_fraction if have LIMIT */
-       if (parse->limitCount != NULL)
-               tuple_fraction = adjust_tuple_fraction_for_limit(root, tuple_fraction);
+       /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
+       if (parse->limitCount || parse->limitOffset)
+               tuple_fraction = preprocess_limit(root, tuple_fraction,
+                                                                                 &offset_est, &count_est);
 
        if (parse->setOperations)
        {
@@ -1144,11 +1148,13 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
        /*
         * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
         */
-       if (parse->limitOffset || parse->limitCount)
+       if (parse->limitCount || parse->limitOffset)
        {
                result_plan = (Plan *) make_limit(result_plan,
                                                                                  parse->limitOffset,
-                                                                                 parse->limitCount);
+                                                                                 parse->limitCount,
+                                                                                 offset_est,
+                                                                                 count_est);
        }
 
        /*
@@ -1161,72 +1167,107 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 }
 
 /*
- * adjust_tuple_fraction_for_limit - adjust tuple fraction for LIMIT
+ * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
  *
- * If the query contains LIMIT, we adjust the caller-supplied tuple_fraction
- * accordingly.  This is not overridable by the caller, since it reflects plan
- * actions that grouping_planner() will certainly take, not assumptions about
- * context.
+ * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
+ * results back in *count_est and *offset_est.  These variables are set to
+ * 0 if the corresponding clause is not present, and -1 if it's present
+ * but we couldn't estimate the value for it.  (The "0" convention is OK
+ * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
+ * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
+ * usual practice of never estimating less than one row.)  These values will
+ * be passed to make_limit, which see if you change this code.
+ *
+ * The return value is the suitably adjusted tuple_fraction to use for
+ * planning the query.  This adjustment is not overridable, since it reflects
+ * plan actions that grouping_planner() will certainly take, not assumptions
+ * about context.
  */
 static double
-adjust_tuple_fraction_for_limit(PlannerInfo *root, double tuple_fraction)
+preprocess_limit(PlannerInfo *root, double tuple_fraction,
+                                int *offset_est, int *count_est)
 {
        Query      *parse = root->parse;
-       double          limit_fraction = 0.0;
+       Node       *est;
+       double          limit_fraction;
 
-       /* Should not be called unless LIMIT */
-       Assert(parse->limitCount != NULL);
+       /* Should not be called unless LIMIT or OFFSET */
+       Assert(parse->limitCount || parse->limitOffset);
 
        /*
-        * A LIMIT clause limits the absolute number of tuples returned. However,
-        * if it's not a constant LIMIT then we have to punt; for lack of a better
-        * idea, assume 10% of the plan's result is wanted.
+        * Try to obtain the clause values.  We use estimate_expression_value
+        * primarily because it can sometimes do something useful with Params.
         */
-       if (IsA(parse->limitCount, Const))
+       if (parse->limitCount)
        {
-               Const      *limitc = (Const *) parse->limitCount;
-               int32           count = DatumGetInt32(limitc->constvalue);
-
-               /*
-                * A NULL-constant LIMIT represents "LIMIT ALL", which we treat the
-                * same as no limit (ie, expect to retrieve all the tuples).
-                */
-               if (!limitc->constisnull && count > 0)
+               est = estimate_expression_value(parse->limitCount);
+               if (est && IsA(est, Const))
                {
-                       limit_fraction = (double) count;
-                       /* We must also consider the OFFSET, if present */
-                       if (parse->limitOffset != NULL)
+                       if (((Const *) est)->constisnull)
                        {
-                               if (IsA(parse->limitOffset, Const))
-                               {
-                                       int32           offset;
-
-                                       limitc = (Const *) parse->limitOffset;
-                                       offset = DatumGetInt32(limitc->constvalue);
-                                       if (!limitc->constisnull && offset > 0)
-                                               limit_fraction += (double) offset;
-                               }
-                               else
-                               {
-                                       /* OFFSET is an expression ... punt ... */
-                                       limit_fraction = 0.10;
-                               }
+                               /* NULL indicates LIMIT ALL, ie, no limit */
+                               *count_est = 0;                 /* treat as not present */
+                       }
+                       else
+                       {
+                               *count_est = DatumGetInt32(((Const *) est)->constvalue);
+                               if (*count_est <= 0)
+                                       *count_est = 1;         /* force to at least 1 */
                        }
                }
+               else
+                       *count_est = -1;        /* can't estimate */
        }
        else
+               *count_est = 0;                 /* not present */
+
+       if (parse->limitOffset)
        {
-               /* LIMIT is an expression ... punt ... */
-               limit_fraction = 0.10;
+               est = estimate_expression_value(parse->limitOffset);
+               if (est && IsA(est, Const))
+               {
+                       if (((Const *) est)->constisnull)
+                       {
+                               /* Treat NULL as no offset; the executor will too */
+                               *offset_est = 0;                /* treat as not present */
+                       }
+                       else
+                       {
+                               *offset_est = DatumGetInt32(((Const *) est)->constvalue);
+                               if (*offset_est < 0)
+                                       *offset_est = 0;        /* less than 0 is same as 0 */
+                       }
+               }
+               else
+                       *offset_est = -1;       /* can't estimate */
        }
+       else
+               *offset_est = 0;                /* not present */
 
-       if (limit_fraction > 0.0)
+       if (*count_est != 0)
        {
+               /*
+                * A LIMIT clause limits the absolute number of tuples returned.
+                * However, if it's not a constant LIMIT then we have to guess; for
+                * lack of a better idea, assume 10% of the plan's result is wanted.
+                */
+               if (*count_est < 0 || *offset_est < 0)
+               {
+                       /* LIMIT or OFFSET is an expression ... punt ... */
+                       limit_fraction = 0.10;
+               }
+               else
+               {
+                       /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
+                       limit_fraction = (double) *count_est + (double) *offset_est;
+               }
+
                /*
                 * If we have absolute limits from both caller and LIMIT, use the
-                * smaller value; if one is fractional and the other absolute,
-                * treat the fraction as a fraction of the absolute value;
-                * else we can multiply the two fractions together.
+                * smaller value; likewise if they are both fractional.  If one is
+                * fractional and the other absolute, we can't easily determine which
+                * is smaller, but we use the heuristic that the absolute will usually
+                * be smaller.
                 */
                if (tuple_fraction >= 1.0)
                {
@@ -1237,25 +1278,20 @@ adjust_tuple_fraction_for_limit(PlannerInfo *root, double tuple_fraction)
                        }
                        else
                        {
-                               /* caller absolute, limit fractional */
-                               tuple_fraction *= limit_fraction;
-                               if (tuple_fraction < 1.0)
-                                       tuple_fraction = 1.0;
+                               /* caller absolute, limit fractional; use caller's value */
                        }
                }
                else if (tuple_fraction > 0.0)
                {
                        if (limit_fraction >= 1.0)
                        {
-                               /* caller fractional, limit absolute */
-                               tuple_fraction *= limit_fraction;
-                               if (tuple_fraction < 1.0)
-                                       tuple_fraction = 1.0;
+                               /* caller fractional, limit absolute; use limit */
+                               tuple_fraction = limit_fraction;
                        }
                        else
                        {
                                /* both fractional */
-                               tuple_fraction *= limit_fraction;
+                               tuple_fraction = Min(tuple_fraction, limit_fraction);
                        }
                }
                else
@@ -1264,6 +1300,56 @@ adjust_tuple_fraction_for_limit(PlannerInfo *root, double tuple_fraction)
                        tuple_fraction = limit_fraction;
                }
        }
+       else if (*offset_est != 0 && tuple_fraction > 0.0)
+       {
+               /*
+                * We have an OFFSET but no LIMIT.  This acts entirely differently
+                * from the LIMIT case: here, we need to increase rather than
+                * decrease the caller's tuple_fraction, because the OFFSET acts
+                * to cause more tuples to be fetched instead of fewer.  This only
+                * matters if we got a tuple_fraction > 0, however.
+                *
+                * As above, use 10% if OFFSET is present but unestimatable.
+                */
+               if (*offset_est < 0)
+                       limit_fraction = 0.10;
+               else
+                       limit_fraction = (double) *offset_est;
+
+               /*
+                * If we have absolute counts from both caller and OFFSET, add them
+                * together; likewise if they are both fractional.  If one is
+                * fractional and the other absolute, we want to take the larger,
+                * and we heuristically assume that's the fractional one.
+                */
+               if (tuple_fraction >= 1.0)
+               {
+                       if (limit_fraction >= 1.0)
+                       {
+                               /* both absolute, so add them together */
+                               tuple_fraction += limit_fraction;
+                       }
+                       else
+                       {
+                               /* caller absolute, limit fractional; use limit */
+                               tuple_fraction = limit_fraction;
+                       }
+               }
+               else
+               {
+                       if (limit_fraction >= 1.0)
+                       {
+                               /* caller fractional, limit absolute; use caller's value */
+                       }
+                       else
+                       {
+                               /* both fractional, so add them together */
+                               tuple_fraction += limit_fraction;
+                               if (tuple_fraction >= 1.0)
+                                       tuple_fraction = 0.0; /* assume fetch all */
+                       }
+               }
+       }
 
        return tuple_fraction;
 }
index 451863518605953bf8f0303ee9ac48bec4bc69f4..474a14da9a0cc80be3a6581ef09c990c0fb643a1 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.86 2005/06/05 22:32:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.87 2005/08/18 17:51:12 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -54,7 +54,8 @@ extern Group *make_group(PlannerInfo *root, List *tlist, List *qual,
 extern Material *make_material(Plan *lefttree);
 extern Plan *materialize_finished_plan(Plan *subplan);
 extern Unique *make_unique(Plan *lefttree, List *distinctList);
-extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount);
+extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
+                                                int offset_est, int count_est);
 extern SetOp *make_setop(SetOpCmd cmd, Plan *lefttree,
                   List *distinctList, AttrNumber flagColIdx);
 extern Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);