Further fallout from the MergeAppend patch.
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 18 Nov 2010 05:30:10 +0000 (00:30 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 18 Nov 2010 05:30:10 +0000 (00:30 -0500)
Fix things so that top-N sorting can be used in child Sort nodes of a
MergeAppend node, when there is a LIMIT and no intervening joins or
grouping.  Actually doing this on the executor side isn't too bad,
but it's a bit messier to get the planner to cost it properly.
Per gripe from Robert Haas.

In passing, fix an oversight in the original top-N-sorting patch:
query_planner should not assume that a LIMIT can be used to make an
explicit sort cheaper when there will be grouping or aggregation in
between.  Possibly this should be back-patched, but I'm not sure the
mistake is serious enough to be a real problem in practice.

src/backend/executor/nodeLimit.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/planmain.c
src/backend/optimizer/plan/planner.c
src/backend/optimizer/util/pathnode.c
src/include/nodes/relation.h

index ce24f1b00af789bc71d707535329c9d4b1c3347d..5ed8664f66f669c3d38d214203f2684d46d795fb 100644 (file)
@@ -25,6 +25,7 @@
 #include "executor/nodeLimit.h"
 
 static void recompute_limits(LimitState *node);
+static void pass_down_bound(LimitState *node, PlanState *child_node);
 
 
 /* ----------------------------------------------------------------
@@ -293,26 +294,35 @@ recompute_limits(LimitState *node)
        /* Set state-machine state */
        node->lstate = LIMIT_RESCAN;
 
-       /*
-        * If we have a COUNT, and our input is a Sort node, notify it that it can
-        * use bounded sort.
-        *
-        * This is a bit of a kluge, but we don't have any more-abstract way of
-        * communicating between the two nodes; and it doesn't seem worth trying
-        * to invent one without some more examples of special communication
-        * needs.
-        *
-        * Note: it is the responsibility of nodeSort.c to react properly to
-        * changes of these parameters.  If we ever do redesign this, it'd be a
-        * good idea to integrate this signaling with the parameter-change
-        * mechanism.
-        */
-       if (IsA(outerPlanState(node), SortState))
+       /* Notify child node about limit, if useful */
+       pass_down_bound(node, outerPlanState(node));
+}
+
+/*
+ * If we have a COUNT, and our input is a Sort node, notify it that it can
+ * use bounded sort.  Also, if our input is a MergeAppend, we can apply the
+ * same bound to any Sorts that are direct children of the MergeAppend,
+ * since the MergeAppend surely need read no more than that many tuples from
+ * any one input.  We also have to be prepared to look through a Result,
+ * since the planner might stick one atop MergeAppend for projection purposes.
+ *
+ * This is a bit of a kluge, but we don't have any more-abstract way of
+ * communicating between the two nodes; and it doesn't seem worth trying
+ * to invent one without some more examples of special communication needs.
+ *
+ * Note: it is the responsibility of nodeSort.c to react properly to
+ * changes of these parameters.  If we ever do redesign this, it'd be a
+ * good idea to integrate this signaling with the parameter-change mechanism.
+ */
+static void
+pass_down_bound(LimitState *node, PlanState *child_node)
+{
+       if (IsA(child_node, SortState))
        {
-               SortState  *sortState = (SortState *) outerPlanState(node);
+               SortState  *sortState = (SortState *) child_node;
                int64           tuples_needed = node->count + node->offset;
 
-               /* negative test checks for overflow */
+               /* negative test checks for overflow in sum */
                if (node->noCount || tuples_needed < 0)
                {
                        /* make sure flag gets reset if needed upon rescan */
@@ -324,6 +334,19 @@ recompute_limits(LimitState *node)
                        sortState->bound = tuples_needed;
                }
        }
+       else if (IsA(child_node, MergeAppendState))
+       {
+               MergeAppendState *maState = (MergeAppendState *) child_node;
+               int                     i;
+
+               for (i = 0; i < maState->ms_nplans; i++)
+                       pass_down_bound(node, maState->mergeplans[i]);
+       }
+       else if (IsA(child_node, ResultState))
+       {
+               if (outerPlanState(child_node))
+                       pass_down_bound(node, outerPlanState(child_node));
+       }
 }
 
 /* ----------------------------------------------------------------
index 61aea612fd4ee822b43e29069769b2106886d39f..afbfccabda50c1b8c30735e5fb469f4deeb4afa6 100644 (file)
@@ -1493,6 +1493,7 @@ _outMergeAppendPath(StringInfo str, MergeAppendPath *node)
        _outPathInfo(str, (Path *) node);
 
        WRITE_NODE_FIELD(subpaths);
+       WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
 }
 
 static void
@@ -1611,6 +1612,7 @@ _outPlannerInfo(StringInfo str, PlannerInfo *node)
        WRITE_NODE_FIELD(minmax_aggs);
        WRITE_FLOAT_FIELD(total_table_pages, "%.0f");
        WRITE_FLOAT_FIELD(tuple_fraction, "%.4f");
+       WRITE_FLOAT_FIELD(limit_tuples, "%.0f");
        WRITE_BOOL_FIELD(hasInheritedTarget);
        WRITE_BOOL_FIELD(hasJoinRTEs);
        WRITE_BOOL_FIELD(hasHavingQual);
index 7a84bd91239895e400862647bc25724275cab583..41ad512a2963e91b1efc9371c6885a8596b4c938 100644 (file)
@@ -714,7 +714,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
                if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
                        subplan = (Plan *) make_sort(root, subplan, numsortkeys,
                                                                                 sortColIdx, sortOperators, nullsFirst,
-                                                                                -1.0);
+                                                                                best_path->limit_tuples);
 
                subplans = lappend(subplans, subplan);
        }
index cab6e9e25ad24af8abcd20bd62f11c8df8319574..9e6b0b724c118799b03ae52feb97db79233c4b2e 100644 (file)
@@ -101,8 +101,9 @@ query_planner(PlannerInfo *root, List *tlist,
        ListCell   *lc;
        double          total_pages;
 
-       /* Make tuple_fraction accessible to lower-level routines */
+       /* Make tuple_fraction, limit_tuples accessible to lower-level routines */
        root->tuple_fraction = tuple_fraction;
+       root->limit_tuples = limit_tuples;
 
        *num_groups = 1;                        /* default result */
 
@@ -315,6 +316,9 @@ query_planner(PlannerInfo *root, List *tlist,
                        !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
                 !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
                        tuple_fraction = 0.0;
+
+               /* In any case, limit_tuples shouldn't be specified here */
+               Assert(limit_tuples < 0);
        }
        else if (parse->hasAggs || root->hasHavingQual)
        {
@@ -323,6 +327,9 @@ query_planner(PlannerInfo *root, List *tlist,
                 * it will deliver a single result row (so leave *num_groups 1).
                 */
                tuple_fraction = 0.0;
+
+               /* limit_tuples shouldn't be specified here */
+               Assert(limit_tuples < 0);
        }
        else if (parse->distinctClause)
        {
@@ -347,6 +354,9 @@ query_planner(PlannerInfo *root, List *tlist,
                 */
                if (tuple_fraction >= 1.0)
                        tuple_fraction /= *num_groups;
+
+               /* limit_tuples shouldn't be specified here */
+               Assert(limit_tuples < 0);
        }
        else
        {
index 620888cbb86a9eddf29112ac32f815c9d4a4bd63..6324bce2403068a70d5495f61fb40fc68de6f9b5 100644 (file)
@@ -968,6 +968,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
        {
                /* No set operations, do regular planning */
                List       *sub_tlist;
+               double          sub_limit_tuples;
                AttrNumber *groupColIdx = NULL;
                bool            need_tlist_eval = true;
                QualCost        tlist_cost;
@@ -1119,13 +1120,28 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                else
                        root->query_pathkeys = NIL;
 
+               /*
+                * Figure out whether there's a hard limit on the number of rows that
+                * query_planner's result subplan needs to return.  Even if we know a
+                * hard limit overall, it doesn't apply if the query has any
+                * grouping/aggregation operations.
+                */
+               if (parse->groupClause ||
+                       parse->distinctClause ||
+                       parse->hasAggs ||
+                       parse->hasWindowFuncs ||
+                       root->hasHavingQual)
+                       sub_limit_tuples = -1.0;
+               else
+                       sub_limit_tuples = limit_tuples;
+
                /*
                 * Generate the best unsorted and presorted paths for this Query (but
                 * note there may not be any presorted path).  query_planner will also
                 * estimate the number of groups in the query, and canonicalize all
                 * the pathkeys.
                 */
-               query_planner(root, sub_tlist, tuple_fraction, limit_tuples,
+               query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
                                          &cheapest_path, &sorted_path, &dNumGroups);
 
                /*
index 20d8074bce4054cae7c3fdae251d34fe46793f0d..231d221b21e24661405cc58e9d9da776b2afb879 100644 (file)
@@ -694,6 +694,35 @@ create_merge_append_path(PlannerInfo *root,
        pathnode->path.pathkeys = pathkeys;
        pathnode->subpaths = subpaths;
 
+       /*
+        * Apply query-wide LIMIT if known and path is for sole base relation.
+        * Finding out the latter at this low level is a bit klugy.
+        */
+       pathnode->limit_tuples = root->limit_tuples;
+       if (pathnode->limit_tuples >= 0)
+       {
+               Index           rti;
+
+               for (rti = 1; rti < root->simple_rel_array_size; rti++)
+               {
+                       RelOptInfo *brel = root->simple_rel_array[rti];
+
+                       if (brel == NULL)
+                               continue;
+
+                       /* ignore RTEs that are "other rels" */
+                       if (brel->reloptkind != RELOPT_BASEREL)
+                               continue;
+
+                       if (brel != rel)
+                       {
+                               /* Oops, it's a join query */
+                               pathnode->limit_tuples = -1.0;
+                               break;
+                       }
+               }
+       }
+
        /* Add up all the costs of the input paths */
        input_startup_cost = 0;
        input_total_cost = 0;
@@ -720,7 +749,7 @@ create_merge_append_path(PlannerInfo *root,
                                          subpath->parent->width,
                                          0.0,
                                          work_mem,
-                                         -1.0);
+                                         pathnode->limit_tuples);
                        input_startup_cost += sort_path.startup_cost;
                        input_total_cost += sort_path.total_cost;
                }
index f885f5a0c46f6ceec15a06e8f0c1f3b30aab3d33..81126a236656cdec77a822b3fb3a50b59364d81e 100644 (file)
@@ -198,6 +198,7 @@ typedef struct PlannerInfo
        double          total_table_pages;              /* # of pages in all tables of query */
 
        double          tuple_fraction; /* tuple_fraction passed to query_planner */
+       double          limit_tuples;   /* limit_tuples passed to query_planner */
 
        bool            hasInheritedTarget;             /* true if parse->resultRelation is an
                                                                                 * inheritance child rel */
@@ -766,6 +767,7 @@ typedef struct MergeAppendPath
 {
        Path            path;
        List       *subpaths;           /* list of component Paths */
+       double          limit_tuples;   /* hard limit on output tuples, or -1 */
 } MergeAppendPath;
 
 /*