]> granicus.if.org Git - postgresql/blobdiff - src/backend/optimizer/plan/planner.c
Change LIMIT/OFFSET to use int8
[postgresql] / src / backend / optimizer / plan / planner.c
index 76ffe04078fabfe7014f2a9fc28d9aa3189c92f3..5545da4978fcf1a2d8adb8ebffc39054a64c135c 100644 (file)
@@ -3,12 +3,12 @@
  * planner.c
  *       The query optimizer external interface.
  *
- * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.188 2005/06/05 22:32:56 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.204 2006/07/26 00:34:48 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include <limits.h>
 
 #include "catalog/pg_operator.h"
-#include "catalog/pg_type.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
-#ifdef OPTIMIZER_DEBUG
-#include "nodes/print.h"
-#endif
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/subselect.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
-#include "parser/parsetree.h"
+#ifdef OPTIMIZER_DEBUG
+#include "nodes/print.h"
+#endif
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
-#include "utils/selfuncs.h"
+#include "parser/parsetree.h"
 #include "utils/syscache.h"
 
 
@@ -47,20 +45,23 @@ ParamListInfo PlannerBoundParamList = NULL;         /* current boundParams */
 
 
 /* Expression kind codes for preprocess_expression */
-#define EXPRKIND_QUAL  0
-#define EXPRKIND_TARGET 1
-#define EXPRKIND_RTFUNC 2
-#define EXPRKIND_LIMIT 3
-#define EXPRKIND_ININFO 4
+#define EXPRKIND_QUAL          0
+#define EXPRKIND_TARGET                1
+#define EXPRKIND_RTFUNC                2
+#define EXPRKIND_LIMIT         3
+#define EXPRKIND_ININFO                4
+#define EXPRKIND_APPINFO       5
 
 
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
-static Plan *inheritance_planner(PlannerInfo *root, List *inheritlist);
+static Plan *inheritance_planner(PlannerInfo *root);
 static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
+static double preprocess_limit(PlannerInfo *root,
+                                double tuple_fraction,
+                                int64 *offset_est, int64 *count_est);
 static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                                           Path *cheapest_path, Path *sorted_path,
-                                          List *sort_pathkeys, List *group_pathkeys,
                                           double dNumGroups, AggClauseCounts *agg_counts);
 static bool hash_safe_grouping(PlannerInfo *root);
 static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
@@ -93,14 +94,13 @@ planner(Query *parse, bool isCursor, int cursorOptions,
         * these global state variables must be saved and restored.
         *
         * Query level and the param list cannot be moved into the per-query
-        * PlannerInfo structure since their whole purpose is communication
-        * across multiple sub-queries. Also, boundParams is explicitly info
-        * from outside the query, and so is likewise better handled as a global
-        * variable.
+        * PlannerInfo structure since their whole purpose is communication across
+        * multiple sub-queries. Also, boundParams is explicitly info from outside
+        * the query, and so is likewise better handled as a global variable.
         *
         * Note we do NOT save and restore PlannerPlanId: it exists to assign
-        * unique IDs to SubPlan nodes, and we want those IDs to be unique for
-        * the life of a backend.  Also, PlannerInitPlan is saved/restored in
+        * unique IDs to SubPlan nodes, and we want those IDs to be unique for the
+        * life of a backend.  Also, PlannerInitPlan is saved/restored in
         * subquery_planner, not here.
         */
        save_PlannerQueryLevel = PlannerQueryLevel;
@@ -116,10 +116,10 @@ planner(Query *parse, bool isCursor, int cursorOptions,
        if (isCursor)
        {
                /*
-                * We have no real idea how many tuples the user will ultimately
-                * FETCH from a cursor, but it seems a good bet that he doesn't
-                * want 'em all.  Optimize for 10% retrieval (you gotta better
-                * number?      Should this be a SETtable parameter?)
+                * We have no real idea how many tuples the user will ultimately FETCH
+                * from a cursor, but it seems a good bet that he doesn't want 'em
+                * all.  Optimize for 10% retrieval (you gotta better number?  Should
+                * this be a SETtable parameter?)
                 */
                tuple_fraction = 0.10;
        }
@@ -192,9 +192,7 @@ subquery_planner(Query *parse, double tuple_fraction,
        int                     saved_planid = PlannerPlanId;
        PlannerInfo *root;
        Plan       *plan;
-       bool            hasOuterJoins;
        List       *newHaving;
-       List       *lst;
        ListCell   *l;
 
        /* Set up for a new level of subquery */
@@ -204,14 +202,15 @@ subquery_planner(Query *parse, double tuple_fraction,
        /* Create a PlannerInfo data structure for this subquery */
        root = makeNode(PlannerInfo);
        root->parse = parse;
+       root->in_info_list = NIL;
+       root->append_rel_list = NIL;
 
        /*
-        * Look for IN clauses at the top level of WHERE, and transform them
-        * into joins.  Note that this step only handles IN clauses originally
-        * at top level of WHERE; if we pull up any subqueries in the next
-        * step, their INs are processed just before pulling them up.
+        * Look for IN clauses at the top level of WHERE, and transform them into
+        * joins.  Note that this step only handles IN clauses originally at top
+        * level of WHERE; if we pull up any subqueries in the next step, their
+        * INs are processed just before pulling them up.
         */
-       root->in_info_list = NIL;
        if (parse->hasSubLinks)
                parse->jointree->quals = pull_up_IN_clauses(root,
                                                                                                        parse->jointree->quals);
@@ -221,17 +220,21 @@ subquery_planner(Query *parse, double tuple_fraction,
         * this query.
         */
        parse->jointree = (FromExpr *)
-               pull_up_subqueries(root, (Node *) parse->jointree, false);
+               pull_up_subqueries(root, (Node *) parse->jointree, false, false);
 
        /*
-        * Detect whether any rangetable entries are RTE_JOIN kind; if not, we
-        * can avoid the expense of doing flatten_join_alias_vars().  Also
-        * check for outer joins --- if none, we can skip
-        * reduce_outer_joins(). This must be done after we have done
+        * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
+        * avoid the expense of doing flatten_join_alias_vars().  Also check for
+        * outer joins --- if none, we can skip reduce_outer_joins() and some
+        * other processing.  This must be done after we have done
         * pull_up_subqueries, of course.
+        *
+        * Note: if reduce_outer_joins manages to eliminate all outer joins,
+        * root->hasOuterJoins is not reset currently.  This is OK since its
+        * purpose is merely to suppress unnecessary processing in simple cases.
         */
        root->hasJoinRTEs = false;
-       hasOuterJoins = false;
+       root->hasOuterJoins = false;
        foreach(l, parse->rtable)
        {
                RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
@@ -241,20 +244,33 @@ subquery_planner(Query *parse, double tuple_fraction,
                        root->hasJoinRTEs = true;
                        if (IS_OUTER_JOIN(rte->jointype))
                        {
-                               hasOuterJoins = true;
+                               root->hasOuterJoins = true;
                                /* Can quit scanning once we find an outer join */
                                break;
                        }
                }
        }
 
+       /*
+        * Expand any rangetable entries that are inheritance sets into "append
+        * relations".  This can add entries to the rangetable, but they must be
+        * plain base relations not joins, so it's OK (and marginally more
+        * efficient) to do it after checking for join RTEs.  We must do it after
+        * pulling up subqueries, else we'd fail to handle inherited tables in
+        * subqueries.
+        */
+       expand_inherited_tables(root);
+
        /*
         * Set hasHavingQual to remember if HAVING clause is present.  Needed
-        * because preprocess_expression will reduce a constant-true condition
-        * to an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
+        * because preprocess_expression will reduce a constant-true condition to
+        * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
         */
        root->hasHavingQual = (parse->havingQual != NULL);
 
+       /* Clear this flag; might get set in distribute_qual_to_rels */
+       root->hasPseudoConstantQuals = false;
+
        /*
         * Do expression preprocessing on targetlist and quals.
         */
@@ -275,6 +291,9 @@ subquery_planner(Query *parse, double tuple_fraction,
        root->in_info_list = (List *)
                preprocess_expression(root, (Node *) root->in_info_list,
                                                          EXPRKIND_ININFO);
+       root->append_rel_list = (List *)
+               preprocess_expression(root, (Node *) root->append_rel_list,
+                                                         EXPRKIND_APPINFO);
 
        /* Also need to preprocess expressions for function RTEs */
        foreach(l, parse->rtable)
@@ -287,29 +306,29 @@ subquery_planner(Query *parse, double tuple_fraction,
        }
 
        /*
-        * In some cases we may want to transfer a HAVING clause into WHERE.
-        * We cannot do so if the HAVING clause contains aggregates (obviously)
-        * or volatile functions (since a HAVING clause is supposed to be executed
+        * In some cases we may want to transfer a HAVING clause into WHERE. We
+        * cannot do so if the HAVING clause contains aggregates (obviously) or
+        * volatile functions (since a HAVING clause is supposed to be executed
         * only once per group).  Also, it may be that the clause is so expensive
         * to execute that we're better off doing it only once per group, despite
         * the loss of selectivity.  This is hard to estimate short of doing the
         * entire planning process twice, so we use a heuristic: clauses
-        * containing subplans are left in HAVING.  Otherwise, we move or copy
-        * the HAVING clause into WHERE, in hopes of eliminating tuples before
+        * containing subplans are left in HAVING.      Otherwise, we move or copy the
+        * HAVING clause into WHERE, in hopes of eliminating tuples before
         * aggregation instead of after.
         *
         * If the query has explicit grouping then we can simply move such a
-        * clause into WHERE; any group that fails the clause will not be
-        * in the output because none of its tuples will reach the grouping
-        * or aggregation stage.  Otherwise we must have a degenerate
-        * (variable-free) HAVING clause, which we put in WHERE so that
-        * query_planner() can use it in a gating Result node, but also keep
-        * in HAVING to ensure that we don't emit a bogus aggregated row.
-        * (This could be done better, but it seems not worth optimizing.)
+        * clause into WHERE; any group that fails the clause will not be in the
+        * output because none of its tuples will reach the grouping or
+        * aggregation stage.  Otherwise we must have a degenerate (variable-free)
+        * HAVING clause, which we put in WHERE so that query_planner() can use it
+        * in a gating Result node, but also keep in HAVING to ensure that we
+        * don't emit a bogus aggregated row. (This could be done better, but it
+        * seems not worth optimizing.)
         *
         * Note that both havingQual and parse->jointree->quals are in
-        * implicitly-ANDed-list form at this point, even though they are
-        * declared as Node *.
+        * implicitly-ANDed-list form at this point, even though they are declared
+        * as Node *.
         */
        newHaving = NIL;
        foreach(l, (List *) parse->havingQual)
@@ -341,39 +360,27 @@ subquery_planner(Query *parse, double tuple_fraction,
        parse->havingQual = (Node *) newHaving;
 
        /*
-        * If we have any outer joins, try to reduce them to plain inner
-        * joins. This step is most easily done after we've done expression
+        * If we have any outer joins, try to reduce them to plain inner joins.
+        * This step is most easily done after we've done expression
         * preprocessing.
         */
-       if (hasOuterJoins)
+       if (root->hasOuterJoins)
                reduce_outer_joins(root);
 
        /*
-        * See if we can simplify the jointree; opportunities for this may
-        * come from having pulled up subqueries, or from flattening explicit
-        * JOIN syntax.  We must do this after flattening JOIN alias
-        * variables, since eliminating explicit JOIN nodes from the jointree
-        * will cause get_relids_for_join() to fail.  But it should happen
-        * after reduce_outer_joins, anyway.
-        */
-       parse->jointree = (FromExpr *)
-               simplify_jointree(root, (Node *) parse->jointree);
-
-       /*
-        * Do the main planning.  If we have an inherited target relation,
-        * that needs special processing, else go straight to
-        * grouping_planner.
+        * Do the main planning.  If we have an inherited target relation, that
+        * needs special processing, else go straight to grouping_planner.
         */
        if (parse->resultRelation &&
-               (lst = expand_inherited_rtentry(root, parse->resultRelation)) != NIL)
-               plan = inheritance_planner(root, lst);
+               rt_fetch(parse->resultRelation, parse->rtable)->inh)
+               plan = inheritance_planner(root);
        else
                plan = grouping_planner(root, tuple_fraction);
 
        /*
         * If any subplans were generated, or if we're inside a subplan, build
-        * initPlan list and extParam/allParam sets for plan nodes, and attach
-        * the initPlans to the top plan node.
+        * initPlan list and extParam/allParam sets for plan nodes, and attach the
+        * initPlans to the top plan node.
         */
        if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1)
                SS_finalize_plan(plan, parse->rtable);
@@ -400,9 +407,9 @@ static Node *
 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 {
        /*
-        * Fall out quickly if expression is empty.  This occurs often enough
-        * to be worth checking.  Note that null->null is the correct conversion
-        * for implicit-AND result format, too.
+        * Fall out quickly if expression is empty.  This occurs often enough to
+        * be worth checking.  Note that null->null is the correct conversion for
+        * implicit-AND result format, too.
         */
        if (expr == NULL)
                return NULL;
@@ -410,8 +417,7 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
        /*
         * If the query has any join RTEs, replace join alias variables with
         * base-relation variables. We must do this before sublink processing,
-        * else sublinks expanded out from join aliases wouldn't get
-        * processed.
+        * else sublinks expanded out from join aliases wouldn't get processed.
         */
        if (root->hasJoinRTEs)
                expr = flatten_join_alias_vars(root, expr);
@@ -425,12 +431,12 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
         * with AND directly under AND, nor OR directly under OR.
         *
         * Because this is a relatively expensive process, we skip it when the
-        * query is trivial, such as "SELECT 2+2;" or "INSERT ... VALUES()".
-        * The expression will only be evaluated once anyway, so no point in
+        * query is trivial, such as "SELECT 2+2;" or "INSERT ... VALUES()". The
+        * expression will only be evaluated once anyway, so no point in
         * pre-simplifying; we can't execute it any faster than the executor can,
         * and we will waste cycles copying the tree.  Notice however that we
-        * still must do it for quals (to get AND/OR flatness); and if we are
-        * in a subquery we should not assume it will be done only once.
+        * still must do it for quals (to get AND/OR flatness); and if we are in a
+        * subquery we should not assume it will be done only once.
         */
        if (root->parse->jointree->fromlist != NIL ||
                kind == EXPRKIND_QUAL ||
@@ -455,8 +461,8 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
                expr = SS_process_sublinks(expr, (kind == EXPRKIND_QUAL));
 
        /*
-        * XXX do not insert anything here unless you have grokked the
-        * comments in SS_replace_correlation_vars ...
+        * XXX do not insert anything here unless you have grokked the comments in
+        * SS_replace_correlation_vars ...
         */
 
        /* Replace uplevel vars with Param nodes */
@@ -464,9 +470,9 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind)
                expr = SS_replace_correlation_vars(expr);
 
        /*
-        * If it's a qual or havingQual, convert it to implicit-AND format.
-        * (We don't want to do this before eval_const_expressions, since the
-        * latter would be unable to simplify a top-level AND correctly. Also,
+        * If it's a qual or havingQual, convert it to implicit-AND format. (We
+        * don't want to do this before eval_const_expressions, since the latter
+        * would be unable to simplify a top-level AND correctly. Also,
         * SS_process_sublinks expects explicit-AND format.)
         */
        if (kind == EXPRKIND_QUAL)
@@ -513,106 +519,87 @@ preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
                         (int) nodeTag(jtnode));
 }
 
-/*--------------------
+/*
  * inheritance_planner
  *       Generate a plan in the case where the result relation is an
  *       inheritance set.
  *
- * We have to handle this case differently from cases where a source
- * relation is an inheritance set.     Source inheritance is expanded at
- * the bottom of the plan tree (see allpaths.c), but target inheritance
- * has to be expanded at the top.  The reason is that for UPDATE, each
- * target relation needs a different targetlist matching its own column
- * set.  (This is not so critical for DELETE, but for simplicity we treat
- * inherited DELETE the same way.)     Fortunately, the UPDATE/DELETE target
- * can never be the nullable side of an outer join, so it's OK to generate
- * the plan this way.
- *
- * inheritlist is an integer list of RT indexes for the result relation set.
+ * We have to handle this case differently from cases where a source relation
+ * is an inheritance set. Source inheritance is expanded at the bottom of the
+ * plan tree (see allpaths.c), but target inheritance has to be expanded at
+ * the top.  The reason is that for UPDATE, each target relation needs a
+ * different targetlist matching its own column set.  Also, for both UPDATE
+ * and DELETE, the executor needs the Append plan node at the top, else it
+ * can't keep track of which table is the current target table.  Fortunately,
+ * the UPDATE/DELETE target can never be the nullable side of an outer join,
+ * so it's OK to generate the plan this way.
  *
  * Returns a query plan.
- *--------------------
  */
 static Plan *
-inheritance_planner(PlannerInfo *root, List *inheritlist)
+inheritance_planner(PlannerInfo *root)
 {
        Query      *parse = root->parse;
        int                     parentRTindex = parse->resultRelation;
-       Oid                     parentOID = getrelid(parentRTindex, parse->rtable);
-       int                     mainrtlength = list_length(parse->rtable);
        List       *subplans = NIL;
        List       *tlist = NIL;
+       PlannerInfo subroot;
        ListCell   *l;
 
-       foreach(l, inheritlist)
+       subroot.parse = NULL;           /* catch it if no matches in loop */
+
+       parse->resultRelations = NIL;
+
+       foreach(l, root->append_rel_list)
        {
-               int                     childRTindex = lfirst_int(l);
-               Oid                     childOID = getrelid(childRTindex, parse->rtable);
-               PlannerInfo subroot;
+               AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
                Plan       *subplan;
 
+               /* append_rel_list contains all append rels; ignore others */
+               if (appinfo->parent_relid != parentRTindex)
+                       continue;
+
+               /* Build target-relations list for the executor */
+               parse->resultRelations = lappend_int(parse->resultRelations,
+                                                                                        appinfo->child_relid);
+
                /*
-                * Generate modified query with this rel as target.  We have to
-                * be prepared to translate varnos in in_info_list as well as in
-                * the Query proper.
+                * Generate modified query with this rel as target.  We have to be
+                * prepared to translate varnos in in_info_list as well as in the
+                * Query proper.
                 */
                memcpy(&subroot, root, sizeof(PlannerInfo));
                subroot.parse = (Query *)
-                       adjust_inherited_attrs((Node *) parse,
-                                                                  parentRTindex, parentOID,
-                                                                  childRTindex, childOID);
+                       adjust_appendrel_attrs((Node *) parse,
+                                                                  appinfo);
                subroot.in_info_list = (List *)
-                       adjust_inherited_attrs((Node *) root->in_info_list,
-                                                                  parentRTindex, parentOID,
-                                                                  childRTindex, childOID);
+                       adjust_appendrel_attrs((Node *) root->in_info_list,
+                                                                  appinfo);
+               /* There shouldn't be any OJ info to translate, as yet */
+               Assert(subroot.oj_info_list == NIL);
 
                /* Generate plan */
                subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
 
                subplans = lappend(subplans, subplan);
 
-               /*
-                * XXX my goodness this next bit is ugly.  Really need to think about
-                * ways to rein in planner's habit of scribbling on its input.
-                *
-                * Planning of the subquery might have modified the rangetable,
-                * either by addition of RTEs due to expansion of inherited source
-                * tables, or by changes of the Query structures inside subquery
-                * RTEs.  We have to ensure that this gets propagated back to the
-                * master copy.  However, if we aren't done planning yet, we also
-                * need to ensure that subsequent calls to grouping_planner have
-                * virgin sub-Queries to work from.  So, if we are at the last
-                * list entry, just copy the subquery rangetable back to the master
-                * copy; if we are not, then extend the master copy by adding
-                * whatever the subquery added.  (We assume these added entries
-                * will go untouched by the future grouping_planner calls.  We are
-                * also effectively assuming that sub-Queries will get planned
-                * identically each time, or at least that the impacts on their
-                * rangetables will be the same each time.  Did I say this is ugly?)
-                */
-               if (lnext(l) == NULL)
-                       parse->rtable = subroot.parse->rtable;
-               else
-               {
-                       int             subrtlength = list_length(subroot.parse->rtable);
-
-                       if (subrtlength > mainrtlength)
-                       {
-                               List       *subrt;
-
-                               subrt = list_copy_tail(subroot.parse->rtable, mainrtlength);
-                               parse->rtable = list_concat(parse->rtable, subrt);
-                               mainrtlength = subrtlength;
-                       }
-               }
-
                /* Save preprocessed tlist from first rel for use in Append */
                if (tlist == NIL)
                        tlist = subplan->targetlist;
        }
 
-       /* Save the target-relations list for the executor, too */
-       parse->resultRelations = inheritlist;
+       /*
+        * Planning might have modified the rangetable, due to changes of the
+        * Query structures inside subquery RTEs.  We have to ensure that this
+        * gets propagated back to the master copy.  But can't do this until we
+        * are done planning, because all the calls to grouping_planner need
+        * virgin sub-Queries to work from.  (We are effectively assuming that
+        * sub-Queries will get planned identically each time, or at least that
+        * the impacts on their rangetables will be the same each time.)
+        *
+        * XXX should clean this up someday
+        */
+       parse->rtable = subroot.parse->rtable;
 
        /* Mark result as unordered (probably unnecessary) */
        root->query_pathkeys = NIL;
@@ -644,36 +631,54 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 {
        Query      *parse = root->parse;
        List       *tlist = parse->targetList;
+       int64           offset_est = 0;
+       int64           count_est = 0;
        Plan       *result_plan;
        List       *current_pathkeys;
        List       *sort_pathkeys;
+       double          dNumGroups = 0;
+
+       /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
+       if (parse->limitCount || parse->limitOffset)
+               tuple_fraction = preprocess_limit(root, tuple_fraction,
+                                                                                 &offset_est, &count_est);
 
        if (parse->setOperations)
        {
                List       *set_sortclauses;
 
                /*
-                * Construct the plan for set operations.  The result will not
-                * need any work except perhaps a top-level sort and/or LIMIT.
+                * If there's a top-level ORDER BY, assume we have to fetch all the
+                * tuples.      This might seem too simplistic given all the hackery below
+                * to possibly avoid the sort ... but a nonzero tuple_fraction is only
+                * of use to plan_set_operations() when the setop is UNION ALL, and
+                * the result of UNION ALL is always unsorted.
                 */
-               result_plan = plan_set_operations(root,
+               if (parse->sortClause)
+                       tuple_fraction = 0.0;
+
+               /*
+                * Construct the plan for set operations.  The result will not need
+                * any work except perhaps a top-level sort and/or LIMIT.
+                */
+               result_plan = plan_set_operations(root, tuple_fraction,
                                                                                  &set_sortclauses);
 
                /*
-                * Calculate pathkeys representing the sort order (if any) of the
-                * set operation's result.  We have to do this before overwriting
-                * the sort key information...
+                * Calculate pathkeys representing the sort order (if any) of the set
+                * operation's result.  We have to do this before overwriting the sort
+                * key information...
                 */
                current_pathkeys = make_pathkeys_for_sortclauses(set_sortclauses,
-                                                                                               result_plan->targetlist);
+                                                                                                       result_plan->targetlist);
                current_pathkeys = canonicalize_pathkeys(root, current_pathkeys);
 
                /*
-                * We should not need to call preprocess_targetlist, since we must
-                * be in a SELECT query node.  Instead, use the targetlist
-                * returned by plan_set_operations (since this tells whether it
-                * returned any resjunk columns!), and transfer any sort key
-                * information from the original tlist.
+                * We should not need to call preprocess_targetlist, since we must be
+                * in a SELECT query node.      Instead, use the targetlist returned by
+                * plan_set_operations (since this tells whether it returned any
+                * resjunk columns!), and transfer any sort key information from the
+                * original tlist.
                 */
                Assert(parse->commandType == CMD_SELECT);
 
@@ -703,11 +708,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                AttrNumber *groupColIdx = NULL;
                bool            need_tlist_eval = true;
                QualCost        tlist_cost;
-               double          sub_tuple_fraction;
                Path       *cheapest_path;
                Path       *sorted_path;
                Path       *best_path;
-               double          dNumGroups = 0;
                long            numGroups = 0;
                AggClauseCounts agg_counts;
                int                     numGroupCols = list_length(parse->groupClause);
@@ -719,20 +722,21 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                tlist = preprocess_targetlist(root, tlist);
 
                /*
-                * Generate appropriate target list for subplan; may be different
-                * from tlist if grouping or aggregation is needed.
+                * Generate appropriate target list for subplan; may be different from
+                * tlist if grouping or aggregation is needed.
                 */
                sub_tlist = make_subplanTargetList(root, tlist,
-                                                                                &groupColIdx, &need_tlist_eval);
+                                                                                  &groupColIdx, &need_tlist_eval);
 
                /*
-                * Calculate pathkeys that represent grouping/ordering
-                * requirements
+                * Calculate pathkeys that represent grouping/ordering requirements.
+                * Stash them in PlannerInfo so that query_planner can canonicalize
+                * them.
                 */
-               group_pathkeys = make_pathkeys_for_sortclauses(parse->groupClause,
-                                                                                                          tlist);
-               sort_pathkeys = make_pathkeys_for_sortclauses(parse->sortClause,
-                                                                                                         tlist);
+               root->group_pathkeys =
+                       make_pathkeys_for_sortclauses(parse->groupClause, tlist);
+               root->sort_pathkeys =
+                       make_pathkeys_for_sortclauses(parse->sortClause, tlist);
 
                /*
                 * Will need actual number of aggregates for estimating costs.
@@ -740,10 +744,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                 * Note: we do not attempt to detect duplicate aggregates here; a
                 * somewhat-overestimated count is okay for our present purposes.
                 *
-                * Note: think not that we can turn off hasAggs if we find no aggs.
-                * It is possible for constant-expression simplification to remove
-                * all explicit references to aggs, but we still have to follow
-                * the aggregate semantics (eg, producing only one output row).
+                * Note: think not that we can turn off hasAggs if we find no aggs. It
+                * is possible for constant-expression simplification to remove all
+                * explicit references to aggs, but we still have to follow the
+                * aggregate semantics (eg, producing only one output row).
                 */
                if (parse->hasAggs)
                {
@@ -754,229 +758,50 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                /*
                 * Figure out whether we need a sorted result from query_planner.
                 *
-                * If we have a GROUP BY clause, then we want a result sorted
-                * properly for grouping.  Otherwise, if there is an ORDER BY
-                * clause, we want to sort by the ORDER BY clause.      (Note: if we
-                * have both, and ORDER BY is a superset of GROUP BY, it would be
-                * tempting to request sort by ORDER BY --- but that might just
-                * leave us failing to exploit an available sort order at all.
-                * Needs more thought...)
+                * If we have a GROUP BY clause, then we want a result sorted properly
+                * for grouping.  Otherwise, if there is an ORDER BY clause, we want
+                * to sort by the ORDER BY clause.      (Note: if we have both, and ORDER
+                * BY is a superset of GROUP BY, it would be tempting to request sort
+                * by ORDER BY --- but that might just leave us failing to exploit an
+                * available sort order at all. Needs more thought...)
                 */
                if (parse->groupClause)
-                       root->query_pathkeys = group_pathkeys;
+                       root->query_pathkeys = root->group_pathkeys;
                else if (parse->sortClause)
-                       root->query_pathkeys = sort_pathkeys;
+                       root->query_pathkeys = root->sort_pathkeys;
                else
                        root->query_pathkeys = NIL;
 
                /*
-                * Adjust tuple_fraction if we see that we are going to apply
-                * limiting/grouping/aggregation/etc.  This is not overridable by
-                * the caller, since it reflects plan actions that this routine
-                * will certainly take, not assumptions about context.
+                * Generate the best unsorted and presorted paths for this Query (but
+                * note there may not be any presorted path).  query_planner will also
+                * estimate the number of groups in the query, and canonicalize all
+                * the pathkeys.
                 */
-               if (parse->limitCount != NULL)
-               {
-                       /*
-                        * A LIMIT clause limits the absolute number of tuples
-                        * returned. However, if it's not a constant LIMIT then we
-                        * have to punt; for lack of a better idea, assume 10% of the
-                        * plan's result is wanted.
-                        */
-                       double          limit_fraction = 0.0;
-
-                       if (IsA(parse->limitCount, Const))
-                       {
-                               Const      *limitc = (Const *) parse->limitCount;
-                               int32           count = DatumGetInt32(limitc->constvalue);
-
-                               /*
-                                * A NULL-constant LIMIT represents "LIMIT ALL", which we
-                                * treat the same as no limit (ie, expect to retrieve all
-                                * the tuples).
-                                */
-                               if (!limitc->constisnull && count > 0)
-                               {
-                                       limit_fraction = (double) count;
-                                       /* We must also consider the OFFSET, if present */
-                                       if (parse->limitOffset != NULL)
-                                       {
-                                               if (IsA(parse->limitOffset, Const))
-                                               {
-                                                       int32           offset;
-
-                                                       limitc = (Const *) parse->limitOffset;
-                                                       offset = DatumGetInt32(limitc->constvalue);
-                                                       if (!limitc->constisnull && offset > 0)
-                                                               limit_fraction += (double) offset;
-                                               }
-                                               else
-                                               {
-                                                       /* OFFSET is an expression ... punt ... */
-                                                       limit_fraction = 0.10;
-                                               }
-                                       }
-                               }
-                       }
-                       else
-                       {
-                               /* LIMIT is an expression ... punt ... */
-                               limit_fraction = 0.10;
-                       }
+               query_planner(root, sub_tlist, tuple_fraction,
+                                         &cheapest_path, &sorted_path, &dNumGroups);
 
-                       if (limit_fraction > 0.0)
-                       {
-                               /*
-                                * If we have absolute limits from both caller and LIMIT,
-                                * use the smaller value; if one is fractional and the
-                                * other absolute, treat the fraction as a fraction of the
-                                * absolute value; else we can multiply the two fractions
-                                * together.
-                                */
-                               if (tuple_fraction >= 1.0)
-                               {
-                                       if (limit_fraction >= 1.0)
-                                       {
-                                               /* both absolute */
-                                               tuple_fraction = Min(tuple_fraction, limit_fraction);
-                                       }
-                                       else
-                                       {
-                                               /* caller absolute, limit fractional */
-                                               tuple_fraction *= limit_fraction;
-                                               if (tuple_fraction < 1.0)
-                                                       tuple_fraction = 1.0;
-                                       }
-                               }
-                               else if (tuple_fraction > 0.0)
-                               {
-                                       if (limit_fraction >= 1.0)
-                                       {
-                                               /* caller fractional, limit absolute */
-                                               tuple_fraction *= limit_fraction;
-                                               if (tuple_fraction < 1.0)
-                                                       tuple_fraction = 1.0;
-                                       }
-                                       else
-                                       {
-                                               /* both fractional */
-                                               tuple_fraction *= limit_fraction;
-                                       }
-                               }
-                               else
-                               {
-                                       /* no info from caller, just use limit */
-                                       tuple_fraction = limit_fraction;
-                               }
-                       }
-               }
+               group_pathkeys = root->group_pathkeys;
+               sort_pathkeys = root->sort_pathkeys;
 
                /*
-                * With grouping or aggregation, the tuple fraction to pass to
-                * query_planner() may be different from what it is at top level.
+                * If grouping, decide whether we want to use hashed grouping.
                 */
-               sub_tuple_fraction = tuple_fraction;
-
                if (parse->groupClause)
                {
-                       /*
-                        * In GROUP BY mode, we have the little problem that we don't
-                        * really know how many input tuples will be needed to make a
-                        * group, so we can't translate an output LIMIT count into an
-                        * input count.  For lack of a better idea, assume 25% of the
-                        * input data will be processed if there is any output limit.
-                        * However, if the caller gave us a fraction rather than an
-                        * absolute count, we can keep using that fraction (which
-                        * amounts to assuming that all the groups are about the same
-                        * size).
-                        */
-                       if (sub_tuple_fraction >= 1.0)
-                               sub_tuple_fraction = 0.25;
-
-                       /*
-                        * If both GROUP BY and ORDER BY are specified, we will need
-                        * two levels of sort --- and, therefore, certainly need to
-                        * read all the input tuples --- unless ORDER BY is a subset
-                        * of GROUP BY.  (We have not yet canonicalized the pathkeys,
-                        * so must use the slower noncanonical comparison method.)
-                        */
-                       if (parse->groupClause && parse->sortClause &&
-                               !noncanonical_pathkeys_contained_in(sort_pathkeys,
-                                                                                                       group_pathkeys))
-                               sub_tuple_fraction = 0.0;
-               }
-               else if (parse->hasAggs)
-               {
-                       /*
-                        * Ungrouped aggregate will certainly want all the input
-                        * tuples.
-                        */
-                       sub_tuple_fraction = 0.0;
-               }
-               else if (parse->distinctClause)
-               {
-                       /*
-                        * SELECT DISTINCT, like GROUP, will absorb an unpredictable
-                        * number of input tuples per output tuple.  Handle the same
-                        * way.
-                        */
-                       if (sub_tuple_fraction >= 1.0)
-                               sub_tuple_fraction = 0.25;
-               }
-
-               /*
-                * Generate the best unsorted and presorted paths for this Query
-                * (but note there may not be any presorted path).
-                */
-               query_planner(root, sub_tlist, sub_tuple_fraction,
-                                         &cheapest_path, &sorted_path);
-
-               /*
-                * We couldn't canonicalize group_pathkeys and sort_pathkeys
-                * before running query_planner(), so do it now.
-                */
-               group_pathkeys = canonicalize_pathkeys(root, group_pathkeys);
-               sort_pathkeys = canonicalize_pathkeys(root, sort_pathkeys);
-
-               /*
-                * If grouping, estimate the number of groups.  (We can't do this
-                * until after running query_planner(), either.)  Then decide
-                * whether we want to use hashed grouping.
-                */
-               if (parse->groupClause)
-               {
-                       List       *groupExprs;
-                       double          cheapest_path_rows;
-
-                       /*
-                        * Beware of the possibility that cheapest_path->parent is NULL.
-                        * This could happen if user does something silly like
-                        *              SELECT 'foo' GROUP BY 1;
-                        */
-                       if (cheapest_path->parent)
-                               cheapest_path_rows = cheapest_path->parent->rows;
-                       else
-                               cheapest_path_rows = 1; /* assume non-set result */
-
-                       groupExprs = get_sortgrouplist_exprs(parse->groupClause,
-                                                                                                parse->targetList);
-                       dNumGroups = estimate_num_groups(root,
-                                                                                        groupExprs,
-                                                                                        cheapest_path_rows);
-                       /* Also want it as a long int --- but 'ware overflow! */
-                       numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
-
                        use_hashed_grouping =
                                choose_hashed_grouping(root, tuple_fraction,
                                                                           cheapest_path, sorted_path,
-                                                                          sort_pathkeys, group_pathkeys,
                                                                           dNumGroups, &agg_counts);
+
+                       /* Also convert # groups to long int --- but 'ware overflow! */
+                       numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
                }
 
                /*
                 * Select the best path.  If we are doing hashed grouping, we will
-                * always read all the input tuples, so use the cheapest-total
-                * path. Otherwise, trust query_planner's decision about which to use.
+                * always read all the input tuples, so use the cheapest-total path.
+                * Otherwise, trust query_planner's decision about which to use.
                 */
                if (use_hashed_grouping || !sorted_path)
                        best_path = cheapest_path;
@@ -984,10 +809,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                        best_path = sorted_path;
 
                /*
-                * Check to see if it's possible to optimize MIN/MAX aggregates.
-                * If so, we will forget all the work we did so far to choose a
-                * "regular" path ... but we had to do it anyway to be able to
-                * tell which way is cheaper.
+                * Check to see if it's possible to optimize MIN/MAX aggregates. If
+                * so, we will forget all the work we did so far to choose a "regular"
+                * path ... but we had to do it anyway to be able to tell which way is
+                * cheaper.
                 */
                result_plan = optimize_minmax_aggregates(root,
                                                                                                 tlist,
@@ -995,8 +820,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                if (result_plan != NULL)
                {
                        /*
-                        * optimize_minmax_aggregates generated the full plan, with
-                        * the right tlist, and it has no sort order.
+                        * optimize_minmax_aggregates generated the full plan, with the
+                        * right tlist, and it has no sort order.
                         */
                        current_pathkeys = NIL;
                }
@@ -1140,8 +965,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                 * GROUP BY without aggregation, so insert a group node (plus
                                 * the appropriate sort node, if necessary).
                                 *
-                                * Add an explicit sort if we couldn't make the path come
-                                * out the way the GROUP node needs it.
+                                * Add an explicit sort if we couldn't make the path come out
+                                * the way the GROUP node needs it.
                                 */
                                if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
                                {
@@ -1169,11 +994,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                                 * This is a degenerate case in which we are supposed to emit
                                 * either 0 or 1 row depending on whether HAVING succeeds.
                                 * Furthermore, there cannot be any variables in either HAVING
-                                * or the targetlist, so we actually do not need the FROM table
-                                * at all!  We can just throw away the plan-so-far and generate
-                                * a Result node.  This is a sufficiently unusual corner case
-                                * that it's not worth contorting the structure of this routine
-                                * to avoid having to generate the plan in the first place.
+                                * or the targetlist, so we actually do not need the FROM
+                                * table at all!  We can just throw away the plan-so-far and
+                                * generate a Result node.      This is a sufficiently unusual
+                                * corner case that it's not worth contorting the structure of
+                                * this routine to avoid having to generate the plan in the
+                                * first place.
                                 */
                                result_plan = (Plan *) make_result(tlist,
                                                                                                   parse->havingQual,
@@ -1183,8 +1009,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
        }                                                       /* end of if (setOperations) */
 
        /*
-        * If we were not able to make the plan come out in the right order,
-        * add an explicit sort step.
+        * If we were not able to make the plan come out in the right order, add
+        * an explicit sort step.
         */
        if (parse->sortClause)
        {
@@ -1206,49 +1032,230 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
                result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
 
                /*
-                * If there was grouping or aggregation, leave plan_rows as-is
-                * (ie, assume the result was already mostly unique).  If not,
-                * it's reasonable to assume the UNIQUE filter has effects
-                * comparable to GROUP BY.
+                * If there was grouping or aggregation, leave plan_rows as-is (ie,
+                * assume the result was already mostly unique).  If not, use the
+                * number of distinct-groups calculated by query_planner.
                 */
                if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs)
-               {
-                       List       *distinctExprs;
-
-                       distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
-                                                                                                       parse->targetList);
-                       result_plan->plan_rows = estimate_num_groups(root,
-                                                                                                                distinctExprs,
-                                                                                                result_plan->plan_rows);
-               }
+                       result_plan->plan_rows = dNumGroups;
        }
 
        /*
         * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
         */
-       if (parse->limitOffset || parse->limitCount)
+       if (parse->limitCount || parse->limitOffset)
        {
                result_plan = (Plan *) make_limit(result_plan,
                                                                                  parse->limitOffset,
-                                                                                 parse->limitCount);
+                                                                                 parse->limitCount,
+                                                                                 offset_est,
+                                                                                 count_est);
        }
 
        /*
-        * Return the actual output ordering in query_pathkeys for possible
-        * use by an outer query level.
+        * Return the actual output ordering in query_pathkeys for possible use by
+        * an outer query level.
         */
        root->query_pathkeys = current_pathkeys;
 
        return result_plan;
 }
 
+/*
+ * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
+ *
+ * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
+ * results back in *count_est and *offset_est. These variables are set to
+ * 0 if the corresponding clause is not present, and -1 if it's present
+ * but we couldn't estimate the value for it.  (The "0" convention is OK
+ * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
+ * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
+ * usual practice of never estimating less than one row.)  These values will
+ * be passed to make_limit, which see if you change this code.
+ *
+ * The return value is the suitably adjusted tuple_fraction to use for
+ * planning the query. This adjustment is not overridable, since it reflects
+ * plan actions that grouping_planner() will certainly take, not assumptions
+ * about context.
+ */
+static double
+preprocess_limit(PlannerInfo *root, double tuple_fraction,
+                                int64 *offset_est, int64 *count_est)
+{
+       Query      *parse = root->parse;
+       Node       *est;
+       double          limit_fraction;
+
+       /* Should not be called unless LIMIT or OFFSET */
+       Assert(parse->limitCount || parse->limitOffset);
+
+       /*
+        * Try to obtain the clause values.  We use estimate_expression_value
+        * primarily because it can sometimes do something useful with Params.
+        */
+       if (parse->limitCount)
+       {
+               est = estimate_expression_value(parse->limitCount);
+               if (est && IsA(est, Const))
+               {
+                       if (((Const *) est)->constisnull)
+                       {
+                               /* NULL indicates LIMIT ALL, ie, no limit */
+                               *count_est = 0; /* treat as not present */
+                       }
+                       else
+                       {
+                               *count_est = DatumGetInt64(((Const *) est)->constvalue);
+                               if (*count_est <= 0)
+                                       *count_est = 1;         /* force to at least 1 */
+                       }
+               }
+               else
+                       *count_est = -1;        /* can't estimate */
+       }
+       else
+               *count_est = 0;                 /* not present */
+
+       if (parse->limitOffset)
+       {
+               est = estimate_expression_value(parse->limitOffset);
+               if (est && IsA(est, Const))
+               {
+                       if (((Const *) est)->constisnull)
+                       {
+                               /* Treat NULL as no offset; the executor will too */
+                               *offset_est = 0;        /* treat as not present */
+                       }
+                       else
+                       {
+                               *offset_est = DatumGetInt64(((Const *) est)->constvalue);
+
+                               if (*offset_est < 0)
+                                       *offset_est = 0;        /* less than 0 is same as 0 */
+                       }
+               }
+               else
+                       *offset_est = -1;       /* can't estimate */
+       }
+       else
+               *offset_est = 0;                /* not present */
+
+       if (*count_est != 0)
+       {
+               /*
+                * A LIMIT clause limits the absolute number of tuples returned.
+                * However, if it's not a constant LIMIT then we have to guess; for
+                * lack of a better idea, assume 10% of the plan's result is wanted.
+                */
+               if (*count_est < 0 || *offset_est < 0)
+               {
+                       /* LIMIT or OFFSET is an expression ... punt ... */
+                       limit_fraction = 0.10;
+               }
+               else
+               {
+                       /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
+                       limit_fraction = (double) *count_est + (double) *offset_est;
+               }
+
+               /*
+                * If we have absolute limits from both caller and LIMIT, use the
+                * smaller value; likewise if they are both fractional.  If one is
+                * fractional and the other absolute, we can't easily determine which
+                * is smaller, but we use the heuristic that the absolute will usually
+                * be smaller.
+                */
+               if (tuple_fraction >= 1.0)
+               {
+                       if (limit_fraction >= 1.0)
+                       {
+                               /* both absolute */
+                               tuple_fraction = Min(tuple_fraction, limit_fraction);
+                       }
+                       else
+                       {
+                               /* caller absolute, limit fractional; use caller's value */
+                       }
+               }
+               else if (tuple_fraction > 0.0)
+               {
+                       if (limit_fraction >= 1.0)
+                       {
+                               /* caller fractional, limit absolute; use limit */
+                               tuple_fraction = limit_fraction;
+                       }
+                       else
+                       {
+                               /* both fractional */
+                               tuple_fraction = Min(tuple_fraction, limit_fraction);
+                       }
+               }
+               else
+               {
+                       /* no info from caller, just use limit */
+                       tuple_fraction = limit_fraction;
+               }
+       }
+       else if (*offset_est != 0 && tuple_fraction > 0.0)
+       {
+               /*
+                * We have an OFFSET but no LIMIT.      This acts entirely differently
+                * from the LIMIT case: here, we need to increase rather than decrease
+                * the caller's tuple_fraction, because the OFFSET acts to cause more
+                * tuples to be fetched instead of fewer.  This only matters if we got
+                * a tuple_fraction > 0, however.
+                *
+                * As above, use 10% if OFFSET is present but unestimatable.
+                */
+               if (*offset_est < 0)
+                       limit_fraction = 0.10;
+               else
+                       limit_fraction = (double) *offset_est;
+
+               /*
+                * If we have absolute counts from both caller and OFFSET, add them
+                * together; likewise if they are both fractional.      If one is
+                * fractional and the other absolute, we want to take the larger, and
+                * we heuristically assume that's the fractional one.
+                */
+               if (tuple_fraction >= 1.0)
+               {
+                       if (limit_fraction >= 1.0)
+                       {
+                               /* both absolute, so add them together */
+                               tuple_fraction += limit_fraction;
+                       }
+                       else
+                       {
+                               /* caller absolute, limit fractional; use limit */
+                               tuple_fraction = limit_fraction;
+                       }
+               }
+               else
+               {
+                       if (limit_fraction >= 1.0)
+                       {
+                               /* caller fractional, limit absolute; use caller's value */
+                       }
+                       else
+                       {
+                               /* both fractional, so add them together */
+                               tuple_fraction += limit_fraction;
+                               if (tuple_fraction >= 1.0)
+                                       tuple_fraction = 0.0;           /* assume fetch all */
+                       }
+               }
+       }
+
+       return tuple_fraction;
+}
+
 /*
  * choose_hashed_grouping - should we use hashed grouping?
  */
 static bool
 choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                                           Path *cheapest_path, Path *sorted_path,
-                                          List *sort_pathkeys, List *group_pathkeys,
                                           double dNumGroups, AggClauseCounts *agg_counts)
 {
        int                     numGroupCols = list_length(root->parse->groupClause);
@@ -1278,9 +1285,8 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
         * Don't do it if it doesn't look like the hashtable will fit into
         * work_mem.
         *
-        * Beware here of the possibility that cheapest_path->parent is NULL.
-        * This could happen if user does something silly like
-        *              SELECT 'foo' GROUP BY 1;
+        * Beware here of the possibility that cheapest_path->parent is NULL. This
+        * could happen if user does something silly like SELECT 'foo' GROUP BY 1;
         */
        if (cheapest_path->parent)
        {
@@ -1289,12 +1295,12 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
        }
        else
        {
-               cheapest_path_rows = 1;                         /* assume non-set result */
-               cheapest_path_width = 100;                      /* arbitrary */
+               cheapest_path_rows = 1; /* assume non-set result */
+               cheapest_path_width = 100;              /* arbitrary */
        }
 
        /* Estimate per-hash-entry space at tuple width... */
-       hashentrysize = cheapest_path_width;
+       hashentrysize = MAXALIGN(cheapest_path_width) + MAXALIGN(sizeof(MinimalTupleData));
        /* plus space for pass-by-ref transition values... */
        hashentrysize += agg_counts->transitionSpace;
        /* plus the per-hash-entry overhead */
@@ -1304,20 +1310,17 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                return false;
 
        /*
-        * See if the estimated cost is no more than doing it the other way.
-        * While avoiding the need for sorted input is usually a win, the fact
-        * that the output won't be sorted may be a loss; so we need to do an
-        * actual cost comparison.
+        * See if the estimated cost is no more than doing it the other way. While
+        * avoiding the need for sorted input is usually a win, the fact that the
+        * output won't be sorted may be a loss; so we need to do an actual cost
+        * comparison.
         *
-        * We need to consider
-        *              cheapest_path + hashagg [+ final sort]
-        * versus either
-        *              cheapest_path [+ sort] + group or agg [+ final sort]
-        * or
-        *              presorted_path + group or agg [+ final sort]
-        * where brackets indicate a step that may not be needed. We assume
-        * query_planner() will have returned a presorted path only if it's a
-        * winner compared to cheapest_path for this purpose.
+        * We need to consider cheapest_path + hashagg [+ final sort] versus
+        * either cheapest_path [+ sort] + group or agg [+ final sort] or
+        * presorted_path + group or agg [+ final sort] where brackets indicate a
+        * step that may not be needed. We assume query_planner() will have
+        * returned a presorted path only if it's a winner compared to
+        * cheapest_path for this purpose.
         *
         * These path variables are dummies that just hold cost fields; we don't
         * make actual Paths for these steps.
@@ -1327,8 +1330,8 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                         cheapest_path->startup_cost, cheapest_path->total_cost,
                         cheapest_path_rows);
        /* Result of hashed agg is always unsorted */
-       if (sort_pathkeys)
-               cost_sort(&hashed_p, root, sort_pathkeys, hashed_p.total_cost,
+       if (root->sort_pathkeys)
+               cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
                                  dNumGroups, cheapest_path_width);
 
        if (sorted_path)
@@ -1343,12 +1346,11 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                sorted_p.total_cost = cheapest_path->total_cost;
                current_pathkeys = cheapest_path->pathkeys;
        }
-       if (!pathkeys_contained_in(group_pathkeys,
-                                                          current_pathkeys))
+       if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
        {
-               cost_sort(&sorted_p, root, group_pathkeys, sorted_p.total_cost,
+               cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
                                  cheapest_path_rows, cheapest_path_width);
-               current_pathkeys = group_pathkeys;
+               current_pathkeys = root->group_pathkeys;
        }
 
        if (root->parse->hasAggs)
@@ -1361,9 +1363,9 @@ choose_hashed_grouping(PlannerInfo *root, double tuple_fraction,
                                   sorted_p.startup_cost, sorted_p.total_cost,
                                   cheapest_path_rows);
        /* The Agg or Group node will preserve ordering */
-       if (sort_pathkeys &&
-               !pathkeys_contained_in(sort_pathkeys, current_pathkeys))
-               cost_sort(&sorted_p, root, sort_pathkeys, sorted_p.total_cost,
+       if (root->sort_pathkeys &&
+               !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+               cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
                                  dNumGroups, cheapest_path_width);
 
        /*
@@ -1478,8 +1480,8 @@ make_subplanTargetList(PlannerInfo *root,
 
        /*
         * Otherwise, start with a "flattened" tlist (having just the vars
-        * mentioned in the targetlist and HAVING qual --- but not upper-
-        * level Vars; they will be replaced by Params later on).
+        * mentioned in the targetlist and HAVING qual --- but not upper- level
+        * Vars; they will be replaced by Params later on).
         */
        sub_tlist = flatten_tlist(tlist);
        extravars = pull_var_clause(parse->havingQual, false);
@@ -1489,9 +1491,8 @@ make_subplanTargetList(PlannerInfo *root,
 
        /*
         * If grouping, create sub_tlist entries for all GROUP BY expressions
-        * (GROUP BY items that are simple Vars should be in the list
-        * already), and make an array showing where the group columns are in
-        * the sub_tlist.
+        * (GROUP BY items that are simple Vars should be in the list already),
+        * and make an array showing where the group columns are in the sub_tlist.
         */
        numCols = list_length(parse->groupClause);
        if (numCols > 0)
@@ -1610,7 +1611,7 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
                Assert(orig_tlist_item != NULL);
                orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
                orig_tlist_item = lnext(orig_tlist_item);
-               if (orig_tle->resjunk)                  /* should not happen */
+               if (orig_tle->resjunk)  /* should not happen */
                        elog(ERROR, "resjunk output columns are not implemented");
                Assert(new_tle->resno == orig_tle->resno);
                new_tle->ressortgroupref = orig_tle->ressortgroupref;