granicus.if.org Git - postgresql/blob - src/backend/optimizer/plan/planner.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * planner.c
   4  *        The query optimizer external interface.
   5  *
   6  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/optimizer/plan/planner.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include <limits.h>
  19
  20 #include "executor/executor.h"
  21 #include "executor/nodeAgg.h"
  22 #include "miscadmin.h"
  23 #include "nodes/makefuncs.h"
  24 #ifdef OPTIMIZER_DEBUG
  25 #include "nodes/print.h"
  26 #endif
  27 #include "optimizer/clauses.h"
  28 #include "optimizer/cost.h"
  29 #include "optimizer/pathnode.h"
  30 #include "optimizer/paths.h"
  31 #include "optimizer/plancat.h"
  32 #include "optimizer/planmain.h"
  33 #include "optimizer/planner.h"
  34 #include "optimizer/prep.h"
  35 #include "optimizer/subselect.h"
  36 #include "optimizer/tlist.h"
  37 #include "parser/analyze.h"
  38 #include "parser/parsetree.h"
  39 #include "rewrite/rewriteManip.h"
  40 #include "utils/rel.h"
  41
  42
  43 /* GUC parameter */
  44 double          cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
  45
  46 /* Hook for plugins to get control in planner() */
  47 planner_hook_type planner_hook = NULL;
  48
  49
  50 /* Expression kind codes for preprocess_expression */
  51 #define EXPRKIND_QUAL           0
  52 #define EXPRKIND_TARGET         1
  53 #define EXPRKIND_RTFUNC         2
  54 #define EXPRKIND_VALUES         3
  55 #define EXPRKIND_LIMIT          4
  56 #define EXPRKIND_APPINFO        5
  57
  58
  59 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
  60 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
  61 static Plan *inheritance_planner(PlannerInfo *root);
  62 static Plan *grouping_planner(PlannerInfo *root, double tuple_fraction);
  63 static void preprocess_rowmarks(PlannerInfo *root);
  64 static double preprocess_limit(PlannerInfo *root,
  65                                  double tuple_fraction,
  66                                  int64 *offset_est, int64 *count_est);
  67 static void preprocess_groupclause(PlannerInfo *root);
  68 static bool choose_hashed_grouping(PlannerInfo *root,
  69                                            double tuple_fraction, double limit_tuples,
  70                                            double path_rows, int path_width,
  71                                            Path *cheapest_path, Path *sorted_path,
  72                                            double dNumGroups, AggClauseCosts *agg_costs);
  73 static bool choose_hashed_distinct(PlannerInfo *root,
  74                                            double tuple_fraction, double limit_tuples,
  75                                            double path_rows, int path_width,
  76                                            Cost cheapest_startup_cost, Cost cheapest_total_cost,
  77                                            Cost sorted_startup_cost, Cost sorted_total_cost,
  78                                            List *sorted_pathkeys,
  79                                            double dNumDistinctRows);
  80 static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
  81                                            AttrNumber **groupColIdx, bool *need_tlist_eval);
  82 static int      get_grouping_column_index(Query *parse, TargetEntry *tle);
  83 static void locate_grouping_columns(PlannerInfo *root,
  84                                                 List *tlist,
  85                                                 List *sub_tlist,
  86                                                 AttrNumber *groupColIdx);
  87 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
  88 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
  89 static List *add_volatile_sort_exprs(List *window_tlist, List *tlist,
  90                                                 List *activeWindows);
  91 static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
  92                                                  List *tlist, bool canonicalize);
  93 static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc,
  94                                                    List *tlist,
  95                                                    int numSortCols, AttrNumber *sortColIdx,
  96                                                    int *partNumCols,
  97                                                    AttrNumber **partColIdx,
  98                                                    Oid **partOperators,
  99                                                    int *ordNumCols,
 100                                                    AttrNumber **ordColIdx,
 101                                                    Oid **ordOperators);
 102
 103
 104 /*****************************************************************************
 105  *
 106  *         Query optimizer entry point
 107  *
 108  * To support loadable plugins that monitor or modify planner behavior,
 109  * we provide a hook variable that lets a plugin get control before and
 110  * after the standard planning process.  The plugin would normally call
 111  * standard_planner().
 112  *
 113  * Note to plugin authors: standard_planner() scribbles on its Query input,
 114  * so you'd better copy that data structure if you want to plan more than once.
 115  *
 116  *****************************************************************************/
 117 PlannedStmt *
 118 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 119 {
 120         PlannedStmt *result;
 121
 122         if (planner_hook)
 123                 result = (*planner_hook) (parse, cursorOptions, boundParams);
 124         else
 125                 result = standard_planner(parse, cursorOptions, boundParams);
 126         return result;
 127 }
 128
 129 PlannedStmt *
 130 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 131 {
 132         PlannedStmt *result;
 133         PlannerGlobal *glob;
 134         double          tuple_fraction;
 135         PlannerInfo *root;
 136         Plan       *top_plan;
 137         ListCell   *lp,
 138                            *lr;
 139
 140         /* Cursor options may come from caller or from DECLARE CURSOR stmt */
 141         if (parse->utilityStmt &&
 142                 IsA(parse->utilityStmt, DeclareCursorStmt))
 143                 cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
 144
 145         /*
 146          * Set up global state for this planner invocation.  This data is needed
 147          * across all levels of sub-Query that might exist in the given command,
 148          * so we keep it in a separate struct that's linked to by each per-Query
 149          * PlannerInfo.
 150          */
 151         glob = makeNode(PlannerGlobal);
 152
 153         glob->boundParams = boundParams;
 154         glob->paramlist = NIL;
 155         glob->subplans = NIL;
 156         glob->subroots = NIL;
 157         glob->rewindPlanIDs = NULL;
 158         glob->finalrtable = NIL;
 159         glob->finalrowmarks = NIL;
 160         glob->resultRelations = NIL;
 161         glob->relationOids = NIL;
 162         glob->invalItems = NIL;
 163         glob->lastPHId = 0;
 164         glob->lastRowMarkId = 0;
 165         glob->transientPlan = false;
 166
 167         /* Determine what fraction of the plan is likely to be scanned */
 168         if (cursorOptions & CURSOR_OPT_FAST_PLAN)
 169         {
 170                 /*
 171                  * We have no real idea how many tuples the user will ultimately FETCH
 172                  * from a cursor, but it is often the case that he doesn't want 'em
 173                  * all, or would prefer a fast-start plan anyway so that he can
 174                  * process some of the tuples sooner.  Use a GUC parameter to decide
 175                  * what fraction to optimize for.
 176                  */
 177                 tuple_fraction = cursor_tuple_fraction;
 178
 179                 /*
 180                  * We document cursor_tuple_fraction as simply being a fraction, which
 181                  * means the edge cases 0 and 1 have to be treated specially here.      We
 182                  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
 183                  */
 184                 if (tuple_fraction >= 1.0)
 185                         tuple_fraction = 0.0;
 186                 else if (tuple_fraction <= 0.0)
 187                         tuple_fraction = 1e-10;
 188         }
 189         else
 190         {
 191                 /* Default assumption is we need all the tuples */
 192                 tuple_fraction = 0.0;
 193         }
 194
 195         /* primary planning entry point (may recurse for subqueries) */
 196         top_plan = subquery_planner(glob, parse, NULL,
 197                                                                 false, tuple_fraction, &root);
 198
 199         /*
 200          * If creating a plan for a scrollable cursor, make sure it can run
 201          * backwards on demand.  Add a Material node at the top at need.
 202          */
 203         if (cursorOptions & CURSOR_OPT_SCROLL)
 204         {
 205                 if (!ExecSupportsBackwardScan(top_plan))
 206                         top_plan = materialize_finished_plan(top_plan);
 207         }
 208
 209         /* final cleanup of the plan */
 210         Assert(glob->finalrtable == NIL);
 211         Assert(glob->finalrowmarks == NIL);
 212         Assert(glob->resultRelations == NIL);
 213         top_plan = set_plan_references(root, top_plan);
 214         /* ... and the subplans (both regular subplans and initplans) */
 215         Assert(list_length(glob->subplans) == list_length(glob->subroots));
 216         forboth(lp, glob->subplans, lr, glob->subroots)
 217         {
 218                 Plan       *subplan = (Plan *) lfirst(lp);
 219                 PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
 220
 221                 lfirst(lp) = set_plan_references(subroot, subplan);
 222         }
 223
 224         /* build the PlannedStmt result */
 225         result = makeNode(PlannedStmt);
 226
 227         result->commandType = parse->commandType;
 228         result->hasReturning = (parse->returningList != NIL);
 229         result->hasModifyingCTE = parse->hasModifyingCTE;
 230         result->canSetTag = parse->canSetTag;
 231         result->transientPlan = glob->transientPlan;
 232         result->planTree = top_plan;
 233         result->rtable = glob->finalrtable;
 234         result->resultRelations = glob->resultRelations;
 235         result->utilityStmt = parse->utilityStmt;
 236         result->intoClause = parse->intoClause;
 237         result->subplans = glob->subplans;
 238         result->rewindPlanIDs = glob->rewindPlanIDs;
 239         result->rowMarks = glob->finalrowmarks;
 240         result->relationOids = glob->relationOids;
 241         result->invalItems = glob->invalItems;
 242         result->nParamExec = list_length(glob->paramlist);
 243
 244         return result;
 245 }
 246
 247
 248 /*--------------------
 249  * subquery_planner
 250  *        Invokes the planner on a subquery.  We recurse to here for each
 251  *        sub-SELECT found in the query tree.
 252  *
 253  * glob is the global state for the current planner run.
 254  * parse is the querytree produced by the parser & rewriter.
 255  * parent_root is the immediate parent Query's info (NULL at the top level).
 256  * hasRecursion is true if this is a recursive WITH query.
 257  * tuple_fraction is the fraction of tuples we expect will be retrieved.
 258  * tuple_fraction is interpreted as explained for grouping_planner, below.
 259  *
 260  * If subroot isn't NULL, we pass back the query's final PlannerInfo struct;
 261  * among other things this tells the output sort ordering of the plan.
 262  *
 263  * Basically, this routine does the stuff that should only be done once
 264  * per Query object.  It then calls grouping_planner.  At one time,
 265  * grouping_planner could be invoked recursively on the same Query object;
 266  * that's not currently true, but we keep the separation between the two
 267  * routines anyway, in case we need it again someday.
 268  *
 269  * subquery_planner will be called recursively to handle sub-Query nodes
 270  * found within the query's expressions and rangetable.
 271  *
 272  * Returns a query plan.
 273  *--------------------
 274  */
 275 Plan *
 276 subquery_planner(PlannerGlobal *glob, Query *parse,
 277                                  PlannerInfo *parent_root,
 278                                  bool hasRecursion, double tuple_fraction,
 279                                  PlannerInfo **subroot)
 280 {
 281         int                     num_old_subplans = list_length(glob->subplans);
 282         PlannerInfo *root;
 283         Plan       *plan;
 284         List       *newHaving;
 285         bool            hasOuterJoins;
 286         ListCell   *l;
 287
 288         /* Create a PlannerInfo data structure for this subquery */
 289         root = makeNode(PlannerInfo);
 290         root->parse = parse;
 291         root->glob = glob;
 292         root->query_level = parent_root ? parent_root->query_level + 1 : 1;
 293         root->parent_root = parent_root;
 294         root->planner_cxt = CurrentMemoryContext;
 295         root->init_plans = NIL;
 296         root->cte_plan_ids = NIL;
 297         root->eq_classes = NIL;
 298         root->append_rel_list = NIL;
 299         root->rowMarks = NIL;
 300         root->hasInheritedTarget = false;
 301
 302         root->hasRecursion = hasRecursion;
 303         if (hasRecursion)
 304                 root->wt_param_id = SS_assign_special_param(root);
 305         else
 306                 root->wt_param_id = -1;
 307         root->non_recursive_plan = NULL;
 308
 309         /*
 310          * If there is a WITH list, process each WITH query and build an initplan
 311          * SubPlan structure for it.
 312          */
 313         if (parse->cteList)
 314                 SS_process_ctes(root);
 315
 316         /*
 317          * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
 318          * to transform them into joins.  Note that this step does not descend
 319          * into subqueries; if we pull up any subqueries below, their SubLinks are
 320          * processed just before pulling them up.
 321          */
 322         if (parse->hasSubLinks)
 323                 pull_up_sublinks(root);
 324
 325         /*
 326          * Scan the rangetable for set-returning functions, and inline them if
 327          * possible (producing subqueries that might get pulled up next).
 328          * Recursion issues here are handled in the same way as for SubLinks.
 329          */
 330         inline_set_returning_functions(root);
 331
 332         /*
 333          * Check to see if any subqueries in the jointree can be merged into this
 334          * query.
 335          */
 336         parse->jointree = (FromExpr *)
 337                 pull_up_subqueries(root, (Node *) parse->jointree, NULL, NULL);
 338
 339         /*
 340          * If this is a simple UNION ALL query, flatten it into an appendrel. We
 341          * do this now because it requires applying pull_up_subqueries to the leaf
 342          * queries of the UNION ALL, which weren't touched above because they
 343          * weren't referenced by the jointree (they will be after we do this).
 344          */
 345         if (parse->setOperations)
 346                 flatten_simple_union_all(root);
 347
 348         /*
 349          * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
 350          * avoid the expense of doing flatten_join_alias_vars().  Also check for
 351          * outer joins --- if none, we can skip reduce_outer_joins(). This must be
 352          * done after we have done pull_up_subqueries, of course.
 353          */
 354         root->hasJoinRTEs = false;
 355         hasOuterJoins = false;
 356         foreach(l, parse->rtable)
 357         {
 358                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 359
 360                 if (rte->rtekind == RTE_JOIN)
 361                 {
 362                         root->hasJoinRTEs = true;
 363                         if (IS_OUTER_JOIN(rte->jointype))
 364                         {
 365                                 hasOuterJoins = true;
 366                                 /* Can quit scanning once we find an outer join */
 367                                 break;
 368                         }
 369                 }
 370         }
 371
 372         /*
 373          * Preprocess RowMark information.      We need to do this after subquery
 374          * pullup (so that all non-inherited RTEs are present) and before
 375          * inheritance expansion (so that the info is available for
 376          * expand_inherited_tables to examine and modify).
 377          */
 378         preprocess_rowmarks(root);
 379
 380         /*
 381          * Expand any rangetable entries that are inheritance sets into "append
 382          * relations".  This can add entries to the rangetable, but they must be
 383          * plain base relations not joins, so it's OK (and marginally more
 384          * efficient) to do it after checking for join RTEs.  We must do it after
 385          * pulling up subqueries, else we'd fail to handle inherited tables in
 386          * subqueries.
 387          */
 388         expand_inherited_tables(root);
 389
 390         /*
 391          * Set hasHavingQual to remember if HAVING clause is present.  Needed
 392          * because preprocess_expression will reduce a constant-true condition to
 393          * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
 394          */
 395         root->hasHavingQual = (parse->havingQual != NULL);
 396
 397         /* Clear this flag; might get set in distribute_qual_to_rels */
 398         root->hasPseudoConstantQuals = false;
 399
 400         /*
 401          * Do expression preprocessing on targetlist and quals, as well as other
 402          * random expressions in the querytree.  Note that we do not need to
 403          * handle sort/group expressions explicitly, because they are actually
 404          * part of the targetlist.
 405          */
 406         parse->targetList = (List *)
 407                 preprocess_expression(root, (Node *) parse->targetList,
 408                                                           EXPRKIND_TARGET);
 409
 410         parse->returningList = (List *)
 411                 preprocess_expression(root, (Node *) parse->returningList,
 412                                                           EXPRKIND_TARGET);
 413
 414         preprocess_qual_conditions(root, (Node *) parse->jointree);
 415
 416         parse->havingQual = preprocess_expression(root, parse->havingQual,
 417                                                                                           EXPRKIND_QUAL);
 418
 419         foreach(l, parse->windowClause)
 420         {
 421                 WindowClause *wc = (WindowClause *) lfirst(l);
 422
 423                 /* partitionClause/orderClause are sort/group expressions */
 424                 wc->startOffset = preprocess_expression(root, wc->startOffset,
 425                                                                                                 EXPRKIND_LIMIT);
 426                 wc->endOffset = preprocess_expression(root, wc->endOffset,
 427                                                                                           EXPRKIND_LIMIT);
 428         }
 429
 430         parse->limitOffset = preprocess_expression(root, parse->limitOffset,
 431                                                                                            EXPRKIND_LIMIT);
 432         parse->limitCount = preprocess_expression(root, parse->limitCount,
 433                                                                                           EXPRKIND_LIMIT);
 434
 435         root->append_rel_list = (List *)
 436                 preprocess_expression(root, (Node *) root->append_rel_list,
 437                                                           EXPRKIND_APPINFO);
 438
 439         /* Also need to preprocess expressions for function and values RTEs */
 440         foreach(l, parse->rtable)
 441         {
 442                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 443
 444                 if (rte->rtekind == RTE_FUNCTION)
 445                         rte->funcexpr = preprocess_expression(root, rte->funcexpr,
 446                                                                                                   EXPRKIND_RTFUNC);
 447                 else if (rte->rtekind == RTE_VALUES)
 448                         rte->values_lists = (List *)
 449                                 preprocess_expression(root, (Node *) rte->values_lists,
 450                                                                           EXPRKIND_VALUES);
 451         }
 452
 453         /*
 454          * In some cases we may want to transfer a HAVING clause into WHERE. We
 455          * cannot do so if the HAVING clause contains aggregates (obviously) or
 456          * volatile functions (since a HAVING clause is supposed to be executed
 457          * only once per group).  Also, it may be that the clause is so expensive
 458          * to execute that we're better off doing it only once per group, despite
 459          * the loss of selectivity.  This is hard to estimate short of doing the
 460          * entire planning process twice, so we use a heuristic: clauses
 461          * containing subplans are left in HAVING.      Otherwise, we move or copy the
 462          * HAVING clause into WHERE, in hopes of eliminating tuples before
 463          * aggregation instead of after.
 464          *
 465          * If the query has explicit grouping then we can simply move such a
 466          * clause into WHERE; any group that fails the clause will not be in the
 467          * output because none of its tuples will reach the grouping or
 468          * aggregation stage.  Otherwise we must have a degenerate (variable-free)
 469          * HAVING clause, which we put in WHERE so that query_planner() can use it
 470          * in a gating Result node, but also keep in HAVING to ensure that we
 471          * don't emit a bogus aggregated row. (This could be done better, but it
 472          * seems not worth optimizing.)
 473          *
 474          * Note that both havingQual and parse->jointree->quals are in
 475          * implicitly-ANDed-list form at this point, even though they are declared
 476          * as Node *.
 477          */
 478         newHaving = NIL;
 479         foreach(l, (List *) parse->havingQual)
 480         {
 481                 Node       *havingclause = (Node *) lfirst(l);
 482
 483                 if (contain_agg_clause(havingclause) ||
 484                         contain_volatile_functions(havingclause) ||
 485                         contain_subplans(havingclause))
 486                 {
 487                         /* keep it in HAVING */
 488                         newHaving = lappend(newHaving, havingclause);
 489                 }
 490                 else if (parse->groupClause)
 491                 {
 492                         /* move it to WHERE */
 493                         parse->jointree->quals = (Node *)
 494                                 lappend((List *) parse->jointree->quals, havingclause);
 495                 }
 496                 else
 497                 {
 498                         /* put a copy in WHERE, keep it in HAVING */
 499                         parse->jointree->quals = (Node *)
 500                                 lappend((List *) parse->jointree->quals,
 501                                                 copyObject(havingclause));
 502                         newHaving = lappend(newHaving, havingclause);
 503                 }
 504         }
 505         parse->havingQual = (Node *) newHaving;
 506
 507         /*
 508          * If we have any outer joins, try to reduce them to plain inner joins.
 509          * This step is most easily done after we've done expression
 510          * preprocessing.
 511          */
 512         if (hasOuterJoins)
 513                 reduce_outer_joins(root);
 514
 515         /*
 516          * Do the main planning.  If we have an inherited target relation, that
 517          * needs special processing, else go straight to grouping_planner.
 518          */
 519         if (parse->resultRelation &&
 520                 rt_fetch(parse->resultRelation, parse->rtable)->inh)
 521                 plan = inheritance_planner(root);
 522         else
 523         {
 524                 plan = grouping_planner(root, tuple_fraction);
 525                 /* If it's not SELECT, we need a ModifyTable node */
 526                 if (parse->commandType != CMD_SELECT)
 527                 {
 528                         List       *returningLists;
 529                         List       *rowMarks;
 530
 531                         /*
 532                          * Deal with the RETURNING clause if any.  It's convenient to pass
 533                          * the returningList through setrefs.c now rather than at top
 534                          * level (if we waited, handling inherited UPDATE/DELETE would be
 535                          * much harder).
 536                          */
 537                         if (parse->returningList)
 538                         {
 539                                 List       *rlist;
 540
 541                                 Assert(parse->resultRelation);
 542                                 rlist = set_returning_clause_references(root,
 543                                                                                                                 parse->returningList,
 544                                                                                                                 plan,
 545                                                                                                           parse->resultRelation);
 546                                 returningLists = list_make1(rlist);
 547                         }
 548                         else
 549                                 returningLists = NIL;
 550
 551                         /*
 552                          * If there was a FOR UPDATE/SHARE clause, the LockRows node will
 553                          * have dealt with fetching non-locked marked rows, else we need
 554                          * to have ModifyTable do that.
 555                          */
 556                         if (parse->rowMarks)
 557                                 rowMarks = NIL;
 558                         else
 559                                 rowMarks = root->rowMarks;
 560
 561                         plan = (Plan *) make_modifytable(parse->commandType,
 562                                                                                          parse->canSetTag,
 563                                                                            list_make1_int(parse->resultRelation),
 564                                                                                          list_make1(plan),
 565                                                                                          returningLists,
 566                                                                                          rowMarks,
 567                                                                                          SS_assign_special_param(root));
 568                 }
 569         }
 570
 571         /*
 572          * If any subplans were generated, or if there are any parameters to worry
 573          * about, build initPlan list and extParam/allParam sets for plan nodes,
 574          * and attach the initPlans to the top plan node.
 575          */
 576         if (list_length(glob->subplans) != num_old_subplans ||
 577                 root->glob->paramlist != NIL)
 578                 SS_finalize_plan(root, plan, true);
 579
 580         /* Return internal info if caller wants it */
 581         if (subroot)
 582                 *subroot = root;
 583
 584         return plan;
 585 }
 586
 587 /*
 588  * preprocess_expression
 589  *              Do subquery_planner's preprocessing work for an expression,
 590  *              which can be a targetlist, a WHERE clause (including JOIN/ON
 591  *              conditions), or a HAVING clause.
 592  */
 593 static Node *
 594 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 595 {
 596         /*
 597          * Fall out quickly if expression is empty.  This occurs often enough to
 598          * be worth checking.  Note that null->null is the correct conversion for
 599          * implicit-AND result format, too.
 600          */
 601         if (expr == NULL)
 602                 return NULL;
 603
 604         /*
 605          * If the query has any join RTEs, replace join alias variables with
 606          * base-relation variables. We must do this before sublink processing,
 607          * else sublinks expanded out from join aliases wouldn't get processed. We
 608          * can skip it in VALUES lists, however, since they can't contain any Vars
 609          * at all.
 610          */
 611         if (root->hasJoinRTEs && kind != EXPRKIND_VALUES)
 612                 expr = flatten_join_alias_vars(root, expr);
 613
 614         /*
 615          * Simplify constant expressions.
 616          *
 617          * Note: an essential effect of this is to convert named-argument function
 618          * calls to positional notation and insert the current actual values of
 619          * any default arguments for functions.  To ensure that happens, we *must*
 620          * process all expressions here.  Previous PG versions sometimes skipped
 621          * const-simplification if it didn't seem worth the trouble, but we can't
 622          * do that anymore.
 623          *
 624          * Note: this also flattens nested AND and OR expressions into N-argument
 625          * form.  All processing of a qual expression after this point must be
 626          * careful to maintain AND/OR flatness --- that is, do not generate a tree
 627          * with AND directly under AND, nor OR directly under OR.
 628          */
 629         expr = eval_const_expressions(root, expr);
 630
 631         /*
 632          * If it's a qual or havingQual, canonicalize it.
 633          */
 634         if (kind == EXPRKIND_QUAL)
 635         {
 636                 expr = (Node *) canonicalize_qual((Expr *) expr);
 637
 638 #ifdef OPTIMIZER_DEBUG
 639                 printf("After canonicalize_qual()\n");
 640                 pprint(expr);
 641 #endif
 642         }
 643
 644         /* Expand SubLinks to SubPlans */
 645         if (root->parse->hasSubLinks)
 646                 expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
 647
 648         /*
 649          * XXX do not insert anything here unless you have grokked the comments in
 650          * SS_replace_correlation_vars ...
 651          */
 652
 653         /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
 654         if (root->query_level > 1)
 655                 expr = SS_replace_correlation_vars(root, expr);
 656
 657         /*
 658          * If it's a qual or havingQual, convert it to implicit-AND format. (We
 659          * don't want to do this before eval_const_expressions, since the latter
 660          * would be unable to simplify a top-level AND correctly. Also,
 661          * SS_process_sublinks expects explicit-AND format.)
 662          */
 663         if (kind == EXPRKIND_QUAL)
 664                 expr = (Node *) make_ands_implicit((Expr *) expr);
 665
 666         return expr;
 667 }
 668
 669 /*
 670  * preprocess_qual_conditions
 671  *              Recursively scan the query's jointree and do subquery_planner's
 672  *              preprocessing work on each qual condition found therein.
 673  */
 674 static void
 675 preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
 676 {
 677         if (jtnode == NULL)
 678                 return;
 679         if (IsA(jtnode, RangeTblRef))
 680         {
 681                 /* nothing to do here */
 682         }
 683         else if (IsA(jtnode, FromExpr))
 684         {
 685                 FromExpr   *f = (FromExpr *) jtnode;
 686                 ListCell   *l;
 687
 688                 foreach(l, f->fromlist)
 689                         preprocess_qual_conditions(root, lfirst(l));
 690
 691                 f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
 692         }
 693         else if (IsA(jtnode, JoinExpr))
 694         {
 695                 JoinExpr   *j = (JoinExpr *) jtnode;
 696
 697                 preprocess_qual_conditions(root, j->larg);
 698                 preprocess_qual_conditions(root, j->rarg);
 699
 700                 j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
 701         }
 702         else
 703                 elog(ERROR, "unrecognized node type: %d",
 704                          (int) nodeTag(jtnode));
 705 }
 706
 707 /*
 708  * inheritance_planner
 709  *        Generate a plan in the case where the result relation is an
 710  *        inheritance set.
 711  *
 712  * We have to handle this case differently from cases where a source relation
 713  * is an inheritance set. Source inheritance is expanded at the bottom of the
 714  * plan tree (see allpaths.c), but target inheritance has to be expanded at
 715  * the top.  The reason is that for UPDATE, each target relation needs a
 716  * different targetlist matching its own column set.  Fortunately,
 717  * the UPDATE/DELETE target can never be the nullable side of an outer join,
 718  * so it's OK to generate the plan this way.
 719  *
 720  * Returns a query plan.
 721  */
 722 static Plan *
 723 inheritance_planner(PlannerInfo *root)
 724 {
 725         Query      *parse = root->parse;
 726         int                     parentRTindex = parse->resultRelation;
 727         List       *final_rtable = NIL;
 728         int                     save_rel_array_size = 0;
 729         RelOptInfo **save_rel_array = NULL;
 730         List       *subplans = NIL;
 731         List       *resultRelations = NIL;
 732         List       *returningLists = NIL;
 733         List       *rowMarks;
 734         ListCell   *lc;
 735
 736         /*
 737          * We generate a modified instance of the original Query for each target
 738          * relation, plan that, and put all the plans into a list that will be
 739          * controlled by a single ModifyTable node.  All the instances share the
 740          * same rangetable, but each instance must have its own set of subquery
 741          * RTEs within the finished rangetable because (1) they are likely to get
 742          * scribbled on during planning, and (2) it's not inconceivable that
 743          * subqueries could get planned differently in different cases.  We need
 744          * not create duplicate copies of other RTE kinds, in particular not the
 745          * target relations, because they don't have either of those issues.  Not
 746          * having to duplicate the target relations is important because doing so
 747          * (1) would result in a rangetable of length O(N^2) for N targets, with
 748          * at least O(N^3) work expended here; and (2) would greatly complicate
 749          * management of the rowMarks list.
 750          */
 751         foreach(lc, root->append_rel_list)
 752         {
 753                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
 754                 PlannerInfo subroot;
 755                 Plan       *subplan;
 756                 Index           rti;
 757
 758                 /* append_rel_list contains all append rels; ignore others */
 759                 if (appinfo->parent_relid != parentRTindex)
 760                         continue;
 761
 762                 /*
 763                  * We need a working copy of the PlannerInfo so that we can control
 764                  * propagation of information back to the main copy.
 765                  */
 766                 memcpy(&subroot, root, sizeof(PlannerInfo));
 767
 768                 /*
 769                  * Generate modified query with this rel as target.  We first apply
 770                  * adjust_appendrel_attrs, which copies the Query and changes
 771                  * references to the parent RTE to refer to the current child RTE,
 772                  * then fool around with subquery RTEs.
 773                  */
 774                 subroot.parse = (Query *)
 775                         adjust_appendrel_attrs((Node *) parse,
 776                                                                    appinfo);
 777
 778                 /*
 779                  * The rowMarks list might contain references to subquery RTEs, so
 780                  * make a copy that we can apply ChangeVarNodes to.  (Fortunately,
 781                  * the executor doesn't need to see the modified copies --- we can
 782                  * just pass it the original rowMarks list.)
 783                  */
 784                 subroot.rowMarks = (List *) copyObject(root->rowMarks);
 785
 786                 /*
 787                  * Add placeholders to the child Query's rangetable list to fill the
 788                  * RT indexes already reserved for subqueries in previous children.
 789                  * These won't be referenced, so there's no need to make them very
 790                  * valid-looking.
 791                  */
 792                 while (list_length(subroot.parse->rtable) < list_length(final_rtable))
 793                         subroot.parse->rtable = lappend(subroot.parse->rtable,
 794                                                                                         makeNode(RangeTblEntry));
 795
 796                 /*
 797                  * If this isn't the first child Query, generate duplicates of all
 798                  * subquery RTEs, and adjust Var numbering to reference the duplicates.
 799                  * To simplify the loop logic, we scan the original rtable not the
 800                  * copy just made by adjust_appendrel_attrs; that should be OK since
 801                  * subquery RTEs couldn't contain any references to the target rel.
 802                  */
 803                 if (final_rtable != NIL)
 804                 {
 805                         ListCell   *lr;
 806
 807                         rti = 1;
 808                         foreach(lr, parse->rtable)
 809                         {
 810                                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);
 811
 812                                 if (rte->rtekind == RTE_SUBQUERY)
 813                                 {
 814                                         Index   newrti;
 815
 816                                         /*
 817                                          * The RTE can't contain any references to its own RT
 818                                          * index, so we can save a few cycles by applying
 819                                          * ChangeVarNodes before we append the RTE to the
 820                                          * rangetable.
 821                                          */
 822                                         newrti = list_length(subroot.parse->rtable) + 1;
 823                                         ChangeVarNodes((Node *) subroot.parse, rti, newrti, 0);
 824                                         ChangeVarNodes((Node *) subroot.rowMarks, rti, newrti, 0);
 825                                         rte = copyObject(rte);
 826                                         subroot.parse->rtable = lappend(subroot.parse->rtable,
 827                                                                                                         rte);
 828                                 }
 829                                 rti++;
 830                         }
 831                 }
 832
 833                 /* We needn't modify the child's append_rel_list */
 834                 /* There shouldn't be any OJ info to translate, as yet */
 835                 Assert(subroot.join_info_list == NIL);
 836                 /* and we haven't created PlaceHolderInfos, either */
 837                 Assert(subroot.placeholder_list == NIL);
 838                 /* build a separate list of initplans for each child */
 839                 subroot.init_plans = NIL;
 840                 /* hack to mark target relation as an inheritance partition */
 841                 subroot.hasInheritedTarget = true;
 842
 843                 /* Generate plan */
 844                 subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ );
 845
 846                 /*
 847                  * If this child rel was excluded by constraint exclusion, exclude it
 848                  * from the result plan.
 849                  */
 850                 if (is_dummy_plan(subplan))
 851                         continue;
 852
 853                 subplans = lappend(subplans, subplan);
 854
 855                 /*
 856                  * If this is the first non-excluded child, its post-planning rtable
 857                  * becomes the initial contents of final_rtable; otherwise, append
 858                  * just its modified subquery RTEs to final_rtable.
 859                  */
 860                 if (final_rtable == NIL)
 861                         final_rtable = subroot.parse->rtable;
 862                 else
 863                         final_rtable = list_concat(final_rtable,
 864                                                                            list_copy_tail(subroot.parse->rtable,
 865                                                                                                           list_length(final_rtable)));
 866
 867                 /*
 868                  * We need to collect all the RelOptInfos from all child plans into
 869                  * the main PlannerInfo, since setrefs.c will need them.  We use the
 870                  * last child's simple_rel_array (previous ones are too short), so we
 871                  * have to propagate forward the RelOptInfos that were already built
 872                  * in previous children.
 873                  */
 874                 Assert(subroot.simple_rel_array_size >= save_rel_array_size);
 875                 for (rti = 1; rti < save_rel_array_size; rti++)
 876                 {
 877                         RelOptInfo *brel = save_rel_array[rti];
 878
 879                         if (brel)
 880                                 subroot.simple_rel_array[rti] = brel;
 881                 }
 882                 save_rel_array_size = subroot.simple_rel_array_size;
 883                 save_rel_array = subroot.simple_rel_array;
 884
 885                 /* Make sure any initplans from this rel get into the outer list */
 886                 root->init_plans = list_concat(root->init_plans, subroot.init_plans);
 887
 888                 /* Build list of target-relation RT indexes */
 889                 resultRelations = lappend_int(resultRelations, appinfo->child_relid);
 890
 891                 /* Build list of per-relation RETURNING targetlists */
 892                 if (parse->returningList)
 893                 {
 894                         List       *rlist;
 895
 896                         rlist = set_returning_clause_references(&subroot,
 897                                                                                                 subroot.parse->returningList,
 898                                                                                                         subplan,
 899                                                                                                         appinfo->child_relid);
 900                         returningLists = lappend(returningLists, rlist);
 901                 }
 902         }
 903
 904         /* Mark result as unordered (probably unnecessary) */
 905         root->query_pathkeys = NIL;
 906
 907         /*
 908          * If we managed to exclude every child rel, return a dummy plan; it
 909          * doesn't even need a ModifyTable node.
 910          */
 911         if (subplans == NIL)
 912         {
 913                 /* although dummy, it must have a valid tlist for executor */
 914                 List       *tlist;
 915
 916                 tlist = preprocess_targetlist(root, parse->targetList);
 917                 return (Plan *) make_result(root,
 918                                                                         tlist,
 919                                                                         (Node *) list_make1(makeBoolConst(false,
 920                                                                                                                                           false)),
 921                                                                         NULL);
 922         }
 923
 924         /*
 925          * Put back the final adjusted rtable into the master copy of the Query.
 926          */
 927         parse->rtable = final_rtable;
 928         root->simple_rel_array_size = save_rel_array_size;
 929         root->simple_rel_array = save_rel_array;
 930
 931         /*
 932          * If there was a FOR UPDATE/SHARE clause, the LockRows node will have
 933          * dealt with fetching non-locked marked rows, else we need to have
 934          * ModifyTable do that.
 935          */
 936         if (parse->rowMarks)
 937                 rowMarks = NIL;
 938         else
 939                 rowMarks = root->rowMarks;
 940
 941         /* And last, tack on a ModifyTable node to do the UPDATE/DELETE work */
 942         return (Plan *) make_modifytable(parse->commandType,
 943                                                                          parse->canSetTag,
 944                                                                          resultRelations,
 945                                                                          subplans,
 946                                                                          returningLists,
 947                                                                          rowMarks,
 948                                                                          SS_assign_special_param(root));
 949 }
 950
 951 /*--------------------
 952  * grouping_planner
 953  *        Perform planning steps related to grouping, aggregation, etc.
 954  *        This primarily means adding top-level processing to the basic
 955  *        query plan produced by query_planner.
 956  *
 957  * tuple_fraction is the fraction of tuples we expect will be retrieved
 958  *
 959  * tuple_fraction is interpreted as follows:
 960  *        0: expect all tuples to be retrieved (normal case)
 961  *        0 < tuple_fraction < 1: expect the given fraction of tuples available
 962  *              from the plan to be retrieved
 963  *        tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
 964  *              expected to be retrieved (ie, a LIMIT specification)
 965  *
 966  * Returns a query plan.  Also, root->query_pathkeys is returned as the
 967  * actual output ordering of the plan (in pathkey format).
 968  *--------------------
 969  */
 970 static Plan *
 971 grouping_planner(PlannerInfo *root, double tuple_fraction)
 972 {
 973         Query      *parse = root->parse;
 974         List       *tlist = parse->targetList;
 975         int64           offset_est = 0;
 976         int64           count_est = 0;
 977         double          limit_tuples = -1.0;
 978         Plan       *result_plan;
 979         List       *current_pathkeys;
 980         double          dNumGroups = 0;
 981         bool            use_hashed_distinct = false;
 982         bool            tested_hashed_distinct = false;
 983
 984         /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
 985         if (parse->limitCount || parse->limitOffset)
 986         {
 987                 tuple_fraction = preprocess_limit(root, tuple_fraction,
 988                                                                                   &offset_est, &count_est);
 989
 990                 /*
 991                  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
 992                  * estimate the effects of using a bounded sort.
 993                  */
 994                 if (count_est > 0 && offset_est >= 0)
 995                         limit_tuples = (double) count_est + (double) offset_est;
 996         }
 997
 998         if (parse->setOperations)
 999         {
1000                 List       *set_sortclauses;
1001
1002                 /*
1003                  * If there's a top-level ORDER BY, assume we have to fetch all the
1004                  * tuples.      This might be too simplistic given all the hackery below
1005                  * to possibly avoid the sort; but the odds of accurate estimates here
1006                  * are pretty low anyway.
1007                  */
1008                 if (parse->sortClause)
1009                         tuple_fraction = 0.0;
1010
1011                 /*
1012                  * Construct the plan for set operations.  The result will not need
1013                  * any work except perhaps a top-level sort and/or LIMIT.  Note that
1014                  * any special work for recursive unions is the responsibility of
1015                  * plan_set_operations.
1016                  */
1017                 result_plan = plan_set_operations(root, tuple_fraction,
1018                                                                                   &set_sortclauses);
1019
1020                 /*
1021                  * Calculate pathkeys representing the sort order (if any) of the set
1022                  * operation's result.  We have to do this before overwriting the sort
1023                  * key information...
1024                  */
1025                 current_pathkeys = make_pathkeys_for_sortclauses(root,
1026                                                                                                                  set_sortclauses,
1027                                                                                                          result_plan->targetlist,
1028                                                                                                                  true);
1029
1030                 /*
1031                  * We should not need to call preprocess_targetlist, since we must be
1032                  * in a SELECT query node.      Instead, use the targetlist returned by
1033                  * plan_set_operations (since this tells whether it returned any
1034                  * resjunk columns!), and transfer any sort key information from the
1035                  * original tlist.
1036                  */
1037                 Assert(parse->commandType == CMD_SELECT);
1038
1039                 tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
1040                                                                                 tlist);
1041
1042                 /*
1043                  * Can't handle FOR UPDATE/SHARE here (parser should have checked
1044                  * already, but let's make sure).
1045                  */
1046                 if (parse->rowMarks)
1047                         ereport(ERROR,
1048                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1049                                          errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
1050
1051                 /*
1052                  * Calculate pathkeys that represent result ordering requirements
1053                  */
1054                 Assert(parse->distinctClause == NIL);
1055                 root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1056                                                                                                                         parse->sortClause,
1057                                                                                                                         tlist,
1058                                                                                                                         true);
1059         }
1060         else
1061         {
1062                 /* No set operations, do regular planning */
1063                 List       *sub_tlist;
1064                 double          sub_limit_tuples;
1065                 AttrNumber *groupColIdx = NULL;
1066                 bool            need_tlist_eval = true;
1067                 QualCost        tlist_cost;
1068                 Path       *cheapest_path;
1069                 Path       *sorted_path;
1070                 Path       *best_path;
1071                 long            numGroups = 0;
1072                 AggClauseCosts agg_costs;
1073                 int                     numGroupCols;
1074                 double          path_rows;
1075                 int                     path_width;
1076                 bool            use_hashed_grouping = false;
1077                 WindowFuncLists *wflists = NULL;
1078                 List       *activeWindows = NIL;
1079
1080                 MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1081
1082                 /* A recursive query should always have setOperations */
1083                 Assert(!root->hasRecursion);
1084
1085                 /* Preprocess GROUP BY clause, if any */
1086                 if (parse->groupClause)
1087                         preprocess_groupclause(root);
1088                 numGroupCols = list_length(parse->groupClause);
1089
1090                 /* Preprocess targetlist */
1091                 tlist = preprocess_targetlist(root, tlist);
1092
1093                 /*
1094                  * Locate any window functions in the tlist.  (We don't need to look
1095                  * anywhere else, since expressions used in ORDER BY will be in there
1096                  * too.)  Note that they could all have been eliminated by constant
1097                  * folding, in which case we don't need to do any more work.
1098                  */
1099                 if (parse->hasWindowFuncs)
1100                 {
1101                         wflists = find_window_functions((Node *) tlist,
1102                                                                                         list_length(parse->windowClause));
1103                         if (wflists->numWindowFuncs > 0)
1104                                 activeWindows = select_active_windows(root, wflists);
1105                         else
1106                                 parse->hasWindowFuncs = false;
1107                 }
1108
1109                 /*
1110                  * Generate appropriate target list for subplan; may be different from
1111                  * tlist if grouping or aggregation is needed.
1112                  */
1113                 sub_tlist = make_subplanTargetList(root, tlist,
1114                                                                                    &groupColIdx, &need_tlist_eval);
1115
1116                 /*
1117                  * Do aggregate preprocessing, if the query has any aggs.
1118                  *
1119                  * Note: think not that we can turn off hasAggs if we find no aggs. It
1120                  * is possible for constant-expression simplification to remove all
1121                  * explicit references to aggs, but we still have to follow the
1122                  * aggregate semantics (eg, producing only one output row).
1123                  */
1124                 if (parse->hasAggs)
1125                 {
1126                         /*
1127                          * Collect statistics about aggregates for estimating costs. Note:
1128                          * we do not attempt to detect duplicate aggregates here; a
1129                          * somewhat-overestimated cost is okay for our present purposes.
1130                          */
1131                         count_agg_clauses(root, (Node *) tlist, &agg_costs);
1132                         count_agg_clauses(root, parse->havingQual, &agg_costs);
1133
1134                         /*
1135                          * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
1136                          * adding logic between here and the optimize_minmax_aggregates
1137                          * call.  Anything that is needed in MIN/MAX-optimizable cases
1138                          * will have to be duplicated in planagg.c.
1139                          */
1140                         preprocess_minmax_aggregates(root, tlist);
1141                 }
1142
1143                 /*
1144                  * Calculate pathkeys that represent grouping/ordering requirements.
1145                  * Stash them in PlannerInfo so that query_planner can canonicalize
1146                  * them after EquivalenceClasses have been formed.      The sortClause is
1147                  * certainly sort-able, but GROUP BY and DISTINCT might not be, in
1148                  * which case we just leave their pathkeys empty.
1149                  */
1150                 if (parse->groupClause &&
1151                         grouping_is_sortable(parse->groupClause))
1152                         root->group_pathkeys =
1153                                 make_pathkeys_for_sortclauses(root,
1154                                                                                           parse->groupClause,
1155                                                                                           tlist,
1156                                                                                           false);
1157                 else
1158                         root->group_pathkeys = NIL;
1159
1160                 /* We consider only the first (bottom) window in pathkeys logic */
1161                 if (activeWindows != NIL)
1162                 {
1163                         WindowClause *wc = (WindowClause *) linitial(activeWindows);
1164
1165                         root->window_pathkeys = make_pathkeys_for_window(root,
1166                                                                                                                          wc,
1167                                                                                                                          tlist,
1168                                                                                                                          false);
1169                 }
1170                 else
1171                         root->window_pathkeys = NIL;
1172
1173                 if (parse->distinctClause &&
1174                         grouping_is_sortable(parse->distinctClause))
1175                         root->distinct_pathkeys =
1176                                 make_pathkeys_for_sortclauses(root,
1177                                                                                           parse->distinctClause,
1178                                                                                           tlist,
1179                                                                                           false);
1180                 else
1181                         root->distinct_pathkeys = NIL;
1182
1183                 root->sort_pathkeys =
1184                         make_pathkeys_for_sortclauses(root,
1185                                                                                   parse->sortClause,
1186                                                                                   tlist,
1187                                                                                   false);
1188
1189                 /*
1190                  * Figure out whether we want a sorted result from query_planner.
1191                  *
1192                  * If we have a sortable GROUP BY clause, then we want a result sorted
1193                  * properly for grouping.  Otherwise, if we have window functions to
1194                  * evaluate, we try to sort for the first window.  Otherwise, if
1195                  * there's a sortable DISTINCT clause that's more rigorous than the
1196                  * ORDER BY clause, we try to produce output that's sufficiently well
1197                  * sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
1198                  * clause, we want to sort by the ORDER BY clause.
1199                  *
1200                  * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
1201                  * superset of GROUP BY, it would be tempting to request sort by ORDER
1202                  * BY --- but that might just leave us failing to exploit an available
1203                  * sort order at all.  Needs more thought.      The choice for DISTINCT
1204                  * versus ORDER BY is much easier, since we know that the parser
1205                  * ensured that one is a superset of the other.
1206                  */
1207                 if (root->group_pathkeys)
1208                         root->query_pathkeys = root->group_pathkeys;
1209                 else if (root->window_pathkeys)
1210                         root->query_pathkeys = root->window_pathkeys;
1211                 else if (list_length(root->distinct_pathkeys) >
1212                                  list_length(root->sort_pathkeys))
1213                         root->query_pathkeys = root->distinct_pathkeys;
1214                 else if (root->sort_pathkeys)
1215                         root->query_pathkeys = root->sort_pathkeys;
1216                 else
1217                         root->query_pathkeys = NIL;
1218
1219                 /*
1220                  * Figure out whether there's a hard limit on the number of rows that
1221                  * query_planner's result subplan needs to return.  Even if we know a
1222                  * hard limit overall, it doesn't apply if the query has any
1223                  * grouping/aggregation operations.
1224                  */
1225                 if (parse->groupClause ||
1226                         parse->distinctClause ||
1227                         parse->hasAggs ||
1228                         parse->hasWindowFuncs ||
1229                         root->hasHavingQual)
1230                         sub_limit_tuples = -1.0;
1231                 else
1232                         sub_limit_tuples = limit_tuples;
1233
1234                 /*
1235                  * Generate the best unsorted and presorted paths for this Query (but
1236                  * note there may not be any presorted path).  query_planner will also
1237                  * estimate the number of groups in the query, and canonicalize all
1238                  * the pathkeys.
1239                  */
1240                 query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
1241                                           &cheapest_path, &sorted_path, &dNumGroups);
1242
1243                 /*
1244                  * Extract rowcount and width estimates for possible use in grouping
1245                  * decisions.  Beware here of the possibility that
1246                  * cheapest_path->parent is NULL (ie, there is no FROM clause).
1247                  */
1248                 if (cheapest_path->parent)
1249                 {
1250                         path_rows = cheapest_path->parent->rows;
1251                         path_width = cheapest_path->parent->width;
1252                 }
1253                 else
1254                 {
1255                         path_rows = 1;          /* assume non-set result */
1256                         path_width = 100;       /* arbitrary */
1257                 }
1258
1259                 if (parse->groupClause)
1260                 {
1261                         /*
1262                          * If grouping, decide whether to use sorted or hashed grouping.
1263                          */
1264                         use_hashed_grouping =
1265                                 choose_hashed_grouping(root,
1266                                                                            tuple_fraction, limit_tuples,
1267                                                                            path_rows, path_width,
1268                                                                            cheapest_path, sorted_path,
1269                                                                            dNumGroups, &agg_costs);
1270                         /* Also convert # groups to long int --- but 'ware overflow! */
1271                         numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
1272                 }
1273                 else if (parse->distinctClause && sorted_path &&
1274                                  !root->hasHavingQual && !parse->hasAggs && !activeWindows)
1275                 {
1276                         /*
1277                          * We'll reach the DISTINCT stage without any intermediate
1278                          * processing, so figure out whether we will want to hash or not
1279                          * so we can choose whether to use cheapest or sorted path.
1280                          */
1281                         use_hashed_distinct =
1282                                 choose_hashed_distinct(root,
1283                                                                            tuple_fraction, limit_tuples,
1284                                                                            path_rows, path_width,
1285                                                                            cheapest_path->startup_cost,
1286                                                                            cheapest_path->total_cost,
1287                                                                            sorted_path->startup_cost,
1288                                                                            sorted_path->total_cost,
1289                                                                            sorted_path->pathkeys,
1290                                                                            dNumGroups);
1291                         tested_hashed_distinct = true;
1292                 }
1293
1294                 /*
1295                  * Select the best path.  If we are doing hashed grouping, we will
1296                  * always read all the input tuples, so use the cheapest-total path.
1297                  * Otherwise, trust query_planner's decision about which to use.
1298                  */
1299                 if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
1300                         best_path = cheapest_path;
1301                 else
1302                         best_path = sorted_path;
1303
1304                 /*
1305                  * Check to see if it's possible to optimize MIN/MAX aggregates. If
1306                  * so, we will forget all the work we did so far to choose a "regular"
1307                  * path ... but we had to do it anyway to be able to tell which way is
1308                  * cheaper.
1309                  */
1310                 result_plan = optimize_minmax_aggregates(root,
1311                                                                                                  tlist,
1312                                                                                                  &agg_costs,
1313                                                                                                  best_path);
1314                 if (result_plan != NULL)
1315                 {
1316                         /*
1317                          * optimize_minmax_aggregates generated the full plan, with the
1318                          * right tlist, and it has no sort order.
1319                          */
1320                         current_pathkeys = NIL;
1321                 }
1322                 else
1323                 {
1324                         /*
1325                          * Normal case --- create a plan according to query_planner's
1326                          * results.
1327                          */
1328                         bool            need_sort_for_grouping = false;
1329
1330                         result_plan = create_plan(root, best_path);
1331                         current_pathkeys = best_path->pathkeys;
1332
1333                         /* Detect if we'll need an explicit sort for grouping */
1334                         if (parse->groupClause && !use_hashed_grouping &&
1335                           !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
1336                         {
1337                                 need_sort_for_grouping = true;
1338
1339                                 /*
1340                                  * Always override create_plan's tlist, so that we don't
1341                                  * sort useless data from a "physical" tlist.
1342                                  */
1343                                 need_tlist_eval = true;
1344                         }
1345
1346                         /*
1347                          * create_plan returns a plan with just a "flat" tlist of
1348                          * required Vars.  Usually we need to insert the sub_tlist as the
1349                          * tlist of the top plan node.  However, we can skip that if we
1350                          * determined that whatever create_plan chose to return will be
1351                          * good enough.
1352                          */
1353                         if (need_tlist_eval)
1354                         {
1355                                 /*
1356                                  * If the top-level plan node is one that cannot do expression
1357                                  * evaluation, we must insert a Result node to project the
1358                                  * desired tlist.
1359                                  */
1360                                 if (!is_projection_capable_plan(result_plan))
1361                                 {
1362                                         result_plan = (Plan *) make_result(root,
1363                                                                                                            sub_tlist,
1364                                                                                                            NULL,
1365                                                                                                            result_plan);
1366                                 }
1367                                 else
1368                                 {
1369                                         /*
1370                                          * Otherwise, just replace the subplan's flat tlist with
1371                                          * the desired tlist.
1372                                          */
1373                                         result_plan->targetlist = sub_tlist;
1374                                 }
1375
1376                                 /*
1377                                  * Also, account for the cost of evaluation of the sub_tlist.
1378                                  *
1379                                  * Up to now, we have only been dealing with "flat" tlists,
1380                                  * containing just Vars.  So their evaluation cost is zero
1381                                  * according to the model used by cost_qual_eval() (or if you
1382                                  * prefer, the cost is factored into cpu_tuple_cost).  Thus we
1383                                  * can avoid accounting for tlist cost throughout
1384                                  * query_planner() and subroutines.  But now we've inserted a
1385                                  * tlist that might contain actual operators, sub-selects, etc
1386                                  * --- so we'd better account for its cost.
1387                                  *
1388                                  * Below this point, any tlist eval cost for added-on nodes
1389                                  * should be accounted for as we create those nodes.
1390                                  * Presently, of the node types we can add on, only Agg,
1391                                  * WindowAgg, and Group project new tlists (the rest just copy
1392                                  * their input tuples) --- so make_agg(), make_windowagg() and
1393                                  * make_group() are responsible for computing the added cost.
1394                                  */
1395                                 cost_qual_eval(&tlist_cost, sub_tlist, root);
1396                                 result_plan->startup_cost += tlist_cost.startup;
1397                                 result_plan->total_cost += tlist_cost.startup +
1398                                         tlist_cost.per_tuple * result_plan->plan_rows;
1399                         }
1400                         else
1401                         {
1402                                 /*
1403                                  * Since we're using create_plan's tlist and not the one
1404                                  * make_subplanTargetList calculated, we have to refigure any
1405                                  * grouping-column indexes make_subplanTargetList computed.
1406                                  */
1407                                 locate_grouping_columns(root, tlist, result_plan->targetlist,
1408                                                                                 groupColIdx);
1409                         }
1410
1411                         /*
1412                          * Insert AGG or GROUP node if needed, plus an explicit sort step
1413                          * if necessary.
1414                          *
1415                          * HAVING clause, if any, becomes qual of the Agg or Group node.
1416                          */
1417                         if (use_hashed_grouping)
1418                         {
1419                                 /* Hashed aggregate plan --- no sort needed */
1420                                 result_plan = (Plan *) make_agg(root,
1421                                                                                                 tlist,
1422                                                                                                 (List *) parse->havingQual,
1423                                                                                                 AGG_HASHED,
1424                                                                                                 &agg_costs,
1425                                                                                                 numGroupCols,
1426                                                                                                 groupColIdx,
1427                                                                         extract_grouping_ops(parse->groupClause),
1428                                                                                                 numGroups,
1429                                                                                                 result_plan);
1430                                 /* Hashed aggregation produces randomly-ordered results */
1431                                 current_pathkeys = NIL;
1432                         }
1433                         else if (parse->hasAggs)
1434                         {
1435                                 /* Plain aggregate plan --- sort if needed */
1436                                 AggStrategy aggstrategy;
1437
1438                                 if (parse->groupClause)
1439                                 {
1440                                         if (need_sort_for_grouping)
1441                                         {
1442                                                 result_plan = (Plan *)
1443                                                         make_sort_from_groupcols(root,
1444                                                                                                          parse->groupClause,
1445                                                                                                          groupColIdx,
1446                                                                                                          result_plan);
1447                                                 current_pathkeys = root->group_pathkeys;
1448                                         }
1449                                         aggstrategy = AGG_SORTED;
1450
1451                                         /*
1452                                          * The AGG node will not change the sort ordering of its
1453                                          * groups, so current_pathkeys describes the result too.
1454                                          */
1455                                 }
1456                                 else
1457                                 {
1458                                         aggstrategy = AGG_PLAIN;
1459                                         /* Result will be only one row anyway; no sort order */
1460                                         current_pathkeys = NIL;
1461                                 }
1462
1463                                 result_plan = (Plan *) make_agg(root,
1464                                                                                                 tlist,
1465                                                                                                 (List *) parse->havingQual,
1466                                                                                                 aggstrategy,
1467                                                                                                 &agg_costs,
1468                                                                                                 numGroupCols,
1469                                                                                                 groupColIdx,
1470                                                                         extract_grouping_ops(parse->groupClause),
1471                                                                                                 numGroups,
1472                                                                                                 result_plan);
1473                         }
1474                         else if (parse->groupClause)
1475                         {
1476                                 /*
1477                                  * GROUP BY without aggregation, so insert a group node (plus
1478                                  * the appropriate sort node, if necessary).
1479                                  *
1480                                  * Add an explicit sort if we couldn't make the path come out
1481                                  * the way the GROUP node needs it.
1482                                  */
1483                                 if (need_sort_for_grouping)
1484                                 {
1485                                         result_plan = (Plan *)
1486                                                 make_sort_from_groupcols(root,
1487                                                                                                  parse->groupClause,
1488                                                                                                  groupColIdx,
1489                                                                                                  result_plan);
1490                                         current_pathkeys = root->group_pathkeys;
1491                                 }
1492
1493                                 result_plan = (Plan *) make_group(root,
1494                                                                                                   tlist,
1495                                                                                                   (List *) parse->havingQual,
1496                                                                                                   numGroupCols,
1497                                                                                                   groupColIdx,
1498                                                                         extract_grouping_ops(parse->groupClause),
1499                                                                                                   dNumGroups,
1500                                                                                                   result_plan);
1501                                 /* The Group node won't change sort ordering */
1502                         }
1503                         else if (root->hasHavingQual)
1504                         {
1505                                 /*
1506                                  * No aggregates, and no GROUP BY, but we have a HAVING qual.
1507                                  * This is a degenerate case in which we are supposed to emit
1508                                  * either 0 or 1 row depending on whether HAVING succeeds.
1509                                  * Furthermore, there cannot be any variables in either HAVING
1510                                  * or the targetlist, so we actually do not need the FROM
1511                                  * table at all!  We can just throw away the plan-so-far and
1512                                  * generate a Result node.      This is a sufficiently unusual
1513                                  * corner case that it's not worth contorting the structure of
1514                                  * this routine to avoid having to generate the plan in the
1515                                  * first place.
1516                                  */
1517                                 result_plan = (Plan *) make_result(root,
1518                                                                                                    tlist,
1519                                                                                                    parse->havingQual,
1520                                                                                                    NULL);
1521                         }
1522                 }                                               /* end of non-minmax-aggregate case */
1523
1524                 /*
1525                  * Since each window function could require a different sort order, we
1526                  * stack up a WindowAgg node for each window, with sort steps between
1527                  * them as needed.
1528                  */
1529                 if (activeWindows)
1530                 {
1531                         List       *window_tlist;
1532                         ListCell   *l;
1533
1534                         /*
1535                          * If the top-level plan node is one that cannot do expression
1536                          * evaluation, we must insert a Result node to project the desired
1537                          * tlist.  (In some cases this might not really be required, but
1538                          * it's not worth trying to avoid it.)  Note that on second and
1539                          * subsequent passes through the following loop, the top-level
1540                          * node will be a WindowAgg which we know can project; so we only
1541                          * need to check once.
1542                          */
1543                         if (!is_projection_capable_plan(result_plan))
1544                         {
1545                                 result_plan = (Plan *) make_result(root,
1546                                                                                                    NIL,
1547                                                                                                    NULL,
1548                                                                                                    result_plan);
1549                         }
1550
1551                         /*
1552                          * The "base" targetlist for all steps of the windowing process is
1553                          * a flat tlist of all Vars and Aggs needed in the result. (In
1554                          * some cases we wouldn't need to propagate all of these all the
1555                          * way to the top, since they might only be needed as inputs to
1556                          * WindowFuncs.  It's probably not worth trying to optimize that
1557                          * though.)  We also need any volatile sort expressions, because
1558                          * make_sort_from_pathkeys won't add those on its own, and anyway
1559                          * we want them evaluated only once at the bottom of the stack. As
1560                          * we climb up the stack, we add outputs for the WindowFuncs
1561                          * computed at each level.      Also, each input tlist has to present
1562                          * all the columns needed to sort the data for the next WindowAgg
1563                          * step.  That's handled internally by make_sort_from_pathkeys,
1564                          * but we need the copyObject steps here to ensure that each plan
1565                          * node has a separately modifiable tlist.
1566                          *
1567                          * Note: it's essential here to use PVC_INCLUDE_AGGREGATES so that
1568                          * Vars mentioned only in aggregate expressions aren't pulled out
1569                          * as separate targetlist entries.  Otherwise we could be putting
1570                          * ungrouped Vars directly into an Agg node's tlist, resulting in
1571                          * undefined behavior.
1572                          */
1573                         window_tlist = flatten_tlist(tlist,
1574                                                                                  PVC_INCLUDE_AGGREGATES,
1575                                                                                  PVC_INCLUDE_PLACEHOLDERS);
1576                         window_tlist = add_volatile_sort_exprs(window_tlist, tlist,
1577                                                                                                    activeWindows);
1578                         result_plan->targetlist = (List *) copyObject(window_tlist);
1579
1580                         foreach(l, activeWindows)
1581                         {
1582                                 WindowClause *wc = (WindowClause *) lfirst(l);
1583                                 List       *window_pathkeys;
1584                                 int                     partNumCols;
1585                                 AttrNumber *partColIdx;
1586                                 Oid                *partOperators;
1587                                 int                     ordNumCols;
1588                                 AttrNumber *ordColIdx;
1589                                 Oid                *ordOperators;
1590
1591                                 window_pathkeys = make_pathkeys_for_window(root,
1592                                                                                                                    wc,
1593                                                                                                                    tlist,
1594                                                                                                                    true);
1595
1596                                 /*
1597                                  * This is a bit tricky: we build a sort node even if we don't
1598                                  * really have to sort.  Even when no explicit sort is needed,
1599                                  * we need to have suitable resjunk items added to the input
1600                                  * plan's tlist for any partitioning or ordering columns that
1601                                  * aren't plain Vars.  Furthermore, this way we can use
1602                                  * existing infrastructure to identify which input columns are
1603                                  * the interesting ones.
1604                                  */
1605                                 if (window_pathkeys)
1606                                 {
1607                                         Sort       *sort_plan;
1608
1609                                         sort_plan = make_sort_from_pathkeys(root,
1610                                                                                                                 result_plan,
1611                                                                                                                 window_pathkeys,
1612                                                                                                                 -1.0);
1613                                         if (!pathkeys_contained_in(window_pathkeys,
1614                                                                                            current_pathkeys))
1615                                         {
1616                                                 /* we do indeed need to sort */
1617                                                 result_plan = (Plan *) sort_plan;
1618                                                 current_pathkeys = window_pathkeys;
1619                                         }
1620                                         /* In either case, extract the per-column information */
1621                                         get_column_info_for_window(root, wc, tlist,
1622                                                                                            sort_plan->numCols,
1623                                                                                            sort_plan->sortColIdx,
1624                                                                                            &partNumCols,
1625                                                                                            &partColIdx,
1626                                                                                            &partOperators,
1627                                                                                            &ordNumCols,
1628                                                                                            &ordColIdx,
1629                                                                                            &ordOperators);
1630                                 }
1631                                 else
1632                                 {
1633                                         /* empty window specification, nothing to sort */
1634                                         partNumCols = 0;
1635                                         partColIdx = NULL;
1636                                         partOperators = NULL;
1637                                         ordNumCols = 0;
1638                                         ordColIdx = NULL;
1639                                         ordOperators = NULL;
1640                                 }
1641
1642                                 if (lnext(l))
1643                                 {
1644                                         /* Add the current WindowFuncs to the running tlist */
1645                                         window_tlist = add_to_flat_tlist(window_tlist,
1646                                                                                    wflists->windowFuncs[wc->winref]);
1647                                 }
1648                                 else
1649                                 {
1650                                         /* Install the original tlist in the topmost WindowAgg */
1651                                         window_tlist = tlist;
1652                                 }
1653
1654                                 /* ... and make the WindowAgg plan node */
1655                                 result_plan = (Plan *)
1656                                         make_windowagg(root,
1657                                                                    (List *) copyObject(window_tlist),
1658                                                                    wflists->windowFuncs[wc->winref],
1659                                                                    wc->winref,
1660                                                                    partNumCols,
1661                                                                    partColIdx,
1662                                                                    partOperators,
1663                                                                    ordNumCols,
1664                                                                    ordColIdx,
1665                                                                    ordOperators,
1666                                                                    wc->frameOptions,
1667                                                                    wc->startOffset,
1668                                                                    wc->endOffset,
1669                                                                    result_plan);
1670                         }
1671                 }
1672         }                                                       /* end of if (setOperations) */
1673
1674         /*
1675          * If there is a DISTINCT clause, add the necessary node(s).
1676          */
1677         if (parse->distinctClause)
1678         {
1679                 double          dNumDistinctRows;
1680                 long            numDistinctRows;
1681
1682                 /*
1683                  * If there was grouping or aggregation, use the current number of
1684                  * rows as the estimated number of DISTINCT rows (ie, assume the
1685                  * result was already mostly unique).  If not, use the number of
1686                  * distinct-groups calculated by query_planner.
1687                  */
1688                 if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
1689                         dNumDistinctRows = result_plan->plan_rows;
1690                 else
1691                         dNumDistinctRows = dNumGroups;
1692
1693                 /* Also convert to long int --- but 'ware overflow! */
1694                 numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
1695
1696                 /* Choose implementation method if we didn't already */
1697                 if (!tested_hashed_distinct)
1698                 {
1699                         /*
1700                          * At this point, either hashed or sorted grouping will have to
1701                          * work from result_plan, so we pass that as both "cheapest" and
1702                          * "sorted".
1703                          */
1704                         use_hashed_distinct =
1705                                 choose_hashed_distinct(root,
1706                                                                            tuple_fraction, limit_tuples,
1707                                                                            result_plan->plan_rows,
1708                                                                            result_plan->plan_width,
1709                                                                            result_plan->startup_cost,
1710                                                                            result_plan->total_cost,
1711                                                                            result_plan->startup_cost,
1712                                                                            result_plan->total_cost,
1713                                                                            current_pathkeys,
1714                                                                            dNumDistinctRows);
1715                 }
1716
1717                 if (use_hashed_distinct)
1718                 {
1719                         /* Hashed aggregate plan --- no sort needed */
1720                         result_plan = (Plan *) make_agg(root,
1721                                                                                         result_plan->targetlist,
1722                                                                                         NIL,
1723                                                                                         AGG_HASHED,
1724                                                                                         NULL,
1725                                                                                   list_length(parse->distinctClause),
1726                                                                  extract_grouping_cols(parse->distinctClause,
1727                                                                                                         result_plan->targetlist),
1728                                                                  extract_grouping_ops(parse->distinctClause),
1729                                                                                         numDistinctRows,
1730                                                                                         result_plan);
1731                         /* Hashed aggregation produces randomly-ordered results */
1732                         current_pathkeys = NIL;
1733                 }
1734                 else
1735                 {
1736                         /*
1737                          * Use a Unique node to implement DISTINCT.  Add an explicit sort
1738                          * if we couldn't make the path come out the way the Unique node
1739                          * needs it.  If we do have to sort, always sort by the more
1740                          * rigorous of DISTINCT and ORDER BY, to avoid a second sort
1741                          * below.  However, for regular DISTINCT, don't sort now if we
1742                          * don't have to --- sorting afterwards will likely be cheaper,
1743                          * and also has the possibility of optimizing via LIMIT.  But for
1744                          * DISTINCT ON, we *must* force the final sort now, else it won't
1745                          * have the desired behavior.
1746                          */
1747                         List       *needed_pathkeys;
1748
1749                         if (parse->hasDistinctOn &&
1750                                 list_length(root->distinct_pathkeys) <
1751                                 list_length(root->sort_pathkeys))
1752                                 needed_pathkeys = root->sort_pathkeys;
1753                         else
1754                                 needed_pathkeys = root->distinct_pathkeys;
1755
1756                         if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
1757                         {
1758                                 if (list_length(root->distinct_pathkeys) >=
1759                                         list_length(root->sort_pathkeys))
1760                                         current_pathkeys = root->distinct_pathkeys;
1761                                 else
1762                                 {
1763                                         current_pathkeys = root->sort_pathkeys;
1764                                         /* Assert checks that parser didn't mess up... */
1765                                         Assert(pathkeys_contained_in(root->distinct_pathkeys,
1766                                                                                                  current_pathkeys));
1767                                 }
1768
1769                                 result_plan = (Plan *) make_sort_from_pathkeys(root,
1770                                                                                                                            result_plan,
1771                                                                                                                         current_pathkeys,
1772                                                                                                                            -1.0);
1773                         }
1774
1775                         result_plan = (Plan *) make_unique(result_plan,
1776                                                                                            parse->distinctClause);
1777                         result_plan->plan_rows = dNumDistinctRows;
1778                         /* The Unique node won't change sort ordering */
1779                 }
1780         }
1781
1782         /*
1783          * If ORDER BY was given and we were not able to make the plan come out in
1784          * the right order, add an explicit sort step.
1785          */
1786         if (parse->sortClause)
1787         {
1788                 if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
1789                 {
1790                         result_plan = (Plan *) make_sort_from_pathkeys(root,
1791                                                                                                                    result_plan,
1792                                                                                                                  root->sort_pathkeys,
1793                                                                                                                    limit_tuples);
1794                         current_pathkeys = root->sort_pathkeys;
1795                 }
1796         }
1797
1798         /*
1799          * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we
1800          * intentionally test parse->rowMarks not root->rowMarks here. If there
1801          * are only non-locking rowmarks, they should be handled by the
1802          * ModifyTable node instead.)
1803          */
1804         if (parse->rowMarks)
1805         {
1806                 result_plan = (Plan *) make_lockrows(result_plan,
1807                                                                                          root->rowMarks,
1808                                                                                          SS_assign_special_param(root));
1809
1810                 /*
1811                  * The result can no longer be assumed sorted, since locking might
1812                  * cause the sort key columns to be replaced with new values.
1813                  */
1814                 current_pathkeys = NIL;
1815         }
1816
1817         /*
1818          * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
1819          */
1820         if (parse->limitCount || parse->limitOffset)
1821         {
1822                 result_plan = (Plan *) make_limit(result_plan,
1823                                                                                   parse->limitOffset,
1824                                                                                   parse->limitCount,
1825                                                                                   offset_est,
1826                                                                                   count_est);
1827         }
1828
1829         /*
1830          * Return the actual output ordering in query_pathkeys for possible use by
1831          * an outer query level.
1832          */
1833         root->query_pathkeys = current_pathkeys;
1834
1835         return result_plan;
1836 }
1837
1838 /*
1839  * Detect whether a plan node is a "dummy" plan created when a relation
1840  * is deemed not to need scanning due to constraint exclusion.
1841  *
1842  * Currently, such dummy plans are Result nodes with constant FALSE
1843  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
1844  *
1845  * XXX this probably ought to be somewhere else, but not clear where.
1846  */
1847 bool
1848 is_dummy_plan(Plan *plan)
1849 {
1850         if (IsA(plan, Result))
1851         {
1852                 List       *rcqual = (List *) ((Result *) plan)->resconstantqual;
1853
1854                 if (list_length(rcqual) == 1)
1855                 {
1856                         Const      *constqual = (Const *) linitial(rcqual);
1857
1858                         if (constqual && IsA(constqual, Const))
1859                         {
1860                                 if (!constqual->constisnull &&
1861                                         !DatumGetBool(constqual->constvalue))
1862                                         return true;
1863                         }
1864                 }
1865         }
1866         return false;
1867 }
1868
1869 /*
1870  * Create a bitmapset of the RT indexes of live base relations
1871  *
1872  * Helper for preprocess_rowmarks ... at this point in the proceedings,
1873  * the only good way to distinguish baserels from appendrel children
1874  * is to see what is in the join tree.
1875  */
1876 static Bitmapset *
1877 get_base_rel_indexes(Node *jtnode)
1878 {
1879         Bitmapset  *result;
1880
1881         if (jtnode == NULL)
1882                 return NULL;
1883         if (IsA(jtnode, RangeTblRef))
1884         {
1885                 int                     varno = ((RangeTblRef *) jtnode)->rtindex;
1886
1887                 result = bms_make_singleton(varno);
1888         }
1889         else if (IsA(jtnode, FromExpr))
1890         {
1891                 FromExpr   *f = (FromExpr *) jtnode;
1892                 ListCell   *l;
1893
1894                 result = NULL;
1895                 foreach(l, f->fromlist)
1896                         result = bms_join(result,
1897                                                           get_base_rel_indexes(lfirst(l)));
1898         }
1899         else if (IsA(jtnode, JoinExpr))
1900         {
1901                 JoinExpr   *j = (JoinExpr *) jtnode;
1902
1903                 result = bms_join(get_base_rel_indexes(j->larg),
1904                                                   get_base_rel_indexes(j->rarg));
1905         }
1906         else
1907         {
1908                 elog(ERROR, "unrecognized node type: %d",
1909                          (int) nodeTag(jtnode));
1910                 result = NULL;                  /* keep compiler quiet */
1911         }
1912         return result;
1913 }
1914
1915 /*
1916  * preprocess_rowmarks - set up PlanRowMarks if needed
1917  */
1918 static void
1919 preprocess_rowmarks(PlannerInfo *root)
1920 {
1921         Query      *parse = root->parse;
1922         Bitmapset  *rels;
1923         List       *prowmarks;
1924         ListCell   *l;
1925         int                     i;
1926
1927         if (parse->rowMarks)
1928         {
1929                 /*
1930                  * We've got trouble if FOR UPDATE/SHARE appears inside grouping,
1931                  * since grouping renders a reference to individual tuple CTIDs
1932                  * invalid.  This is also checked at parse time, but that's
1933                  * insufficient because of rule substitution, query pullup, etc.
1934                  */
1935                 CheckSelectLocking(parse);
1936         }
1937         else
1938         {
1939                 /*
1940                  * We only need rowmarks for UPDATE, DELETE, or FOR UPDATE/SHARE.
1941                  */
1942                 if (parse->commandType != CMD_UPDATE &&
1943                         parse->commandType != CMD_DELETE)
1944                         return;
1945         }
1946
1947         /*
1948          * We need to have rowmarks for all base relations except the target. We
1949          * make a bitmapset of all base rels and then remove the items we don't
1950          * need or have FOR UPDATE/SHARE marks for.
1951          */
1952         rels = get_base_rel_indexes((Node *) parse->jointree);
1953         if (parse->resultRelation)
1954                 rels = bms_del_member(rels, parse->resultRelation);
1955
1956         /*
1957          * Convert RowMarkClauses to PlanRowMark representation.
1958          */
1959         prowmarks = NIL;
1960         foreach(l, parse->rowMarks)
1961         {
1962                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
1963                 RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
1964                 PlanRowMark *newrc;
1965
1966                 /*
1967                  * Currently, it is syntactically impossible to have FOR UPDATE
1968                  * applied to an update/delete target rel.      If that ever becomes
1969                  * possible, we should drop the target from the PlanRowMark list.
1970                  */
1971                 Assert(rc->rti != parse->resultRelation);
1972
1973                 /*
1974                  * Ignore RowMarkClauses for subqueries; they aren't real tables and
1975                  * can't support true locking.  Subqueries that got flattened into the
1976                  * main query should be ignored completely.  Any that didn't will get
1977                  * ROW_MARK_COPY items in the next loop.
1978                  */
1979                 if (rte->rtekind != RTE_RELATION)
1980                         continue;
1981
1982                 rels = bms_del_member(rels, rc->rti);
1983
1984                 newrc = makeNode(PlanRowMark);
1985                 newrc->rti = newrc->prti = rc->rti;
1986                 newrc->rowmarkId = ++(root->glob->lastRowMarkId);
1987                 if (rc->forUpdate)
1988                         newrc->markType = ROW_MARK_EXCLUSIVE;
1989                 else
1990                         newrc->markType = ROW_MARK_SHARE;
1991                 newrc->noWait = rc->noWait;
1992                 newrc->isParent = false;
1993
1994                 prowmarks = lappend(prowmarks, newrc);
1995         }
1996
1997         /*
1998          * Now, add rowmarks for any non-target, non-locked base relations.
1999          */
2000         i = 0;
2001         foreach(l, parse->rtable)
2002         {
2003                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
2004                 PlanRowMark *newrc;
2005
2006                 i++;
2007                 if (!bms_is_member(i, rels))
2008                         continue;
2009
2010                 newrc = makeNode(PlanRowMark);
2011                 newrc->rti = newrc->prti = i;
2012                 newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2013                 /* real tables support REFERENCE, anything else needs COPY */
2014                 if (rte->rtekind == RTE_RELATION &&
2015                         rte->relkind != RELKIND_FOREIGN_TABLE)
2016                         newrc->markType = ROW_MARK_REFERENCE;
2017                 else
2018                         newrc->markType = ROW_MARK_COPY;
2019                 newrc->noWait = false;  /* doesn't matter */
2020                 newrc->isParent = false;
2021
2022                 prowmarks = lappend(prowmarks, newrc);
2023         }
2024
2025         root->rowMarks = prowmarks;
2026 }
2027
2028 /*
2029  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2030  *
2031  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2032  * results back in *count_est and *offset_est.  These variables are set to
2033  * 0 if the corresponding clause is not present, and -1 if it's present
2034  * but we couldn't estimate the value for it.  (The "0" convention is OK
2035  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2036  * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
2037  * usual practice of never estimating less than one row.)  These values will
2038  * be passed to make_limit, which see if you change this code.
2039  *
2040  * The return value is the suitably adjusted tuple_fraction to use for
2041  * planning the query.  This adjustment is not overridable, since it reflects
2042  * plan actions that grouping_planner() will certainly take, not assumptions
2043  * about context.
2044  */
2045 static double
2046 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2047                                  int64 *offset_est, int64 *count_est)
2048 {
2049         Query      *parse = root->parse;
2050         Node       *est;
2051         double          limit_fraction;
2052
2053         /* Should not be called unless LIMIT or OFFSET */
2054         Assert(parse->limitCount || parse->limitOffset);
2055
2056         /*
2057          * Try to obtain the clause values.  We use estimate_expression_value
2058          * primarily because it can sometimes do something useful with Params.
2059          */
2060         if (parse->limitCount)
2061         {
2062                 est = estimate_expression_value(root, parse->limitCount);
2063                 if (est && IsA(est, Const))
2064                 {
2065                         if (((Const *) est)->constisnull)
2066                         {
2067                                 /* NULL indicates LIMIT ALL, ie, no limit */
2068                                 *count_est = 0; /* treat as not present */
2069                         }
2070                         else
2071                         {
2072                                 *count_est = DatumGetInt64(((Const *) est)->constvalue);
2073                                 if (*count_est <= 0)
2074                                         *count_est = 1;         /* force to at least 1 */
2075                         }
2076                 }
2077                 else
2078                         *count_est = -1;        /* can't estimate */
2079         }
2080         else
2081                 *count_est = 0;                 /* not present */
2082
2083         if (parse->limitOffset)
2084         {
2085                 est = estimate_expression_value(root, parse->limitOffset);
2086                 if (est && IsA(est, Const))
2087                 {
2088                         if (((Const *) est)->constisnull)
2089                         {
2090                                 /* Treat NULL as no offset; the executor will too */
2091                                 *offset_est = 0;        /* treat as not present */
2092                         }
2093                         else
2094                         {
2095                                 *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2096                                 if (*offset_est < 0)
2097                                         *offset_est = 0;        /* less than 0 is same as 0 */
2098                         }
2099                 }
2100                 else
2101                         *offset_est = -1;       /* can't estimate */
2102         }
2103         else
2104                 *offset_est = 0;                /* not present */
2105
2106         if (*count_est != 0)
2107         {
2108                 /*
2109                  * A LIMIT clause limits the absolute number of tuples returned.
2110                  * However, if it's not a constant LIMIT then we have to guess; for
2111                  * lack of a better idea, assume 10% of the plan's result is wanted.
2112                  */
2113                 if (*count_est < 0 || *offset_est < 0)
2114                 {
2115                         /* LIMIT or OFFSET is an expression ... punt ... */
2116                         limit_fraction = 0.10;
2117                 }
2118                 else
2119                 {
2120                         /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2121                         limit_fraction = (double) *count_est + (double) *offset_est;
2122                 }
2123
2124                 /*
2125                  * If we have absolute limits from both caller and LIMIT, use the
2126                  * smaller value; likewise if they are both fractional.  If one is
2127                  * fractional and the other absolute, we can't easily determine which
2128                  * is smaller, but we use the heuristic that the absolute will usually
2129                  * be smaller.
2130                  */
2131                 if (tuple_fraction >= 1.0)
2132                 {
2133                         if (limit_fraction >= 1.0)
2134                         {
2135                                 /* both absolute */
2136                                 tuple_fraction = Min(tuple_fraction, limit_fraction);
2137                         }
2138                         else
2139                         {
2140                                 /* caller absolute, limit fractional; use caller's value */
2141                         }
2142                 }
2143                 else if (tuple_fraction > 0.0)
2144                 {
2145                         if (limit_fraction >= 1.0)
2146                         {
2147                                 /* caller fractional, limit absolute; use limit */
2148                                 tuple_fraction = limit_fraction;
2149                         }
2150                         else
2151                         {
2152                                 /* both fractional */
2153                                 tuple_fraction = Min(tuple_fraction, limit_fraction);
2154                         }
2155                 }
2156                 else
2157                 {
2158                         /* no info from caller, just use limit */
2159                         tuple_fraction = limit_fraction;
2160                 }
2161         }
2162         else if (*offset_est != 0 && tuple_fraction > 0.0)
2163         {
2164                 /*
2165                  * We have an OFFSET but no LIMIT.      This acts entirely differently
2166                  * from the LIMIT case: here, we need to increase rather than decrease
2167                  * the caller's tuple_fraction, because the OFFSET acts to cause more
2168                  * tuples to be fetched instead of fewer.  This only matters if we got
2169                  * a tuple_fraction > 0, however.
2170                  *
2171                  * As above, use 10% if OFFSET is present but unestimatable.
2172                  */
2173                 if (*offset_est < 0)
2174                         limit_fraction = 0.10;
2175                 else
2176                         limit_fraction = (double) *offset_est;
2177
2178                 /*
2179                  * If we have absolute counts from both caller and OFFSET, add them
2180                  * together; likewise if they are both fractional.      If one is
2181                  * fractional and the other absolute, we want to take the larger, and
2182                  * we heuristically assume that's the fractional one.
2183                  */
2184                 if (tuple_fraction >= 1.0)
2185                 {
2186                         if (limit_fraction >= 1.0)
2187                         {
2188                                 /* both absolute, so add them together */
2189                                 tuple_fraction += limit_fraction;
2190                         }
2191                         else
2192                         {
2193                                 /* caller absolute, limit fractional; use limit */
2194                                 tuple_fraction = limit_fraction;
2195                         }
2196                 }
2197                 else
2198                 {
2199                         if (limit_fraction >= 1.0)
2200                         {
2201                                 /* caller fractional, limit absolute; use caller's value */
2202                         }
2203                         else
2204                         {
2205                                 /* both fractional, so add them together */
2206                                 tuple_fraction += limit_fraction;
2207                                 if (tuple_fraction >= 1.0)
2208                                         tuple_fraction = 0.0;           /* assume fetch all */
2209                         }
2210                 }
2211         }
2212
2213         return tuple_fraction;
2214 }
2215
2216
2217 /*
2218  * preprocess_groupclause - do preparatory work on GROUP BY clause
2219  *
2220  * The idea here is to adjust the ordering of the GROUP BY elements
2221  * (which in itself is semantically insignificant) to match ORDER BY,
2222  * thereby allowing a single sort operation to both implement the ORDER BY
2223  * requirement and set up for a Unique step that implements GROUP BY.
2224  *
2225  * In principle it might be interesting to consider other orderings of the
2226  * GROUP BY elements, which could match the sort ordering of other
2227  * possible plans (eg an indexscan) and thereby reduce cost.  We don't
2228  * bother with that, though.  Hashed grouping will frequently win anyway.
2229  *
2230  * Note: we need no comparable processing of the distinctClause because
2231  * the parser already enforced that that matches ORDER BY.
2232  */
2233 static void
2234 preprocess_groupclause(PlannerInfo *root)
2235 {
2236         Query      *parse = root->parse;
2237         List       *new_groupclause;
2238         bool            partial_match;
2239         ListCell   *sl;
2240         ListCell   *gl;
2241
2242         /* If no ORDER BY, nothing useful to do here */
2243         if (parse->sortClause == NIL)
2244                 return;
2245
2246         /*
2247          * Scan the ORDER BY clause and construct a list of matching GROUP BY
2248          * items, but only as far as we can make a matching prefix.
2249          *
2250          * This code assumes that the sortClause contains no duplicate items.
2251          */
2252         new_groupclause = NIL;
2253         foreach(sl, parse->sortClause)
2254         {
2255                 SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2256
2257                 foreach(gl, parse->groupClause)
2258                 {
2259                         SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2260
2261                         if (equal(gc, sc))
2262                         {
2263                                 new_groupclause = lappend(new_groupclause, gc);
2264                                 break;
2265                         }
2266                 }
2267                 if (gl == NULL)
2268                         break;                          /* no match, so stop scanning */
2269         }
2270
2271         /* Did we match all of the ORDER BY list, or just some of it? */
2272         partial_match = (sl != NULL);
2273
2274         /* If no match at all, no point in reordering GROUP BY */
2275         if (new_groupclause == NIL)
2276                 return;
2277
2278         /*
2279          * Add any remaining GROUP BY items to the new list, but only if we were
2280          * able to make a complete match.  In other words, we only rearrange the
2281          * GROUP BY list if the result is that one list is a prefix of the other
2282          * --- otherwise there's no possibility of a common sort.  Also, give up
2283          * if there are any non-sortable GROUP BY items, since then there's no
2284          * hope anyway.
2285          */
2286         foreach(gl, parse->groupClause)
2287         {
2288                 SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2289
2290                 if (list_member_ptr(new_groupclause, gc))
2291                         continue;                       /* it matched an ORDER BY item */
2292                 if (partial_match)
2293                         return;                         /* give up, no common sort possible */
2294                 if (!OidIsValid(gc->sortop))
2295                         return;                         /* give up, GROUP BY can't be sorted */
2296                 new_groupclause = lappend(new_groupclause, gc);
2297         }
2298
2299         /* Success --- install the rearranged GROUP BY list */
2300         Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2301         parse->groupClause = new_groupclause;
2302 }
2303
2304 /*
2305  * choose_hashed_grouping - should we use hashed grouping?
2306  *
2307  * Returns TRUE to select hashing, FALSE to select sorting.
2308  */
2309 static bool
2310 choose_hashed_grouping(PlannerInfo *root,
2311                                            double tuple_fraction, double limit_tuples,
2312                                            double path_rows, int path_width,
2313                                            Path *cheapest_path, Path *sorted_path,
2314                                            double dNumGroups, AggClauseCosts *agg_costs)
2315 {
2316         Query      *parse = root->parse;
2317         int                     numGroupCols = list_length(parse->groupClause);
2318         bool            can_hash;
2319         bool            can_sort;
2320         Size            hashentrysize;
2321         List       *target_pathkeys;
2322         List       *current_pathkeys;
2323         Path            hashed_p;
2324         Path            sorted_p;
2325
2326         /*
2327          * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
2328          * aggregates.  (Doing so would imply storing *all* the input values in
2329          * the hash table, and/or running many sorts in parallel, either of which
2330          * seems like a certain loser.)
2331          */
2332         can_hash = (agg_costs->numOrderedAggs == 0 &&
2333                                 grouping_is_hashable(parse->groupClause));
2334         can_sort = grouping_is_sortable(parse->groupClause);
2335
2336         /* Quick out if only one choice is workable */
2337         if (!(can_hash && can_sort))
2338         {
2339                 if (can_hash)
2340                         return true;
2341                 else if (can_sort)
2342                         return false;
2343                 else
2344                         ereport(ERROR,
2345                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2346                                          errmsg("could not implement GROUP BY"),
2347                                          errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2348         }
2349
2350         /* Prefer sorting when enable_hashagg is off */
2351         if (!enable_hashagg)
2352                 return false;
2353
2354         /*
2355          * Don't do it if it doesn't look like the hashtable will fit into
2356          * work_mem.
2357          */
2358
2359         /* Estimate per-hash-entry space at tuple width... */
2360         hashentrysize = MAXALIGN(path_width) + MAXALIGN(sizeof(MinimalTupleData));
2361         /* plus space for pass-by-ref transition values... */
2362         hashentrysize += agg_costs->transitionSpace;
2363         /* plus the per-hash-entry overhead */
2364         hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
2365
2366         if (hashentrysize * dNumGroups > work_mem * 1024L)
2367                 return false;
2368
2369         /*
2370          * When we have both GROUP BY and DISTINCT, use the more-rigorous of
2371          * DISTINCT and ORDER BY as the assumed required output sort order. This
2372          * is an oversimplification because the DISTINCT might get implemented via
2373          * hashing, but it's not clear that the case is common enough (or that our
2374          * estimates are good enough) to justify trying to solve it exactly.
2375          */
2376         if (list_length(root->distinct_pathkeys) >
2377                 list_length(root->sort_pathkeys))
2378                 target_pathkeys = root->distinct_pathkeys;
2379         else
2380                 target_pathkeys = root->sort_pathkeys;
2381
2382         /*
2383          * See if the estimated cost is no more than doing it the other way. While
2384          * avoiding the need for sorted input is usually a win, the fact that the
2385          * output won't be sorted may be a loss; so we need to do an actual cost
2386          * comparison.
2387          *
2388          * We need to consider cheapest_path + hashagg [+ final sort] versus
2389          * either cheapest_path [+ sort] + group or agg [+ final sort] or
2390          * presorted_path + group or agg [+ final sort] where brackets indicate a
2391          * step that may not be needed. We assume query_planner() will have
2392          * returned a presorted path only if it's a winner compared to
2393          * cheapest_path for this purpose.
2394          *
2395          * These path variables are dummies that just hold cost fields; we don't
2396          * make actual Paths for these steps.
2397          */
2398         cost_agg(&hashed_p, root, AGG_HASHED, agg_costs,
2399                          numGroupCols, dNumGroups,
2400                          cheapest_path->startup_cost, cheapest_path->total_cost,
2401                          path_rows);
2402         /* Result of hashed agg is always unsorted */
2403         if (target_pathkeys)
2404                 cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost,
2405                                   dNumGroups, path_width,
2406                                   0.0, work_mem, limit_tuples);
2407
2408         if (sorted_path)
2409         {
2410                 sorted_p.startup_cost = sorted_path->startup_cost;
2411                 sorted_p.total_cost = sorted_path->total_cost;
2412                 current_pathkeys = sorted_path->pathkeys;
2413         }
2414         else
2415         {
2416                 sorted_p.startup_cost = cheapest_path->startup_cost;
2417                 sorted_p.total_cost = cheapest_path->total_cost;
2418                 current_pathkeys = cheapest_path->pathkeys;
2419         }
2420         if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
2421         {
2422                 cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,
2423                                   path_rows, path_width,
2424                                   0.0, work_mem, -1.0);
2425                 current_pathkeys = root->group_pathkeys;
2426         }
2427
2428         if (parse->hasAggs)
2429                 cost_agg(&sorted_p, root, AGG_SORTED, agg_costs,
2430                                  numGroupCols, dNumGroups,
2431                                  sorted_p.startup_cost, sorted_p.total_cost,
2432                                  path_rows);
2433         else
2434                 cost_group(&sorted_p, root, numGroupCols, dNumGroups,
2435                                    sorted_p.startup_cost, sorted_p.total_cost,
2436                                    path_rows);
2437         /* The Agg or Group node will preserve ordering */
2438         if (target_pathkeys &&
2439                 !pathkeys_contained_in(target_pathkeys, current_pathkeys))
2440                 cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost,
2441                                   dNumGroups, path_width,
2442                                   0.0, work_mem, limit_tuples);
2443
2444         /*
2445          * Now make the decision using the top-level tuple fraction.  First we
2446          * have to convert an absolute count (LIMIT) into fractional form.
2447          */
2448         if (tuple_fraction >= 1.0)
2449                 tuple_fraction /= dNumGroups;
2450
2451         if (compare_fractional_path_costs(&hashed_p, &sorted_p,
2452                                                                           tuple_fraction) < 0)
2453         {
2454                 /* Hashed is cheaper, so use it */
2455                 return true;
2456         }
2457         return false;
2458 }
2459
2460 /*
2461  * choose_hashed_distinct - should we use hashing for DISTINCT?
2462  *
2463  * This is fairly similar to choose_hashed_grouping, but there are enough
2464  * differences that it doesn't seem worth trying to unify the two functions.
2465  * (One difference is that we sometimes apply this after forming a Plan,
2466  * so the input alternatives can't be represented as Paths --- instead we
2467  * pass in the costs as individual variables.)
2468  *
2469  * But note that making the two choices independently is a bit bogus in
2470  * itself.      If the two could be combined into a single choice operation
2471  * it'd probably be better, but that seems far too unwieldy to be practical,
2472  * especially considering that the combination of GROUP BY and DISTINCT
2473  * isn't very common in real queries.  By separating them, we are giving
2474  * extra preference to using a sorting implementation when a common sort key
2475  * is available ... and that's not necessarily wrong anyway.
2476  *
2477  * Returns TRUE to select hashing, FALSE to select sorting.
2478  */
2479 static bool
2480 choose_hashed_distinct(PlannerInfo *root,
2481                                            double tuple_fraction, double limit_tuples,
2482                                            double path_rows, int path_width,
2483                                            Cost cheapest_startup_cost, Cost cheapest_total_cost,
2484                                            Cost sorted_startup_cost, Cost sorted_total_cost,
2485                                            List *sorted_pathkeys,
2486                                            double dNumDistinctRows)
2487 {
2488         Query      *parse = root->parse;
2489         int                     numDistinctCols = list_length(parse->distinctClause);
2490         bool            can_sort;
2491         bool            can_hash;
2492         Size            hashentrysize;
2493         List       *current_pathkeys;
2494         List       *needed_pathkeys;
2495         Path            hashed_p;
2496         Path            sorted_p;
2497
2498         /*
2499          * If we have a sortable DISTINCT ON clause, we always use sorting. This
2500          * enforces the expected behavior of DISTINCT ON.
2501          */
2502         can_sort = grouping_is_sortable(parse->distinctClause);
2503         if (can_sort && parse->hasDistinctOn)
2504                 return false;
2505
2506         can_hash = grouping_is_hashable(parse->distinctClause);
2507
2508         /* Quick out if only one choice is workable */
2509         if (!(can_hash && can_sort))
2510         {
2511                 if (can_hash)
2512                         return true;
2513                 else if (can_sort)
2514                         return false;
2515                 else
2516                         ereport(ERROR,
2517                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2518                                          errmsg("could not implement DISTINCT"),
2519                                          errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
2520         }
2521
2522         /* Prefer sorting when enable_hashagg is off */
2523         if (!enable_hashagg)
2524                 return false;
2525
2526         /*
2527          * Don't do it if it doesn't look like the hashtable will fit into
2528          * work_mem.
2529          */
2530         hashentrysize = MAXALIGN(path_width) + MAXALIGN(sizeof(MinimalTupleData));
2531
2532         if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
2533                 return false;
2534
2535         /*
2536          * See if the estimated cost is no more than doing it the other way. While
2537          * avoiding the need for sorted input is usually a win, the fact that the
2538          * output won't be sorted may be a loss; so we need to do an actual cost
2539          * comparison.
2540          *
2541          * We need to consider cheapest_path + hashagg [+ final sort] versus
2542          * sorted_path [+ sort] + group [+ final sort] where brackets indicate a
2543          * step that may not be needed.
2544          *
2545          * These path variables are dummies that just hold cost fields; we don't
2546          * make actual Paths for these steps.
2547          */
2548         cost_agg(&hashed_p, root, AGG_HASHED, NULL,
2549                          numDistinctCols, dNumDistinctRows,
2550                          cheapest_startup_cost, cheapest_total_cost,
2551                          path_rows);
2552
2553         /*
2554          * Result of hashed agg is always unsorted, so if ORDER BY is present we
2555          * need to charge for the final sort.
2556          */
2557         if (parse->sortClause)
2558                 cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,
2559                                   dNumDistinctRows, path_width,
2560                                   0.0, work_mem, limit_tuples);
2561
2562         /*
2563          * Now for the GROUP case.      See comments in grouping_planner about the
2564          * sorting choices here --- this code should match that code.
2565          */
2566         sorted_p.startup_cost = sorted_startup_cost;
2567         sorted_p.total_cost = sorted_total_cost;
2568         current_pathkeys = sorted_pathkeys;
2569         if (parse->hasDistinctOn &&
2570                 list_length(root->distinct_pathkeys) <
2571                 list_length(root->sort_pathkeys))
2572                 needed_pathkeys = root->sort_pathkeys;
2573         else
2574                 needed_pathkeys = root->distinct_pathkeys;
2575         if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
2576         {
2577                 if (list_length(root->distinct_pathkeys) >=
2578                         list_length(root->sort_pathkeys))
2579                         current_pathkeys = root->distinct_pathkeys;
2580                 else
2581                         current_pathkeys = root->sort_pathkeys;
2582                 cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost,
2583                                   path_rows, path_width,
2584                                   0.0, work_mem, -1.0);
2585         }
2586         cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
2587                            sorted_p.startup_cost, sorted_p.total_cost,
2588                            path_rows);
2589         if (parse->sortClause &&
2590                 !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
2591                 cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,
2592                                   dNumDistinctRows, path_width,
2593                                   0.0, work_mem, limit_tuples);
2594
2595         /*
2596          * Now make the decision using the top-level tuple fraction.  First we
2597          * have to convert an absolute count (LIMIT) into fractional form.
2598          */
2599         if (tuple_fraction >= 1.0)
2600                 tuple_fraction /= dNumDistinctRows;
2601
2602         if (compare_fractional_path_costs(&hashed_p, &sorted_p,
2603                                                                           tuple_fraction) < 0)
2604         {
2605                 /* Hashed is cheaper, so use it */
2606                 return true;
2607         }
2608         return false;
2609 }
2610
2611 /*
2612  * make_subplanTargetList
2613  *        Generate appropriate target list when grouping is required.
2614  *
2615  * When grouping_planner inserts grouping or aggregation plan nodes
2616  * above the scan/join plan constructed by query_planner+create_plan,
2617  * we typically want the scan/join plan to emit a different target list
2618  * than the outer plan nodes should have.  This routine generates the
2619  * correct target list for the scan/join subplan.
2620  *
2621  * The initial target list passed from the parser already contains entries
2622  * for all ORDER BY and GROUP BY expressions, but it will not have entries
2623  * for variables used only in HAVING clauses; so we need to add those
2624  * variables to the subplan target list.  Also, we flatten all expressions
2625  * except GROUP BY items into their component variables; the other expressions
2626  * will be computed by the inserted nodes rather than by the subplan.
2627  * For example, given a query like
2628  *              SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
2629  * we want to pass this targetlist to the subplan:
2630  *              a+b,c,d
2631  * where the a+b target will be used by the Sort/Group steps, and the
2632  * other targets will be used for computing the final results.
2633  *
2634  * If we are grouping or aggregating, *and* there are no non-Var grouping
2635  * expressions, then the returned tlist is effectively dummy; we do not
2636  * need to force it to be evaluated, because all the Vars it contains
2637  * should be present in the "flat" tlist generated by create_plan, though
2638  * possibly in a different order.  In that case we'll use create_plan's tlist,
2639  * and the tlist made here is only needed as input to query_planner to tell
2640  * it which Vars are needed in the output of the scan/join plan.
2641  *
2642  * 'tlist' is the query's target list.
2643  * 'groupColIdx' receives an array of column numbers for the GROUP BY
2644  *                      expressions (if there are any) in the returned target list.
2645  * 'need_tlist_eval' is set true if we really need to evaluate the
2646  *                      returned tlist as-is.
2647  *
2648  * The result is the targetlist to be passed to query_planner.
2649  */
2650 static List *
2651 make_subplanTargetList(PlannerInfo *root,
2652                                            List *tlist,
2653                                            AttrNumber **groupColIdx,
2654                                            bool *need_tlist_eval)
2655 {
2656         Query      *parse = root->parse;
2657         List       *sub_tlist;
2658         List       *non_group_cols;
2659         List       *non_group_vars;
2660         int                     numCols;
2661
2662         *groupColIdx = NULL;
2663
2664         /*
2665          * If we're not grouping or aggregating, there's nothing to do here;
2666          * query_planner should receive the unmodified target list.
2667          */
2668         if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual &&
2669                 !parse->hasWindowFuncs)
2670         {
2671                 *need_tlist_eval = true;
2672                 return tlist;
2673         }
2674
2675         /*
2676          * Otherwise, we must build a tlist containing all grouping columns,
2677          * plus any other Vars mentioned in the targetlist and HAVING qual.
2678          */
2679         sub_tlist = NIL;
2680         non_group_cols = NIL;
2681         *need_tlist_eval = false;       /* only eval if not flat tlist */
2682
2683         numCols = list_length(parse->groupClause);
2684         if (numCols > 0)
2685         {
2686                 /*
2687                  * If grouping, create sub_tlist entries for all GROUP BY columns, and
2688                  * make an array showing where the group columns are in the sub_tlist.
2689                  *
2690                  * Note: with this implementation, the array entries will always be
2691                  * 1..N, but we don't want callers to assume that.
2692                  */
2693                 AttrNumber *grpColIdx;
2694                 ListCell   *tl;
2695
2696                 grpColIdx = (AttrNumber *) palloc0(sizeof(AttrNumber) * numCols);
2697                 *groupColIdx = grpColIdx;
2698
2699                 foreach(tl, tlist)
2700                 {
2701                         TargetEntry *tle = (TargetEntry *) lfirst(tl);
2702                         int                     colno;
2703
2704                         colno = get_grouping_column_index(parse, tle);
2705                         if (colno >= 0)
2706                         {
2707                                 /*
2708                                  * It's a grouping column, so add it to the result tlist and
2709                                  * remember its resno in grpColIdx[].
2710                                  */
2711                                 TargetEntry *newtle;
2712
2713                                 newtle = makeTargetEntry(tle->expr,
2714                                                                                  list_length(sub_tlist) + 1,
2715                                                                                  NULL,
2716                                                                                  false);
2717                                 sub_tlist = lappend(sub_tlist, newtle);
2718
2719                                 Assert(grpColIdx[colno] == 0);  /* no dups expected */
2720                                 grpColIdx[colno] = newtle->resno;
2721
2722                                 if (!(newtle->expr && IsA(newtle->expr, Var)))
2723                                         *need_tlist_eval = true;        /* tlist contains non Vars */
2724                         }
2725                         else
2726                         {
2727                                 /*
2728                                  * Non-grouping column, so just remember the expression
2729                                  * for later call to pull_var_clause.  There's no need for
2730                                  * pull_var_clause to examine the TargetEntry node itself.
2731                                  */
2732                                 non_group_cols = lappend(non_group_cols, tle->expr);
2733                         }
2734                 }
2735         }
2736         else
2737         {
2738                 /*
2739                  * With no grouping columns, just pass whole tlist to pull_var_clause.
2740                  * Need (shallow) copy to avoid damaging input tlist below.
2741                  */
2742                 non_group_cols = list_copy(tlist);
2743         }
2744
2745         /*
2746          * If there's a HAVING clause, we'll need the Vars it uses, too.
2747          */
2748         if (parse->havingQual)
2749                 non_group_cols = lappend(non_group_cols, parse->havingQual);
2750
2751         /*
2752          * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
2753          * add them to the result tlist if not already present.  (A Var used
2754          * directly as a GROUP BY item will be present already.)  Note this
2755          * includes Vars used in resjunk items, so we are covering the needs of
2756          * ORDER BY and window specifications.  Vars used within Aggrefs will be
2757          * pulled out here, too.
2758          */
2759         non_group_vars = pull_var_clause((Node *) non_group_cols,
2760                                                                          PVC_RECURSE_AGGREGATES,
2761                                                                          PVC_INCLUDE_PLACEHOLDERS);
2762         sub_tlist = add_to_flat_tlist(sub_tlist, non_group_vars);
2763
2764         /* clean up cruft */
2765         list_free(non_group_vars);
2766         list_free(non_group_cols);
2767
2768         return sub_tlist;
2769 }
2770
2771 /*
2772  * get_grouping_column_index
2773  *              Get the GROUP BY column position, if any, of a targetlist entry.
2774  *
2775  * Returns the index (counting from 0) of the TLE in the GROUP BY list, or -1
2776  * if it's not a grouping column.  Note: the result is unique because the
2777  * parser won't make multiple groupClause entries for the same TLE.
2778  */
2779 static int
2780 get_grouping_column_index(Query *parse, TargetEntry *tle)
2781 {
2782         int                     colno = 0;
2783         Index           ressortgroupref = tle->ressortgroupref;
2784         ListCell   *gl;
2785
2786         /* No need to search groupClause if TLE hasn't got a sortgroupref */
2787         if (ressortgroupref == 0)
2788                 return -1;
2789
2790         foreach(gl, parse->groupClause)
2791         {
2792                 SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
2793
2794                 if (grpcl->tleSortGroupRef == ressortgroupref)
2795                         return colno;
2796                 colno++;
2797         }
2798
2799         return -1;
2800 }
2801
2802 /*
2803  * locate_grouping_columns
2804  *              Locate grouping columns in the tlist chosen by create_plan.
2805  *
2806  * This is only needed if we don't use the sub_tlist chosen by
2807  * make_subplanTargetList.      We have to forget the column indexes found
2808  * by that routine and re-locate the grouping exprs in the real sub_tlist.
2809  */
2810 static void
2811 locate_grouping_columns(PlannerInfo *root,
2812                                                 List *tlist,
2813                                                 List *sub_tlist,
2814                                                 AttrNumber *groupColIdx)
2815 {
2816         int                     keyno = 0;
2817         ListCell   *gl;
2818
2819         /*
2820          * No work unless grouping.
2821          */
2822         if (!root->parse->groupClause)
2823         {
2824                 Assert(groupColIdx == NULL);
2825                 return;
2826         }
2827         Assert(groupColIdx != NULL);
2828
2829         foreach(gl, root->parse->groupClause)
2830         {
2831                 SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl);
2832                 Node       *groupexpr = get_sortgroupclause_expr(grpcl, tlist);
2833                 TargetEntry *te = tlist_member(groupexpr, sub_tlist);
2834
2835                 if (!te)
2836                         elog(ERROR, "failed to locate grouping columns");
2837                 groupColIdx[keyno++] = te->resno;
2838         }
2839 }
2840
2841 /*
2842  * postprocess_setop_tlist
2843  *        Fix up targetlist returned by plan_set_operations().
2844  *
2845  * We need to transpose sort key info from the orig_tlist into new_tlist.
2846  * NOTE: this would not be good enough if we supported resjunk sort keys
2847  * for results of set operations --- then, we'd need to project a whole
2848  * new tlist to evaluate the resjunk columns.  For now, just ereport if we
2849  * find any resjunk columns in orig_tlist.
2850  */
2851 static List *
2852 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
2853 {
2854         ListCell   *l;
2855         ListCell   *orig_tlist_item = list_head(orig_tlist);
2856
2857         foreach(l, new_tlist)
2858         {
2859                 TargetEntry *new_tle = (TargetEntry *) lfirst(l);
2860                 TargetEntry *orig_tle;
2861
2862                 /* ignore resjunk columns in setop result */
2863                 if (new_tle->resjunk)
2864                         continue;
2865
2866                 Assert(orig_tlist_item != NULL);
2867                 orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
2868                 orig_tlist_item = lnext(orig_tlist_item);
2869                 if (orig_tle->resjunk)  /* should not happen */
2870                         elog(ERROR, "resjunk output columns are not implemented");
2871                 Assert(new_tle->resno == orig_tle->resno);
2872                 new_tle->ressortgroupref = orig_tle->ressortgroupref;
2873         }
2874         if (orig_tlist_item != NULL)
2875                 elog(ERROR, "resjunk output columns are not implemented");
2876         return new_tlist;
2877 }
2878
2879 /*
2880  * select_active_windows
2881  *              Create a list of the "active" window clauses (ie, those referenced
2882  *              by non-deleted WindowFuncs) in the order they are to be executed.
2883  */
2884 static List *
2885 select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
2886 {
2887         List       *result;
2888         List       *actives;
2889         ListCell   *lc;
2890
2891         /* First, make a list of the active windows */
2892         actives = NIL;
2893         foreach(lc, root->parse->windowClause)
2894         {
2895                 WindowClause *wc = (WindowClause *) lfirst(lc);
2896
2897                 /* It's only active if wflists shows some related WindowFuncs */
2898                 Assert(wc->winref <= wflists->maxWinRef);
2899                 if (wflists->windowFuncs[wc->winref] != NIL)
2900                         actives = lappend(actives, wc);
2901         }
2902
2903         /*
2904          * Now, ensure that windows with identical partitioning/ordering clauses
2905          * are adjacent in the list.  This is required by the SQL standard, which
2906          * says that only one sort is to be used for such windows, even if they
2907          * are otherwise distinct (eg, different names or framing clauses).
2908          *
2909          * There is room to be much smarter here, for example detecting whether
2910          * one window's sort keys are a prefix of another's (so that sorting for
2911          * the latter would do for the former), or putting windows first that
2912          * match a sort order available for the underlying query.  For the moment
2913          * we are content with meeting the spec.
2914          */
2915         result = NIL;
2916         while (actives != NIL)
2917         {
2918                 WindowClause *wc = (WindowClause *) linitial(actives);
2919                 ListCell   *prev;
2920                 ListCell   *next;
2921
2922                 /* Move wc from actives to result */
2923                 actives = list_delete_first(actives);
2924                 result = lappend(result, wc);
2925
2926                 /* Now move any matching windows from actives to result */
2927                 prev = NULL;
2928                 for (lc = list_head(actives); lc; lc = next)
2929                 {
2930                         WindowClause *wc2 = (WindowClause *) lfirst(lc);
2931
2932                         next = lnext(lc);
2933                         /* framing options are NOT to be compared here! */
2934                         if (equal(wc->partitionClause, wc2->partitionClause) &&
2935                                 equal(wc->orderClause, wc2->orderClause))
2936                         {
2937                                 actives = list_delete_cell(actives, lc, prev);
2938                                 result = lappend(result, wc2);
2939                         }
2940                         else
2941                                 prev = lc;
2942                 }
2943         }
2944
2945         return result;
2946 }
2947
2948 /*
2949  * add_volatile_sort_exprs
2950  *              Identify any volatile sort/group expressions used by the active
2951  *              windows, and add them to window_tlist if not already present.
2952  *              Return the modified window_tlist.
2953  */
2954 static List *
2955 add_volatile_sort_exprs(List *window_tlist, List *tlist, List *activeWindows)
2956 {
2957         Bitmapset  *sgrefs = NULL;
2958         ListCell   *lc;
2959
2960         /* First, collect the sortgrouprefs of the windows into a bitmapset */
2961         foreach(lc, activeWindows)
2962         {
2963                 WindowClause *wc = (WindowClause *) lfirst(lc);
2964                 ListCell   *lc2;
2965
2966                 foreach(lc2, wc->partitionClause)
2967                 {
2968                         SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
2969
2970                         sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
2971                 }
2972                 foreach(lc2, wc->orderClause)
2973                 {
2974                         SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
2975
2976                         sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
2977                 }
2978         }
2979
2980         /*
2981          * Now scan the original tlist to find the referenced expressions. Any
2982          * that are volatile must be added to window_tlist.
2983          *
2984          * Note: we know that the input window_tlist contains no items marked with
2985          * ressortgrouprefs, so we don't have to worry about collisions of the
2986          * reference numbers.
2987          */
2988         foreach(lc, tlist)
2989         {
2990                 TargetEntry *tle = (TargetEntry *) lfirst(lc);
2991
2992                 if (tle->ressortgroupref != 0 &&
2993                         bms_is_member(tle->ressortgroupref, sgrefs) &&
2994                         contain_volatile_functions((Node *) tle->expr))
2995                 {
2996                         TargetEntry *newtle;
2997
2998                         newtle = makeTargetEntry(tle->expr,
2999                                                                          list_length(window_tlist) + 1,
3000                                                                          NULL,
3001                                                                          false);
3002                         newtle->ressortgroupref = tle->ressortgroupref;
3003                         window_tlist = lappend(window_tlist, newtle);
3004                 }
3005         }
3006
3007         return window_tlist;
3008 }
3009
3010 /*
3011  * make_pathkeys_for_window
3012  *              Create a pathkeys list describing the required input ordering
3013  *              for the given WindowClause.
3014  *
3015  * The required ordering is first the PARTITION keys, then the ORDER keys.
3016  * In the future we might try to implement windowing using hashing, in which
3017  * case the ordering could be relaxed, but for now we always sort.
3018  */
3019 static List *
3020 make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
3021                                                  List *tlist, bool canonicalize)
3022 {
3023         List       *window_pathkeys;
3024         List       *window_sortclauses;
3025
3026         /* Throw error if can't sort */
3027         if (!grouping_is_sortable(wc->partitionClause))
3028                 ereport(ERROR,
3029                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3030                                  errmsg("could not implement window PARTITION BY"),
3031                                  errdetail("Window partitioning columns must be of sortable datatypes.")));
3032         if (!grouping_is_sortable(wc->orderClause))
3033                 ereport(ERROR,
3034                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3035                                  errmsg("could not implement window ORDER BY"),
3036                 errdetail("Window ordering columns must be of sortable datatypes.")));
3037
3038         /* Okay, make the combined pathkeys */
3039         window_sortclauses = list_concat(list_copy(wc->partitionClause),
3040                                                                          list_copy(wc->orderClause));
3041         window_pathkeys = make_pathkeys_for_sortclauses(root,
3042                                                                                                         window_sortclauses,
3043                                                                                                         tlist,
3044                                                                                                         canonicalize);
3045         list_free(window_sortclauses);
3046         return window_pathkeys;
3047 }
3048
3049 /*----------
3050  * get_column_info_for_window
3051  *              Get the partitioning/ordering column numbers and equality operators
3052  *              for a WindowAgg node.
3053  *
3054  * This depends on the behavior of make_pathkeys_for_window()!
3055  *
3056  * We are given the target WindowClause and an array of the input column
3057  * numbers associated with the resulting pathkeys.      In the easy case, there
3058  * are the same number of pathkey columns as partitioning + ordering columns
3059  * and we just have to copy some data around.  However, it's possible that
3060  * some of the original partitioning + ordering columns were eliminated as
3061  * redundant during the transformation to pathkeys.  (This can happen even
3062  * though the parser gets rid of obvious duplicates.  A typical scenario is a
3063  * window specification "PARTITION BY x ORDER BY y" coupled with a clause
3064  * "WHERE x = y" that causes the two sort columns to be recognized as
3065  * redundant.)  In that unusual case, we have to work a lot harder to
3066  * determine which keys are significant.
3067  *
3068  * The method used here is a bit brute-force: add the sort columns to a list
3069  * one at a time and note when the resulting pathkey list gets longer.  But
3070  * it's a sufficiently uncommon case that a faster way doesn't seem worth
3071  * the amount of code refactoring that'd be needed.
3072  *----------
3073  */
3074 static void
3075 get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist,
3076                                                    int numSortCols, AttrNumber *sortColIdx,
3077                                                    int *partNumCols,
3078                                                    AttrNumber **partColIdx,
3079                                                    Oid **partOperators,
3080                                                    int *ordNumCols,
3081                                                    AttrNumber **ordColIdx,
3082                                                    Oid **ordOperators)
3083 {
3084         int                     numPart = list_length(wc->partitionClause);
3085         int                     numOrder = list_length(wc->orderClause);
3086
3087         if (numSortCols == numPart + numOrder)
3088         {
3089                 /* easy case */
3090                 *partNumCols = numPart;
3091                 *partColIdx = sortColIdx;
3092                 *partOperators = extract_grouping_ops(wc->partitionClause);
3093                 *ordNumCols = numOrder;
3094                 *ordColIdx = sortColIdx + numPart;
3095                 *ordOperators = extract_grouping_ops(wc->orderClause);
3096         }
3097         else
3098         {
3099                 List       *sortclauses;
3100                 List       *pathkeys;
3101                 int                     scidx;
3102                 ListCell   *lc;
3103
3104                 /* first, allocate what's certainly enough space for the arrays */
3105                 *partNumCols = 0;
3106                 *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber));
3107                 *partOperators = (Oid *) palloc(numPart * sizeof(Oid));
3108                 *ordNumCols = 0;
3109                 *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber));
3110                 *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid));
3111                 sortclauses = NIL;
3112                 pathkeys = NIL;
3113                 scidx = 0;
3114                 foreach(lc, wc->partitionClause)
3115                 {
3116                         SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
3117                         List       *new_pathkeys;
3118
3119                         sortclauses = lappend(sortclauses, sgc);
3120                         new_pathkeys = make_pathkeys_for_sortclauses(root,
3121                                                                                                                  sortclauses,
3122                                                                                                                  tlist,
3123                                                                                                                  true);
3124                         if (list_length(new_pathkeys) > list_length(pathkeys))
3125                         {
3126                                 /* this sort clause is actually significant */
3127                                 (*partColIdx)[*partNumCols] = sortColIdx[scidx++];
3128                                 (*partOperators)[*partNumCols] = sgc->eqop;
3129                                 (*partNumCols)++;
3130                                 pathkeys = new_pathkeys;
3131                         }
3132                 }
3133                 foreach(lc, wc->orderClause)
3134                 {
3135                         SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
3136                         List       *new_pathkeys;
3137
3138                         sortclauses = lappend(sortclauses, sgc);
3139                         new_pathkeys = make_pathkeys_for_sortclauses(root,
3140                                                                                                                  sortclauses,
3141                                                                                                                  tlist,
3142                                                                                                                  true);
3143                         if (list_length(new_pathkeys) > list_length(pathkeys))
3144                         {
3145                                 /* this sort clause is actually significant */
3146                                 (*ordColIdx)[*ordNumCols] = sortColIdx[scidx++];
3147                                 (*ordOperators)[*ordNumCols] = sgc->eqop;
3148                                 (*ordNumCols)++;
3149                                 pathkeys = new_pathkeys;
3150                         }
3151                 }
3152                 /* complain if we didn't eat exactly the right number of sort cols */
3153                 if (scidx != numSortCols)
3154                         elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators");
3155         }
3156 }
3157
3158
3159 /*
3160  * expression_planner
3161  *              Perform planner's transformations on a standalone expression.
3162  *
3163  * Various utility commands need to evaluate expressions that are not part
3164  * of a plannable query.  They can do so using the executor's regular
3165  * expression-execution machinery, but first the expression has to be fed
3166  * through here to transform it from parser output to something executable.
3167  *
3168  * Currently, we disallow sublinks in standalone expressions, so there's no
3169  * real "planning" involved here.  (That might not always be true though.)
3170  * What we must do is run eval_const_expressions to ensure that any function
3171  * calls are converted to positional notation and function default arguments
3172  * get inserted.  The fact that constant subexpressions get simplified is a
3173  * side-effect that is useful when the expression will get evaluated more than
3174  * once.  Also, we must fix operator function IDs.
3175  *
3176  * Note: this must not make any damaging changes to the passed-in expression
3177  * tree.  (It would actually be okay to apply fix_opfuncids to it, but since
3178  * we first do an expression_tree_mutator-based walk, what is returned will
3179  * be a new node tree.)
3180  */
3181 Expr *
3182 expression_planner(Expr *expr)
3183 {
3184         Node       *result;
3185
3186         /*
3187          * Convert named-argument function calls, insert default arguments and
3188          * simplify constant subexprs
3189          */
3190         result = eval_const_expressions(NULL, (Node *) expr);
3191
3192         /* Fill in opfuncid values if missing */
3193         fix_opfuncids(result);
3194
3195         return (Expr *) result;
3196 }
3197
3198
3199 /*
3200  * plan_cluster_use_sort
3201  *              Use the planner to decide how CLUSTER should implement sorting
3202  *
3203  * tableOid is the OID of a table to be clustered on its index indexOid
3204  * (which is already known to be a btree index).  Decide whether it's
3205  * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
3206  * Return TRUE to use sorting, FALSE to use an indexscan.
3207  *
3208  * Note: caller had better already hold some type of lock on the table.
3209  */
3210 bool
3211 plan_cluster_use_sort(Oid tableOid, Oid indexOid)
3212 {
3213         PlannerInfo *root;
3214         Query      *query;
3215         PlannerGlobal *glob;
3216         RangeTblEntry *rte;
3217         RelOptInfo *rel;
3218         IndexOptInfo *indexInfo;
3219         QualCost        indexExprCost;
3220         Cost            comparisonCost;
3221         Path       *seqScanPath;
3222         Path            seqScanAndSortPath;
3223         IndexPath  *indexScanPath;
3224         ListCell   *lc;
3225
3226         /* Set up mostly-dummy planner state */
3227         query = makeNode(Query);
3228         query->commandType = CMD_SELECT;
3229
3230         glob = makeNode(PlannerGlobal);
3231
3232         root = makeNode(PlannerInfo);
3233         root->parse = query;
3234         root->glob = glob;
3235         root->query_level = 1;
3236         root->planner_cxt = CurrentMemoryContext;
3237         root->wt_param_id = -1;
3238
3239         /* Build a minimal RTE for the rel */
3240         rte = makeNode(RangeTblEntry);
3241         rte->rtekind = RTE_RELATION;
3242         rte->relid = tableOid;
3243         rte->relkind = RELKIND_RELATION;
3244         rte->inh = false;
3245         rte->inFromCl = true;
3246         query->rtable = list_make1(rte);
3247
3248         /* Set up RTE/RelOptInfo arrays */
3249         setup_simple_rel_arrays(root);
3250
3251         /* Build RelOptInfo */
3252         rel = build_simple_rel(root, 1, RELOPT_BASEREL);
3253
3254         /* Locate IndexOptInfo for the target index */
3255         indexInfo = NULL;
3256         foreach(lc, rel->indexlist)
3257         {
3258                 indexInfo = (IndexOptInfo *) lfirst(lc);
3259                 if (indexInfo->indexoid == indexOid)
3260                         break;
3261         }
3262
3263         /*
3264          * It's possible that get_relation_info did not generate an IndexOptInfo
3265          * for the desired index; this could happen if it's not yet reached its
3266          * indcheckxmin usability horizon, or if it's a system index and we're
3267          * ignoring system indexes.  In such cases we should tell CLUSTER to not
3268          * trust the index contents but use seqscan-and-sort.
3269          */
3270         if (lc == NULL)                         /* not in the list? */
3271                 return true;                    /* use sort */
3272
3273         /*
3274          * Rather than doing all the pushups that would be needed to use
3275          * set_baserel_size_estimates, just do a quick hack for rows and width.
3276          */
3277         rel->rows = rel->tuples;
3278         rel->width = get_relation_data_width(tableOid, NULL);
3279
3280         root->total_table_pages = rel->pages;
3281
3282         /*
3283          * Determine eval cost of the index expressions, if any.  We need to
3284          * charge twice that amount for each tuple comparison that happens during
3285          * the sort, since tuplesort.c will have to re-evaluate the index
3286          * expressions each time.  (XXX that's pretty inefficient...)
3287          */
3288         cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
3289         comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
3290
3291         /* Estimate the cost of seq scan + sort */
3292         seqScanPath = create_seqscan_path(root, rel);
3293         cost_sort(&seqScanAndSortPath, root, NIL,
3294                           seqScanPath->total_cost, rel->tuples, rel->width,
3295                           comparisonCost, maintenance_work_mem, -1.0);
3296
3297         /* Estimate the cost of index scan */
3298         indexScanPath = create_index_path(root, indexInfo,
3299                                                                           NIL, NIL, NIL, NIL, NIL,
3300                                                                           ForwardScanDirection, false, NULL);
3301
3302         return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
3303 }