granicus.if.org Git - postgresql/blob - src/backend/optimizer/plan/planner.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * planner.c
   4  *        The query optimizer external interface.
   5  *
   6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        src/backend/optimizer/plan/planner.c
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15
  16 #include "postgres.h"
  17
  18 #include <limits.h>
  19 #include <math.h>
  20
  21 #include "access/htup_details.h"
  22 #include "access/parallel.h"
  23 #include "access/sysattr.h"
  24 #include "access/xact.h"
  25 #include "catalog/pg_constraint_fn.h"
  26 #include "catalog/pg_proc.h"
  27 #include "catalog/pg_type.h"
  28 #include "executor/executor.h"
  29 #include "executor/nodeAgg.h"
  30 #include "foreign/fdwapi.h"
  31 #include "miscadmin.h"
  32 #include "lib/bipartite_match.h"
  33 #include "nodes/makefuncs.h"
  34 #include "nodes/nodeFuncs.h"
  35 #ifdef OPTIMIZER_DEBUG
  36 #include "nodes/print.h"
  37 #endif
  38 #include "optimizer/clauses.h"
  39 #include "optimizer/cost.h"
  40 #include "optimizer/pathnode.h"
  41 #include "optimizer/paths.h"
  42 #include "optimizer/plancat.h"
  43 #include "optimizer/planmain.h"
  44 #include "optimizer/planner.h"
  45 #include "optimizer/prep.h"
  46 #include "optimizer/subselect.h"
  47 #include "optimizer/tlist.h"
  48 #include "optimizer/var.h"
  49 #include "parser/analyze.h"
  50 #include "parser/parsetree.h"
  51 #include "parser/parse_agg.h"
  52 #include "rewrite/rewriteManip.h"
  53 #include "storage/dsm_impl.h"
  54 #include "utils/rel.h"
  55 #include "utils/selfuncs.h"
  56 #include "utils/lsyscache.h"
  57 #include "utils/syscache.h"
  58
  59
  60 /* GUC parameters */
  61 double          cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION;
  62 int                     force_parallel_mode = FORCE_PARALLEL_OFF;
  63
  64 /* Hook for plugins to get control in planner() */
  65 planner_hook_type planner_hook = NULL;
  66
  67 /* Hook for plugins to get control when grouping_planner() plans upper rels */
  68 create_upper_paths_hook_type create_upper_paths_hook = NULL;
  69
  70
  71 /* Expression kind codes for preprocess_expression */
  72 #define EXPRKIND_QUAL                   0
  73 #define EXPRKIND_TARGET                 1
  74 #define EXPRKIND_RTFUNC                 2
  75 #define EXPRKIND_RTFUNC_LATERAL 3
  76 #define EXPRKIND_VALUES                 4
  77 #define EXPRKIND_VALUES_LATERAL 5
  78 #define EXPRKIND_LIMIT                  6
  79 #define EXPRKIND_APPINFO                7
  80 #define EXPRKIND_PHV                    8
  81 #define EXPRKIND_TABLESAMPLE    9
  82 #define EXPRKIND_ARBITER_ELEM   10
  83
  84 /* Passthrough data for standard_qp_callback */
  85 typedef struct
  86 {
  87         List       *tlist;                      /* preprocessed query targetlist */
  88         List       *activeWindows;      /* active windows, if any */
  89         List       *groupClause;        /* overrides parse->groupClause */
  90 } standard_qp_extra;
  91
  92 /* Local functions */
  93 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
  94 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
  95 static void inheritance_planner(PlannerInfo *root);
  96 static void grouping_planner(PlannerInfo *root, bool inheritance_update,
  97                                  double tuple_fraction);
  98 static void preprocess_rowmarks(PlannerInfo *root);
  99 static double preprocess_limit(PlannerInfo *root,
 100                                  double tuple_fraction,
 101                                  int64 *offset_est, int64 *count_est);
 102 static bool limit_needed(Query *parse);
 103 static void remove_useless_groupby_columns(PlannerInfo *root);
 104 static List *preprocess_groupclause(PlannerInfo *root, List *force);
 105 static List *extract_rollup_sets(List *groupingSets);
 106 static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
 107 static void standard_qp_callback(PlannerInfo *root, void *extra);
 108 static double get_number_of_groups(PlannerInfo *root,
 109                                          double path_rows,
 110                                          List *rollup_lists,
 111                                          List *rollup_groupclauses);
 112 static Size estimate_hashagg_tablesize(Path *path,
 113                                                    const AggClauseCosts *agg_costs,
 114                                                    double dNumGroups);
 115 static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 116                                           RelOptInfo *input_rel,
 117                                           PathTarget *target,
 118                                           const AggClauseCosts *agg_costs,
 119                                           List *rollup_lists,
 120                                           List *rollup_groupclauses);
 121 static RelOptInfo *create_window_paths(PlannerInfo *root,
 122                                         RelOptInfo *input_rel,
 123                                         PathTarget *input_target,
 124                                         PathTarget *output_target,
 125                                         List *tlist,
 126                                         WindowFuncLists *wflists,
 127                                         List *activeWindows);
 128 static void create_one_window_path(PlannerInfo *root,
 129                                            RelOptInfo *window_rel,
 130                                            Path *path,
 131                                            PathTarget *input_target,
 132                                            PathTarget *output_target,
 133                                            List *tlist,
 134                                            WindowFuncLists *wflists,
 135                                            List *activeWindows);
 136 static RelOptInfo *create_distinct_paths(PlannerInfo *root,
 137                                           RelOptInfo *input_rel);
 138 static RelOptInfo *create_ordered_paths(PlannerInfo *root,
 139                                          RelOptInfo *input_rel,
 140                                          PathTarget *target,
 141                                          double limit_tuples);
 142 static PathTarget *make_group_input_target(PlannerInfo *root,
 143                                                 PathTarget *final_target);
 144 static PathTarget *make_partial_grouping_target(PlannerInfo *root,
 145                                                          PathTarget *grouping_target);
 146 static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
 147 static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
 148 static PathTarget *make_window_input_target(PlannerInfo *root,
 149                                                  PathTarget *final_target,
 150                                                  List *activeWindows);
 151 static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 152                                                  List *tlist);
 153 static PathTarget *make_sort_input_target(PlannerInfo *root,
 154                                            PathTarget *final_target,
 155                                            bool *have_postponed_srfs);
 156
 157
 158 /*****************************************************************************
 159  *
 160  *         Query optimizer entry point
 161  *
 162  * To support loadable plugins that monitor or modify planner behavior,
 163  * we provide a hook variable that lets a plugin get control before and
 164  * after the standard planning process.  The plugin would normally call
 165  * standard_planner().
 166  *
 167  * Note to plugin authors: standard_planner() scribbles on its Query input,
 168  * so you'd better copy that data structure if you want to plan more than once.
 169  *
 170  *****************************************************************************/
 171 PlannedStmt *
 172 planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 173 {
 174         PlannedStmt *result;
 175
 176         if (planner_hook)
 177                 result = (*planner_hook) (parse, cursorOptions, boundParams);
 178         else
 179                 result = standard_planner(parse, cursorOptions, boundParams);
 180         return result;
 181 }
 182
 183 PlannedStmt *
 184 standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
 185 {
 186         PlannedStmt *result;
 187         PlannerGlobal *glob;
 188         double          tuple_fraction;
 189         PlannerInfo *root;
 190         RelOptInfo *final_rel;
 191         Path       *best_path;
 192         Plan       *top_plan;
 193         ListCell   *lp,
 194                            *lr;
 195
 196         /* Cursor options may come from caller or from DECLARE CURSOR stmt */
 197         if (parse->utilityStmt &&
 198                 IsA(parse->utilityStmt, DeclareCursorStmt))
 199                 cursorOptions |= ((DeclareCursorStmt *) parse->utilityStmt)->options;
 200
 201         /*
 202          * Set up global state for this planner invocation.  This data is needed
 203          * across all levels of sub-Query that might exist in the given command,
 204          * so we keep it in a separate struct that's linked to by each per-Query
 205          * PlannerInfo.
 206          */
 207         glob = makeNode(PlannerGlobal);
 208
 209         glob->boundParams = boundParams;
 210         glob->subplans = NIL;
 211         glob->subroots = NIL;
 212         glob->rewindPlanIDs = NULL;
 213         glob->finalrtable = NIL;
 214         glob->finalrowmarks = NIL;
 215         glob->resultRelations = NIL;
 216         glob->relationOids = NIL;
 217         glob->invalItems = NIL;
 218         glob->nParamExec = 0;
 219         glob->lastPHId = 0;
 220         glob->lastRowMarkId = 0;
 221         glob->lastPlanNodeId = 0;
 222         glob->transientPlan = false;
 223         glob->dependsOnRole = false;
 224
 225         /*
 226          * Assess whether it's feasible to use parallel mode for this query. We
 227          * can't do this in a standalone backend, or if the command will try to
 228          * modify any data, or if this is a cursor operation, or if GUCs are set
 229          * to values that don't permit parallelism, or if parallel-unsafe
 230          * functions are present in the query tree.
 231          *
 232          * For now, we don't try to use parallel mode if we're running inside a
 233          * parallel worker.  We might eventually be able to relax this
 234          * restriction, but for now it seems best not to have parallel workers
 235          * trying to create their own parallel workers.
 236          *
 237          * We can't use parallelism in serializable mode because the predicate
 238          * locking code is not parallel-aware.  It's not catastrophic if someone
 239          * tries to run a parallel plan in serializable mode; it just won't get
 240          * any workers and will run serially.  But it seems like a good heuristic
 241          * to assume that the same serialization level will be in effect at plan
 242          * time and execution time, so don't generate a parallel plan if we're in
 243          * serializable mode.
 244          */
 245         if ((cursorOptions & CURSOR_OPT_PARALLEL_OK) != 0 &&
 246                 IsUnderPostmaster &&
 247                 dynamic_shared_memory_type != DSM_IMPL_NONE &&
 248                 parse->commandType == CMD_SELECT &&
 249                 parse->utilityStmt == NULL &&
 250                 !parse->hasModifyingCTE &&
 251                 max_parallel_workers_per_gather > 0 &&
 252                 !IsParallelWorker() &&
 253                 !IsolationIsSerializable())
 254         {
 255                 /* all the cheap tests pass, so scan the query tree */
 256                 glob->maxParallelHazard = max_parallel_hazard(parse);
 257                 glob->parallelModeOK = (glob->maxParallelHazard != PROPARALLEL_UNSAFE);
 258         }
 259         else
 260         {
 261                 /* skip the query tree scan, just assume it's unsafe */
 262                 glob->maxParallelHazard = PROPARALLEL_UNSAFE;
 263                 glob->parallelModeOK = false;
 264         }
 265
 266         /*
 267          * glob->parallelModeNeeded should tell us whether it's necessary to
 268          * impose the parallel mode restrictions, but we don't actually want to
 269          * impose them unless we choose a parallel plan, so it is normally set
 270          * only if a parallel plan is chosen (see create_gather_plan).  That way,
 271          * people who mislabel their functions but don't use parallelism anyway
 272          * aren't harmed.  But when force_parallel_mode is set, we enable the
 273          * restrictions whenever possible for testing purposes.
 274          */
 275         glob->parallelModeNeeded = glob->parallelModeOK &&
 276                 (force_parallel_mode != FORCE_PARALLEL_OFF);
 277
 278         /* Determine what fraction of the plan is likely to be scanned */
 279         if (cursorOptions & CURSOR_OPT_FAST_PLAN)
 280         {
 281                 /*
 282                  * We have no real idea how many tuples the user will ultimately FETCH
 283                  * from a cursor, but it is often the case that he doesn't want 'em
 284                  * all, or would prefer a fast-start plan anyway so that he can
 285                  * process some of the tuples sooner.  Use a GUC parameter to decide
 286                  * what fraction to optimize for.
 287                  */
 288                 tuple_fraction = cursor_tuple_fraction;
 289
 290                 /*
 291                  * We document cursor_tuple_fraction as simply being a fraction, which
 292                  * means the edge cases 0 and 1 have to be treated specially here.  We
 293                  * convert 1 to 0 ("all the tuples") and 0 to a very small fraction.
 294                  */
 295                 if (tuple_fraction >= 1.0)
 296                         tuple_fraction = 0.0;
 297                 else if (tuple_fraction <= 0.0)
 298                         tuple_fraction = 1e-10;
 299         }
 300         else
 301         {
 302                 /* Default assumption is we need all the tuples */
 303                 tuple_fraction = 0.0;
 304         }
 305
 306         /* primary planning entry point (may recurse for subqueries) */
 307         root = subquery_planner(glob, parse, NULL,
 308                                                         false, tuple_fraction);
 309
 310         /* Select best Path and turn it into a Plan */
 311         final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
 312         best_path = get_cheapest_fractional_path(final_rel, tuple_fraction);
 313
 314         top_plan = create_plan(root, best_path);
 315
 316         /*
 317          * If creating a plan for a scrollable cursor, make sure it can run
 318          * backwards on demand.  Add a Material node at the top at need.
 319          */
 320         if (cursorOptions & CURSOR_OPT_SCROLL)
 321         {
 322                 if (!ExecSupportsBackwardScan(top_plan))
 323                 {
 324                         Plan       *sub_plan = top_plan;
 325
 326                         top_plan = materialize_finished_plan(sub_plan);
 327
 328                         /*
 329                          * XXX horrid kluge: if there are any initPlans attached to the
 330                          * formerly-top plan node, move them up to the Material node. This
 331                          * prevents failure in SS_finalize_plan, which see for comments.
 332                          * We don't bother adjusting the sub_plan's cost estimate for
 333                          * this.
 334                          */
 335                         top_plan->initPlan = sub_plan->initPlan;
 336                         sub_plan->initPlan = NIL;
 337                 }
 338         }
 339
 340         /*
 341          * Optionally add a Gather node for testing purposes, provided this is
 342          * actually a safe thing to do.  (Note: we assume adding a Material node
 343          * above did not change the parallel safety of the plan, so we can still
 344          * rely on best_path->parallel_safe.  However, that flag doesn't account
 345          * for subplans, which we are unable to transmit to workers presently.)
 346          */
 347         if (force_parallel_mode != FORCE_PARALLEL_OFF &&
 348                 best_path->parallel_safe &&
 349                 glob->subplans == NIL)
 350         {
 351                 Gather     *gather = makeNode(Gather);
 352
 353                 gather->plan.targetlist = top_plan->targetlist;
 354                 gather->plan.qual = NIL;
 355                 gather->plan.lefttree = top_plan;
 356                 gather->plan.righttree = NULL;
 357                 gather->num_workers = 1;
 358                 gather->single_copy = true;
 359                 gather->invisible = (force_parallel_mode == FORCE_PARALLEL_REGRESS);
 360
 361                 /*
 362                  * Ideally we'd use cost_gather here, but setting up dummy path data
 363                  * to satisfy it doesn't seem much cleaner than knowing what it does.
 364                  */
 365                 gather->plan.startup_cost = top_plan->startup_cost +
 366                         parallel_setup_cost;
 367                 gather->plan.total_cost = top_plan->total_cost +
 368                         parallel_setup_cost + parallel_tuple_cost * top_plan->plan_rows;
 369                 gather->plan.plan_rows = top_plan->plan_rows;
 370                 gather->plan.plan_width = top_plan->plan_width;
 371                 gather->plan.parallel_aware = false;
 372
 373                 /* use parallel mode for parallel plans. */
 374                 root->glob->parallelModeNeeded = true;
 375
 376                 top_plan = &gather->plan;
 377         }
 378
 379         /*
 380          * If any Params were generated, run through the plan tree and compute
 381          * each plan node's extParam/allParam sets.  Ideally we'd merge this into
 382          * set_plan_references' tree traversal, but for now it has to be separate
 383          * because we need to visit subplans before not after main plan.
 384          */
 385         if (glob->nParamExec > 0)
 386         {
 387                 Assert(list_length(glob->subplans) == list_length(glob->subroots));
 388                 forboth(lp, glob->subplans, lr, glob->subroots)
 389                 {
 390                         Plan       *subplan = (Plan *) lfirst(lp);
 391                         PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
 392
 393                         SS_finalize_plan(subroot, subplan);
 394                 }
 395                 SS_finalize_plan(root, top_plan);
 396         }
 397
 398         /* final cleanup of the plan */
 399         Assert(glob->finalrtable == NIL);
 400         Assert(glob->finalrowmarks == NIL);
 401         Assert(glob->resultRelations == NIL);
 402         top_plan = set_plan_references(root, top_plan);
 403         /* ... and the subplans (both regular subplans and initplans) */
 404         Assert(list_length(glob->subplans) == list_length(glob->subroots));
 405         forboth(lp, glob->subplans, lr, glob->subroots)
 406         {
 407                 Plan       *subplan = (Plan *) lfirst(lp);
 408                 PlannerInfo *subroot = (PlannerInfo *) lfirst(lr);
 409
 410                 lfirst(lp) = set_plan_references(subroot, subplan);
 411         }
 412
 413         /* build the PlannedStmt result */
 414         result = makeNode(PlannedStmt);
 415
 416         result->commandType = parse->commandType;
 417         result->queryId = parse->queryId;
 418         result->hasReturning = (parse->returningList != NIL);
 419         result->hasModifyingCTE = parse->hasModifyingCTE;
 420         result->canSetTag = parse->canSetTag;
 421         result->transientPlan = glob->transientPlan;
 422         result->dependsOnRole = glob->dependsOnRole;
 423         result->parallelModeNeeded = glob->parallelModeNeeded;
 424         result->planTree = top_plan;
 425         result->rtable = glob->finalrtable;
 426         result->resultRelations = glob->resultRelations;
 427         result->utilityStmt = parse->utilityStmt;
 428         result->subplans = glob->subplans;
 429         result->rewindPlanIDs = glob->rewindPlanIDs;
 430         result->rowMarks = glob->finalrowmarks;
 431         result->relationOids = glob->relationOids;
 432         result->invalItems = glob->invalItems;
 433         result->nParamExec = glob->nParamExec;
 434
 435         return result;
 436 }
 437
 438
 439 /*--------------------
 440  * subquery_planner
 441  *        Invokes the planner on a subquery.  We recurse to here for each
 442  *        sub-SELECT found in the query tree.
 443  *
 444  * glob is the global state for the current planner run.
 445  * parse is the querytree produced by the parser & rewriter.
 446  * parent_root is the immediate parent Query's info (NULL at the top level).
 447  * hasRecursion is true if this is a recursive WITH query.
 448  * tuple_fraction is the fraction of tuples we expect will be retrieved.
 449  * tuple_fraction is interpreted as explained for grouping_planner, below.
 450  *
 451  * Basically, this routine does the stuff that should only be done once
 452  * per Query object.  It then calls grouping_planner.  At one time,
 453  * grouping_planner could be invoked recursively on the same Query object;
 454  * that's not currently true, but we keep the separation between the two
 455  * routines anyway, in case we need it again someday.
 456  *
 457  * subquery_planner will be called recursively to handle sub-Query nodes
 458  * found within the query's expressions and rangetable.
 459  *
 460  * Returns the PlannerInfo struct ("root") that contains all data generated
 461  * while planning the subquery.  In particular, the Path(s) attached to
 462  * the (UPPERREL_FINAL, NULL) upperrel represent our conclusions about the
 463  * cheapest way(s) to implement the query.  The top level will select the
 464  * best Path and pass it through createplan.c to produce a finished Plan.
 465  *--------------------
 466  */
 467 PlannerInfo *
 468 subquery_planner(PlannerGlobal *glob, Query *parse,
 469                                  PlannerInfo *parent_root,
 470                                  bool hasRecursion, double tuple_fraction)
 471 {
 472         PlannerInfo *root;
 473         List       *newWithCheckOptions;
 474         List       *newHaving;
 475         bool            hasOuterJoins;
 476         RelOptInfo *final_rel;
 477         ListCell   *l;
 478
 479         /* Create a PlannerInfo data structure for this subquery */
 480         root = makeNode(PlannerInfo);
 481         root->parse = parse;
 482         root->glob = glob;
 483         root->query_level = parent_root ? parent_root->query_level + 1 : 1;
 484         root->parent_root = parent_root;
 485         root->plan_params = NIL;
 486         root->outer_params = NULL;
 487         root->planner_cxt = CurrentMemoryContext;
 488         root->init_plans = NIL;
 489         root->cte_plan_ids = NIL;
 490         root->multiexpr_params = NIL;
 491         root->eq_classes = NIL;
 492         root->append_rel_list = NIL;
 493         root->rowMarks = NIL;
 494         memset(root->upper_rels, 0, sizeof(root->upper_rels));
 495         memset(root->upper_targets, 0, sizeof(root->upper_targets));
 496         root->processed_tlist = NIL;
 497         root->grouping_map = NULL;
 498         root->minmax_aggs = NIL;
 499         root->hasInheritedTarget = false;
 500         root->hasRecursion = hasRecursion;
 501         if (hasRecursion)
 502                 root->wt_param_id = SS_assign_special_param(root);
 503         else
 504                 root->wt_param_id = -1;
 505         root->non_recursive_path = NULL;
 506
 507         /*
 508          * If there is a WITH list, process each WITH query and build an initplan
 509          * SubPlan structure for it.
 510          */
 511         if (parse->cteList)
 512                 SS_process_ctes(root);
 513
 514         /*
 515          * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try
 516          * to transform them into joins.  Note that this step does not descend
 517          * into subqueries; if we pull up any subqueries below, their SubLinks are
 518          * processed just before pulling them up.
 519          */
 520         if (parse->hasSubLinks)
 521                 pull_up_sublinks(root);
 522
 523         /*
 524          * Scan the rangetable for set-returning functions, and inline them if
 525          * possible (producing subqueries that might get pulled up next).
 526          * Recursion issues here are handled in the same way as for SubLinks.
 527          */
 528         inline_set_returning_functions(root);
 529
 530         /*
 531          * Check to see if any subqueries in the jointree can be merged into this
 532          * query.
 533          */
 534         pull_up_subqueries(root);
 535
 536         /*
 537          * If this is a simple UNION ALL query, flatten it into an appendrel. We
 538          * do this now because it requires applying pull_up_subqueries to the leaf
 539          * queries of the UNION ALL, which weren't touched above because they
 540          * weren't referenced by the jointree (they will be after we do this).
 541          */
 542         if (parse->setOperations)
 543                 flatten_simple_union_all(root);
 544
 545         /*
 546          * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can
 547          * avoid the expense of doing flatten_join_alias_vars().  Also check for
 548          * outer joins --- if none, we can skip reduce_outer_joins().  And check
 549          * for LATERAL RTEs, too.  This must be done after we have done
 550          * pull_up_subqueries(), of course.
 551          */
 552         root->hasJoinRTEs = false;
 553         root->hasLateralRTEs = false;
 554         hasOuterJoins = false;
 555         foreach(l, parse->rtable)
 556         {
 557                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 558
 559                 if (rte->rtekind == RTE_JOIN)
 560                 {
 561                         root->hasJoinRTEs = true;
 562                         if (IS_OUTER_JOIN(rte->jointype))
 563                                 hasOuterJoins = true;
 564                 }
 565                 if (rte->lateral)
 566                         root->hasLateralRTEs = true;
 567         }
 568
 569         /*
 570          * Preprocess RowMark information.  We need to do this after subquery
 571          * pullup (so that all non-inherited RTEs are present) and before
 572          * inheritance expansion (so that the info is available for
 573          * expand_inherited_tables to examine and modify).
 574          */
 575         preprocess_rowmarks(root);
 576
 577         /*
 578          * Expand any rangetable entries that are inheritance sets into "append
 579          * relations".  This can add entries to the rangetable, but they must be
 580          * plain base relations not joins, so it's OK (and marginally more
 581          * efficient) to do it after checking for join RTEs.  We must do it after
 582          * pulling up subqueries, else we'd fail to handle inherited tables in
 583          * subqueries.
 584          */
 585         expand_inherited_tables(root);
 586
 587         /*
 588          * Set hasHavingQual to remember if HAVING clause is present.  Needed
 589          * because preprocess_expression will reduce a constant-true condition to
 590          * an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
 591          */
 592         root->hasHavingQual = (parse->havingQual != NULL);
 593
 594         /* Clear this flag; might get set in distribute_qual_to_rels */
 595         root->hasPseudoConstantQuals = false;
 596
 597         /*
 598          * Do expression preprocessing on targetlist and quals, as well as other
 599          * random expressions in the querytree.  Note that we do not need to
 600          * handle sort/group expressions explicitly, because they are actually
 601          * part of the targetlist.
 602          */
 603         parse->targetList = (List *)
 604                 preprocess_expression(root, (Node *) parse->targetList,
 605                                                           EXPRKIND_TARGET);
 606
 607         /* Constant-folding might have removed all set-returning functions */
 608         if (parse->hasTargetSRFs)
 609                 parse->hasTargetSRFs = expression_returns_set((Node *) parse->targetList);
 610
 611         newWithCheckOptions = NIL;
 612         foreach(l, parse->withCheckOptions)
 613         {
 614                 WithCheckOption *wco = (WithCheckOption *) lfirst(l);
 615
 616                 wco->qual = preprocess_expression(root, wco->qual,
 617                                                                                   EXPRKIND_QUAL);
 618                 if (wco->qual != NULL)
 619                         newWithCheckOptions = lappend(newWithCheckOptions, wco);
 620         }
 621         parse->withCheckOptions = newWithCheckOptions;
 622
 623         parse->returningList = (List *)
 624                 preprocess_expression(root, (Node *) parse->returningList,
 625                                                           EXPRKIND_TARGET);
 626
 627         preprocess_qual_conditions(root, (Node *) parse->jointree);
 628
 629         parse->havingQual = preprocess_expression(root, parse->havingQual,
 630                                                                                           EXPRKIND_QUAL);
 631
 632         foreach(l, parse->windowClause)
 633         {
 634                 WindowClause *wc = (WindowClause *) lfirst(l);
 635
 636                 /* partitionClause/orderClause are sort/group expressions */
 637                 wc->startOffset = preprocess_expression(root, wc->startOffset,
 638                                                                                                 EXPRKIND_LIMIT);
 639                 wc->endOffset = preprocess_expression(root, wc->endOffset,
 640                                                                                           EXPRKIND_LIMIT);
 641         }
 642
 643         parse->limitOffset = preprocess_expression(root, parse->limitOffset,
 644                                                                                            EXPRKIND_LIMIT);
 645         parse->limitCount = preprocess_expression(root, parse->limitCount,
 646                                                                                           EXPRKIND_LIMIT);
 647
 648         if (parse->onConflict)
 649         {
 650                 parse->onConflict->arbiterElems = (List *)
 651                         preprocess_expression(root,
 652                                                                   (Node *) parse->onConflict->arbiterElems,
 653                                                                   EXPRKIND_ARBITER_ELEM);
 654                 parse->onConflict->arbiterWhere =
 655                         preprocess_expression(root,
 656                                                                   parse->onConflict->arbiterWhere,
 657                                                                   EXPRKIND_QUAL);
 658                 parse->onConflict->onConflictSet = (List *)
 659                         preprocess_expression(root,
 660                                                                   (Node *) parse->onConflict->onConflictSet,
 661                                                                   EXPRKIND_TARGET);
 662                 parse->onConflict->onConflictWhere =
 663                         preprocess_expression(root,
 664                                                                   parse->onConflict->onConflictWhere,
 665                                                                   EXPRKIND_QUAL);
 666                 /* exclRelTlist contains only Vars, so no preprocessing needed */
 667         }
 668
 669         root->append_rel_list = (List *)
 670                 preprocess_expression(root, (Node *) root->append_rel_list,
 671                                                           EXPRKIND_APPINFO);
 672
 673         /* Also need to preprocess expressions within RTEs */
 674         foreach(l, parse->rtable)
 675         {
 676                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
 677                 int                     kind;
 678
 679                 if (rte->rtekind == RTE_RELATION)
 680                 {
 681                         if (rte->tablesample)
 682                                 rte->tablesample = (TableSampleClause *)
 683                                         preprocess_expression(root,
 684                                                                                   (Node *) rte->tablesample,
 685                                                                                   EXPRKIND_TABLESAMPLE);
 686                 }
 687                 else if (rte->rtekind == RTE_SUBQUERY)
 688                 {
 689                         /*
 690                          * We don't want to do all preprocessing yet on the subquery's
 691                          * expressions, since that will happen when we plan it.  But if it
 692                          * contains any join aliases of our level, those have to get
 693                          * expanded now, because planning of the subquery won't do it.
 694                          * That's only possible if the subquery is LATERAL.
 695                          */
 696                         if (rte->lateral && root->hasJoinRTEs)
 697                                 rte->subquery = (Query *)
 698                                         flatten_join_alias_vars(root, (Node *) rte->subquery);
 699                 }
 700                 else if (rte->rtekind == RTE_FUNCTION)
 701                 {
 702                         /* Preprocess the function expression(s) fully */
 703                         kind = rte->lateral ? EXPRKIND_RTFUNC_LATERAL : EXPRKIND_RTFUNC;
 704                         rte->functions = (List *) preprocess_expression(root, (Node *) rte->functions, kind);
 705                 }
 706                 else if (rte->rtekind == RTE_VALUES)
 707                 {
 708                         /* Preprocess the values lists fully */
 709                         kind = rte->lateral ? EXPRKIND_VALUES_LATERAL : EXPRKIND_VALUES;
 710                         rte->values_lists = (List *)
 711                                 preprocess_expression(root, (Node *) rte->values_lists, kind);
 712                 }
 713         }
 714
 715         /*
 716          * In some cases we may want to transfer a HAVING clause into WHERE. We
 717          * cannot do so if the HAVING clause contains aggregates (obviously) or
 718          * volatile functions (since a HAVING clause is supposed to be executed
 719          * only once per group).  We also can't do this if there are any nonempty
 720          * grouping sets; moving such a clause into WHERE would potentially change
 721          * the results, if any referenced column isn't present in all the grouping
 722          * sets.  (If there are only empty grouping sets, then the HAVING clause
 723          * must be degenerate as discussed below.)
 724          *
 725          * Also, it may be that the clause is so expensive to execute that we're
 726          * better off doing it only once per group, despite the loss of
 727          * selectivity.  This is hard to estimate short of doing the entire
 728          * planning process twice, so we use a heuristic: clauses containing
 729          * subplans are left in HAVING.  Otherwise, we move or copy the HAVING
 730          * clause into WHERE, in hopes of eliminating tuples before aggregation
 731          * instead of after.
 732          *
 733          * If the query has explicit grouping then we can simply move such a
 734          * clause into WHERE; any group that fails the clause will not be in the
 735          * output because none of its tuples will reach the grouping or
 736          * aggregation stage.  Otherwise we must have a degenerate (variable-free)
 737          * HAVING clause, which we put in WHERE so that query_planner() can use it
 738          * in a gating Result node, but also keep in HAVING to ensure that we
 739          * don't emit a bogus aggregated row. (This could be done better, but it
 740          * seems not worth optimizing.)
 741          *
 742          * Note that both havingQual and parse->jointree->quals are in
 743          * implicitly-ANDed-list form at this point, even though they are declared
 744          * as Node *.
 745          */
 746         newHaving = NIL;
 747         foreach(l, (List *) parse->havingQual)
 748         {
 749                 Node       *havingclause = (Node *) lfirst(l);
 750
 751                 if ((parse->groupClause && parse->groupingSets) ||
 752                         contain_agg_clause(havingclause) ||
 753                         contain_volatile_functions(havingclause) ||
 754                         contain_subplans(havingclause))
 755                 {
 756                         /* keep it in HAVING */
 757                         newHaving = lappend(newHaving, havingclause);
 758                 }
 759                 else if (parse->groupClause && !parse->groupingSets)
 760                 {
 761                         /* move it to WHERE */
 762                         parse->jointree->quals = (Node *)
 763                                 lappend((List *) parse->jointree->quals, havingclause);
 764                 }
 765                 else
 766                 {
 767                         /* put a copy in WHERE, keep it in HAVING */
 768                         parse->jointree->quals = (Node *)
 769                                 lappend((List *) parse->jointree->quals,
 770                                                 copyObject(havingclause));
 771                         newHaving = lappend(newHaving, havingclause);
 772                 }
 773         }
 774         parse->havingQual = (Node *) newHaving;
 775
 776         /* Remove any redundant GROUP BY columns */
 777         remove_useless_groupby_columns(root);
 778
 779         /*
 780          * If we have any outer joins, try to reduce them to plain inner joins.
 781          * This step is most easily done after we've done expression
 782          * preprocessing.
 783          */
 784         if (hasOuterJoins)
 785                 reduce_outer_joins(root);
 786
 787         /*
 788          * Do the main planning.  If we have an inherited target relation, that
 789          * needs special processing, else go straight to grouping_planner.
 790          */
 791         if (parse->resultRelation &&
 792                 rt_fetch(parse->resultRelation, parse->rtable)->inh)
 793                 inheritance_planner(root);
 794         else
 795                 grouping_planner(root, false, tuple_fraction);
 796
 797         /*
 798          * Capture the set of outer-level param IDs we have access to, for use in
 799          * extParam/allParam calculations later.
 800          */
 801         SS_identify_outer_params(root);
 802
 803         /*
 804          * If any initPlans were created in this query level, increment the
 805          * surviving Paths' costs to account for them.  They won't actually get
 806          * attached to the plan tree till create_plan() runs, but we want to be
 807          * sure their costs are included now.
 808          */
 809         final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
 810         SS_charge_for_initplans(root, final_rel);
 811
 812         /*
 813          * Make sure we've identified the cheapest Path for the final rel.  (By
 814          * doing this here not in grouping_planner, we include initPlan costs in
 815          * the decision, though it's unlikely that will change anything.)
 816          */
 817         set_cheapest(final_rel);
 818
 819         return root;
 820 }
 821
 822 /*
 823  * preprocess_expression
 824  *              Do subquery_planner's preprocessing work for an expression,
 825  *              which can be a targetlist, a WHERE clause (including JOIN/ON
 826  *              conditions), a HAVING clause, or a few other things.
 827  */
 828 static Node *
 829 preprocess_expression(PlannerInfo *root, Node *expr, int kind)
 830 {
 831         /*
 832          * Fall out quickly if expression is empty.  This occurs often enough to
 833          * be worth checking.  Note that null->null is the correct conversion for
 834          * implicit-AND result format, too.
 835          */
 836         if (expr == NULL)
 837                 return NULL;
 838
 839         /*
 840          * If the query has any join RTEs, replace join alias variables with
 841          * base-relation variables.  We must do this before sublink processing,
 842          * else sublinks expanded out from join aliases would not get processed.
 843          * We can skip it in non-lateral RTE functions, VALUES lists, and
 844          * TABLESAMPLE clauses, however, since they can't contain any Vars of the
 845          * current query level.
 846          */
 847         if (root->hasJoinRTEs &&
 848                 !(kind == EXPRKIND_RTFUNC ||
 849                   kind == EXPRKIND_VALUES ||
 850                   kind == EXPRKIND_TABLESAMPLE))
 851                 expr = flatten_join_alias_vars(root, expr);
 852
 853         /*
 854          * Simplify constant expressions.
 855          *
 856          * Note: an essential effect of this is to convert named-argument function
 857          * calls to positional notation and insert the current actual values of
 858          * any default arguments for functions.  To ensure that happens, we *must*
 859          * process all expressions here.  Previous PG versions sometimes skipped
 860          * const-simplification if it didn't seem worth the trouble, but we can't
 861          * do that anymore.
 862          *
 863          * Note: this also flattens nested AND and OR expressions into N-argument
 864          * form.  All processing of a qual expression after this point must be
 865          * careful to maintain AND/OR flatness --- that is, do not generate a tree
 866          * with AND directly under AND, nor OR directly under OR.
 867          */
 868         expr = eval_const_expressions(root, expr);
 869
 870         /*
 871          * If it's a qual or havingQual, canonicalize it.
 872          */
 873         if (kind == EXPRKIND_QUAL)
 874         {
 875                 expr = (Node *) canonicalize_qual((Expr *) expr);
 876
 877 #ifdef OPTIMIZER_DEBUG
 878                 printf("After canonicalize_qual()\n");
 879                 pprint(expr);
 880 #endif
 881         }
 882
 883         /* Expand SubLinks to SubPlans */
 884         if (root->parse->hasSubLinks)
 885                 expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL));
 886
 887         /*
 888          * XXX do not insert anything here unless you have grokked the comments in
 889          * SS_replace_correlation_vars ...
 890          */
 891
 892         /* Replace uplevel vars with Param nodes (this IS possible in VALUES) */
 893         if (root->query_level > 1)
 894                 expr = SS_replace_correlation_vars(root, expr);
 895
 896         /*
 897          * If it's a qual or havingQual, convert it to implicit-AND format. (We
 898          * don't want to do this before eval_const_expressions, since the latter
 899          * would be unable to simplify a top-level AND correctly. Also,
 900          * SS_process_sublinks expects explicit-AND format.)
 901          */
 902         if (kind == EXPRKIND_QUAL)
 903                 expr = (Node *) make_ands_implicit((Expr *) expr);
 904
 905         return expr;
 906 }
 907
 908 /*
 909  * preprocess_qual_conditions
 910  *              Recursively scan the query's jointree and do subquery_planner's
 911  *              preprocessing work on each qual condition found therein.
 912  */
 913 static void
 914 preprocess_qual_conditions(PlannerInfo *root, Node *jtnode)
 915 {
 916         if (jtnode == NULL)
 917                 return;
 918         if (IsA(jtnode, RangeTblRef))
 919         {
 920                 /* nothing to do here */
 921         }
 922         else if (IsA(jtnode, FromExpr))
 923         {
 924                 FromExpr   *f = (FromExpr *) jtnode;
 925                 ListCell   *l;
 926
 927                 foreach(l, f->fromlist)
 928                         preprocess_qual_conditions(root, lfirst(l));
 929
 930                 f->quals = preprocess_expression(root, f->quals, EXPRKIND_QUAL);
 931         }
 932         else if (IsA(jtnode, JoinExpr))
 933         {
 934                 JoinExpr   *j = (JoinExpr *) jtnode;
 935
 936                 preprocess_qual_conditions(root, j->larg);
 937                 preprocess_qual_conditions(root, j->rarg);
 938
 939                 j->quals = preprocess_expression(root, j->quals, EXPRKIND_QUAL);
 940         }
 941         else
 942                 elog(ERROR, "unrecognized node type: %d",
 943                          (int) nodeTag(jtnode));
 944 }
 945
 946 /*
 947  * preprocess_phv_expression
 948  *        Do preprocessing on a PlaceHolderVar expression that's been pulled up.
 949  *
 950  * If a LATERAL subquery references an output of another subquery, and that
 951  * output must be wrapped in a PlaceHolderVar because of an intermediate outer
 952  * join, then we'll push the PlaceHolderVar expression down into the subquery
 953  * and later pull it back up during find_lateral_references, which runs after
 954  * subquery_planner has preprocessed all the expressions that were in the
 955  * current query level to start with.  So we need to preprocess it then.
 956  */
 957 Expr *
 958 preprocess_phv_expression(PlannerInfo *root, Expr *expr)
 959 {
 960         return (Expr *) preprocess_expression(root, (Node *) expr, EXPRKIND_PHV);
 961 }
 962
 963 /*
 964  * inheritance_planner
 965  *        Generate Paths in the case where the result relation is an
 966  *        inheritance set.
 967  *
 968  * We have to handle this case differently from cases where a source relation
 969  * is an inheritance set. Source inheritance is expanded at the bottom of the
 970  * plan tree (see allpaths.c), but target inheritance has to be expanded at
 971  * the top.  The reason is that for UPDATE, each target relation needs a
 972  * different targetlist matching its own column set.  Fortunately,
 973  * the UPDATE/DELETE target can never be the nullable side of an outer join,
 974  * so it's OK to generate the plan this way.
 975  *
 976  * Returns nothing; the useful output is in the Paths we attach to
 977  * the (UPPERREL_FINAL, NULL) upperrel stored in *root.
 978  *
 979  * Note that we have not done set_cheapest() on the final rel; it's convenient
 980  * to leave this to the caller.
 981  */
 982 static void
 983 inheritance_planner(PlannerInfo *root)
 984 {
 985         Query      *parse = root->parse;
 986         int                     parentRTindex = parse->resultRelation;
 987         Bitmapset  *resultRTindexes;
 988         Bitmapset  *subqueryRTindexes;
 989         Bitmapset  *modifiableARIindexes;
 990         int                     nominalRelation = -1;
 991         List       *final_rtable = NIL;
 992         int                     save_rel_array_size = 0;
 993         RelOptInfo **save_rel_array = NULL;
 994         List       *subpaths = NIL;
 995         List       *subroots = NIL;
 996         List       *resultRelations = NIL;
 997         List       *withCheckOptionLists = NIL;
 998         List       *returningLists = NIL;
 999         List       *rowMarks;
1000         RelOptInfo *final_rel;
1001         ListCell   *lc;
1002         Index           rti;
1003
1004         Assert(parse->commandType != CMD_INSERT);
1005
1006         /*
1007          * We generate a modified instance of the original Query for each target
1008          * relation, plan that, and put all the plans into a list that will be
1009          * controlled by a single ModifyTable node.  All the instances share the
1010          * same rangetable, but each instance must have its own set of subquery
1011          * RTEs within the finished rangetable because (1) they are likely to get
1012          * scribbled on during planning, and (2) it's not inconceivable that
1013          * subqueries could get planned differently in different cases.  We need
1014          * not create duplicate copies of other RTE kinds, in particular not the
1015          * target relations, because they don't have either of those issues.  Not
1016          * having to duplicate the target relations is important because doing so
1017          * (1) would result in a rangetable of length O(N^2) for N targets, with
1018          * at least O(N^3) work expended here; and (2) would greatly complicate
1019          * management of the rowMarks list.
1020          *
1021          * Note that any RTEs with security barrier quals will be turned into
1022          * subqueries during planning, and so we must create copies of them too,
1023          * except where they are target relations, which will each only be used in
1024          * a single plan.
1025          *
1026          * To begin with, we'll need a bitmapset of the target relation relids.
1027          */
1028         resultRTindexes = bms_make_singleton(parentRTindex);
1029         foreach(lc, root->append_rel_list)
1030         {
1031                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1032
1033                 if (appinfo->parent_relid == parentRTindex)
1034                         resultRTindexes = bms_add_member(resultRTindexes,
1035                                                                                          appinfo->child_relid);
1036         }
1037
1038         /*
1039          * Now, generate a bitmapset of the relids of the subquery RTEs, including
1040          * security-barrier RTEs that will become subqueries, as just explained.
1041          */
1042         subqueryRTindexes = NULL;
1043         rti = 1;
1044         foreach(lc, parse->rtable)
1045         {
1046                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1047
1048                 if (rte->rtekind == RTE_SUBQUERY ||
1049                         (rte->securityQuals != NIL &&
1050                          !bms_is_member(rti, resultRTindexes)))
1051                         subqueryRTindexes = bms_add_member(subqueryRTindexes, rti);
1052                 rti++;
1053         }
1054
1055         /*
1056          * Next, we want to identify which AppendRelInfo items contain references
1057          * to any of the aforesaid subquery RTEs.  These items will need to be
1058          * copied and modified to adjust their subquery references; whereas the
1059          * other ones need not be touched.  It's worth being tense over this
1060          * because we can usually avoid processing most of the AppendRelInfo
1061          * items, thereby saving O(N^2) space and time when the target is a large
1062          * inheritance tree.  We can identify AppendRelInfo items by their
1063          * child_relid, since that should be unique within the list.
1064          */
1065         modifiableARIindexes = NULL;
1066         if (subqueryRTindexes != NULL)
1067         {
1068                 foreach(lc, root->append_rel_list)
1069                 {
1070                         AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1071
1072                         if (bms_is_member(appinfo->parent_relid, subqueryRTindexes) ||
1073                                 bms_is_member(appinfo->child_relid, subqueryRTindexes) ||
1074                                 bms_overlap(pull_varnos((Node *) appinfo->translated_vars),
1075                                                         subqueryRTindexes))
1076                                 modifiableARIindexes = bms_add_member(modifiableARIindexes,
1077                                                                                                           appinfo->child_relid);
1078                 }
1079         }
1080
1081         /*
1082          * And now we can get on with generating a plan for each child table.
1083          */
1084         foreach(lc, root->append_rel_list)
1085         {
1086                 AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(lc);
1087                 PlannerInfo *subroot;
1088                 RelOptInfo *sub_final_rel;
1089                 Path       *subpath;
1090
1091                 /* append_rel_list contains all append rels; ignore others */
1092                 if (appinfo->parent_relid != parentRTindex)
1093                         continue;
1094
1095                 /*
1096                  * We need a working copy of the PlannerInfo so that we can control
1097                  * propagation of information back to the main copy.
1098                  */
1099                 subroot = makeNode(PlannerInfo);
1100                 memcpy(subroot, root, sizeof(PlannerInfo));
1101
1102                 /*
1103                  * Generate modified query with this rel as target.  We first apply
1104                  * adjust_appendrel_attrs, which copies the Query and changes
1105                  * references to the parent RTE to refer to the current child RTE,
1106                  * then fool around with subquery RTEs.
1107                  */
1108                 subroot->parse = (Query *)
1109                         adjust_appendrel_attrs(root,
1110                                                                    (Node *) parse,
1111                                                                    appinfo);
1112
1113                 /*
1114                  * The rowMarks list might contain references to subquery RTEs, so
1115                  * make a copy that we can apply ChangeVarNodes to.  (Fortunately, the
1116                  * executor doesn't need to see the modified copies --- we can just
1117                  * pass it the original rowMarks list.)
1118                  */
1119                 subroot->rowMarks = (List *) copyObject(root->rowMarks);
1120
1121                 /*
1122                  * The append_rel_list likewise might contain references to subquery
1123                  * RTEs (if any subqueries were flattenable UNION ALLs).  So prepare
1124                  * to apply ChangeVarNodes to that, too.  As explained above, we only
1125                  * want to copy items that actually contain such references; the rest
1126                  * can just get linked into the subroot's append_rel_list.
1127                  *
1128                  * If we know there are no such references, we can just use the outer
1129                  * append_rel_list unmodified.
1130                  */
1131                 if (modifiableARIindexes != NULL)
1132                 {
1133                         ListCell   *lc2;
1134
1135                         subroot->append_rel_list = NIL;
1136                         foreach(lc2, root->append_rel_list)
1137                         {
1138                                 AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1139
1140                                 if (bms_is_member(appinfo2->child_relid, modifiableARIindexes))
1141                                         appinfo2 = (AppendRelInfo *) copyObject(appinfo2);
1142
1143                                 subroot->append_rel_list = lappend(subroot->append_rel_list,
1144                                                                                                    appinfo2);
1145                         }
1146                 }
1147
1148                 /*
1149                  * Add placeholders to the child Query's rangetable list to fill the
1150                  * RT indexes already reserved for subqueries in previous children.
1151                  * These won't be referenced, so there's no need to make them very
1152                  * valid-looking.
1153                  */
1154                 while (list_length(subroot->parse->rtable) < list_length(final_rtable))
1155                         subroot->parse->rtable = lappend(subroot->parse->rtable,
1156                                                                                          makeNode(RangeTblEntry));
1157
1158                 /*
1159                  * If this isn't the first child Query, generate duplicates of all
1160                  * subquery (or subquery-to-be) RTEs, and adjust Var numbering to
1161                  * reference the duplicates.  To simplify the loop logic, we scan the
1162                  * original rtable not the copy just made by adjust_appendrel_attrs;
1163                  * that should be OK since subquery RTEs couldn't contain any
1164                  * references to the target rel.
1165                  */
1166                 if (final_rtable != NIL && subqueryRTindexes != NULL)
1167                 {
1168                         ListCell   *lr;
1169
1170                         rti = 1;
1171                         foreach(lr, parse->rtable)
1172                         {
1173                                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lr);
1174
1175                                 if (bms_is_member(rti, subqueryRTindexes))
1176                                 {
1177                                         Index           newrti;
1178
1179                                         /*
1180                                          * The RTE can't contain any references to its own RT
1181                                          * index, except in the security barrier quals, so we can
1182                                          * save a few cycles by applying ChangeVarNodes before we
1183                                          * append the RTE to the rangetable.
1184                                          */
1185                                         newrti = list_length(subroot->parse->rtable) + 1;
1186                                         ChangeVarNodes((Node *) subroot->parse, rti, newrti, 0);
1187                                         ChangeVarNodes((Node *) subroot->rowMarks, rti, newrti, 0);
1188                                         /* Skip processing unchanging parts of append_rel_list */
1189                                         if (modifiableARIindexes != NULL)
1190                                         {
1191                                                 ListCell   *lc2;
1192
1193                                                 foreach(lc2, subroot->append_rel_list)
1194                                                 {
1195                                                         AppendRelInfo *appinfo2 = (AppendRelInfo *) lfirst(lc2);
1196
1197                                                         if (bms_is_member(appinfo2->child_relid,
1198                                                                                           modifiableARIindexes))
1199                                                                 ChangeVarNodes((Node *) appinfo2, rti, newrti, 0);
1200                                                 }
1201                                         }
1202                                         rte = copyObject(rte);
1203                                         ChangeVarNodes((Node *) rte->securityQuals, rti, newrti, 0);
1204                                         subroot->parse->rtable = lappend(subroot->parse->rtable,
1205                                                                                                          rte);
1206                                 }
1207                                 rti++;
1208                         }
1209                 }
1210
1211                 /* There shouldn't be any OJ info to translate, as yet */
1212                 Assert(subroot->join_info_list == NIL);
1213                 /* and we haven't created PlaceHolderInfos, either */
1214                 Assert(subroot->placeholder_list == NIL);
1215                 /* hack to mark target relation as an inheritance partition */
1216                 subroot->hasInheritedTarget = true;
1217
1218                 /* Generate Path(s) for accessing this result relation */
1219                 grouping_planner(subroot, true, 0.0 /* retrieve all tuples */ );
1220
1221                 /*
1222                  * Planning may have modified the query result relation (if there were
1223                  * security barrier quals on the result RTE).
1224                  */
1225                 appinfo->child_relid = subroot->parse->resultRelation;
1226
1227                 /*
1228                  * We'll use the first child relation (even if it's excluded) as the
1229                  * nominal target relation of the ModifyTable node.  Because of the
1230                  * way expand_inherited_rtentry works, this should always be the RTE
1231                  * representing the parent table in its role as a simple member of the
1232                  * inheritance set.  (It would be logically cleaner to use the
1233                  * inheritance parent RTE as the nominal target; but since that RTE
1234                  * will not be otherwise referenced in the plan, doing so would give
1235                  * rise to confusing use of multiple aliases in EXPLAIN output for
1236                  * what the user will think is the "same" table.)
1237                  */
1238                 if (nominalRelation < 0)
1239                         nominalRelation = appinfo->child_relid;
1240
1241                 /*
1242                  * Select cheapest path in case there's more than one.  We always run
1243                  * modification queries to conclusion, so we care only for the
1244                  * cheapest-total path.
1245                  */
1246                 sub_final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
1247                 set_cheapest(sub_final_rel);
1248                 subpath = sub_final_rel->cheapest_total_path;
1249
1250                 /*
1251                  * If this child rel was excluded by constraint exclusion, exclude it
1252                  * from the result plan.
1253                  */
1254                 if (IS_DUMMY_PATH(subpath))
1255                         continue;
1256
1257                 /*
1258                  * If this is the first non-excluded child, its post-planning rtable
1259                  * becomes the initial contents of final_rtable; otherwise, append
1260                  * just its modified subquery RTEs to final_rtable.
1261                  */
1262                 if (final_rtable == NIL)
1263                         final_rtable = subroot->parse->rtable;
1264                 else
1265                 {
1266                         List       *tmp_rtable = NIL;
1267                         ListCell   *cell1,
1268                                            *cell2;
1269
1270                         /*
1271                          * Check to see if any of the original RTEs were turned into
1272                          * subqueries during planning.  Currently, this should only ever
1273                          * happen due to securityQuals being involved which push a
1274                          * relation down under a subquery, to ensure that the security
1275                          * barrier quals are evaluated first.
1276                          *
1277                          * When this happens, we want to use the new subqueries in the
1278                          * final rtable.
1279                          */
1280                         forboth(cell1, final_rtable, cell2, subroot->parse->rtable)
1281                         {
1282                                 RangeTblEntry *rte1 = (RangeTblEntry *) lfirst(cell1);
1283                                 RangeTblEntry *rte2 = (RangeTblEntry *) lfirst(cell2);
1284
1285                                 if (rte1->rtekind == RTE_RELATION &&
1286                                         rte2->rtekind == RTE_SUBQUERY)
1287                                 {
1288                                         /* Should only be when there are securityQuals today */
1289                                         Assert(rte1->securityQuals != NIL);
1290                                         tmp_rtable = lappend(tmp_rtable, rte2);
1291                                 }
1292                                 else
1293                                         tmp_rtable = lappend(tmp_rtable, rte1);
1294                         }
1295
1296                         final_rtable = list_concat(tmp_rtable,
1297                                                                            list_copy_tail(subroot->parse->rtable,
1298                                                                                                  list_length(final_rtable)));
1299                 }
1300
1301                 /*
1302                  * We need to collect all the RelOptInfos from all child plans into
1303                  * the main PlannerInfo, since setrefs.c will need them.  We use the
1304                  * last child's simple_rel_array (previous ones are too short), so we
1305                  * have to propagate forward the RelOptInfos that were already built
1306                  * in previous children.
1307                  */
1308                 Assert(subroot->simple_rel_array_size >= save_rel_array_size);
1309                 for (rti = 1; rti < save_rel_array_size; rti++)
1310                 {
1311                         RelOptInfo *brel = save_rel_array[rti];
1312
1313                         if (brel)
1314                                 subroot->simple_rel_array[rti] = brel;
1315                 }
1316                 save_rel_array_size = subroot->simple_rel_array_size;
1317                 save_rel_array = subroot->simple_rel_array;
1318
1319                 /* Make sure any initplans from this rel get into the outer list */
1320                 root->init_plans = subroot->init_plans;
1321
1322                 /* Build list of sub-paths */
1323                 subpaths = lappend(subpaths, subpath);
1324
1325                 /* Build list of modified subroots, too */
1326                 subroots = lappend(subroots, subroot);
1327
1328                 /* Build list of target-relation RT indexes */
1329                 resultRelations = lappend_int(resultRelations, appinfo->child_relid);
1330
1331                 /* Build lists of per-relation WCO and RETURNING targetlists */
1332                 if (parse->withCheckOptions)
1333                         withCheckOptionLists = lappend(withCheckOptionLists,
1334                                                                                    subroot->parse->withCheckOptions);
1335                 if (parse->returningList)
1336                         returningLists = lappend(returningLists,
1337                                                                          subroot->parse->returningList);
1338
1339                 Assert(!parse->onConflict);
1340         }
1341
1342         /* Result path must go into outer query's FINAL upperrel */
1343         final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1344
1345         /*
1346          * We don't currently worry about setting final_rel's consider_parallel
1347          * flag in this case, nor about allowing FDWs or create_upper_paths_hook
1348          * to get control here.
1349          */
1350
1351         /*
1352          * If we managed to exclude every child rel, return a dummy plan; it
1353          * doesn't even need a ModifyTable node.
1354          */
1355         if (subpaths == NIL)
1356         {
1357                 set_dummy_rel_pathlist(final_rel);
1358                 return;
1359         }
1360
1361         /*
1362          * Put back the final adjusted rtable into the master copy of the Query.
1363          * (We mustn't do this if we found no non-excluded children.)
1364          */
1365         parse->rtable = final_rtable;
1366         root->simple_rel_array_size = save_rel_array_size;
1367         root->simple_rel_array = save_rel_array;
1368         /* Must reconstruct master's simple_rte_array, too */
1369         root->simple_rte_array = (RangeTblEntry **)
1370                 palloc0((list_length(final_rtable) + 1) * sizeof(RangeTblEntry *));
1371         rti = 1;
1372         foreach(lc, final_rtable)
1373         {
1374                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1375
1376                 root->simple_rte_array[rti++] = rte;
1377         }
1378
1379         /*
1380          * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node will
1381          * have dealt with fetching non-locked marked rows, else we need to have
1382          * ModifyTable do that.
1383          */
1384         if (parse->rowMarks)
1385                 rowMarks = NIL;
1386         else
1387                 rowMarks = root->rowMarks;
1388
1389         /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
1390         add_path(final_rel, (Path *)
1391                          create_modifytable_path(root, final_rel,
1392                                                                          parse->commandType,
1393                                                                          parse->canSetTag,
1394                                                                          nominalRelation,
1395                                                                          resultRelations,
1396                                                                          subpaths,
1397                                                                          subroots,
1398                                                                          withCheckOptionLists,
1399                                                                          returningLists,
1400                                                                          rowMarks,
1401                                                                          NULL,
1402                                                                          SS_assign_special_param(root)));
1403 }
1404
1405 /*--------------------
1406  * grouping_planner
1407  *        Perform planning steps related to grouping, aggregation, etc.
1408  *
1409  * This function adds all required top-level processing to the scan/join
1410  * Path(s) produced by query_planner.
1411  *
1412  * If inheritance_update is true, we're being called from inheritance_planner
1413  * and should not include a ModifyTable step in the resulting Path(s).
1414  * (inheritance_planner will create a single ModifyTable node covering all the
1415  * target tables.)
1416  *
1417  * tuple_fraction is the fraction of tuples we expect will be retrieved.
1418  * tuple_fraction is interpreted as follows:
1419  *        0: expect all tuples to be retrieved (normal case)
1420  *        0 < tuple_fraction < 1: expect the given fraction of tuples available
1421  *              from the plan to be retrieved
1422  *        tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
1423  *              expected to be retrieved (ie, a LIMIT specification)
1424  *
1425  * Returns nothing; the useful output is in the Paths we attach to the
1426  * (UPPERREL_FINAL, NULL) upperrel in *root.  In addition,
1427  * root->processed_tlist contains the final processed targetlist.
1428  *
1429  * Note that we have not done set_cheapest() on the final rel; it's convenient
1430  * to leave this to the caller.
1431  *--------------------
1432  */
1433 static void
1434 grouping_planner(PlannerInfo *root, bool inheritance_update,
1435                                  double tuple_fraction)
1436 {
1437         Query      *parse = root->parse;
1438         List       *tlist = parse->targetList;
1439         int64           offset_est = 0;
1440         int64           count_est = 0;
1441         double          limit_tuples = -1.0;
1442         bool            have_postponed_srfs = false;
1443         double          tlist_rows;
1444         PathTarget *final_target;
1445         RelOptInfo *current_rel;
1446         RelOptInfo *final_rel;
1447         ListCell   *lc;
1448
1449         /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
1450         if (parse->limitCount || parse->limitOffset)
1451         {
1452                 tuple_fraction = preprocess_limit(root, tuple_fraction,
1453                                                                                   &offset_est, &count_est);
1454
1455                 /*
1456                  * If we have a known LIMIT, and don't have an unknown OFFSET, we can
1457                  * estimate the effects of using a bounded sort.
1458                  */
1459                 if (count_est > 0 && offset_est >= 0)
1460                         limit_tuples = (double) count_est + (double) offset_est;
1461         }
1462
1463         /* Make tuple_fraction accessible to lower-level routines */
1464         root->tuple_fraction = tuple_fraction;
1465
1466         if (parse->setOperations)
1467         {
1468                 /*
1469                  * If there's a top-level ORDER BY, assume we have to fetch all the
1470                  * tuples.  This might be too simplistic given all the hackery below
1471                  * to possibly avoid the sort; but the odds of accurate estimates here
1472                  * are pretty low anyway.  XXX try to get rid of this in favor of
1473                  * letting plan_set_operations generate both fast-start and
1474                  * cheapest-total paths.
1475                  */
1476                 if (parse->sortClause)
1477                         root->tuple_fraction = 0.0;
1478
1479                 /*
1480                  * Construct Paths for set operations.  The results will not need any
1481                  * work except perhaps a top-level sort and/or LIMIT.  Note that any
1482                  * special work for recursive unions is the responsibility of
1483                  * plan_set_operations.
1484                  */
1485                 current_rel = plan_set_operations(root);
1486
1487                 /*
1488                  * We should not need to call preprocess_targetlist, since we must be
1489                  * in a SELECT query node.  Instead, use the targetlist returned by
1490                  * plan_set_operations (since this tells whether it returned any
1491                  * resjunk columns!), and transfer any sort key information from the
1492                  * original tlist.
1493                  */
1494                 Assert(parse->commandType == CMD_SELECT);
1495
1496                 tlist = root->processed_tlist;  /* from plan_set_operations */
1497
1498                 /* for safety, copy processed_tlist instead of modifying in-place */
1499                 tlist = postprocess_setop_tlist(copyObject(tlist), parse->targetList);
1500
1501                 /* Save aside the final decorated tlist */
1502                 root->processed_tlist = tlist;
1503
1504                 /* Also extract the PathTarget form of the setop result tlist */
1505                 final_target = current_rel->cheapest_total_path->pathtarget;
1506
1507                 /*
1508                  * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have
1509                  * checked already, but let's make sure).
1510                  */
1511                 if (parse->rowMarks)
1512                         ereport(ERROR,
1513                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1514                         /*------
1515                           translator: %s is a SQL row locking clause such as FOR UPDATE */
1516                                          errmsg("%s is not allowed with UNION/INTERSECT/EXCEPT",
1517                                                         LCS_asString(((RowMarkClause *)
1518                                                                         linitial(parse->rowMarks))->strength))));
1519
1520                 /*
1521                  * Calculate pathkeys that represent result ordering requirements
1522                  */
1523                 Assert(parse->distinctClause == NIL);
1524                 root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
1525                                                                                                                         parse->sortClause,
1526                                                                                                                         tlist);
1527         }
1528         else
1529         {
1530                 /* No set operations, do regular planning */
1531                 PathTarget *sort_input_target;
1532                 PathTarget *grouping_target;
1533                 PathTarget *scanjoin_target;
1534                 bool            have_grouping;
1535                 AggClauseCosts agg_costs;
1536                 WindowFuncLists *wflists = NULL;
1537                 List       *activeWindows = NIL;
1538                 List       *rollup_lists = NIL;
1539                 List       *rollup_groupclauses = NIL;
1540                 standard_qp_extra qp_extra;
1541
1542                 /* A recursive query should always have setOperations */
1543                 Assert(!root->hasRecursion);
1544
1545                 /* Preprocess grouping sets and GROUP BY clause, if any */
1546                 if (parse->groupingSets)
1547                 {
1548                         int                *tleref_to_colnum_map;
1549                         List       *sets;
1550                         int                     maxref;
1551                         ListCell   *lc;
1552                         ListCell   *lc2;
1553                         ListCell   *lc_set;
1554
1555                         parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1);
1556
1557                         /* Identify max SortGroupRef in groupClause, for array sizing */
1558                         maxref = 0;
1559                         foreach(lc, parse->groupClause)
1560                         {
1561                                 SortGroupClause *gc = lfirst(lc);
1562
1563                                 if (gc->tleSortGroupRef > maxref)
1564                                         maxref = gc->tleSortGroupRef;
1565                         }
1566
1567                         /* Allocate workspace array for remapping */
1568                         tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int));
1569
1570                         /* Examine the rollup sets */
1571                         sets = extract_rollup_sets(parse->groupingSets);
1572
1573                         foreach(lc_set, sets)
1574                         {
1575                                 List       *current_sets = (List *) lfirst(lc_set);
1576                                 List       *groupclause;
1577                                 int                     ref;
1578
1579                                 /*
1580                                  * Reorder the current list of grouping sets into correct
1581                                  * prefix order.  If only one aggregation pass is needed, try
1582                                  * to make the list match the ORDER BY clause; if more than
1583                                  * one pass is needed, we don't bother with that.
1584                                  */
1585                                 current_sets = reorder_grouping_sets(current_sets,
1586                                                                                                          (list_length(sets) == 1
1587                                                                                                           ? parse->sortClause
1588                                                                                                           : NIL));
1589
1590                                 /*
1591                                  * Order the groupClause appropriately.  If the first grouping
1592                                  * set is empty, this can match regular GROUP BY
1593                                  * preprocessing, otherwise we have to force the groupClause
1594                                  * to match that grouping set's order.
1595                                  */
1596                                 groupclause = preprocess_groupclause(root,
1597                                                                                                          linitial(current_sets));
1598
1599                                 /*
1600                                  * Now that we've pinned down an order for the groupClause for
1601                                  * this list of grouping sets, we need to remap the entries in
1602                                  * the grouping sets from sortgrouprefs to plain indices
1603                                  * (0-based) into the groupClause for this collection of
1604                                  * grouping sets.
1605                                  */
1606                                 ref = 0;
1607                                 foreach(lc, groupclause)
1608                                 {
1609                                         SortGroupClause *gc = lfirst(lc);
1610
1611                                         tleref_to_colnum_map[gc->tleSortGroupRef] = ref++;
1612                                 }
1613
1614                                 foreach(lc, current_sets)
1615                                 {
1616                                         foreach(lc2, (List *) lfirst(lc))
1617                                         {
1618                                                 lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)];
1619                                         }
1620                                 }
1621
1622                                 /* Save the reordered sets and corresponding groupclauses */
1623                                 rollup_lists = lcons(current_sets, rollup_lists);
1624                                 rollup_groupclauses = lcons(groupclause, rollup_groupclauses);
1625                         }
1626                 }
1627                 else
1628                 {
1629                         /* Preprocess regular GROUP BY clause, if any */
1630                         if (parse->groupClause)
1631                                 parse->groupClause = preprocess_groupclause(root, NIL);
1632                 }
1633
1634                 /* Preprocess targetlist */
1635                 tlist = preprocess_targetlist(root, tlist);
1636
1637                 if (parse->onConflict)
1638                         parse->onConflict->onConflictSet =
1639                                 preprocess_onconflict_targetlist(parse->onConflict->onConflictSet,
1640                                                                                                  parse->resultRelation,
1641                                                                                                  parse->rtable);
1642
1643                 /*
1644                  * Expand any rangetable entries that have security barrier quals.
1645                  * This may add new security barrier subquery RTEs to the rangetable.
1646                  */
1647                 expand_security_quals(root, tlist);
1648
1649                 /*
1650                  * We are now done hacking up the query's targetlist.  Most of the
1651                  * remaining planning work will be done with the PathTarget
1652                  * representation of tlists, but save aside the full representation so
1653                  * that we can transfer its decoration (resnames etc) to the topmost
1654                  * tlist of the finished Plan.
1655                  */
1656                 root->processed_tlist = tlist;
1657
1658                 /*
1659                  * Collect statistics about aggregates for estimating costs, and mark
1660                  * all the aggregates with resolved aggtranstypes.  We must do this
1661                  * before slicing and dicing the tlist into various pathtargets, else
1662                  * some copies of the Aggref nodes might escape being marked with the
1663                  * correct transtypes.
1664                  *
1665                  * Note: currently, we do not detect duplicate aggregates here.  This
1666                  * may result in somewhat-overestimated cost, which is fine for our
1667                  * purposes since all Paths will get charged the same.  But at some
1668                  * point we might wish to do that detection in the planner, rather
1669                  * than during executor startup.
1670                  */
1671                 MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
1672                 if (parse->hasAggs)
1673                 {
1674                         get_agg_clause_costs(root, (Node *) tlist, AGGSPLIT_SIMPLE,
1675                                                                  &agg_costs);
1676                         get_agg_clause_costs(root, parse->havingQual, AGGSPLIT_SIMPLE,
1677                                                                  &agg_costs);
1678                 }
1679
1680                 /*
1681                  * Locate any window functions in the tlist.  (We don't need to look
1682                  * anywhere else, since expressions used in ORDER BY will be in there
1683                  * too.)  Note that they could all have been eliminated by constant
1684                  * folding, in which case we don't need to do any more work.
1685                  */
1686                 if (parse->hasWindowFuncs)
1687                 {
1688                         wflists = find_window_functions((Node *) tlist,
1689                                                                                         list_length(parse->windowClause));
1690                         if (wflists->numWindowFuncs > 0)
1691                                 activeWindows = select_active_windows(root, wflists);
1692                         else
1693                                 parse->hasWindowFuncs = false;
1694                 }
1695
1696                 /*
1697                  * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
1698                  * adding logic between here and the query_planner() call.  Anything
1699                  * that is needed in MIN/MAX-optimizable cases will have to be
1700                  * duplicated in planagg.c.
1701                  */
1702                 if (parse->hasAggs)
1703                         preprocess_minmax_aggregates(root, tlist);
1704
1705                 /*
1706                  * Figure out whether there's a hard limit on the number of rows that
1707                  * query_planner's result subplan needs to return.  Even if we know a
1708                  * hard limit overall, it doesn't apply if the query has any
1709                  * grouping/aggregation operations, or SRFs in the tlist.
1710                  */
1711                 if (parse->groupClause ||
1712                         parse->groupingSets ||
1713                         parse->distinctClause ||
1714                         parse->hasAggs ||
1715                         parse->hasWindowFuncs ||
1716                         parse->hasTargetSRFs ||
1717                         root->hasHavingQual)
1718                         root->limit_tuples = -1.0;
1719                 else
1720                         root->limit_tuples = limit_tuples;
1721
1722                 /* Set up data needed by standard_qp_callback */
1723                 qp_extra.tlist = tlist;
1724                 qp_extra.activeWindows = activeWindows;
1725                 qp_extra.groupClause =
1726                         parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause;
1727
1728                 /*
1729                  * Generate the best unsorted and presorted paths for the scan/join
1730                  * portion of this Query, ie the processing represented by the
1731                  * FROM/WHERE clauses.  (Note there may not be any presorted paths.)
1732                  * We also generate (in standard_qp_callback) pathkey representations
1733                  * of the query's sort clause, distinct clause, etc.
1734                  */
1735                 current_rel = query_planner(root, tlist,
1736                                                                         standard_qp_callback, &qp_extra);
1737
1738                 /*
1739                  * Convert the query's result tlist into PathTarget format.
1740                  *
1741                  * Note: it's desirable to not do this till after query_planner(),
1742                  * because the target width estimates can use per-Var width numbers
1743                  * that were obtained within query_planner().
1744                  */
1745                 final_target = create_pathtarget(root, tlist);
1746
1747                 /*
1748                  * If ORDER BY was given, consider whether we should use a post-sort
1749                  * projection, and compute the adjusted target for preceding steps if
1750                  * so.
1751                  */
1752                 if (parse->sortClause)
1753                         sort_input_target = make_sort_input_target(root,
1754                                                                                                            final_target,
1755                                                                                                            &have_postponed_srfs);
1756                 else
1757                         sort_input_target = final_target;
1758
1759                 /*
1760                  * If we have window functions to deal with, the output from any
1761                  * grouping step needs to be what the window functions want;
1762                  * otherwise, it should be sort_input_target.
1763                  */
1764                 if (activeWindows)
1765                         grouping_target = make_window_input_target(root,
1766                                                                                                            final_target,
1767                                                                                                            activeWindows);
1768                 else
1769                         grouping_target = sort_input_target;
1770
1771                 /*
1772                  * If we have grouping or aggregation to do, the topmost scan/join
1773                  * plan node must emit what the grouping step wants; otherwise, it
1774                  * should emit grouping_target.
1775                  */
1776                 have_grouping = (parse->groupClause || parse->groupingSets ||
1777                                                  parse->hasAggs || root->hasHavingQual);
1778                 if (have_grouping)
1779                         scanjoin_target = make_group_input_target(root, final_target);
1780                 else
1781                         scanjoin_target = grouping_target;
1782
1783                 /*
1784                  * Forcibly apply scan/join target to all the Paths for the scan/join
1785                  * rel.
1786                  *
1787                  * In principle we should re-run set_cheapest() here to identify the
1788                  * cheapest path, but it seems unlikely that adding the same tlist
1789                  * eval costs to all the paths would change that, so we don't bother.
1790                  * Instead, just assume that the cheapest-startup and cheapest-total
1791                  * paths remain so.  (There should be no parameterized paths anymore,
1792                  * so we needn't worry about updating cheapest_parameterized_paths.)
1793                  */
1794                 foreach(lc, current_rel->pathlist)
1795                 {
1796                         Path       *subpath = (Path *) lfirst(lc);
1797                         Path       *path;
1798
1799                         Assert(subpath->param_info == NULL);
1800                         path = apply_projection_to_path(root, current_rel,
1801                                                                                         subpath, scanjoin_target);
1802                         /* If we had to add a Result, path is different from subpath */
1803                         if (path != subpath)
1804                         {
1805                                 lfirst(lc) = path;
1806                                 if (subpath == current_rel->cheapest_startup_path)
1807                                         current_rel->cheapest_startup_path = path;
1808                                 if (subpath == current_rel->cheapest_total_path)
1809                                         current_rel->cheapest_total_path = path;
1810                         }
1811                 }
1812
1813                 /*
1814                  * Upper planning steps which make use of the top scan/join rel's
1815                  * partial pathlist will expect partial paths for that rel to produce
1816                  * the same output as complete paths ... and we just changed the
1817                  * output for the complete paths, so we'll need to do the same thing
1818                  * for partial paths.  But only parallel-safe expressions can be
1819                  * computed by partial paths.
1820                  */
1821                 if (current_rel->partial_pathlist &&
1822                         is_parallel_safe(root, (Node *) scanjoin_target->exprs))
1823                 {
1824                         /* Apply the scan/join target to each partial path */
1825                         foreach(lc, current_rel->partial_pathlist)
1826                         {
1827                                 Path       *subpath = (Path *) lfirst(lc);
1828                                 Path       *newpath;
1829
1830                                 /* Shouldn't have any parameterized paths anymore */
1831                                 Assert(subpath->param_info == NULL);
1832
1833                                 /*
1834                                  * Don't use apply_projection_to_path() here, because there
1835                                  * could be other pointers to these paths, and therefore we
1836                                  * mustn't modify them in place.
1837                                  */
1838                                 newpath = (Path *) create_projection_path(root,
1839                                                                                                                   current_rel,
1840                                                                                                                   subpath,
1841                                                                                                                   scanjoin_target);
1842                                 lfirst(lc) = newpath;
1843                         }
1844                 }
1845                 else
1846                 {
1847                         /*
1848                          * In the unfortunate event that scanjoin_target is not
1849                          * parallel-safe, we can't apply it to the partial paths; in that
1850                          * case, we'll need to forget about the partial paths, which
1851                          * aren't valid input for upper planning steps.
1852                          */
1853                         current_rel->partial_pathlist = NIL;
1854                 }
1855
1856                 /*
1857                  * Save the various upper-rel PathTargets we just computed into
1858                  * root->upper_targets[].  The core code doesn't use this, but it
1859                  * provides a convenient place for extensions to get at the info.  For
1860                  * consistency, we save all the intermediate targets, even though some
1861                  * of the corresponding upperrels might not be needed for this query.
1862                  */
1863                 root->upper_targets[UPPERREL_FINAL] = final_target;
1864                 root->upper_targets[UPPERREL_WINDOW] = sort_input_target;
1865                 root->upper_targets[UPPERREL_GROUP_AGG] = grouping_target;
1866
1867                 /*
1868                  * If we have grouping and/or aggregation, consider ways to implement
1869                  * that.  We build a new upperrel representing the output of this
1870                  * phase.
1871                  */
1872                 if (have_grouping)
1873                 {
1874                         current_rel = create_grouping_paths(root,
1875                                                                                                 current_rel,
1876                                                                                                 grouping_target,
1877                                                                                                 &agg_costs,
1878                                                                                                 rollup_lists,
1879                                                                                                 rollup_groupclauses);
1880                 }
1881
1882                 /*
1883                  * If we have window functions, consider ways to implement those.  We
1884                  * build a new upperrel representing the output of this phase.
1885                  */
1886                 if (activeWindows)
1887                 {
1888                         current_rel = create_window_paths(root,
1889                                                                                           current_rel,
1890                                                                                           grouping_target,
1891                                                                                           sort_input_target,
1892                                                                                           tlist,
1893                                                                                           wflists,
1894                                                                                           activeWindows);
1895                 }
1896
1897                 /*
1898                  * If there is a DISTINCT clause, consider ways to implement that. We
1899                  * build a new upperrel representing the output of this phase.
1900                  */
1901                 if (parse->distinctClause)
1902                 {
1903                         current_rel = create_distinct_paths(root,
1904                                                                                                 current_rel);
1905                 }
1906
1907         }                                                       /* end of if (setOperations) */
1908
1909         /*
1910          * If ORDER BY was given, consider ways to implement that, and generate a
1911          * new upperrel containing only paths that emit the correct ordering and
1912          * project the correct final_target.  We can apply the original
1913          * limit_tuples limit in sort costing here, but only if there are no
1914          * postponed SRFs.
1915          */
1916         if (parse->sortClause)
1917         {
1918                 current_rel = create_ordered_paths(root,
1919                                                                                    current_rel,
1920                                                                                    final_target,
1921                                                                                    have_postponed_srfs ? -1.0 :
1922                                                                                    limit_tuples);
1923         }
1924
1925         /*
1926          * If there are set-returning functions in the tlist, scale up the output
1927          * rowcounts of all surviving Paths to account for that.  Note that if any
1928          * SRFs appear in sorting or grouping columns, we'll have underestimated
1929          * the numbers of rows passing through earlier steps; but that's such a
1930          * weird usage that it doesn't seem worth greatly complicating matters to
1931          * account for it.
1932          */
1933         if (parse->hasTargetSRFs)
1934                 tlist_rows = tlist_returns_set_rows(tlist);
1935         else
1936                 tlist_rows = 1;
1937
1938         if (tlist_rows > 1)
1939         {
1940                 foreach(lc, current_rel->pathlist)
1941                 {
1942                         Path       *path = (Path *) lfirst(lc);
1943
1944                         /*
1945                          * We assume that execution costs of the tlist as such were
1946                          * already accounted for.  However, it still seems appropriate to
1947                          * charge something more for the executor's general costs of
1948                          * processing the added tuples.  The cost is probably less than
1949                          * cpu_tuple_cost, though, so we arbitrarily use half of that.
1950                          */
1951                         path->total_cost += path->rows * (tlist_rows - 1) *
1952                                 cpu_tuple_cost / 2;
1953
1954                         path->rows *= tlist_rows;
1955                 }
1956                 /* No need to run set_cheapest; we're keeping all paths anyway. */
1957         }
1958
1959         /*
1960          * Now we are prepared to build the final-output upperrel.
1961          */
1962         final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
1963
1964         /*
1965          * If the input rel is marked consider_parallel and there's nothing that's
1966          * not parallel-safe in the LIMIT clause, then the final_rel can be marked
1967          * consider_parallel as well.  Note that if the query has rowMarks or is
1968          * not a SELECT, consider_parallel will be false for every relation in the
1969          * query.
1970          */
1971         if (current_rel->consider_parallel &&
1972                 is_parallel_safe(root, parse->limitOffset) &&
1973                 is_parallel_safe(root, parse->limitCount))
1974                 final_rel->consider_parallel = true;
1975
1976         /*
1977          * If the current_rel belongs to a single FDW, so does the final_rel.
1978          */
1979         final_rel->serverid = current_rel->serverid;
1980         final_rel->userid = current_rel->userid;
1981         final_rel->useridiscurrent = current_rel->useridiscurrent;
1982         final_rel->fdwroutine = current_rel->fdwroutine;
1983
1984         /*
1985          * Generate paths for the final_rel.  Insert all surviving paths, with
1986          * LockRows, Limit, and/or ModifyTable steps added if needed.
1987          */
1988         foreach(lc, current_rel->pathlist)
1989         {
1990                 Path       *path = (Path *) lfirst(lc);
1991
1992                 /*
1993                  * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node.
1994                  * (Note: we intentionally test parse->rowMarks not root->rowMarks
1995                  * here.  If there are only non-locking rowmarks, they should be
1996                  * handled by the ModifyTable node instead.  However, root->rowMarks
1997                  * is what goes into the LockRows node.)
1998                  */
1999                 if (parse->rowMarks)
2000                 {
2001                         path = (Path *) create_lockrows_path(root, final_rel, path,
2002                                                                                                  root->rowMarks,
2003                                                                                           SS_assign_special_param(root));
2004                 }
2005
2006                 /*
2007                  * If there is a LIMIT/OFFSET clause, add the LIMIT node.
2008                  */
2009                 if (limit_needed(parse))
2010                 {
2011                         path = (Path *) create_limit_path(root, final_rel, path,
2012                                                                                           parse->limitOffset,
2013                                                                                           parse->limitCount,
2014                                                                                           offset_est, count_est);
2015                 }
2016
2017                 /*
2018                  * If this is an INSERT/UPDATE/DELETE, and we're not being called from
2019                  * inheritance_planner, add the ModifyTable node.
2020                  */
2021                 if (parse->commandType != CMD_SELECT && !inheritance_update)
2022                 {
2023                         List       *withCheckOptionLists;
2024                         List       *returningLists;
2025                         List       *rowMarks;
2026
2027                         /*
2028                          * Set up the WITH CHECK OPTION and RETURNING lists-of-lists, if
2029                          * needed.
2030                          */
2031                         if (parse->withCheckOptions)
2032                                 withCheckOptionLists = list_make1(parse->withCheckOptions);
2033                         else
2034                                 withCheckOptionLists = NIL;
2035
2036                         if (parse->returningList)
2037                                 returningLists = list_make1(parse->returningList);
2038                         else
2039                                 returningLists = NIL;
2040
2041                         /*
2042                          * If there was a FOR [KEY] UPDATE/SHARE clause, the LockRows node
2043                          * will have dealt with fetching non-locked marked rows, else we
2044                          * need to have ModifyTable do that.
2045                          */
2046                         if (parse->rowMarks)
2047                                 rowMarks = NIL;
2048                         else
2049                                 rowMarks = root->rowMarks;
2050
2051                         path = (Path *)
2052                                 create_modifytable_path(root, final_rel,
2053                                                                                 parse->commandType,
2054                                                                                 parse->canSetTag,
2055                                                                                 parse->resultRelation,
2056                                                                                 list_make1_int(parse->resultRelation),
2057                                                                                 list_make1(path),
2058                                                                                 list_make1(root),
2059                                                                                 withCheckOptionLists,
2060                                                                                 returningLists,
2061                                                                                 rowMarks,
2062                                                                                 parse->onConflict,
2063                                                                                 SS_assign_special_param(root));
2064                 }
2065
2066                 /* And shove it into final_rel */
2067                 add_path(final_rel, path);
2068         }
2069
2070         /*
2071          * If there is an FDW that's responsible for all baserels of the query,
2072          * let it consider adding ForeignPaths.
2073          */
2074         if (final_rel->fdwroutine &&
2075                 final_rel->fdwroutine->GetForeignUpperPaths)
2076                 final_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_FINAL,
2077                                                                                                         current_rel, final_rel);
2078
2079         /* Let extensions possibly add some more paths */
2080         if (create_upper_paths_hook)
2081                 (*create_upper_paths_hook) (root, UPPERREL_FINAL,
2082                                                                         current_rel, final_rel);
2083
2084         /* Note: currently, we leave it to callers to do set_cheapest() */
2085 }
2086
2087
2088 /*
2089  * Detect whether a plan node is a "dummy" plan created when a relation
2090  * is deemed not to need scanning due to constraint exclusion.
2091  *
2092  * Currently, such dummy plans are Result nodes with constant FALSE
2093  * filter quals (see set_dummy_rel_pathlist and create_append_plan).
2094  *
2095  * XXX this probably ought to be somewhere else, but not clear where.
2096  */
2097 bool
2098 is_dummy_plan(Plan *plan)
2099 {
2100         if (IsA(plan, Result))
2101         {
2102                 List       *rcqual = (List *) ((Result *) plan)->resconstantqual;
2103
2104                 if (list_length(rcqual) == 1)
2105                 {
2106                         Const      *constqual = (Const *) linitial(rcqual);
2107
2108                         if (constqual && IsA(constqual, Const))
2109                         {
2110                                 if (!constqual->constisnull &&
2111                                         !DatumGetBool(constqual->constvalue))
2112                                         return true;
2113                         }
2114                 }
2115         }
2116         return false;
2117 }
2118
2119 /*
2120  * Create a bitmapset of the RT indexes of live base relations
2121  *
2122  * Helper for preprocess_rowmarks ... at this point in the proceedings,
2123  * the only good way to distinguish baserels from appendrel children
2124  * is to see what is in the join tree.
2125  */
2126 static Bitmapset *
2127 get_base_rel_indexes(Node *jtnode)
2128 {
2129         Bitmapset  *result;
2130
2131         if (jtnode == NULL)
2132                 return NULL;
2133         if (IsA(jtnode, RangeTblRef))
2134         {
2135                 int                     varno = ((RangeTblRef *) jtnode)->rtindex;
2136
2137                 result = bms_make_singleton(varno);
2138         }
2139         else if (IsA(jtnode, FromExpr))
2140         {
2141                 FromExpr   *f = (FromExpr *) jtnode;
2142                 ListCell   *l;
2143
2144                 result = NULL;
2145                 foreach(l, f->fromlist)
2146                         result = bms_join(result,
2147                                                           get_base_rel_indexes(lfirst(l)));
2148         }
2149         else if (IsA(jtnode, JoinExpr))
2150         {
2151                 JoinExpr   *j = (JoinExpr *) jtnode;
2152
2153                 result = bms_join(get_base_rel_indexes(j->larg),
2154                                                   get_base_rel_indexes(j->rarg));
2155         }
2156         else
2157         {
2158                 elog(ERROR, "unrecognized node type: %d",
2159                          (int) nodeTag(jtnode));
2160                 result = NULL;                  /* keep compiler quiet */
2161         }
2162         return result;
2163 }
2164
2165 /*
2166  * preprocess_rowmarks - set up PlanRowMarks if needed
2167  */
2168 static void
2169 preprocess_rowmarks(PlannerInfo *root)
2170 {
2171         Query      *parse = root->parse;
2172         Bitmapset  *rels;
2173         List       *prowmarks;
2174         ListCell   *l;
2175         int                     i;
2176
2177         if (parse->rowMarks)
2178         {
2179                 /*
2180                  * We've got trouble if FOR [KEY] UPDATE/SHARE appears inside
2181                  * grouping, since grouping renders a reference to individual tuple
2182                  * CTIDs invalid.  This is also checked at parse time, but that's
2183                  * insufficient because of rule substitution, query pullup, etc.
2184                  */
2185                 CheckSelectLocking(parse, ((RowMarkClause *)
2186                                                                    linitial(parse->rowMarks))->strength);
2187         }
2188         else
2189         {
2190                 /*
2191                  * We only need rowmarks for UPDATE, DELETE, or FOR [KEY]
2192                  * UPDATE/SHARE.
2193                  */
2194                 if (parse->commandType != CMD_UPDATE &&
2195                         parse->commandType != CMD_DELETE)
2196                         return;
2197         }
2198
2199         /*
2200          * We need to have rowmarks for all base relations except the target. We
2201          * make a bitmapset of all base rels and then remove the items we don't
2202          * need or have FOR [KEY] UPDATE/SHARE marks for.
2203          */
2204         rels = get_base_rel_indexes((Node *) parse->jointree);
2205         if (parse->resultRelation)
2206                 rels = bms_del_member(rels, parse->resultRelation);
2207
2208         /*
2209          * Convert RowMarkClauses to PlanRowMark representation.
2210          */
2211         prowmarks = NIL;
2212         foreach(l, parse->rowMarks)
2213         {
2214                 RowMarkClause *rc = (RowMarkClause *) lfirst(l);
2215                 RangeTblEntry *rte = rt_fetch(rc->rti, parse->rtable);
2216                 PlanRowMark *newrc;
2217
2218                 /*
2219                  * Currently, it is syntactically impossible to have FOR UPDATE et al
2220                  * applied to an update/delete target rel.  If that ever becomes
2221                  * possible, we should drop the target from the PlanRowMark list.
2222                  */
2223                 Assert(rc->rti != parse->resultRelation);
2224
2225                 /*
2226                  * Ignore RowMarkClauses for subqueries; they aren't real tables and
2227                  * can't support true locking.  Subqueries that got flattened into the
2228                  * main query should be ignored completely.  Any that didn't will get
2229                  * ROW_MARK_COPY items in the next loop.
2230                  */
2231                 if (rte->rtekind != RTE_RELATION)
2232                         continue;
2233
2234                 rels = bms_del_member(rels, rc->rti);
2235
2236                 newrc = makeNode(PlanRowMark);
2237                 newrc->rti = newrc->prti = rc->rti;
2238                 newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2239                 newrc->markType = select_rowmark_type(rte, rc->strength);
2240                 newrc->allMarkTypes = (1 << newrc->markType);
2241                 newrc->strength = rc->strength;
2242                 newrc->waitPolicy = rc->waitPolicy;
2243                 newrc->isParent = false;
2244
2245                 prowmarks = lappend(prowmarks, newrc);
2246         }
2247
2248         /*
2249          * Now, add rowmarks for any non-target, non-locked base relations.
2250          */
2251         i = 0;
2252         foreach(l, parse->rtable)
2253         {
2254                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
2255                 PlanRowMark *newrc;
2256
2257                 i++;
2258                 if (!bms_is_member(i, rels))
2259                         continue;
2260
2261                 newrc = makeNode(PlanRowMark);
2262                 newrc->rti = newrc->prti = i;
2263                 newrc->rowmarkId = ++(root->glob->lastRowMarkId);
2264                 newrc->markType = select_rowmark_type(rte, LCS_NONE);
2265                 newrc->allMarkTypes = (1 << newrc->markType);
2266                 newrc->strength = LCS_NONE;
2267                 newrc->waitPolicy = LockWaitBlock;              /* doesn't matter */
2268                 newrc->isParent = false;
2269
2270                 prowmarks = lappend(prowmarks, newrc);
2271         }
2272
2273         root->rowMarks = prowmarks;
2274 }
2275
2276 /*
2277  * Select RowMarkType to use for a given table
2278  */
2279 RowMarkType
2280 select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength)
2281 {
2282         if (rte->rtekind != RTE_RELATION)
2283         {
2284                 /* If it's not a table at all, use ROW_MARK_COPY */
2285                 return ROW_MARK_COPY;
2286         }
2287         else if (rte->relkind == RELKIND_FOREIGN_TABLE)
2288         {
2289                 /* Let the FDW select the rowmark type, if it wants to */
2290                 FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid);
2291
2292                 if (fdwroutine->GetForeignRowMarkType != NULL)
2293                         return fdwroutine->GetForeignRowMarkType(rte, strength);
2294                 /* Otherwise, use ROW_MARK_COPY by default */
2295                 return ROW_MARK_COPY;
2296         }
2297         else
2298         {
2299                 /* Regular table, apply the appropriate lock type */
2300                 switch (strength)
2301                 {
2302                         case LCS_NONE:
2303
2304                                 /*
2305                                  * We don't need a tuple lock, only the ability to re-fetch
2306                                  * the row.  Regular tables support ROW_MARK_REFERENCE, but if
2307                                  * this RTE has security barrier quals, it will be turned into
2308                                  * a subquery during planning, so use ROW_MARK_COPY.
2309                                  *
2310                                  * This is only necessary for LCS_NONE, since real tuple locks
2311                                  * on an RTE with security barrier quals are supported by
2312                                  * pushing the lock down into the subquery --- see
2313                                  * expand_security_qual.
2314                                  */
2315                                 if (rte->securityQuals != NIL)
2316                                         return ROW_MARK_COPY;
2317                                 return ROW_MARK_REFERENCE;
2318                                 break;
2319                         case LCS_FORKEYSHARE:
2320                                 return ROW_MARK_KEYSHARE;
2321                                 break;
2322                         case LCS_FORSHARE:
2323                                 return ROW_MARK_SHARE;
2324                                 break;
2325                         case LCS_FORNOKEYUPDATE:
2326                                 return ROW_MARK_NOKEYEXCLUSIVE;
2327                                 break;
2328                         case LCS_FORUPDATE:
2329                                 return ROW_MARK_EXCLUSIVE;
2330                                 break;
2331                 }
2332                 elog(ERROR, "unrecognized LockClauseStrength %d", (int) strength);
2333                 return ROW_MARK_EXCLUSIVE;              /* keep compiler quiet */
2334         }
2335 }
2336
2337 /*
2338  * preprocess_limit - do pre-estimation for LIMIT and/or OFFSET clauses
2339  *
2340  * We try to estimate the values of the LIMIT/OFFSET clauses, and pass the
2341  * results back in *count_est and *offset_est.  These variables are set to
2342  * 0 if the corresponding clause is not present, and -1 if it's present
2343  * but we couldn't estimate the value for it.  (The "0" convention is OK
2344  * for OFFSET but a little bit bogus for LIMIT: effectively we estimate
2345  * LIMIT 0 as though it were LIMIT 1.  But this is in line with the planner's
2346  * usual practice of never estimating less than one row.)  These values will
2347  * be passed to create_limit_path, which see if you change this code.
2348  *
2349  * The return value is the suitably adjusted tuple_fraction to use for
2350  * planning the query.  This adjustment is not overridable, since it reflects
2351  * plan actions that grouping_planner() will certainly take, not assumptions
2352  * about context.
2353  */
2354 static double
2355 preprocess_limit(PlannerInfo *root, double tuple_fraction,
2356                                  int64 *offset_est, int64 *count_est)
2357 {
2358         Query      *parse = root->parse;
2359         Node       *est;
2360         double          limit_fraction;
2361
2362         /* Should not be called unless LIMIT or OFFSET */
2363         Assert(parse->limitCount || parse->limitOffset);
2364
2365         /*
2366          * Try to obtain the clause values.  We use estimate_expression_value
2367          * primarily because it can sometimes do something useful with Params.
2368          */
2369         if (parse->limitCount)
2370         {
2371                 est = estimate_expression_value(root, parse->limitCount);
2372                 if (est && IsA(est, Const))
2373                 {
2374                         if (((Const *) est)->constisnull)
2375                         {
2376                                 /* NULL indicates LIMIT ALL, ie, no limit */
2377                                 *count_est = 0; /* treat as not present */
2378                         }
2379                         else
2380                         {
2381                                 *count_est = DatumGetInt64(((Const *) est)->constvalue);
2382                                 if (*count_est <= 0)
2383                                         *count_est = 1;         /* force to at least 1 */
2384                         }
2385                 }
2386                 else
2387                         *count_est = -1;        /* can't estimate */
2388         }
2389         else
2390                 *count_est = 0;                 /* not present */
2391
2392         if (parse->limitOffset)
2393         {
2394                 est = estimate_expression_value(root, parse->limitOffset);
2395                 if (est && IsA(est, Const))
2396                 {
2397                         if (((Const *) est)->constisnull)
2398                         {
2399                                 /* Treat NULL as no offset; the executor will too */
2400                                 *offset_est = 0;        /* treat as not present */
2401                         }
2402                         else
2403                         {
2404                                 *offset_est = DatumGetInt64(((Const *) est)->constvalue);
2405                                 if (*offset_est < 0)
2406                                         *offset_est = 0;        /* treat as not present */
2407                         }
2408                 }
2409                 else
2410                         *offset_est = -1;       /* can't estimate */
2411         }
2412         else
2413                 *offset_est = 0;                /* not present */
2414
2415         if (*count_est != 0)
2416         {
2417                 /*
2418                  * A LIMIT clause limits the absolute number of tuples returned.
2419                  * However, if it's not a constant LIMIT then we have to guess; for
2420                  * lack of a better idea, assume 10% of the plan's result is wanted.
2421                  */
2422                 if (*count_est < 0 || *offset_est < 0)
2423                 {
2424                         /* LIMIT or OFFSET is an expression ... punt ... */
2425                         limit_fraction = 0.10;
2426                 }
2427                 else
2428                 {
2429                         /* LIMIT (plus OFFSET, if any) is max number of tuples needed */
2430                         limit_fraction = (double) *count_est + (double) *offset_est;
2431                 }
2432
2433                 /*
2434                  * If we have absolute limits from both caller and LIMIT, use the
2435                  * smaller value; likewise if they are both fractional.  If one is
2436                  * fractional and the other absolute, we can't easily determine which
2437                  * is smaller, but we use the heuristic that the absolute will usually
2438                  * be smaller.
2439                  */
2440                 if (tuple_fraction >= 1.0)
2441                 {
2442                         if (limit_fraction >= 1.0)
2443                         {
2444                                 /* both absolute */
2445                                 tuple_fraction = Min(tuple_fraction, limit_fraction);
2446                         }
2447                         else
2448                         {
2449                                 /* caller absolute, limit fractional; use caller's value */
2450                         }
2451                 }
2452                 else if (tuple_fraction > 0.0)
2453                 {
2454                         if (limit_fraction >= 1.0)
2455                         {
2456                                 /* caller fractional, limit absolute; use limit */
2457                                 tuple_fraction = limit_fraction;
2458                         }
2459                         else
2460                         {
2461                                 /* both fractional */
2462                                 tuple_fraction = Min(tuple_fraction, limit_fraction);
2463                         }
2464                 }
2465                 else
2466                 {
2467                         /* no info from caller, just use limit */
2468                         tuple_fraction = limit_fraction;
2469                 }
2470         }
2471         else if (*offset_est != 0 && tuple_fraction > 0.0)
2472         {
2473                 /*
2474                  * We have an OFFSET but no LIMIT.  This acts entirely differently
2475                  * from the LIMIT case: here, we need to increase rather than decrease
2476                  * the caller's tuple_fraction, because the OFFSET acts to cause more
2477                  * tuples to be fetched instead of fewer.  This only matters if we got
2478                  * a tuple_fraction > 0, however.
2479                  *
2480                  * As above, use 10% if OFFSET is present but unestimatable.
2481                  */
2482                 if (*offset_est < 0)
2483                         limit_fraction = 0.10;
2484                 else
2485                         limit_fraction = (double) *offset_est;
2486
2487                 /*
2488                  * If we have absolute counts from both caller and OFFSET, add them
2489                  * together; likewise if they are both fractional.  If one is
2490                  * fractional and the other absolute, we want to take the larger, and
2491                  * we heuristically assume that's the fractional one.
2492                  */
2493                 if (tuple_fraction >= 1.0)
2494                 {
2495                         if (limit_fraction >= 1.0)
2496                         {
2497                                 /* both absolute, so add them together */
2498                                 tuple_fraction += limit_fraction;
2499                         }
2500                         else
2501                         {
2502                                 /* caller absolute, limit fractional; use limit */
2503                                 tuple_fraction = limit_fraction;
2504                         }
2505                 }
2506                 else
2507                 {
2508                         if (limit_fraction >= 1.0)
2509                         {
2510                                 /* caller fractional, limit absolute; use caller's value */
2511                         }
2512                         else
2513                         {
2514                                 /* both fractional, so add them together */
2515                                 tuple_fraction += limit_fraction;
2516                                 if (tuple_fraction >= 1.0)
2517                                         tuple_fraction = 0.0;           /* assume fetch all */
2518                         }
2519                 }
2520         }
2521
2522         return tuple_fraction;
2523 }
2524
2525 /*
2526  * limit_needed - do we actually need a Limit plan node?
2527  *
2528  * If we have constant-zero OFFSET and constant-null LIMIT, we can skip adding
2529  * a Limit node.  This is worth checking for because "OFFSET 0" is a common
2530  * locution for an optimization fence.  (Because other places in the planner
2531  * merely check whether parse->limitOffset isn't NULL, it will still work as
2532  * an optimization fence --- we're just suppressing unnecessary run-time
2533  * overhead.)
2534  *
2535  * This might look like it could be merged into preprocess_limit, but there's
2536  * a key distinction: here we need hard constants in OFFSET/LIMIT, whereas
2537  * in preprocess_limit it's good enough to consider estimated values.
2538  */
2539 static bool
2540 limit_needed(Query *parse)
2541 {
2542         Node       *node;
2543
2544         node = parse->limitCount;
2545         if (node)
2546         {
2547                 if (IsA(node, Const))
2548                 {
2549                         /* NULL indicates LIMIT ALL, ie, no limit */
2550                         if (!((Const *) node)->constisnull)
2551                                 return true;    /* LIMIT with a constant value */
2552                 }
2553                 else
2554                         return true;            /* non-constant LIMIT */
2555         }
2556
2557         node = parse->limitOffset;
2558         if (node)
2559         {
2560                 if (IsA(node, Const))
2561                 {
2562                         /* Treat NULL as no offset; the executor would too */
2563                         if (!((Const *) node)->constisnull)
2564                         {
2565                                 int64           offset = DatumGetInt64(((Const *) node)->constvalue);
2566
2567                                 if (offset != 0)
2568                                         return true;    /* OFFSET with a nonzero value */
2569                         }
2570                 }
2571                 else
2572                         return true;            /* non-constant OFFSET */
2573         }
2574
2575         return false;                           /* don't need a Limit plan node */
2576 }
2577
2578
2579 /*
2580  * remove_useless_groupby_columns
2581  *              Remove any columns in the GROUP BY clause that are redundant due to
2582  *              being functionally dependent on other GROUP BY columns.
2583  *
2584  * Since some other DBMSes do not allow references to ungrouped columns, it's
2585  * not unusual to find all columns listed in GROUP BY even though listing the
2586  * primary-key columns would be sufficient.  Deleting such excess columns
2587  * avoids redundant sorting work, so it's worth doing.  When we do this, we
2588  * must mark the plan as dependent on the pkey constraint (compare the
2589  * parser's check_ungrouped_columns() and check_functional_grouping()).
2590  *
2591  * In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
2592  * index as the determining columns.  But as with check_functional_grouping(),
2593  * there's currently no way to represent dependency on a NOT NULL constraint,
2594  * so we consider only the pkey for now.
2595  */
2596 static void
2597 remove_useless_groupby_columns(PlannerInfo *root)
2598 {
2599         Query      *parse = root->parse;
2600         Bitmapset **groupbyattnos;
2601         Bitmapset **surplusvars;
2602         ListCell   *lc;
2603         int                     relid;
2604
2605         /* No chance to do anything if there are less than two GROUP BY items */
2606         if (list_length(parse->groupClause) < 2)
2607                 return;
2608
2609         /* Don't fiddle with the GROUP BY clause if the query has grouping sets */
2610         if (parse->groupingSets)
2611                 return;
2612
2613         /*
2614          * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
2615          * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
2616          * that are GROUP BY items.
2617          */
2618         groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2619                                                                                    (list_length(parse->rtable) + 1));
2620         foreach(lc, parse->groupClause)
2621         {
2622                 SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2623                 TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2624                 Var                *var = (Var *) tle->expr;
2625
2626                 /*
2627                  * Ignore non-Vars and Vars from other query levels.
2628                  *
2629                  * XXX in principle, stable expressions containing Vars could also be
2630                  * removed, if all the Vars are functionally dependent on other GROUP
2631                  * BY items.  But it's not clear that such cases occur often enough to
2632                  * be worth troubling over.
2633                  */
2634                 if (!IsA(var, Var) ||
2635                         var->varlevelsup > 0)
2636                         continue;
2637
2638                 /* OK, remember we have this Var */
2639                 relid = var->varno;
2640                 Assert(relid <= list_length(parse->rtable));
2641                 groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
2642                                                  var->varattno - FirstLowInvalidHeapAttributeNumber);
2643         }
2644
2645         /*
2646          * Consider each relation and see if it is possible to remove some of its
2647          * Vars from GROUP BY.  For simplicity and speed, we do the actual removal
2648          * in a separate pass.  Here, we just fill surplusvars[k] with a bitmapset
2649          * of the column attnos of RTE k that are removable GROUP BY items.
2650          */
2651         surplusvars = NULL;                     /* don't allocate array unless required */
2652         relid = 0;
2653         foreach(lc, parse->rtable)
2654         {
2655                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
2656                 Bitmapset  *relattnos;
2657                 Bitmapset  *pkattnos;
2658                 Oid                     constraintOid;
2659
2660                 relid++;
2661
2662                 /* Only plain relations could have primary-key constraints */
2663                 if (rte->rtekind != RTE_RELATION)
2664                         continue;
2665
2666                 /* Nothing to do unless this rel has multiple Vars in GROUP BY */
2667                 relattnos = groupbyattnos[relid];
2668                 if (bms_membership(relattnos) != BMS_MULTIPLE)
2669                         continue;
2670
2671                 /*
2672                  * Can't remove any columns for this rel if there is no suitable
2673                  * (i.e., nondeferrable) primary key constraint.
2674                  */
2675                 pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
2676                 if (pkattnos == NULL)
2677                         continue;
2678
2679                 /*
2680                  * If the primary key is a proper subset of relattnos then we have
2681                  * some items in the GROUP BY that can be removed.
2682                  */
2683                 if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
2684                 {
2685                         /*
2686                          * To easily remember whether we've found anything to do, we don't
2687                          * allocate the surplusvars[] array until we find something.
2688                          */
2689                         if (surplusvars == NULL)
2690                                 surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
2691                                                                                    (list_length(parse->rtable) + 1));
2692
2693                         /* Remember the attnos of the removable columns */
2694                         surplusvars[relid] = bms_difference(relattnos, pkattnos);
2695
2696                         /* Also, mark the resulting plan as dependent on this constraint */
2697                         parse->constraintDeps = lappend_oid(parse->constraintDeps,
2698                                                                                                 constraintOid);
2699                 }
2700         }
2701
2702         /*
2703          * If we found any surplus Vars, build a new GROUP BY clause without them.
2704          * (Note: this may leave some TLEs with unreferenced ressortgroupref
2705          * markings, but that's harmless.)
2706          */
2707         if (surplusvars != NULL)
2708         {
2709                 List       *new_groupby = NIL;
2710
2711                 foreach(lc, parse->groupClause)
2712                 {
2713                         SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
2714                         TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
2715                         Var                *var = (Var *) tle->expr;
2716
2717                         /*
2718                          * New list must include non-Vars, outer Vars, and anything not
2719                          * marked as surplus.
2720                          */
2721                         if (!IsA(var, Var) ||
2722                                 var->varlevelsup > 0 ||
2723                         !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
2724                                                    surplusvars[var->varno]))
2725                                 new_groupby = lappend(new_groupby, sgc);
2726                 }
2727
2728                 parse->groupClause = new_groupby;
2729         }
2730 }
2731
2732 /*
2733  * preprocess_groupclause - do preparatory work on GROUP BY clause
2734  *
2735  * The idea here is to adjust the ordering of the GROUP BY elements
2736  * (which in itself is semantically insignificant) to match ORDER BY,
2737  * thereby allowing a single sort operation to both implement the ORDER BY
2738  * requirement and set up for a Unique step that implements GROUP BY.
2739  *
2740  * In principle it might be interesting to consider other orderings of the
2741  * GROUP BY elements, which could match the sort ordering of other
2742  * possible plans (eg an indexscan) and thereby reduce cost.  We don't
2743  * bother with that, though.  Hashed grouping will frequently win anyway.
2744  *
2745  * Note: we need no comparable processing of the distinctClause because
2746  * the parser already enforced that that matches ORDER BY.
2747  *
2748  * For grouping sets, the order of items is instead forced to agree with that
2749  * of the grouping set (and items not in the grouping set are skipped). The
2750  * work of sorting the order of grouping set elements to match the ORDER BY if
2751  * possible is done elsewhere.
2752  */
2753 static List *
2754 preprocess_groupclause(PlannerInfo *root, List *force)
2755 {
2756         Query      *parse = root->parse;
2757         List       *new_groupclause = NIL;
2758         bool            partial_match;
2759         ListCell   *sl;
2760         ListCell   *gl;
2761
2762         /* For grouping sets, we need to force the ordering */
2763         if (force)
2764         {
2765                 foreach(sl, force)
2766                 {
2767                         Index           ref = lfirst_int(sl);
2768                         SortGroupClause *cl = get_sortgroupref_clause(ref, parse->groupClause);
2769
2770                         new_groupclause = lappend(new_groupclause, cl);
2771                 }
2772
2773                 return new_groupclause;
2774         }
2775
2776         /* If no ORDER BY, nothing useful to do here */
2777         if (parse->sortClause == NIL)
2778                 return parse->groupClause;
2779
2780         /*
2781          * Scan the ORDER BY clause and construct a list of matching GROUP BY
2782          * items, but only as far as we can make a matching prefix.
2783          *
2784          * This code assumes that the sortClause contains no duplicate items.
2785          */
2786         foreach(sl, parse->sortClause)
2787         {
2788                 SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
2789
2790                 foreach(gl, parse->groupClause)
2791                 {
2792                         SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2793
2794                         if (equal(gc, sc))
2795                         {
2796                                 new_groupclause = lappend(new_groupclause, gc);
2797                                 break;
2798                         }
2799                 }
2800                 if (gl == NULL)
2801                         break;                          /* no match, so stop scanning */
2802         }
2803
2804         /* Did we match all of the ORDER BY list, or just some of it? */
2805         partial_match = (sl != NULL);
2806
2807         /* If no match at all, no point in reordering GROUP BY */
2808         if (new_groupclause == NIL)
2809                 return parse->groupClause;
2810
2811         /*
2812          * Add any remaining GROUP BY items to the new list, but only if we were
2813          * able to make a complete match.  In other words, we only rearrange the
2814          * GROUP BY list if the result is that one list is a prefix of the other
2815          * --- otherwise there's no possibility of a common sort.  Also, give up
2816          * if there are any non-sortable GROUP BY items, since then there's no
2817          * hope anyway.
2818          */
2819         foreach(gl, parse->groupClause)
2820         {
2821                 SortGroupClause *gc = (SortGroupClause *) lfirst(gl);
2822
2823                 if (list_member_ptr(new_groupclause, gc))
2824                         continue;                       /* it matched an ORDER BY item */
2825                 if (partial_match)
2826                         return parse->groupClause;      /* give up, no common sort possible */
2827                 if (!OidIsValid(gc->sortop))
2828                         return parse->groupClause;      /* give up, GROUP BY can't be sorted */
2829                 new_groupclause = lappend(new_groupclause, gc);
2830         }
2831
2832         /* Success --- install the rearranged GROUP BY list */
2833         Assert(list_length(parse->groupClause) == list_length(new_groupclause));
2834         return new_groupclause;
2835 }
2836
2837 /*
2838  * Extract lists of grouping sets that can be implemented using a single
2839  * rollup-type aggregate pass each. Returns a list of lists of grouping sets.
2840  *
2841  * Input must be sorted with smallest sets first. Result has each sublist
2842  * sorted with smallest sets first.
2843  *
2844  * We want to produce the absolute minimum possible number of lists here to
2845  * avoid excess sorts. Fortunately, there is an algorithm for this; the problem
2846  * of finding the minimal partition of a partially-ordered set into chains
2847  * (which is what we need, taking the list of grouping sets as a poset ordered
2848  * by set inclusion) can be mapped to the problem of finding the maximum
2849  * cardinality matching on a bipartite graph, which is solvable in polynomial
2850  * time with a worst case of no worse than O(n^2.5) and usually much
2851  * better. Since our N is at most 4096, we don't need to consider fallbacks to
2852  * heuristic or approximate methods.  (Planning time for a 12-d cube is under
2853  * half a second on my modest system even with optimization off and assertions
2854  * on.)
2855  */
2856 static List *
2857 extract_rollup_sets(List *groupingSets)
2858 {
2859         int                     num_sets_raw = list_length(groupingSets);
2860         int                     num_empty = 0;
2861         int                     num_sets = 0;   /* distinct sets */
2862         int                     num_chains = 0;
2863         List       *result = NIL;
2864         List      **results;
2865         List      **orig_sets;
2866         Bitmapset **set_masks;
2867         int                *chains;
2868         short     **adjacency;
2869         short      *adjacency_buf;
2870         BipartiteMatchState *state;
2871         int                     i;
2872         int                     j;
2873         int                     j_size;
2874         ListCell   *lc1 = list_head(groupingSets);
2875         ListCell   *lc;
2876
2877         /*
2878          * Start by stripping out empty sets.  The algorithm doesn't require this,
2879          * but the planner currently needs all empty sets to be returned in the
2880          * first list, so we strip them here and add them back after.
2881          */
2882         while (lc1 && lfirst(lc1) == NIL)
2883         {
2884                 ++num_empty;
2885                 lc1 = lnext(lc1);
2886         }
2887
2888         /* bail out now if it turns out that all we had were empty sets. */
2889         if (!lc1)
2890                 return list_make1(groupingSets);
2891
2892         /*----------
2893          * We don't strictly need to remove duplicate sets here, but if we don't,
2894          * they tend to become scattered through the result, which is a bit
2895          * confusing (and irritating if we ever decide to optimize them out).
2896          * So we remove them here and add them back after.
2897          *
2898          * For each non-duplicate set, we fill in the following:
2899          *
2900          * orig_sets[i] = list of the original set lists
2901          * set_masks[i] = bitmapset for testing inclusion
2902          * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices
2903          *
2904          * chains[i] will be the result group this set is assigned to.
2905          *
2906          * We index all of these from 1 rather than 0 because it is convenient
2907          * to leave 0 free for the NIL node in the graph algorithm.
2908          *----------
2909          */
2910         orig_sets = palloc0((num_sets_raw + 1) * sizeof(List *));
2911         set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *));
2912         adjacency = palloc0((num_sets_raw + 1) * sizeof(short *));
2913         adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short));
2914
2915         j_size = 0;
2916         j = 0;
2917         i = 1;
2918
2919         for_each_cell(lc, lc1)
2920         {
2921                 List       *candidate = lfirst(lc);
2922                 Bitmapset  *candidate_set = NULL;
2923                 ListCell   *lc2;
2924                 int                     dup_of = 0;
2925
2926                 foreach(lc2, candidate)
2927                 {
2928                         candidate_set = bms_add_member(candidate_set, lfirst_int(lc2));
2929                 }
2930
2931                 /* we can only be a dup if we're the same length as a previous set */
2932                 if (j_size == list_length(candidate))
2933                 {
2934                         int                     k;
2935
2936                         for (k = j; k < i; ++k)
2937                         {
2938                                 if (bms_equal(set_masks[k], candidate_set))
2939                                 {
2940                                         dup_of = k;
2941                                         break;
2942                                 }
2943                         }
2944                 }
2945                 else if (j_size < list_length(candidate))
2946                 {
2947                         j_size = list_length(candidate);
2948                         j = i;
2949                 }
2950
2951                 if (dup_of > 0)
2952                 {
2953                         orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate);
2954                         bms_free(candidate_set);
2955                 }
2956                 else
2957                 {
2958                         int                     k;
2959                         int                     n_adj = 0;
2960
2961                         orig_sets[i] = list_make1(candidate);
2962                         set_masks[i] = candidate_set;
2963
2964                         /* fill in adjacency list; no need to compare equal-size sets */
2965
2966                         for (k = j - 1; k > 0; --k)
2967                         {
2968                                 if (bms_is_subset(set_masks[k], candidate_set))
2969                                         adjacency_buf[++n_adj] = k;
2970                         }
2971
2972                         if (n_adj > 0)
2973                         {
2974                                 adjacency_buf[0] = n_adj;
2975                                 adjacency[i] = palloc((n_adj + 1) * sizeof(short));
2976                                 memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short));
2977                         }
2978                         else
2979                                 adjacency[i] = NULL;
2980
2981                         ++i;
2982                 }
2983         }
2984
2985         num_sets = i - 1;
2986
2987         /*
2988          * Apply the graph matching algorithm to do the work.
2989          */
2990         state = BipartiteMatch(num_sets, num_sets, adjacency);
2991
2992         /*
2993          * Now, the state->pair* fields have the info we need to assign sets to
2994          * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or
2995          * pair_vu[v] = u (both will be true, but we check both so that we can do
2996          * it in one pass)
2997          */
2998         chains = palloc0((num_sets + 1) * sizeof(int));
2999
3000         for (i = 1; i <= num_sets; ++i)
3001         {
3002                 int                     u = state->pair_vu[i];
3003                 int                     v = state->pair_uv[i];
3004
3005                 if (u > 0 && u < i)
3006                         chains[i] = chains[u];
3007                 else if (v > 0 && v < i)
3008                         chains[i] = chains[v];
3009                 else
3010                         chains[i] = ++num_chains;
3011         }
3012
3013         /* build result lists. */
3014         results = palloc0((num_chains + 1) * sizeof(List *));
3015
3016         for (i = 1; i <= num_sets; ++i)
3017         {
3018                 int                     c = chains[i];
3019
3020                 Assert(c > 0);
3021
3022                 results[c] = list_concat(results[c], orig_sets[i]);
3023         }
3024
3025         /* push any empty sets back on the first list. */
3026         while (num_empty-- > 0)
3027                 results[1] = lcons(NIL, results[1]);
3028
3029         /* make result list */
3030         for (i = 1; i <= num_chains; ++i)
3031                 result = lappend(result, results[i]);
3032
3033         /*
3034          * Free all the things.
3035          *
3036          * (This is over-fussy for small sets but for large sets we could have
3037          * tied up a nontrivial amount of memory.)
3038          */
3039         BipartiteMatchFree(state);
3040         pfree(results);
3041         pfree(chains);
3042         for (i = 1; i <= num_sets; ++i)
3043                 if (adjacency[i])
3044                         pfree(adjacency[i]);
3045         pfree(adjacency);
3046         pfree(adjacency_buf);
3047         pfree(orig_sets);
3048         for (i = 1; i <= num_sets; ++i)
3049                 bms_free(set_masks[i]);
3050         pfree(set_masks);
3051
3052         return result;
3053 }
3054
3055 /*
3056  * Reorder the elements of a list of grouping sets such that they have correct
3057  * prefix relationships.
3058  *
3059  * The input must be ordered with smallest sets first; the result is returned
3060  * with largest sets first.  Note that the result shares no list substructure
3061  * with the input, so it's safe for the caller to modify it later.
3062  *
3063  * If we're passed in a sortclause, we follow its order of columns to the
3064  * extent possible, to minimize the chance that we add unnecessary sorts.
3065  * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a
3066  * gets implemented in one pass.)
3067  */
3068 static List *
3069 reorder_grouping_sets(List *groupingsets, List *sortclause)
3070 {
3071         ListCell   *lc;
3072         ListCell   *lc2;
3073         List       *previous = NIL;
3074         List       *result = NIL;
3075
3076         foreach(lc, groupingsets)
3077         {
3078                 List       *candidate = lfirst(lc);
3079                 List       *new_elems = list_difference_int(candidate, previous);
3080
3081                 if (list_length(new_elems) > 0)
3082                 {
3083                         while (list_length(sortclause) > list_length(previous))
3084                         {
3085                                 SortGroupClause *sc = list_nth(sortclause, list_length(previous));
3086                                 int                     ref = sc->tleSortGroupRef;
3087
3088                                 if (list_member_int(new_elems, ref))
3089                                 {
3090                                         previous = lappend_int(previous, ref);
3091                                         new_elems = list_delete_int(new_elems, ref);
3092                                 }
3093                                 else
3094                                 {
3095                                         /* diverged from the sortclause; give up on it */
3096                                         sortclause = NIL;
3097                                         break;
3098                                 }
3099                         }
3100
3101                         foreach(lc2, new_elems)
3102                         {
3103                                 previous = lappend_int(previous, lfirst_int(lc2));
3104                         }
3105                 }
3106
3107                 result = lcons(list_copy(previous), result);
3108                 list_free(new_elems);
3109         }
3110
3111         list_free(previous);
3112
3113         return result;
3114 }
3115
3116 /*
3117  * Compute query_pathkeys and other pathkeys during plan generation
3118  */
3119 static void
3120 standard_qp_callback(PlannerInfo *root, void *extra)
3121 {
3122         Query      *parse = root->parse;
3123         standard_qp_extra *qp_extra = (standard_qp_extra *) extra;
3124         List       *tlist = qp_extra->tlist;
3125         List       *activeWindows = qp_extra->activeWindows;
3126
3127         /*
3128          * Calculate pathkeys that represent grouping/ordering requirements.  The
3129          * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not
3130          * be, in which case we just leave their pathkeys empty.
3131          */
3132         if (qp_extra->groupClause &&
3133                 grouping_is_sortable(qp_extra->groupClause))
3134                 root->group_pathkeys =
3135                         make_pathkeys_for_sortclauses(root,
3136                                                                                   qp_extra->groupClause,
3137                                                                                   tlist);
3138         else
3139                 root->group_pathkeys = NIL;
3140
3141         /* We consider only the first (bottom) window in pathkeys logic */
3142         if (activeWindows != NIL)
3143         {
3144                 WindowClause *wc = (WindowClause *) linitial(activeWindows);
3145
3146                 root->window_pathkeys = make_pathkeys_for_window(root,
3147                                                                                                                  wc,
3148                                                                                                                  tlist);
3149         }
3150         else
3151                 root->window_pathkeys = NIL;
3152
3153         if (parse->distinctClause &&
3154                 grouping_is_sortable(parse->distinctClause))
3155                 root->distinct_pathkeys =
3156                         make_pathkeys_for_sortclauses(root,
3157                                                                                   parse->distinctClause,
3158                                                                                   tlist);
3159         else
3160                 root->distinct_pathkeys = NIL;
3161
3162         root->sort_pathkeys =
3163                 make_pathkeys_for_sortclauses(root,
3164                                                                           parse->sortClause,
3165                                                                           tlist);
3166
3167         /*
3168          * Figure out whether we want a sorted result from query_planner.
3169          *
3170          * If we have a sortable GROUP BY clause, then we want a result sorted
3171          * properly for grouping.  Otherwise, if we have window functions to
3172          * evaluate, we try to sort for the first window.  Otherwise, if there's a
3173          * sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
3174          * we try to produce output that's sufficiently well sorted for the
3175          * DISTINCT.  Otherwise, if there is an ORDER BY clause, we want to sort
3176          * by the ORDER BY clause.
3177          *
3178          * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
3179          * of GROUP BY, it would be tempting to request sort by ORDER BY --- but
3180          * that might just leave us failing to exploit an available sort order at
3181          * all.  Needs more thought.  The choice for DISTINCT versus ORDER BY is
3182          * much easier, since we know that the parser ensured that one is a
3183          * superset of the other.
3184          */
3185         if (root->group_pathkeys)
3186                 root->query_pathkeys = root->group_pathkeys;
3187         else if (root->window_pathkeys)
3188                 root->query_pathkeys = root->window_pathkeys;
3189         else if (list_length(root->distinct_pathkeys) >
3190                          list_length(root->sort_pathkeys))
3191                 root->query_pathkeys = root->distinct_pathkeys;
3192         else if (root->sort_pathkeys)
3193                 root->query_pathkeys = root->sort_pathkeys;
3194         else
3195                 root->query_pathkeys = NIL;
3196 }
3197
3198 /*
3199  * Estimate number of groups produced by grouping clauses (1 if not grouping)
3200  *
3201  * path_rows: number of output rows from scan/join step
3202  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3203  * rollup_groupclauses: list of grouping clauses for grouping sets,
3204  *              or NIL if not doing grouping sets
3205  */
3206 static double
3207 get_number_of_groups(PlannerInfo *root,
3208                                          double path_rows,
3209                                          List *rollup_lists,
3210                                          List *rollup_groupclauses)
3211 {
3212         Query      *parse = root->parse;
3213         double          dNumGroups;
3214
3215         if (parse->groupClause)
3216         {
3217                 List       *groupExprs;
3218
3219                 if (parse->groupingSets)
3220                 {
3221                         /* Add up the estimates for each grouping set */
3222                         ListCell   *lc,
3223                                            *lc2;
3224
3225                         dNumGroups = 0;
3226                         forboth(lc, rollup_groupclauses, lc2, rollup_lists)
3227                         {
3228                                 List       *groupClause = (List *) lfirst(lc);
3229                                 List       *gsets = (List *) lfirst(lc2);
3230                                 ListCell   *lc3;
3231
3232                                 groupExprs = get_sortgrouplist_exprs(groupClause,
3233                                                                                                          parse->targetList);
3234
3235                                 foreach(lc3, gsets)
3236                                 {
3237                                         List       *gset = (List *) lfirst(lc3);
3238
3239                                         dNumGroups += estimate_num_groups(root,
3240                                                                                                           groupExprs,
3241                                                                                                           path_rows,
3242                                                                                                           &gset);
3243                                 }
3244                         }
3245                 }
3246                 else
3247                 {
3248                         /* Plain GROUP BY */
3249                         groupExprs = get_sortgrouplist_exprs(parse->groupClause,
3250                                                                                                  parse->targetList);
3251
3252                         dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
3253                                                                                          NULL);
3254                 }
3255         }
3256         else if (parse->groupingSets)
3257         {
3258                 /* Empty grouping sets ... one result row for each one */
3259                 dNumGroups = list_length(parse->groupingSets);
3260         }
3261         else if (parse->hasAggs || root->hasHavingQual)
3262         {
3263                 /* Plain aggregation, one result row */
3264                 dNumGroups = 1;
3265         }
3266         else
3267         {
3268                 /* Not grouping */
3269                 dNumGroups = 1;
3270         }
3271
3272         return dNumGroups;
3273 }
3274
3275 /*
3276  * estimate_hashagg_tablesize
3277  *        estimate the number of bytes that a hash aggregate hashtable will
3278  *        require based on the agg_costs, path width and dNumGroups.
3279  */
3280 static Size
3281 estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
3282                                                    double dNumGroups)
3283 {
3284         Size            hashentrysize;
3285
3286         /* Estimate per-hash-entry space at tuple width... */
3287         hashentrysize = MAXALIGN(path->pathtarget->width) +
3288                 MAXALIGN(SizeofMinimalTupleHeader);
3289
3290         /* plus space for pass-by-ref transition values... */
3291         hashentrysize += agg_costs->transitionSpace;
3292         /* plus the per-hash-entry overhead */
3293         hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3294
3295         /*
3296          * Note that this disregards the effect of fill-factor and growth policy
3297          * of the hash-table. That's probably ok, given default the default
3298          * fill-factor is relatively high. It'd be hard to meaningfully factor in
3299          * "double-in-size" growth policies here.
3300          */
3301         return hashentrysize * dNumGroups;
3302 }
3303
3304 /*
3305  * create_grouping_paths
3306  *
3307  * Build a new upperrel containing Paths for grouping and/or aggregation.
3308  *
3309  * input_rel: contains the source-data Paths
3310  * target: the pathtarget for the result Paths to compute
3311  * agg_costs: cost info about all aggregates in query (in AGGSPLIT_SIMPLE mode)
3312  * rollup_lists: list of grouping sets, or NIL if not doing grouping sets
3313  * rollup_groupclauses: list of grouping clauses for grouping sets,
3314  *              or NIL if not doing grouping sets
3315  *
3316  * Note: all Paths in input_rel are expected to return the target computed
3317  * by make_group_input_target.
3318  *
3319  * We need to consider sorted and hashed aggregation in the same function,
3320  * because otherwise (1) it would be harder to throw an appropriate error
3321  * message if neither way works, and (2) we should not allow hashtable size
3322  * considerations to dissuade us from using hashing if sorting is not possible.
3323  */
3324 static RelOptInfo *
3325 create_grouping_paths(PlannerInfo *root,
3326                                           RelOptInfo *input_rel,
3327                                           PathTarget *target,
3328                                           const AggClauseCosts *agg_costs,
3329                                           List *rollup_lists,
3330                                           List *rollup_groupclauses)
3331 {
3332         Query      *parse = root->parse;
3333         Path       *cheapest_path = input_rel->cheapest_total_path;
3334         RelOptInfo *grouped_rel;
3335         PathTarget *partial_grouping_target = NULL;
3336         AggClauseCosts agg_partial_costs;       /* parallel only */
3337         AggClauseCosts agg_final_costs;         /* parallel only */
3338         Size            hashaggtablesize;
3339         double          dNumGroups;
3340         double          dNumPartialGroups = 0;
3341         bool            can_hash;
3342         bool            can_sort;
3343         bool            try_parallel_aggregation;
3344
3345         ListCell   *lc;
3346
3347         /* For now, do all work in the (GROUP_AGG, NULL) upperrel */
3348         grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
3349
3350         /*
3351          * If the input relation is not parallel-safe, then the grouped relation
3352          * can't be parallel-safe, either.  Otherwise, it's parallel-safe if the
3353          * target list and HAVING quals are parallel-safe.
3354          */
3355         if (input_rel->consider_parallel &&
3356                 is_parallel_safe(root, (Node *) target->exprs) &&
3357                 is_parallel_safe(root, (Node *) parse->havingQual))
3358                 grouped_rel->consider_parallel = true;
3359
3360         /*
3361          * If the input rel belongs to a single FDW, so does the grouped rel.
3362          */
3363         grouped_rel->serverid = input_rel->serverid;
3364         grouped_rel->userid = input_rel->userid;
3365         grouped_rel->useridiscurrent = input_rel->useridiscurrent;
3366         grouped_rel->fdwroutine = input_rel->fdwroutine;
3367
3368         /*
3369          * Check for degenerate grouping.
3370          */
3371         if ((root->hasHavingQual || parse->groupingSets) &&
3372                 !parse->hasAggs && parse->groupClause == NIL)
3373         {
3374                 /*
3375                  * We have a HAVING qual and/or grouping sets, but no aggregates and
3376                  * no GROUP BY (which implies that the grouping sets are all empty).
3377                  *
3378                  * This is a degenerate case in which we are supposed to emit either
3379                  * zero or one row for each grouping set depending on whether HAVING
3380                  * succeeds.  Furthermore, there cannot be any variables in either
3381                  * HAVING or the targetlist, so we actually do not need the FROM table
3382                  * at all!      We can just throw away the plan-so-far and generate a
3383                  * Result node.  This is a sufficiently unusual corner case that it's
3384                  * not worth contorting the structure of this module to avoid having
3385                  * to generate the earlier paths in the first place.
3386                  */
3387                 int                     nrows = list_length(parse->groupingSets);
3388                 Path       *path;
3389
3390                 if (nrows > 1)
3391                 {
3392                         /*
3393                          * Doesn't seem worthwhile writing code to cons up a
3394                          * generate_series or a values scan to emit multiple rows. Instead
3395                          * just make N clones and append them.  (With a volatile HAVING
3396                          * clause, this means you might get between 0 and N output rows.
3397                          * Offhand I think that's desired.)
3398                          */
3399                         List       *paths = NIL;
3400
3401                         while (--nrows >= 0)
3402                         {
3403                                 path = (Path *)
3404                                         create_result_path(root, grouped_rel,
3405                                                                            target,
3406                                                                            (List *) parse->havingQual);
3407                                 paths = lappend(paths, path);
3408                         }
3409                         path = (Path *)
3410                                 create_append_path(grouped_rel,
3411                                                                    paths,
3412                                                                    NULL,
3413                                                                    0);
3414                         path->pathtarget = target;
3415                 }
3416                 else
3417                 {
3418                         /* No grouping sets, or just one, so one output row */
3419                         path = (Path *)
3420                                 create_result_path(root, grouped_rel,
3421                                                                    target,
3422                                                                    (List *) parse->havingQual);
3423                 }
3424
3425                 add_path(grouped_rel, path);
3426
3427                 /* No need to consider any other alternatives. */
3428                 set_cheapest(grouped_rel);
3429
3430                 return grouped_rel;
3431         }
3432
3433         /*
3434          * Estimate number of groups.
3435          */
3436         dNumGroups = get_number_of_groups(root,
3437                                                                           cheapest_path->rows,
3438                                                                           rollup_lists,
3439                                                                           rollup_groupclauses);
3440
3441         /*
3442          * Determine whether it's possible to perform sort-based implementations
3443          * of grouping.  (Note that if groupClause is empty,
3444          * grouping_is_sortable() is trivially true, and all the
3445          * pathkeys_contained_in() tests will succeed too, so that we'll consider
3446          * every surviving input path.)
3447          */
3448         can_sort = grouping_is_sortable(parse->groupClause);
3449
3450         /*
3451          * Determine whether we should consider hash-based implementations of
3452          * grouping.
3453          *
3454          * Hashed aggregation only applies if we're grouping.  We currently can't
3455          * hash if there are grouping sets, though.
3456          *
3457          * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
3458          * aggregates.  (Doing so would imply storing *all* the input values in
3459          * the hash table, and/or running many sorts in parallel, either of which
3460          * seems like a certain loser.)  We similarly don't support ordered-set
3461          * aggregates in hashed aggregation, but that case is also included in the
3462          * numOrderedAggs count.
3463          *
3464          * Note: grouping_is_hashable() is much more expensive to check than the
3465          * other gating conditions, so we want to do it last.
3466          */
3467         can_hash = (parse->groupClause != NIL &&
3468                                 parse->groupingSets == NIL &&
3469                                 agg_costs->numOrderedAggs == 0 &&
3470                                 grouping_is_hashable(parse->groupClause));
3471
3472         /*
3473          * If grouped_rel->consider_parallel is true, then paths that we generate
3474          * for this grouping relation could be run inside of a worker, but that
3475          * doesn't mean we can actually use the PartialAggregate/FinalizeAggregate
3476          * execution strategy.  Figure that out.
3477          */
3478         if (!grouped_rel->consider_parallel)
3479         {
3480                 /* Not even parallel-safe. */
3481                 try_parallel_aggregation = false;
3482         }
3483         else if (input_rel->partial_pathlist == NIL)
3484         {
3485                 /* Nothing to use as input for partial aggregate. */
3486                 try_parallel_aggregation = false;
3487         }
3488         else if (!parse->hasAggs && parse->groupClause == NIL)
3489         {
3490                 /*
3491                  * We don't know how to do parallel aggregation unless we have either
3492                  * some aggregates or a grouping clause.
3493                  */
3494                 try_parallel_aggregation = false;
3495         }
3496         else if (parse->groupingSets)
3497         {
3498                 /* We don't know how to do grouping sets in parallel. */
3499                 try_parallel_aggregation = false;
3500         }
3501         else if (agg_costs->hasNonPartial || agg_costs->hasNonSerial)
3502         {
3503                 /* Insufficient support for partial mode. */
3504                 try_parallel_aggregation = false;
3505         }
3506         else
3507         {
3508                 /* Everything looks good. */
3509                 try_parallel_aggregation = true;
3510         }
3511
3512         /*
3513          * Before generating paths for grouped_rel, we first generate any possible
3514          * partial paths; that way, later code can easily consider both parallel
3515          * and non-parallel approaches to grouping.  Note that the partial paths
3516          * we generate here are also partially aggregated, so simply pushing a
3517          * Gather node on top is insufficient to create a final path, as would be
3518          * the case for a scan/join rel.
3519          */
3520         if (try_parallel_aggregation)
3521         {
3522                 Path       *cheapest_partial_path = linitial(input_rel->partial_pathlist);
3523
3524                 /*
3525                  * Build target list for partial aggregate paths.  These paths cannot
3526                  * just emit the same tlist as regular aggregate paths, because (1) we
3527                  * must include Vars and Aggrefs needed in HAVING, which might not
3528                  * appear in the result tlist, and (2) the Aggrefs must be set in
3529                  * partial mode.
3530                  */
3531                 partial_grouping_target = make_partial_grouping_target(root, target);
3532
3533                 /* Estimate number of partial groups. */
3534                 dNumPartialGroups = get_number_of_groups(root,
3535                                                                                                  cheapest_partial_path->rows,
3536                                                                                                  NIL,
3537                                                                                                  NIL);
3538
3539                 /*
3540                  * Collect statistics about aggregates for estimating costs of
3541                  * performing aggregation in parallel.
3542                  */
3543                 MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3544                 MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3545                 if (parse->hasAggs)
3546                 {
3547                         /* partial phase */
3548                         get_agg_clause_costs(root, (Node *) partial_grouping_target->exprs,
3549                                                                  AGGSPLIT_INITIAL_SERIAL,
3550                                                                  &agg_partial_costs);
3551
3552                         /* final phase */
3553                         get_agg_clause_costs(root, (Node *) target->exprs,
3554                                                                  AGGSPLIT_FINAL_DESERIAL,
3555                                                                  &agg_final_costs);
3556                         get_agg_clause_costs(root, parse->havingQual,
3557                                                                  AGGSPLIT_FINAL_DESERIAL,
3558                                                                  &agg_final_costs);
3559                 }
3560
3561                 if (can_sort)
3562                 {
3563                         /* This was checked before setting try_parallel_aggregation */
3564                         Assert(parse->hasAggs || parse->groupClause);
3565
3566                         /*
3567                          * Use any available suitably-sorted path as input, and also
3568                          * consider sorting the cheapest partial path.
3569                          */
3570                         foreach(lc, input_rel->partial_pathlist)
3571                         {
3572                                 Path       *path = (Path *) lfirst(lc);
3573                                 bool            is_sorted;
3574
3575                                 is_sorted = pathkeys_contained_in(root->group_pathkeys,
3576                                                                                                   path->pathkeys);
3577                                 if (path == cheapest_partial_path || is_sorted)
3578                                 {
3579                                         /* Sort the cheapest partial path, if it isn't already */
3580                                         if (!is_sorted)
3581                                                 path = (Path *) create_sort_path(root,
3582                                                                                                                  grouped_rel,
3583                                                                                                                  path,
3584                                                                                                                  root->group_pathkeys,
3585                                                                                                                  -1.0);
3586
3587                                         if (parse->hasAggs)
3588                                                 add_partial_path(grouped_rel, (Path *)
3589                                                                                  create_agg_path(root,
3590                                                                                                                  grouped_rel,
3591                                                                                                                  path,
3592                                                                                                          partial_grouping_target,
3593                                                                  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3594                                                                                                          AGGSPLIT_INITIAL_SERIAL,
3595                                                                                                                  parse->groupClause,
3596                                                                                                                  NIL,
3597                                                                                                                  &agg_partial_costs,
3598                                                                                                                  dNumPartialGroups));
3599                                         else
3600                                                 add_partial_path(grouped_rel, (Path *)
3601                                                                                  create_group_path(root,
3602                                                                                                                    grouped_rel,
3603                                                                                                                    path,
3604                                                                                                          partial_grouping_target,
3605                                                                                                                    parse->groupClause,
3606                                                                                                                    NIL,
3607                                                                                                                  dNumPartialGroups));
3608                                 }
3609                         }
3610                 }
3611
3612                 if (can_hash)
3613                 {
3614                         /* Checked above */
3615                         Assert(parse->hasAggs || parse->groupClause);
3616
3617                         hashaggtablesize =
3618                                 estimate_hashagg_tablesize(cheapest_partial_path,
3619                                                                                    &agg_partial_costs,
3620                                                                                    dNumPartialGroups);
3621
3622                         /*
3623                          * Tentatively produce a partial HashAgg Path, depending on if it
3624                          * looks as if the hash table will fit in work_mem.
3625                          */
3626                         if (hashaggtablesize < work_mem * 1024L)
3627                         {
3628                                 add_partial_path(grouped_rel, (Path *)
3629                                                                  create_agg_path(root,
3630                                                                                                  grouped_rel,
3631                                                                                                  cheapest_partial_path,
3632                                                                                                  partial_grouping_target,
3633                                                                                                  AGG_HASHED,
3634                                                                                                  AGGSPLIT_INITIAL_SERIAL,
3635                                                                                                  parse->groupClause,
3636                                                                                                  NIL,
3637                                                                                                  &agg_partial_costs,
3638                                                                                                  dNumPartialGroups));
3639                         }
3640                 }
3641         }
3642
3643         /* Build final grouping paths */
3644         if (can_sort)
3645         {
3646                 /*
3647                  * Use any available suitably-sorted path as input, and also consider
3648                  * sorting the cheapest-total path.
3649                  */
3650                 foreach(lc, input_rel->pathlist)
3651                 {
3652                         Path       *path = (Path *) lfirst(lc);
3653                         bool            is_sorted;
3654
3655                         is_sorted = pathkeys_contained_in(root->group_pathkeys,
3656                                                                                           path->pathkeys);
3657                         if (path == cheapest_path || is_sorted)
3658                         {
3659                                 /* Sort the cheapest-total path if it isn't already sorted */
3660                                 if (!is_sorted)
3661                                         path = (Path *) create_sort_path(root,
3662                                                                                                          grouped_rel,
3663                                                                                                          path,
3664                                                                                                          root->group_pathkeys,
3665                                                                                                          -1.0);
3666
3667                                 /* Now decide what to stick atop it */
3668                                 if (parse->groupingSets)
3669                                 {
3670                                         /*
3671                                          * We have grouping sets, possibly with aggregation.  Make
3672                                          * a GroupingSetsPath.
3673                                          */
3674                                         add_path(grouped_rel, (Path *)
3675                                                          create_groupingsets_path(root,
3676                                                                                                           grouped_rel,
3677                                                                                                           path,
3678                                                                                                           target,
3679                                                                                                   (List *) parse->havingQual,
3680                                                                                                           rollup_lists,
3681                                                                                                           rollup_groupclauses,
3682                                                                                                           agg_costs,
3683                                                                                                           dNumGroups));
3684                                 }
3685                                 else if (parse->hasAggs)
3686                                 {
3687                                         /*
3688                                          * We have aggregation, possibly with plain GROUP BY. Make
3689                                          * an AggPath.
3690                                          */
3691                                         add_path(grouped_rel, (Path *)
3692                                                          create_agg_path(root,
3693                                                                                          grouped_rel,
3694                                                                                          path,
3695                                                                                          target,
3696                                                                  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3697                                                                                          AGGSPLIT_SIMPLE,
3698                                                                                          parse->groupClause,
3699                                                                                          (List *) parse->havingQual,
3700                                                                                          agg_costs,
3701                                                                                          dNumGroups));
3702                                 }
3703                                 else if (parse->groupClause)
3704                                 {
3705                                         /*
3706                                          * We have GROUP BY without aggregation or grouping sets.
3707                                          * Make a GroupPath.
3708                                          */
3709                                         add_path(grouped_rel, (Path *)
3710                                                          create_group_path(root,
3711                                                                                            grouped_rel,
3712                                                                                            path,
3713                                                                                            target,
3714                                                                                            parse->groupClause,
3715                                                                                            (List *) parse->havingQual,
3716                                                                                            dNumGroups));
3717                                 }
3718                                 else
3719                                 {
3720                                         /* Other cases should have been handled above */
3721                                         Assert(false);
3722                                 }
3723                         }
3724                 }
3725
3726                 /*
3727                  * Now generate a complete GroupAgg Path atop of the cheapest partial
3728                  * path. We need only bother with the cheapest path here, as the
3729                  * output of Gather is never sorted.
3730                  */
3731                 if (grouped_rel->partial_pathlist)
3732                 {
3733                         Path       *path = (Path *) linitial(grouped_rel->partial_pathlist);
3734                         double          total_groups = path->rows * path->parallel_workers;
3735
3736                         path = (Path *) create_gather_path(root,
3737                                                                                            grouped_rel,
3738                                                                                            path,
3739                                                                                            partial_grouping_target,
3740                                                                                            NULL,
3741                                                                                            &total_groups);
3742
3743                         /*
3744                          * Since Gather's output is always unsorted, we'll need to sort,
3745                          * unless there's no GROUP BY clause or a degenerate (constant)
3746                          * one, in which case there will only be a single group.
3747                          */
3748                         if (root->group_pathkeys)
3749                                 path = (Path *) create_sort_path(root,
3750                                                                                                  grouped_rel,
3751                                                                                                  path,
3752                                                                                                  root->group_pathkeys,
3753                                                                                                  -1.0);
3754
3755                         if (parse->hasAggs)
3756                                 add_path(grouped_rel, (Path *)
3757                                                  create_agg_path(root,
3758                                                                                  grouped_rel,
3759                                                                                  path,
3760                                                                                  target,
3761                                                                  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
3762                                                                                  AGGSPLIT_FINAL_DESERIAL,
3763                                                                                  parse->groupClause,
3764                                                                                  (List *) parse->havingQual,
3765                                                                                  &agg_final_costs,
3766                                                                                  dNumGroups));
3767                         else
3768                                 add_path(grouped_rel, (Path *)
3769                                                  create_group_path(root,
3770                                                                                    grouped_rel,
3771                                                                                    path,
3772                                                                                    target,
3773                                                                                    parse->groupClause,
3774                                                                                    (List *) parse->havingQual,
3775                                                                                    dNumGroups));
3776                 }
3777         }
3778
3779         if (can_hash)
3780         {
3781                 hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
3782                                                                                                           agg_costs,
3783                                                                                                           dNumGroups);
3784
3785                 /*
3786                  * Provided that the estimated size of the hashtable does not exceed
3787                  * work_mem, we'll generate a HashAgg Path, although if we were unable
3788                  * to sort above, then we'd better generate a Path, so that we at
3789                  * least have one.
3790                  */
3791                 if (hashaggtablesize < work_mem * 1024L ||
3792                         grouped_rel->pathlist == NIL)
3793                 {
3794                         /*
3795                          * We just need an Agg over the cheapest-total input path, since
3796                          * input order won't matter.
3797                          */
3798                         add_path(grouped_rel, (Path *)
3799                                          create_agg_path(root, grouped_rel,
3800                                                                          cheapest_path,
3801                                                                          target,
3802                                                                          AGG_HASHED,
3803                                                                          AGGSPLIT_SIMPLE,
3804                                                                          parse->groupClause,
3805                                                                          (List *) parse->havingQual,
3806                                                                          agg_costs,
3807                                                                          dNumGroups));
3808                 }
3809
3810                 /*
3811                  * Generate a HashAgg Path atop of the cheapest partial path. Once
3812                  * again, we'll only do this if it looks as though the hash table
3813                  * won't exceed work_mem.
3814                  */
3815                 if (grouped_rel->partial_pathlist)
3816                 {
3817                         Path       *path = (Path *) linitial(grouped_rel->partial_pathlist);
3818
3819                         hashaggtablesize = estimate_hashagg_tablesize(path,
3820                                                                                                                   &agg_final_costs,
3821                                                                                                                   dNumGroups);
3822
3823                         if (hashaggtablesize < work_mem * 1024L)
3824                         {
3825                                 double          total_groups = path->rows * path->parallel_workers;
3826
3827                                 path = (Path *) create_gather_path(root,
3828                                                                                                    grouped_rel,
3829                                                                                                    path,
3830                                                                                                    partial_grouping_target,
3831                                                                                                    NULL,
3832                                                                                                    &total_groups);
3833
3834                                 add_path(grouped_rel, (Path *)
3835                                                  create_agg_path(root,
3836                                                                                  grouped_rel,
3837                                                                                  path,
3838                                                                                  target,
3839                                                                                  AGG_HASHED,
3840                                                                                  AGGSPLIT_FINAL_DESERIAL,
3841                                                                                  parse->groupClause,
3842                                                                                  (List *) parse->havingQual,
3843                                                                                  &agg_final_costs,
3844                                                                                  dNumGroups));
3845                         }
3846                 }
3847         }
3848
3849         /* Give a helpful error if we failed to find any implementation */
3850         if (grouped_rel->pathlist == NIL)
3851                 ereport(ERROR,
3852                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3853                                  errmsg("could not implement GROUP BY"),
3854                                  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
3855
3856         /*
3857          * If there is an FDW that's responsible for all baserels of the query,
3858          * let it consider adding ForeignPaths.
3859          */
3860         if (grouped_rel->fdwroutine &&
3861                 grouped_rel->fdwroutine->GetForeignUpperPaths)
3862                 grouped_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_GROUP_AGG,
3863                                                                                                           input_rel, grouped_rel);
3864
3865         /* Let extensions possibly add some more paths */
3866         if (create_upper_paths_hook)
3867                 (*create_upper_paths_hook) (root, UPPERREL_GROUP_AGG,
3868                                                                         input_rel, grouped_rel);
3869
3870         /* Now choose the best path(s) */
3871         set_cheapest(grouped_rel);
3872
3873         return grouped_rel;
3874 }
3875
3876 /*
3877  * create_window_paths
3878  *
3879  * Build a new upperrel containing Paths for window-function evaluation.
3880  *
3881  * input_rel: contains the source-data Paths
3882  * input_target: result of make_window_input_target
3883  * output_target: what the topmost WindowAggPath should return
3884  * tlist: query's target list (needed to look up pathkeys)
3885  * wflists: result of find_window_functions
3886  * activeWindows: result of select_active_windows
3887  *
3888  * Note: all Paths in input_rel are expected to return input_target.
3889  */
3890 static RelOptInfo *
3891 create_window_paths(PlannerInfo *root,
3892                                         RelOptInfo *input_rel,
3893                                         PathTarget *input_target,
3894                                         PathTarget *output_target,
3895                                         List *tlist,
3896                                         WindowFuncLists *wflists,
3897                                         List *activeWindows)
3898 {
3899         RelOptInfo *window_rel;
3900         ListCell   *lc;
3901
3902         /* For now, do all work in the (WINDOW, NULL) upperrel */
3903         window_rel = fetch_upper_rel(root, UPPERREL_WINDOW, NULL);
3904
3905         /*
3906          * If the input relation is not parallel-safe, then the window relation
3907          * can't be parallel-safe, either.  Otherwise, we need to examine the
3908          * target list and active windows for non-parallel-safe constructs.
3909          */
3910         if (input_rel->consider_parallel &&
3911                 is_parallel_safe(root, (Node *) output_target->exprs) &&
3912                 is_parallel_safe(root, (Node *) activeWindows))
3913                 window_rel->consider_parallel = true;
3914
3915         /*
3916          * If the input rel belongs to a single FDW, so does the window rel.
3917          */
3918         window_rel->serverid = input_rel->serverid;
3919         window_rel->userid = input_rel->userid;
3920         window_rel->useridiscurrent = input_rel->useridiscurrent;
3921         window_rel->fdwroutine = input_rel->fdwroutine;
3922
3923         /*
3924          * Consider computing window functions starting from the existing
3925          * cheapest-total path (which will likely require a sort) as well as any
3926          * existing paths that satisfy root->window_pathkeys (which won't).
3927          */
3928         foreach(lc, input_rel->pathlist)
3929         {
3930                 Path       *path = (Path *) lfirst(lc);
3931
3932                 if (path == input_rel->cheapest_total_path ||
3933                         pathkeys_contained_in(root->window_pathkeys, path->pathkeys))
3934                         create_one_window_path(root,
3935                                                                    window_rel,
3936                                                                    path,
3937                                                                    input_target,
3938                                                                    output_target,
3939                                                                    tlist,
3940                                                                    wflists,
3941                                                                    activeWindows);
3942         }
3943
3944         /*
3945          * If there is an FDW that's responsible for all baserels of the query,
3946          * let it consider adding ForeignPaths.
3947          */
3948         if (window_rel->fdwroutine &&
3949                 window_rel->fdwroutine->GetForeignUpperPaths)
3950                 window_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_WINDOW,
3951                                                                                                          input_rel, window_rel);
3952
3953         /* Let extensions possibly add some more paths */
3954         if (create_upper_paths_hook)
3955                 (*create_upper_paths_hook) (root, UPPERREL_WINDOW,
3956                                                                         input_rel, window_rel);
3957
3958         /* Now choose the best path(s) */
3959         set_cheapest(window_rel);
3960
3961         return window_rel;
3962 }
3963
3964 /*
3965  * Stack window-function implementation steps atop the given Path, and
3966  * add the result to window_rel.
3967  *
3968  * window_rel: upperrel to contain result
3969  * path: input Path to use (must return input_target)
3970  * input_target: result of make_window_input_target
3971  * output_target: what the topmost WindowAggPath should return
3972  * tlist: query's target list (needed to look up pathkeys)
3973  * wflists: result of find_window_functions
3974  * activeWindows: result of select_active_windows
3975  */
3976 static void
3977 create_one_window_path(PlannerInfo *root,
3978                                            RelOptInfo *window_rel,
3979                                            Path *path,
3980                                            PathTarget *input_target,
3981                                            PathTarget *output_target,
3982                                            List *tlist,
3983                                            WindowFuncLists *wflists,
3984                                            List *activeWindows)
3985 {
3986         PathTarget *window_target;
3987         ListCell   *l;
3988
3989         /*
3990          * Since each window clause could require a different sort order, we stack
3991          * up a WindowAgg node for each clause, with sort steps between them as
3992          * needed.  (We assume that select_active_windows chose a good order for
3993          * executing the clauses in.)
3994          *
3995          * input_target should contain all Vars and Aggs needed for the result.
3996          * (In some cases we wouldn't need to propagate all of these all the way
3997          * to the top, since they might only be needed as inputs to WindowFuncs.
3998          * It's probably not worth trying to optimize that though.)  It must also
3999          * contain all window partitioning and sorting expressions, to ensure
4000          * they're computed only once at the bottom of the stack (that's critical
4001          * for volatile functions).  As we climb up the stack, we'll add outputs
4002          * for the WindowFuncs computed at each level.
4003          */
4004         window_target = input_target;
4005
4006         foreach(l, activeWindows)
4007         {
4008                 WindowClause *wc = (WindowClause *) lfirst(l);
4009                 List       *window_pathkeys;
4010
4011                 window_pathkeys = make_pathkeys_for_window(root,
4012                                                                                                    wc,
4013                                                                                                    tlist);
4014
4015                 /* Sort if necessary */
4016                 if (!pathkeys_contained_in(window_pathkeys, path->pathkeys))
4017                 {
4018                         path = (Path *) create_sort_path(root, window_rel,
4019                                                                                          path,
4020                                                                                          window_pathkeys,
4021                                                                                          -1.0);
4022                 }
4023
4024                 if (lnext(l))
4025                 {
4026                         /*
4027                          * Add the current WindowFuncs to the output target for this
4028                          * intermediate WindowAggPath.  We must copy window_target to
4029                          * avoid changing the previous path's target.
4030                          *
4031                          * Note: a WindowFunc adds nothing to the target's eval costs; but
4032                          * we do need to account for the increase in tlist width.
4033                          */
4034                         ListCell   *lc2;
4035
4036                         window_target = copy_pathtarget(window_target);
4037                         foreach(lc2, wflists->windowFuncs[wc->winref])
4038                         {
4039                                 WindowFunc *wfunc = (WindowFunc *) lfirst(lc2);
4040
4041                                 Assert(IsA(wfunc, WindowFunc));
4042                                 add_column_to_pathtarget(window_target, (Expr *) wfunc, 0);
4043                                 window_target->width += get_typavgwidth(wfunc->wintype, -1);
4044                         }
4045                 }
4046                 else
4047                 {
4048                         /* Install the goal target in the topmost WindowAgg */
4049                         window_target = output_target;
4050                 }
4051
4052                 path = (Path *)
4053                         create_windowagg_path(root, window_rel, path, window_target,
4054                                                                   wflists->windowFuncs[wc->winref],
4055                                                                   wc,
4056                                                                   window_pathkeys);
4057         }
4058
4059         add_path(window_rel, path);
4060 }
4061
4062 /*
4063  * create_distinct_paths
4064  *
4065  * Build a new upperrel containing Paths for SELECT DISTINCT evaluation.
4066  *
4067  * input_rel: contains the source-data Paths
4068  *
4069  * Note: input paths should already compute the desired pathtarget, since
4070  * Sort/Unique won't project anything.
4071  */
4072 static RelOptInfo *
4073 create_distinct_paths(PlannerInfo *root,
4074                                           RelOptInfo *input_rel)
4075 {
4076         Query      *parse = root->parse;
4077         Path       *cheapest_input_path = input_rel->cheapest_total_path;
4078         RelOptInfo *distinct_rel;
4079         double          numDistinctRows;
4080         bool            allow_hash;
4081         Path       *path;
4082         ListCell   *lc;
4083
4084         /* For now, do all work in the (DISTINCT, NULL) upperrel */
4085         distinct_rel = fetch_upper_rel(root, UPPERREL_DISTINCT, NULL);
4086
4087         /*
4088          * We don't compute anything at this level, so distinct_rel will be
4089          * parallel-safe if the input rel is parallel-safe.  In particular, if
4090          * there is a DISTINCT ON (...) clause, any path for the input_rel will
4091          * output those expressions, and will not be parallel-safe unless those
4092          * expressions are parallel-safe.
4093          */
4094         distinct_rel->consider_parallel = input_rel->consider_parallel;
4095
4096         /*
4097          * If the input rel belongs to a single FDW, so does the distinct_rel.
4098          */
4099         distinct_rel->serverid = input_rel->serverid;
4100         distinct_rel->userid = input_rel->userid;
4101         distinct_rel->useridiscurrent = input_rel->useridiscurrent;
4102         distinct_rel->fdwroutine = input_rel->fdwroutine;
4103
4104         /* Estimate number of distinct rows there will be */
4105         if (parse->groupClause || parse->groupingSets || parse->hasAggs ||
4106                 root->hasHavingQual)
4107         {
4108                 /*
4109                  * If there was grouping or aggregation, use the number of input rows
4110                  * as the estimated number of DISTINCT rows (ie, assume the input is
4111                  * already mostly unique).
4112                  */
4113                 numDistinctRows = cheapest_input_path->rows;
4114         }
4115         else
4116         {
4117                 /*
4118                  * Otherwise, the UNIQUE filter has effects comparable to GROUP BY.
4119                  */
4120                 List       *distinctExprs;
4121
4122                 distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
4123                                                                                                 parse->targetList);
4124                 numDistinctRows = estimate_num_groups(root, distinctExprs,
4125                                                                                           cheapest_input_path->rows,
4126                                                                                           NULL);
4127         }
4128
4129         /*
4130          * Consider sort-based implementations of DISTINCT, if possible.
4131          */
4132         if (grouping_is_sortable(parse->distinctClause))
4133         {
4134                 /*
4135                  * First, if we have any adequately-presorted paths, just stick a
4136                  * Unique node on those.  Then consider doing an explicit sort of the
4137                  * cheapest input path and Unique'ing that.
4138                  *
4139                  * When we have DISTINCT ON, we must sort by the more rigorous of
4140                  * DISTINCT and ORDER BY, else it won't have the desired behavior.
4141                  * Also, if we do have to do an explicit sort, we might as well use
4142                  * the more rigorous ordering to avoid a second sort later.  (Note
4143                  * that the parser will have ensured that one clause is a prefix of
4144                  * the other.)
4145                  */
4146                 List       *needed_pathkeys;
4147
4148                 if (parse->hasDistinctOn &&
4149                         list_length(root->distinct_pathkeys) <
4150                         list_length(root->sort_pathkeys))
4151                         needed_pathkeys = root->sort_pathkeys;
4152                 else
4153                         needed_pathkeys = root->distinct_pathkeys;
4154
4155                 foreach(lc, input_rel->pathlist)
4156                 {
4157                         Path       *path = (Path *) lfirst(lc);
4158
4159                         if (pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4160                         {
4161                                 add_path(distinct_rel, (Path *)
4162                                                  create_upper_unique_path(root, distinct_rel,
4163                                                                                                   path,
4164                                                                                 list_length(root->distinct_pathkeys),
4165                                                                                                   numDistinctRows));
4166                         }
4167                 }
4168
4169                 /* For explicit-sort case, always use the more rigorous clause */
4170                 if (list_length(root->distinct_pathkeys) <
4171                         list_length(root->sort_pathkeys))
4172                 {
4173                         needed_pathkeys = root->sort_pathkeys;
4174                         /* Assert checks that parser didn't mess up... */
4175                         Assert(pathkeys_contained_in(root->distinct_pathkeys,
4176                                                                                  needed_pathkeys));
4177                 }
4178                 else
4179                         needed_pathkeys = root->distinct_pathkeys;
4180
4181                 path = cheapest_input_path;
4182                 if (!pathkeys_contained_in(needed_pathkeys, path->pathkeys))
4183                         path = (Path *) create_sort_path(root, distinct_rel,
4184                                                                                          path,
4185                                                                                          needed_pathkeys,
4186                                                                                          -1.0);
4187
4188                 add_path(distinct_rel, (Path *)
4189                                  create_upper_unique_path(root, distinct_rel,
4190                                                                                   path,
4191                                                                                 list_length(root->distinct_pathkeys),
4192                                                                                   numDistinctRows));
4193         }
4194
4195         /*
4196          * Consider hash-based implementations of DISTINCT, if possible.
4197          *
4198          * If we were not able to make any other types of path, we *must* hash or
4199          * die trying.  If we do have other choices, there are several things that
4200          * should prevent selection of hashing: if the query uses DISTINCT ON
4201          * (because it won't really have the expected behavior if we hash), or if
4202          * enable_hashagg is off, or if it looks like the hashtable will exceed
4203          * work_mem.
4204          *
4205          * Note: grouping_is_hashable() is much more expensive to check than the
4206          * other gating conditions, so we want to do it last.
4207          */
4208         if (distinct_rel->pathlist == NIL)
4209                 allow_hash = true;              /* we have no alternatives */
4210         else if (parse->hasDistinctOn || !enable_hashagg)
4211                 allow_hash = false;             /* policy-based decision not to hash */
4212         else
4213         {
4214                 Size            hashentrysize;
4215
4216                 /* Estimate per-hash-entry space at tuple width... */
4217                 hashentrysize = MAXALIGN(cheapest_input_path->pathtarget->width) +
4218                         MAXALIGN(SizeofMinimalTupleHeader);
4219                 /* plus the per-hash-entry overhead */
4220                 hashentrysize += hash_agg_entry_size(0);
4221
4222                 /* Allow hashing only if hashtable is predicted to fit in work_mem */
4223                 allow_hash = (hashentrysize * numDistinctRows <= work_mem * 1024L);
4224         }
4225
4226         if (allow_hash && grouping_is_hashable(parse->distinctClause))
4227         {
4228                 /* Generate hashed aggregate path --- no sort needed */
4229                 add_path(distinct_rel, (Path *)
4230                                  create_agg_path(root,
4231                                                                  distinct_rel,
4232                                                                  cheapest_input_path,
4233                                                                  cheapest_input_path->pathtarget,
4234                                                                  AGG_HASHED,
4235                                                                  AGGSPLIT_SIMPLE,
4236                                                                  parse->distinctClause,
4237                                                                  NIL,
4238                                                                  NULL,
4239                                                                  numDistinctRows));
4240         }
4241
4242         /* Give a helpful error if we failed to find any implementation */
4243         if (distinct_rel->pathlist == NIL)
4244                 ereport(ERROR,
4245                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4246                                  errmsg("could not implement DISTINCT"),
4247                                  errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
4248
4249         /*
4250          * If there is an FDW that's responsible for all baserels of the query,
4251          * let it consider adding ForeignPaths.
4252          */
4253         if (distinct_rel->fdwroutine &&
4254                 distinct_rel->fdwroutine->GetForeignUpperPaths)
4255                 distinct_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_DISTINCT,
4256                                                                                                         input_rel, distinct_rel);
4257
4258         /* Let extensions possibly add some more paths */
4259         if (create_upper_paths_hook)
4260                 (*create_upper_paths_hook) (root, UPPERREL_DISTINCT,
4261                                                                         input_rel, distinct_rel);
4262
4263         /* Now choose the best path(s) */
4264         set_cheapest(distinct_rel);
4265
4266         return distinct_rel;
4267 }
4268
4269 /*
4270  * create_ordered_paths
4271  *
4272  * Build a new upperrel containing Paths for ORDER BY evaluation.
4273  *
4274  * All paths in the result must satisfy the ORDER BY ordering.
4275  * The only new path we need consider is an explicit sort on the
4276  * cheapest-total existing path.
4277  *
4278  * input_rel: contains the source-data Paths
4279  * target: the output tlist the result Paths must emit
4280  * limit_tuples: estimated bound on the number of output tuples,
4281  *              or -1 if no LIMIT or couldn't estimate
4282  */
4283 static RelOptInfo *
4284 create_ordered_paths(PlannerInfo *root,
4285                                          RelOptInfo *input_rel,
4286                                          PathTarget *target,
4287                                          double limit_tuples)
4288 {
4289         Path       *cheapest_input_path = input_rel->cheapest_total_path;
4290         RelOptInfo *ordered_rel;
4291         ListCell   *lc;
4292
4293         /* For now, do all work in the (ORDERED, NULL) upperrel */
4294         ordered_rel = fetch_upper_rel(root, UPPERREL_ORDERED, NULL);
4295
4296         /*
4297          * If the input relation is not parallel-safe, then the ordered relation
4298          * can't be parallel-safe, either.  Otherwise, it's parallel-safe if the
4299          * target list is parallel-safe.
4300          */
4301         if (input_rel->consider_parallel &&
4302                 is_parallel_safe(root, (Node *) target->exprs))
4303                 ordered_rel->consider_parallel = true;
4304
4305         /*
4306          * If the input rel belongs to a single FDW, so does the ordered_rel.
4307          */
4308         ordered_rel->serverid = input_rel->serverid;
4309         ordered_rel->userid = input_rel->userid;
4310         ordered_rel->useridiscurrent = input_rel->useridiscurrent;
4311         ordered_rel->fdwroutine = input_rel->fdwroutine;
4312
4313         foreach(lc, input_rel->pathlist)
4314         {
4315                 Path       *path = (Path *) lfirst(lc);
4316                 bool            is_sorted;
4317
4318                 is_sorted = pathkeys_contained_in(root->sort_pathkeys,
4319                                                                                   path->pathkeys);
4320                 if (path == cheapest_input_path || is_sorted)
4321                 {
4322                         if (!is_sorted)
4323                         {
4324                                 /* An explicit sort here can take advantage of LIMIT */
4325                                 path = (Path *) create_sort_path(root,
4326                                                                                                  ordered_rel,
4327                                                                                                  path,
4328                                                                                                  root->sort_pathkeys,
4329                                                                                                  limit_tuples);
4330                         }
4331
4332                         /* Add projection step if needed */
4333                         if (path->pathtarget != target)
4334                                 path = apply_projection_to_path(root, ordered_rel,
4335                                                                                                 path, target);
4336
4337                         add_path(ordered_rel, path);
4338                 }
4339         }
4340
4341         /*
4342          * If there is an FDW that's responsible for all baserels of the query,
4343          * let it consider adding ForeignPaths.
4344          */
4345         if (ordered_rel->fdwroutine &&
4346                 ordered_rel->fdwroutine->GetForeignUpperPaths)
4347                 ordered_rel->fdwroutine->GetForeignUpperPaths(root, UPPERREL_ORDERED,
4348                                                                                                           input_rel, ordered_rel);
4349
4350         /* Let extensions possibly add some more paths */
4351         if (create_upper_paths_hook)
4352                 (*create_upper_paths_hook) (root, UPPERREL_ORDERED,
4353                                                                         input_rel, ordered_rel);
4354
4355         /*
4356          * No need to bother with set_cheapest here; grouping_planner does not
4357          * need us to do it.
4358          */
4359         Assert(ordered_rel->pathlist != NIL);
4360
4361         return ordered_rel;
4362 }
4363
4364
4365 /*
4366  * make_group_input_target
4367  *        Generate appropriate PathTarget for initial input to grouping nodes.
4368  *
4369  * If there is grouping or aggregation, the scan/join subplan cannot emit
4370  * the query's final targetlist; for example, it certainly can't emit any
4371  * aggregate function calls.  This routine generates the correct target
4372  * for the scan/join subplan.
4373  *
4374  * The query target list passed from the parser already contains entries
4375  * for all ORDER BY and GROUP BY expressions, but it will not have entries
4376  * for variables used only in HAVING clauses; so we need to add those
4377  * variables to the subplan target list.  Also, we flatten all expressions
4378  * except GROUP BY items into their component variables; other expressions
4379  * will be computed by the upper plan nodes rather than by the subplan.
4380  * For example, given a query like
4381  *              SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
4382  * we want to pass this targetlist to the subplan:
4383  *              a+b,c,d
4384  * where the a+b target will be used by the Sort/Group steps, and the
4385  * other targets will be used for computing the final results.
4386  *
4387  * 'final_target' is the query's final target list (in PathTarget form)
4388  *
4389  * The result is the PathTarget to be computed by the Paths returned from
4390  * query_planner().
4391  */
4392 static PathTarget *
4393 make_group_input_target(PlannerInfo *root, PathTarget *final_target)
4394 {
4395         Query      *parse = root->parse;
4396         PathTarget *input_target;
4397         List       *non_group_cols;
4398         List       *non_group_vars;
4399         int                     i;
4400         ListCell   *lc;
4401
4402         /*
4403          * We must build a target containing all grouping columns, plus any other
4404          * Vars mentioned in the query's targetlist and HAVING qual.
4405          */
4406         input_target = create_empty_pathtarget();
4407         non_group_cols = NIL;
4408
4409         i = 0;
4410         foreach(lc, final_target->exprs)
4411         {
4412                 Expr       *expr = (Expr *) lfirst(lc);
4413                 Index           sgref = get_pathtarget_sortgroupref(final_target, i);
4414
4415                 if (sgref && parse->groupClause &&
4416                         get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
4417                 {
4418                         /*
4419                          * It's a grouping column, so add it to the input target as-is.
4420                          */
4421                         add_column_to_pathtarget(input_target, expr, sgref);
4422                 }
4423                 else
4424                 {
4425                         /*
4426                          * Non-grouping column, so just remember the expression for later
4427                          * call to pull_var_clause.
4428                          */
4429                         non_group_cols = lappend(non_group_cols, expr);
4430                 }
4431
4432                 i++;
4433         }
4434
4435         /*
4436          * If there's a HAVING clause, we'll need the Vars it uses, too.
4437          */
4438         if (parse->havingQual)
4439                 non_group_cols = lappend(non_group_cols, parse->havingQual);
4440
4441         /*
4442          * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
4443          * add them to the input target if not already present.  (A Var used
4444          * directly as a GROUP BY item will be present already.)  Note this
4445          * includes Vars used in resjunk items, so we are covering the needs of
4446          * ORDER BY and window specifications.  Vars used within Aggrefs and
4447          * WindowFuncs will be pulled out here, too.
4448          */
4449         non_group_vars = pull_var_clause((Node *) non_group_cols,
4450                                                                          PVC_RECURSE_AGGREGATES |
4451                                                                          PVC_RECURSE_WINDOWFUNCS |
4452                                                                          PVC_INCLUDE_PLACEHOLDERS);
4453         add_new_columns_to_pathtarget(input_target, non_group_vars);
4454
4455         /* clean up cruft */
4456         list_free(non_group_vars);
4457         list_free(non_group_cols);
4458
4459         /* XXX this causes some redundant cost calculation ... */
4460         return set_pathtarget_cost_width(root, input_target);
4461 }
4462
4463 /*
4464  * make_partial_grouping_target
4465  *        Generate appropriate PathTarget for output of partial aggregate
4466  *        (or partial grouping, if there are no aggregates) nodes.
4467  *
4468  * A partial aggregation node needs to emit all the same aggregates that
4469  * a regular aggregation node would, plus any aggregates used in HAVING;
4470  * except that the Aggref nodes should be marked as partial aggregates.
4471  *
4472  * In addition, we'd better emit any Vars and PlaceholderVars that are
4473  * used outside of Aggrefs in the aggregation tlist and HAVING.  (Presumably,
4474  * these would be Vars that are grouped by or used in grouping expressions.)
4475  *
4476  * grouping_target is the tlist to be emitted by the topmost aggregation step.
4477  * We get the HAVING clause out of *root.
4478  */
4479 static PathTarget *
4480 make_partial_grouping_target(PlannerInfo *root, PathTarget *grouping_target)
4481 {
4482         Query      *parse = root->parse;
4483         PathTarget *partial_target;
4484         List       *non_group_cols;
4485         List       *non_group_exprs;
4486         int                     i;
4487         ListCell   *lc;
4488
4489         partial_target = create_empty_pathtarget();
4490         non_group_cols = NIL;
4491
4492         i = 0;
4493         foreach(lc, grouping_target->exprs)
4494         {
4495                 Expr       *expr = (Expr *) lfirst(lc);
4496                 Index           sgref = get_pathtarget_sortgroupref(grouping_target, i);
4497
4498                 if (sgref && parse->groupClause &&
4499                         get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
4500                 {
4501                         /*
4502                          * It's a grouping column, so add it to the partial_target as-is.
4503                          * (This allows the upper agg step to repeat the grouping calcs.)
4504                          */
4505                         add_column_to_pathtarget(partial_target, expr, sgref);
4506                 }
4507                 else
4508                 {
4509                         /*
4510                          * Non-grouping column, so just remember the expression for later
4511                          * call to pull_var_clause.
4512                          */
4513                         non_group_cols = lappend(non_group_cols, expr);
4514                 }
4515
4516                 i++;
4517         }
4518
4519         /*
4520          * If there's a HAVING clause, we'll need the Vars/Aggrefs it uses, too.
4521          */
4522         if (parse->havingQual)
4523                 non_group_cols = lappend(non_group_cols, parse->havingQual);
4524
4525         /*
4526          * Pull out all the Vars, PlaceHolderVars, and Aggrefs mentioned in
4527          * non-group cols (plus HAVING), and add them to the partial_target if not
4528          * already present.  (An expression used directly as a GROUP BY item will
4529          * be present already.)  Note this includes Vars used in resjunk items, so
4530          * we are covering the needs of ORDER BY and window specifications.
4531          */
4532         non_group_exprs = pull_var_clause((Node *) non_group_cols,
4533                                                                           PVC_INCLUDE_AGGREGATES |
4534                                                                           PVC_RECURSE_WINDOWFUNCS |
4535                                                                           PVC_INCLUDE_PLACEHOLDERS);
4536
4537         add_new_columns_to_pathtarget(partial_target, non_group_exprs);
4538
4539         /*
4540          * Adjust Aggrefs to put them in partial mode.  At this point all Aggrefs
4541          * are at the top level of the target list, so we can just scan the list
4542          * rather than recursing through the expression trees.
4543          */
4544         foreach(lc, partial_target->exprs)
4545         {
4546                 Aggref     *aggref = (Aggref *) lfirst(lc);
4547
4548                 if (IsA(aggref, Aggref))
4549                 {
4550                         Aggref     *newaggref;
4551
4552                         /*
4553                          * We shouldn't need to copy the substructure of the Aggref node,
4554                          * but flat-copy the node itself to avoid damaging other trees.
4555                          */
4556                         newaggref = makeNode(Aggref);
4557                         memcpy(newaggref, aggref, sizeof(Aggref));
4558
4559                         /* For now, assume serialization is required */
4560                         mark_partial_aggref(newaggref, AGGSPLIT_INITIAL_SERIAL);
4561
4562                         lfirst(lc) = newaggref;
4563                 }
4564         }
4565
4566         /* clean up cruft */
4567         list_free(non_group_exprs);
4568         list_free(non_group_cols);
4569
4570         /* XXX this causes some redundant cost calculation ... */
4571         return set_pathtarget_cost_width(root, partial_target);
4572 }
4573
4574 /*
4575  * mark_partial_aggref
4576  *        Adjust an Aggref to make it represent a partial-aggregation step.
4577  *
4578  * The Aggref node is modified in-place; caller must do any copying required.
4579  */
4580 void
4581 mark_partial_aggref(Aggref *agg, AggSplit aggsplit)
4582 {
4583         /* aggtranstype should be computed by this point */
4584         Assert(OidIsValid(agg->aggtranstype));
4585         /* ... but aggsplit should still be as the parser left it */
4586         Assert(agg->aggsplit == AGGSPLIT_SIMPLE);
4587
4588         /* Mark the Aggref with the intended partial-aggregation mode */
4589         agg->aggsplit = aggsplit;
4590
4591         /*
4592          * Adjust result type if needed.  Normally, a partial aggregate returns
4593          * the aggregate's transition type; but if that's INTERNAL and we're
4594          * serializing, it returns BYTEA instead.
4595          */
4596         if (DO_AGGSPLIT_SKIPFINAL(aggsplit))
4597         {
4598                 if (agg->aggtranstype == INTERNALOID && DO_AGGSPLIT_SERIALIZE(aggsplit))
4599                         agg->aggtype = BYTEAOID;
4600                 else
4601                         agg->aggtype = agg->aggtranstype;
4602         }
4603 }
4604
4605 /*
4606  * postprocess_setop_tlist
4607  *        Fix up targetlist returned by plan_set_operations().
4608  *
4609  * We need to transpose sort key info from the orig_tlist into new_tlist.
4610  * NOTE: this would not be good enough if we supported resjunk sort keys
4611  * for results of set operations --- then, we'd need to project a whole
4612  * new tlist to evaluate the resjunk columns.  For now, just ereport if we
4613  * find any resjunk columns in orig_tlist.
4614  */
4615 static List *
4616 postprocess_setop_tlist(List *new_tlist, List *orig_tlist)
4617 {
4618         ListCell   *l;
4619         ListCell   *orig_tlist_item = list_head(orig_tlist);
4620
4621         foreach(l, new_tlist)
4622         {
4623                 TargetEntry *new_tle = (TargetEntry *) lfirst(l);
4624                 TargetEntry *orig_tle;
4625
4626                 /* ignore resjunk columns in setop result */
4627                 if (new_tle->resjunk)
4628                         continue;
4629
4630                 Assert(orig_tlist_item != NULL);
4631                 orig_tle = (TargetEntry *) lfirst(orig_tlist_item);
4632                 orig_tlist_item = lnext(orig_tlist_item);
4633                 if (orig_tle->resjunk)  /* should not happen */
4634                         elog(ERROR, "resjunk output columns are not implemented");
4635                 Assert(new_tle->resno == orig_tle->resno);
4636                 new_tle->ressortgroupref = orig_tle->ressortgroupref;
4637         }
4638         if (orig_tlist_item != NULL)
4639                 elog(ERROR, "resjunk output columns are not implemented");
4640         return new_tlist;
4641 }
4642
4643 /*
4644  * select_active_windows
4645  *              Create a list of the "active" window clauses (ie, those referenced
4646  *              by non-deleted WindowFuncs) in the order they are to be executed.
4647  */
4648 static List *
4649 select_active_windows(PlannerInfo *root, WindowFuncLists *wflists)
4650 {
4651         List       *result;
4652         List       *actives;
4653         ListCell   *lc;
4654
4655         /* First, make a list of the active windows */
4656         actives = NIL;
4657         foreach(lc, root->parse->windowClause)
4658         {
4659                 WindowClause *wc = (WindowClause *) lfirst(lc);
4660
4661                 /* It's only active if wflists shows some related WindowFuncs */
4662                 Assert(wc->winref <= wflists->maxWinRef);
4663                 if (wflists->windowFuncs[wc->winref] != NIL)
4664                         actives = lappend(actives, wc);
4665         }
4666
4667         /*
4668          * Now, ensure that windows with identical partitioning/ordering clauses
4669          * are adjacent in the list.  This is required by the SQL standard, which
4670          * says that only one sort is to be used for such windows, even if they
4671          * are otherwise distinct (eg, different names or framing clauses).
4672          *
4673          * There is room to be much smarter here, for example detecting whether
4674          * one window's sort keys are a prefix of another's (so that sorting for
4675          * the latter would do for the former), or putting windows first that
4676          * match a sort order available for the underlying query.  For the moment
4677          * we are content with meeting the spec.
4678          */
4679         result = NIL;
4680         while (actives != NIL)
4681         {
4682                 WindowClause *wc = (WindowClause *) linitial(actives);
4683                 ListCell   *prev;
4684                 ListCell   *next;
4685
4686                 /* Move wc from actives to result */
4687                 actives = list_delete_first(actives);
4688                 result = lappend(result, wc);
4689
4690                 /* Now move any matching windows from actives to result */
4691                 prev = NULL;
4692                 for (lc = list_head(actives); lc; lc = next)
4693                 {
4694                         WindowClause *wc2 = (WindowClause *) lfirst(lc);
4695
4696                         next = lnext(lc);
4697                         /* framing options are NOT to be compared here! */
4698                         if (equal(wc->partitionClause, wc2->partitionClause) &&
4699                                 equal(wc->orderClause, wc2->orderClause))
4700                         {
4701                                 actives = list_delete_cell(actives, lc, prev);
4702                                 result = lappend(result, wc2);
4703                         }
4704                         else
4705                                 prev = lc;
4706                 }
4707         }
4708
4709         return result;
4710 }
4711
4712 /*
4713  * make_window_input_target
4714  *        Generate appropriate PathTarget for initial input to WindowAgg nodes.
4715  *
4716  * When the query has window functions, this function computes the desired
4717  * target to be computed by the node just below the first WindowAgg.
4718  * This tlist must contain all values needed to evaluate the window functions,
4719  * compute the final target list, and perform any required final sort step.
4720  * If multiple WindowAggs are needed, each intermediate one adds its window
4721  * function results onto this base tlist; only the topmost WindowAgg computes
4722  * the actual desired target list.
4723  *
4724  * This function is much like make_group_input_target, though not quite enough
4725  * like it to share code.  As in that function, we flatten most expressions
4726  * into their component variables.  But we do not want to flatten window
4727  * PARTITION BY/ORDER BY clauses, since that might result in multiple
4728  * evaluations of them, which would be bad (possibly even resulting in
4729  * inconsistent answers, if they contain volatile functions).
4730  * Also, we must not flatten GROUP BY clauses that were left unflattened by
4731  * make_group_input_target, because we may no longer have access to the
4732  * individual Vars in them.
4733  *
4734  * Another key difference from make_group_input_target is that we don't
4735  * flatten Aggref expressions, since those are to be computed below the
4736  * window functions and just referenced like Vars above that.
4737  *
4738  * 'final_target' is the query's final target list (in PathTarget form)
4739  * 'activeWindows' is the list of active windows previously identified by
4740  *                      select_active_windows.
4741  *
4742  * The result is the PathTarget to be computed by the plan node immediately
4743  * below the first WindowAgg node.
4744  */
4745 static PathTarget *
4746 make_window_input_target(PlannerInfo *root,
4747                                                  PathTarget *final_target,
4748                                                  List *activeWindows)
4749 {
4750         Query      *parse = root->parse;
4751         PathTarget *input_target;
4752         Bitmapset  *sgrefs;
4753         List       *flattenable_cols;
4754         List       *flattenable_vars;
4755         int                     i;
4756         ListCell   *lc;
4757
4758         Assert(parse->hasWindowFuncs);
4759
4760         /*
4761          * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
4762          * into a bitmapset for convenient reference below.
4763          */
4764         sgrefs = NULL;
4765         foreach(lc, activeWindows)
4766         {
4767                 WindowClause *wc = (WindowClause *) lfirst(lc);
4768                 ListCell   *lc2;
4769
4770                 foreach(lc2, wc->partitionClause)
4771                 {
4772                         SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
4773
4774                         sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
4775                 }
4776                 foreach(lc2, wc->orderClause)
4777                 {
4778                         SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc2);
4779
4780                         sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef);
4781                 }
4782         }
4783
4784         /* Add in sortgroupref numbers of GROUP BY clauses, too */
4785         foreach(lc, parse->groupClause)
4786         {
4787                 SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
4788
4789                 sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
4790         }
4791
4792         /*
4793          * Construct a target containing all the non-flattenable targetlist items,
4794          * and save aside the others for a moment.
4795          */
4796         input_target = create_empty_pathtarget();
4797         flattenable_cols = NIL;
4798
4799         i = 0;
4800         foreach(lc, final_target->exprs)
4801         {
4802                 Expr       *expr = (Expr *) lfirst(lc);
4803                 Index           sgref = get_pathtarget_sortgroupref(final_target, i);
4804
4805                 /*
4806                  * Don't want to deconstruct window clauses or GROUP BY items.  (Note
4807                  * that such items can't contain window functions, so it's okay to
4808                  * compute them below the WindowAgg nodes.)
4809                  */
4810                 if (sgref != 0 && bms_is_member(sgref, sgrefs))
4811                 {
4812                         /*
4813                          * Don't want to deconstruct this value, so add it to the input
4814                          * target as-is.
4815                          */
4816                         add_column_to_pathtarget(input_target, expr, sgref);
4817                 }
4818                 else
4819                 {
4820                         /*
4821                          * Column is to be flattened, so just remember the expression for
4822                          * later call to pull_var_clause.
4823                          */
4824                         flattenable_cols = lappend(flattenable_cols, expr);
4825                 }
4826
4827                 i++;
4828         }
4829
4830         /*
4831          * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
4832          * add them to the input target if not already present.  (Some might be
4833          * there already because they're used directly as window/group clauses.)
4834          *
4835          * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that any
4836          * Aggrefs are placed in the Agg node's tlist and not left to be computed
4837          * at higher levels.  On the other hand, we should recurse into
4838          * WindowFuncs to make sure their input expressions are available.
4839          */
4840         flattenable_vars = pull_var_clause((Node *) flattenable_cols,
4841                                                                            PVC_INCLUDE_AGGREGATES |
4842                                                                            PVC_RECURSE_WINDOWFUNCS |
4843                                                                            PVC_INCLUDE_PLACEHOLDERS);
4844         add_new_columns_to_pathtarget(input_target, flattenable_vars);
4845
4846         /* clean up cruft */
4847         list_free(flattenable_vars);
4848         list_free(flattenable_cols);
4849
4850         /* XXX this causes some redundant cost calculation ... */
4851         return set_pathtarget_cost_width(root, input_target);
4852 }
4853
4854 /*
4855  * make_pathkeys_for_window
4856  *              Create a pathkeys list describing the required input ordering
4857  *              for the given WindowClause.
4858  *
4859  * The required ordering is first the PARTITION keys, then the ORDER keys.
4860  * In the future we might try to implement windowing using hashing, in which
4861  * case the ordering could be relaxed, but for now we always sort.
4862  *
4863  * Caution: if you change this, see createplan.c's get_column_info_for_window!
4864  */
4865 static List *
4866 make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
4867                                                  List *tlist)
4868 {
4869         List       *window_pathkeys;
4870         List       *window_sortclauses;
4871
4872         /* Throw error if can't sort */
4873         if (!grouping_is_sortable(wc->partitionClause))
4874                 ereport(ERROR,
4875                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4876                                  errmsg("could not implement window PARTITION BY"),
4877                                  errdetail("Window partitioning columns must be of sortable datatypes.")));
4878         if (!grouping_is_sortable(wc->orderClause))
4879                 ereport(ERROR,
4880                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4881                                  errmsg("could not implement window ORDER BY"),
4882                 errdetail("Window ordering columns must be of sortable datatypes.")));
4883
4884         /* Okay, make the combined pathkeys */
4885         window_sortclauses = list_concat(list_copy(wc->partitionClause),
4886                                                                          list_copy(wc->orderClause));
4887         window_pathkeys = make_pathkeys_for_sortclauses(root,
4888                                                                                                         window_sortclauses,
4889                                                                                                         tlist);
4890         list_free(window_sortclauses);
4891         return window_pathkeys;
4892 }
4893
4894 /*
4895  * make_sort_input_target
4896  *        Generate appropriate PathTarget for initial input to Sort step.
4897  *
4898  * If the query has ORDER BY, this function chooses the target to be computed
4899  * by the node just below the Sort (and DISTINCT, if any, since Unique can't
4900  * project) steps.  This might or might not be identical to the query's final
4901  * output target.
4902  *
4903  * The main argument for keeping the sort-input tlist the same as the final
4904  * is that we avoid a separate projection node (which will be needed if
4905  * they're different, because Sort can't project).  However, there are also
4906  * advantages to postponing tlist evaluation till after the Sort: it ensures
4907  * a consistent order of evaluation for any volatile functions in the tlist,
4908  * and if there's also a LIMIT, we can stop the query without ever computing
4909  * tlist functions for later rows, which is beneficial for both volatile and
4910  * expensive functions.
4911  *
4912  * Our current policy is to postpone volatile expressions till after the sort
4913  * unconditionally (assuming that that's possible, ie they are in plain tlist
4914  * columns and not ORDER BY/GROUP BY/DISTINCT columns).  We also prefer to
4915  * postpone set-returning expressions, because running them beforehand would
4916  * bloat the sort dataset, and because it might cause unexpected output order
4917  * if the sort isn't stable.  However there's a constraint on that: all SRFs
4918  * in the tlist should be evaluated at the same plan step, so that they can
4919  * run in sync in ExecTargetList.  So if any SRFs are in sort columns, we
4920  * mustn't postpone any SRFs.  (Note that in principle that policy should
4921  * probably get applied to the group/window input targetlists too, but we
4922  * have not done that historically.)  Lastly, expensive expressions are
4923  * postponed if there is a LIMIT, or if root->tuple_fraction shows that
4924  * partial evaluation of the query is possible (if neither is true, we expect
4925  * to have to evaluate the expressions for every row anyway), or if there are
4926  * any volatile or set-returning expressions (since once we've put in a
4927  * projection at all, it won't cost any more to postpone more stuff).
4928  *
4929  * Another issue that could potentially be considered here is that
4930  * evaluating tlist expressions could result in data that's either wider
4931  * or narrower than the input Vars, thus changing the volume of data that
4932  * has to go through the Sort.  However, we usually have only a very bad
4933  * idea of the output width of any expression more complex than a Var,
4934  * so for now it seems too risky to try to optimize on that basis.
4935  *
4936  * Note that if we do produce a modified sort-input target, and then the
4937  * query ends up not using an explicit Sort, no particular harm is done:
4938  * we'll initially use the modified target for the preceding path nodes,
4939  * but then change them to the final target with apply_projection_to_path.
4940  * Moreover, in such a case the guarantees about evaluation order of
4941  * volatile functions still hold, since the rows are sorted already.
4942  *
4943  * This function has some things in common with make_group_input_target and
4944  * make_window_input_target, though the detailed rules for what to do are
4945  * different.  We never flatten/postpone any grouping or ordering columns;
4946  * those are needed before the sort.  If we do flatten a particular
4947  * expression, we leave Aggref and WindowFunc nodes alone, since those were
4948  * computed earlier.
4949  *
4950  * 'final_target' is the query's final target list (in PathTarget form)
4951  * 'have_postponed_srfs' is an output argument, see below
4952  *
4953  * The result is the PathTarget to be computed by the plan node immediately
4954  * below the Sort step (and the Distinct step, if any).  This will be
4955  * exactly final_target if we decide a projection step wouldn't be helpful.
4956  *
4957  * In addition, *have_postponed_srfs is set to TRUE if we choose to postpone
4958  * any set-returning functions to after the Sort.
4959  */
4960 static PathTarget *
4961 make_sort_input_target(PlannerInfo *root,
4962                                            PathTarget *final_target,
4963                                            bool *have_postponed_srfs)
4964 {
4965         Query      *parse = root->parse;
4966         PathTarget *input_target;
4967         int                     ncols;
4968         bool       *col_is_srf;
4969         bool       *postpone_col;
4970         bool            have_srf;
4971         bool            have_volatile;
4972         bool            have_expensive;
4973         bool            have_srf_sortcols;
4974         bool            postpone_srfs;
4975         List       *postponable_cols;
4976         List       *postponable_vars;
4977         int                     i;
4978         ListCell   *lc;
4979
4980         /* Shouldn't get here unless query has ORDER BY */
4981         Assert(parse->sortClause);
4982
4983         *have_postponed_srfs = false;           /* default result */
4984
4985         /* Inspect tlist and collect per-column information */
4986         ncols = list_length(final_target->exprs);
4987         col_is_srf = (bool *) palloc0(ncols * sizeof(bool));
4988         postpone_col = (bool *) palloc0(ncols * sizeof(bool));
4989         have_srf = have_volatile = have_expensive = have_srf_sortcols = false;
4990
4991         i = 0;
4992         foreach(lc, final_target->exprs)
4993         {
4994                 Expr       *expr = (Expr *) lfirst(lc);
4995
4996                 /*
4997                  * If the column has a sortgroupref, assume it has to be evaluated
4998                  * before sorting.  Generally such columns would be ORDER BY, GROUP
4999                  * BY, etc targets.  One exception is columns that were removed from
5000                  * GROUP BY by remove_useless_groupby_columns() ... but those would
5001                  * only be Vars anyway.  There don't seem to be any cases where it
5002                  * would be worth the trouble to double-check.
5003                  */
5004                 if (get_pathtarget_sortgroupref(final_target, i) == 0)
5005                 {
5006                         /*
5007                          * Check for SRF or volatile functions.  Check the SRF case first
5008                          * because we must know whether we have any postponed SRFs.
5009                          */
5010                         if (parse->hasTargetSRFs &&
5011                                 expression_returns_set((Node *) expr))
5012                         {
5013                                 /* We'll decide below whether these are postponable */
5014                                 col_is_srf[i] = true;
5015                                 have_srf = true;
5016                         }
5017                         else if (contain_volatile_functions((Node *) expr))
5018                         {
5019                                 /* Unconditionally postpone */
5020                                 postpone_col[i] = true;
5021                                 have_volatile = true;
5022                         }
5023                         else
5024                         {
5025                                 /*
5026                                  * Else check the cost.  XXX it's annoying to have to do this
5027                                  * when set_pathtarget_cost_width() just did it.  Refactor to
5028                                  * allow sharing the work?
5029                                  */
5030                                 QualCost        cost;
5031
5032                                 cost_qual_eval_node(&cost, (Node *) expr, root);
5033
5034                                 /*
5035                                  * We arbitrarily define "expensive" as "more than 10X
5036                                  * cpu_operator_cost".  Note this will take in any PL function
5037                                  * with default cost.
5038                                  */
5039                                 if (cost.per_tuple > 10 * cpu_operator_cost)
5040                                 {
5041                                         postpone_col[i] = true;
5042                                         have_expensive = true;
5043                                 }
5044                         }
5045                 }
5046                 else
5047                 {
5048                         /* For sortgroupref cols, just check if any contain SRFs */
5049                         if (!have_srf_sortcols &&
5050                                 parse->hasTargetSRFs &&
5051                                 expression_returns_set((Node *) expr))
5052                                 have_srf_sortcols = true;
5053                 }
5054
5055                 i++;
5056         }
5057
5058         /*
5059          * We can postpone SRFs if we have some but none are in sortgroupref cols.
5060          */
5061         postpone_srfs = (have_srf && !have_srf_sortcols);
5062
5063         /*
5064          * If we don't need a post-sort projection, just return final_target.
5065          */
5066         if (!(postpone_srfs || have_volatile ||
5067                   (have_expensive &&
5068                    (parse->limitCount || root->tuple_fraction > 0))))
5069                 return final_target;
5070
5071         /*
5072          * Report whether the post-sort projection will contain set-returning
5073          * functions.  This is important because it affects whether the Sort can
5074          * rely on the query's LIMIT (if any) to bound the number of rows it needs
5075          * to return.
5076          */
5077         *have_postponed_srfs = postpone_srfs;
5078
5079         /*
5080          * Construct the sort-input target, taking all non-postponable columns and
5081          * then adding Vars, PlaceHolderVars, Aggrefs, and WindowFuncs found in
5082          * the postponable ones.
5083          */
5084         input_target = create_empty_pathtarget();
5085         postponable_cols = NIL;
5086
5087         i = 0;
5088         foreach(lc, final_target->exprs)
5089         {
5090                 Expr       *expr = (Expr *) lfirst(lc);
5091
5092                 if (postpone_col[i] || (postpone_srfs && col_is_srf[i]))
5093                         postponable_cols = lappend(postponable_cols, expr);
5094                 else
5095                         add_column_to_pathtarget(input_target, expr,
5096                                                            get_pathtarget_sortgroupref(final_target, i));
5097
5098                 i++;
5099         }
5100
5101         /*
5102          * Pull out all the Vars, Aggrefs, and WindowFuncs mentioned in
5103          * postponable columns, and add them to the sort-input target if not
5104          * already present.  (Some might be there already.)  We mustn't
5105          * deconstruct Aggrefs or WindowFuncs here, since the projection node
5106          * would be unable to recompute them.
5107          */
5108         postponable_vars = pull_var_clause((Node *) postponable_cols,
5109                                                                            PVC_INCLUDE_AGGREGATES |
5110                                                                            PVC_INCLUDE_WINDOWFUNCS |
5111                                                                            PVC_INCLUDE_PLACEHOLDERS);
5112         add_new_columns_to_pathtarget(input_target, postponable_vars);
5113
5114         /* clean up cruft */
5115         list_free(postponable_vars);
5116         list_free(postponable_cols);
5117
5118         /* XXX this represents even more redundant cost calculation ... */
5119         return set_pathtarget_cost_width(root, input_target);
5120 }
5121
5122 /*
5123  * get_cheapest_fractional_path
5124  *        Find the cheapest path for retrieving a specified fraction of all
5125  *        the tuples expected to be returned by the given relation.
5126  *
5127  * We interpret tuple_fraction the same way as grouping_planner.
5128  *
5129  * We assume set_cheapest() has been run on the given rel.
5130  */
5131 Path *
5132 get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction)
5133 {
5134         Path       *best_path = rel->cheapest_total_path;
5135         ListCell   *l;
5136
5137         /* If all tuples will be retrieved, just return the cheapest-total path */
5138         if (tuple_fraction <= 0.0)
5139                 return best_path;
5140
5141         /* Convert absolute # of tuples to a fraction; no need to clamp to 0..1 */
5142         if (tuple_fraction >= 1.0 && best_path->rows > 0)
5143                 tuple_fraction /= best_path->rows;
5144
5145         foreach(l, rel->pathlist)
5146         {
5147                 Path       *path = (Path *) lfirst(l);
5148
5149                 if (path == rel->cheapest_total_path ||
5150                  compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0)
5151                         continue;
5152
5153                 best_path = path;
5154         }
5155
5156         return best_path;
5157 }
5158
5159 /*
5160  * expression_planner
5161  *              Perform planner's transformations on a standalone expression.
5162  *
5163  * Various utility commands need to evaluate expressions that are not part
5164  * of a plannable query.  They can do so using the executor's regular
5165  * expression-execution machinery, but first the expression has to be fed
5166  * through here to transform it from parser output to something executable.
5167  *
5168  * Currently, we disallow sublinks in standalone expressions, so there's no
5169  * real "planning" involved here.  (That might not always be true though.)
5170  * What we must do is run eval_const_expressions to ensure that any function
5171  * calls are converted to positional notation and function default arguments
5172  * get inserted.  The fact that constant subexpressions get simplified is a
5173  * side-effect that is useful when the expression will get evaluated more than
5174  * once.  Also, we must fix operator function IDs.
5175  *
5176  * Note: this must not make any damaging changes to the passed-in expression
5177  * tree.  (It would actually be okay to apply fix_opfuncids to it, but since
5178  * we first do an expression_tree_mutator-based walk, what is returned will
5179  * be a new node tree.)
5180  */
5181 Expr *
5182 expression_planner(Expr *expr)
5183 {
5184         Node       *result;
5185
5186         /*
5187          * Convert named-argument function calls, insert default arguments and
5188          * simplify constant subexprs
5189          */
5190         result = eval_const_expressions(NULL, (Node *) expr);
5191
5192         /* Fill in opfuncid values if missing */
5193         fix_opfuncids(result);
5194
5195         return (Expr *) result;
5196 }
5197
5198
5199 /*
5200  * plan_cluster_use_sort
5201  *              Use the planner to decide how CLUSTER should implement sorting
5202  *
5203  * tableOid is the OID of a table to be clustered on its index indexOid
5204  * (which is already known to be a btree index).  Decide whether it's
5205  * cheaper to do an indexscan or a seqscan-plus-sort to execute the CLUSTER.
5206  * Return TRUE to use sorting, FALSE to use an indexscan.
5207  *
5208  * Note: caller had better already hold some type of lock on the table.
5209  */
5210 bool
5211 plan_cluster_use_sort(Oid tableOid, Oid indexOid)
5212 {
5213         PlannerInfo *root;
5214         Query      *query;
5215         PlannerGlobal *glob;
5216         RangeTblEntry *rte;
5217         RelOptInfo *rel;
5218         IndexOptInfo *indexInfo;
5219         QualCost        indexExprCost;
5220         Cost            comparisonCost;
5221         Path       *seqScanPath;
5222         Path            seqScanAndSortPath;
5223         IndexPath  *indexScanPath;
5224         ListCell   *lc;
5225
5226         /* We can short-circuit the cost comparison if indexscans are disabled */
5227         if (!enable_indexscan)
5228                 return true;                    /* use sort */
5229
5230         /* Set up mostly-dummy planner state */
5231         query = makeNode(Query);
5232         query->commandType = CMD_SELECT;
5233
5234         glob = makeNode(PlannerGlobal);
5235
5236         root = makeNode(PlannerInfo);
5237         root->parse = query;
5238         root->glob = glob;
5239         root->query_level = 1;
5240         root->planner_cxt = CurrentMemoryContext;
5241         root->wt_param_id = -1;
5242
5243         /* Build a minimal RTE for the rel */
5244         rte = makeNode(RangeTblEntry);
5245         rte->rtekind = RTE_RELATION;
5246         rte->relid = tableOid;
5247         rte->relkind = RELKIND_RELATION;        /* Don't be too picky. */
5248         rte->lateral = false;
5249         rte->inh = false;
5250         rte->inFromCl = true;
5251         query->rtable = list_make1(rte);
5252
5253         /* Set up RTE/RelOptInfo arrays */
5254         setup_simple_rel_arrays(root);
5255
5256         /* Build RelOptInfo */
5257         rel = build_simple_rel(root, 1, RELOPT_BASEREL);
5258
5259         /* Locate IndexOptInfo for the target index */
5260         indexInfo = NULL;
5261         foreach(lc, rel->indexlist)
5262         {
5263                 indexInfo = (IndexOptInfo *) lfirst(lc);
5264                 if (indexInfo->indexoid == indexOid)
5265                         break;
5266         }
5267
5268         /*
5269          * It's possible that get_relation_info did not generate an IndexOptInfo
5270          * for the desired index; this could happen if it's not yet reached its
5271          * indcheckxmin usability horizon, or if it's a system index and we're
5272          * ignoring system indexes.  In such cases we should tell CLUSTER to not
5273          * trust the index contents but use seqscan-and-sort.
5274          */
5275         if (lc == NULL)                         /* not in the list? */
5276                 return true;                    /* use sort */
5277
5278         /*
5279          * Rather than doing all the pushups that would be needed to use
5280          * set_baserel_size_estimates, just do a quick hack for rows and width.
5281          */
5282         rel->rows = rel->tuples;
5283         rel->reltarget->width = get_relation_data_width(tableOid, NULL);
5284
5285         root->total_table_pages = rel->pages;
5286
5287         /*
5288          * Determine eval cost of the index expressions, if any.  We need to
5289          * charge twice that amount for each tuple comparison that happens during
5290          * the sort, since tuplesort.c will have to re-evaluate the index
5291          * expressions each time.  (XXX that's pretty inefficient...)
5292          */
5293         cost_qual_eval(&indexExprCost, indexInfo->indexprs, root);
5294         comparisonCost = 2.0 * (indexExprCost.startup + indexExprCost.per_tuple);
5295
5296         /* Estimate the cost of seq scan + sort */
5297         seqScanPath = create_seqscan_path(root, rel, NULL, 0);
5298         cost_sort(&seqScanAndSortPath, root, NIL,
5299                           seqScanPath->total_cost, rel->tuples, rel->reltarget->width,
5300                           comparisonCost, maintenance_work_mem, -1.0);
5301
5302         /* Estimate the cost of index scan */
5303         indexScanPath = create_index_path(root, indexInfo,
5304                                                                           NIL, NIL, NIL, NIL, NIL,
5305                                                                           ForwardScanDirection, false,
5306                                                                           NULL, 1.0);
5307
5308         return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
5309 }