X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;f=src%2Fbackend%2Foptimizer%2Fplan%2Fplanner.c;h=841d85f7397da28f478290fdaae80de563a89c53;hb=511db38ace2690f19816465baed07cefe535bfec;hp=27d7b3dea26e6c80d03eb536360618b186b19fc7;hpb=2e4094dad86bd0d2a55bf6a47ae9c75068afadc4;p=postgresql diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 27d7b3dea2..841d85f739 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3,12 +3,12 @@ * planner.c * The query optimizer external interface. * - * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.229 2008/03/28 02:00:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.250 2009/01/01 17:23:44 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -42,6 +42,9 @@ #include "utils/syscache.h" +/* GUC parameter */ +double cursor_tuple_fraction = DEFAULT_CURSOR_TUPLE_FRACTION; + /* Hook for plugins to get control in planner() */ planner_hook_type planner_hook = NULL; @@ -52,8 +55,7 @@ planner_hook_type planner_hook = NULL; #define EXPRKIND_RTFUNC 2 #define EXPRKIND_VALUES 3 #define EXPRKIND_LIMIT 4 -#define EXPRKIND_ININFO 5 -#define EXPRKIND_APPINFO 6 +#define EXPRKIND_APPINFO 5 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); @@ -64,12 +66,15 @@ static bool is_dummy_plan(Plan *plan); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, int64 *offset_est, int64 *count_est); -static Oid *extract_grouping_ops(List *groupClause); +static void preprocess_groupclause(PlannerInfo *root); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, Path *cheapest_path, Path *sorted_path, - Oid *groupOperators, double dNumGroups, - AggClauseCounts *agg_counts); + double dNumGroups, AggClauseCounts *agg_counts); +static bool choose_hashed_distinct(PlannerInfo *root, + Plan *input_plan, List *input_pathkeys, + double tuple_fraction, double limit_tuples, + double dNumDistinctRows); static List *make_subplanTargetList(PlannerInfo *root, List *tlist, AttrNumber **groupColIdx, bool *need_tlist_eval); static void locate_grouping_columns(PlannerInfo *root, @@ -77,6 +82,18 @@ static void locate_grouping_columns(PlannerInfo *root, List *sub_tlist, AttrNumber *groupColIdx); static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist); +static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists); +static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, bool canonicalize); +static void get_column_info_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, + int numSortCols, AttrNumber *sortColIdx, + int *partNumCols, + AttrNumber **partColIdx, + Oid **partOperators, + int *ordNumCols, + AttrNumber **ordColIdx, + Oid **ordOperators); /***************************************************************************** @@ -135,6 +152,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) glob->rewindPlanIDs = NULL; glob->finalrtable = NIL; glob->relationOids = NIL; + glob->invalItems = NIL; + glob->lastPHId = 0; glob->transientPlan = false; /* Determine what fraction of the plan is likely to be scanned */ @@ -142,11 +161,23 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) { /* * We have no real idea how many tuples the user will ultimately FETCH - * from a cursor, but it seems a good bet that he doesn't want 'em - * all. Optimize for 10% retrieval (you gotta better number? Should - * this be a SETtable parameter?) + * from a cursor, but it is often the case that he doesn't want 'em + * all, or would prefer a fast-start plan anyway so that he can + * process some of the tuples sooner. Use a GUC parameter to decide + * what fraction to optimize for. + */ + tuple_fraction = cursor_tuple_fraction; + + /* + * We document cursor_tuple_fraction as simply being a fraction, + * which means the edge cases 0 and 1 have to be treated specially + * here. We convert 1 to 0 ("all the tuples") and 0 to a very small + * fraction. */ - tuple_fraction = 0.10; + if (tuple_fraction >= 1.0) + tuple_fraction = 0.0; + else if (tuple_fraction <= 0.0) + tuple_fraction = 1e-10; } else { @@ -155,7 +186,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) } /* primary planning entry point (may recurse for subqueries) */ - top_plan = subquery_planner(glob, parse, 1, tuple_fraction, &root); + top_plan = subquery_planner(glob, parse, NULL, + false, tuple_fraction, &root); /* * If creating a plan for a scrollable cursor, make sure it can run @@ -196,6 +228,7 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) result->returningLists = root->returningLists; result->rowMarks = parse->rowMarks; result->relationOids = glob->relationOids; + result->invalItems = glob->invalItems; result->nParamExec = list_length(glob->paramlist); return result; @@ -209,7 +242,8 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) * * glob is the global state for the current planner run. * parse is the querytree produced by the parser & rewriter. - * level is the current recursion depth (1 at the top-level Query). + * parent_root is the immediate parent Query's info (NULL at the top level). + * hasRecursion is true if this is a recursive WITH query. * tuple_fraction is the fraction of tuples we expect will be retrieved. * tuple_fraction is interpreted as explained for grouping_planner, below. * @@ -230,40 +264,56 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) */ Plan * subquery_planner(PlannerGlobal *glob, Query *parse, - Index level, double tuple_fraction, + PlannerInfo *parent_root, + bool hasRecursion, double tuple_fraction, PlannerInfo **subroot) { int num_old_subplans = list_length(glob->subplans); PlannerInfo *root; Plan *plan; List *newHaving; + bool hasOuterJoins; ListCell *l; /* Create a PlannerInfo data structure for this subquery */ root = makeNode(PlannerInfo); root->parse = parse; root->glob = glob; - root->query_level = level; + root->query_level = parent_root ? parent_root->query_level + 1 : 1; + root->parent_root = parent_root; root->planner_cxt = CurrentMemoryContext; root->init_plans = NIL; + root->cte_plan_ids = NIL; root->eq_classes = NIL; - root->in_info_list = NIL; root->append_rel_list = NIL; + root->hasRecursion = hasRecursion; + if (hasRecursion) + root->wt_param_id = SS_assign_worktable_param(root); + else + root->wt_param_id = -1; + root->non_recursive_plan = NULL; + + /* + * If there is a WITH list, process each WITH query and build an + * initplan SubPlan structure for it. + */ + if (parse->cteList) + SS_process_ctes(root); + /* - * Look for IN clauses at the top level of WHERE, and transform them into - * joins. Note that this step only handles IN clauses originally at top - * level of WHERE; if we pull up any subqueries below, their INs are + * Look for ANY and EXISTS SubLinks in WHERE and JOIN/ON clauses, and try + * to transform them into joins. Note that this step does not descend + * into subqueries; if we pull up any subqueries below, their SubLinks are * processed just before pulling them up. */ if (parse->hasSubLinks) - parse->jointree->quals = pull_up_IN_clauses(root, - parse->jointree->quals); + pull_up_sublinks(root); /* * Scan the rangetable for set-returning functions, and inline them * if possible (producing subqueries that might get pulled up next). - * Recursion issues here are handled in the same way as for IN clauses. + * Recursion issues here are handled in the same way as for SubLinks. */ inline_set_returning_functions(root); @@ -277,16 +327,11 @@ subquery_planner(PlannerGlobal *glob, Query *parse, /* * Detect whether any rangetable entries are RTE_JOIN kind; if not, we can * avoid the expense of doing flatten_join_alias_vars(). Also check for - * outer joins --- if none, we can skip reduce_outer_joins() and some - * other processing. This must be done after we have done - * pull_up_subqueries, of course. - * - * Note: if reduce_outer_joins manages to eliminate all outer joins, - * root->hasOuterJoins is not reset currently. This is OK since its - * purpose is merely to suppress unnecessary processing in simple cases. + * outer joins --- if none, we can skip reduce_outer_joins(). + * This must be done after we have done pull_up_subqueries, of course. */ root->hasJoinRTEs = false; - root->hasOuterJoins = false; + hasOuterJoins = false; foreach(l, parse->rtable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); @@ -296,7 +341,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, root->hasJoinRTEs = true; if (IS_OUTER_JOIN(rte->jointype)) { - root->hasOuterJoins = true; + hasOuterJoins = true; /* Can quit scanning once we find an outer join */ break; } @@ -344,9 +389,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, parse->limitCount = preprocess_expression(root, parse->limitCount, EXPRKIND_LIMIT); - root->in_info_list = (List *) - preprocess_expression(root, (Node *) root->in_info_list, - EXPRKIND_ININFO); root->append_rel_list = (List *) preprocess_expression(root, (Node *) root->append_rel_list, EXPRKIND_APPINFO); @@ -424,7 +466,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, * This step is most easily done after we've done expression * preprocessing. */ - if (root->hasOuterJoins) + if (hasOuterJoins) reduce_outer_joins(root); /* @@ -444,7 +486,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, */ if (list_length(glob->subplans) != num_old_subplans || root->query_level > 1) - SS_finalize_plan(root, plan); + SS_finalize_plan(root, plan, true); /* Return internal info if caller wants it */ if (subroot) @@ -483,27 +525,18 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind) /* * Simplify constant expressions. * + * Note: one essential effect here is to insert the current actual values + * of any default arguments for functions. To ensure that happens, we + * *must* process all expressions here. Previous PG versions sometimes + * skipped const-simplification if it didn't seem worth the trouble, but + * we can't do that anymore. + * * Note: this also flattens nested AND and OR expressions into N-argument * form. All processing of a qual expression after this point must be * careful to maintain AND/OR flatness --- that is, do not generate a tree * with AND directly under AND, nor OR directly under OR. - * - * Because this is a relatively expensive process, we skip it when the - * query is trivial, such as "SELECT 2+2;" or "INSERT ... VALUES()". The - * expression will only be evaluated once anyway, so no point in - * pre-simplifying; we can't execute it any faster than the executor can, - * and we will waste cycles copying the tree. Notice however that we - * still must do it for quals (to get AND/OR flatness); and if we are in a - * subquery we should not assume it will be done only once. - * - * For VALUES lists we never do this at all, again on the grounds that we - * should optimize for one-time evaluation. */ - if (kind != EXPRKIND_VALUES && - (root->parse->jointree->fromlist != NIL || - kind == EXPRKIND_QUAL || - root->query_level > 1)) - expr = eval_const_expressions(expr); + expr = eval_const_expressions(root, expr); /* * If it's a qual or havingQual, canonicalize it. @@ -621,20 +654,19 @@ inheritance_planner(PlannerInfo *root) continue; /* - * Generate modified query with this rel as target. We have to be - * prepared to translate varnos in in_info_list as well as in the - * Query proper. + * Generate modified query with this rel as target. */ memcpy(&subroot, root, sizeof(PlannerInfo)); subroot.parse = (Query *) adjust_appendrel_attrs((Node *) parse, appinfo); - subroot.in_info_list = (List *) - adjust_appendrel_attrs((Node *) root->in_info_list, - appinfo); + subroot.returningLists = NIL; subroot.init_plans = NIL; + /* We needn't modify the child's append_rel_list */ /* There shouldn't be any OJ info to translate, as yet */ - Assert(subroot.oj_info_list == NIL); + Assert(subroot.join_info_list == NIL); + /* and we haven't created PlaceHolderInfos, either */ + Assert(subroot.placeholder_list == NIL); /* Generate plan */ subplan = grouping_planner(&subroot, 0.0 /* retrieve all tuples */ ); @@ -740,7 +772,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) double limit_tuples = -1.0; Plan *result_plan; List *current_pathkeys; - List *sort_pathkeys; double dNumGroups = 0; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ @@ -763,17 +794,18 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * If there's a top-level ORDER BY, assume we have to fetch all the - * tuples. This might seem too simplistic given all the hackery below - * to possibly avoid the sort ... but a nonzero tuple_fraction is only - * of use to plan_set_operations() when the setop is UNION ALL, and - * the result of UNION ALL is always unsorted. + * tuples. This might be too simplistic given all the hackery below + * to possibly avoid the sort; but the odds of accurate estimates + * here are pretty low anyway. */ if (parse->sortClause) tuple_fraction = 0.0; /* * Construct the plan for set operations. The result will not need - * any work except perhaps a top-level sort and/or LIMIT. + * any work except perhaps a top-level sort and/or LIMIT. Note that + * any special work for recursive unions is the responsibility of + * plan_set_operations. */ result_plan = plan_set_operations(root, tuple_fraction, &set_sortclauses); @@ -797,7 +829,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) */ Assert(parse->commandType == CMD_SELECT); - tlist = postprocess_setop_tlist(result_plan->targetlist, tlist); + tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), + tlist); /* * Can't handle FOR UPDATE/SHARE here (parser should have checked @@ -811,18 +844,17 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Calculate pathkeys that represent result ordering requirements */ - sort_pathkeys = make_pathkeys_for_sortclauses(root, - parse->sortClause, - tlist, - true); + Assert(parse->distinctClause == NIL); + root->sort_pathkeys = make_pathkeys_for_sortclauses(root, + parse->sortClause, + tlist, + true); } else { /* No set operations, do regular planning */ List *sub_tlist; - List *group_pathkeys; AttrNumber *groupColIdx = NULL; - Oid *groupOperators = NULL; bool need_tlist_eval = true; QualCost tlist_cost; Path *cheapest_path; @@ -830,29 +862,40 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) Path *best_path; long numGroups = 0; AggClauseCounts agg_counts; - int numGroupCols = list_length(parse->groupClause); + int numGroupCols; bool use_hashed_grouping = false; + WindowFuncLists *wflists = NULL; + List *activeWindows = NIL; MemSet(&agg_counts, 0, sizeof(AggClauseCounts)); + /* A recursive query should always have setOperations */ + Assert(!root->hasRecursion); + + /* Preprocess GROUP BY clause, if any */ + if (parse->groupClause) + preprocess_groupclause(root); + numGroupCols = list_length(parse->groupClause); + + /* Preprocess targetlist */ + tlist = preprocess_targetlist(root, tlist); + /* - * If the query involves ungrouped aggregation, then it can produce - * at most one row, so we can ignore any ORDER BY or DISTINCT - * request. This isn't all that exciting as an optimization, but it - * prevents a corner case when optimize_minmax_aggregates succeeds: - * if ORDER BY or DISTINCT were present we'd try, and fail, to match - * the EquivalenceClasses we're about to build with the modified - * targetlist entries it will create. + * Locate any window functions in the tlist. (We don't need to look + * anywhere else, since expressions used in ORDER BY will be in there + * too.) Note that they could all have been eliminated by constant + * folding, in which case we don't need to do any more work. */ - if (parse->hasAggs && parse->groupClause == NIL) + if (parse->hasWindowFuncs) { - parse->sortClause = NIL; - parse->distinctClause = NIL; + wflists = find_window_functions((Node *) tlist, + list_length(parse->windowClause)); + if (wflists->numWindowFuncs > 0) + activeWindows = select_active_windows(root, wflists); + else + parse->hasWindowFuncs = false; } - /* Preprocess targetlist */ - tlist = preprocess_targetlist(root, tlist); - /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. @@ -863,13 +906,43 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize - * them after EquivalenceClasses have been formed. + * them after EquivalenceClasses have been formed. The sortClause + * is certainly sort-able, but GROUP BY and DISTINCT might not be, + * in which case we just leave their pathkeys empty. */ - root->group_pathkeys = - make_pathkeys_for_sortclauses(root, - parse->groupClause, - tlist, - false); + if (parse->groupClause && + grouping_is_sortable(parse->groupClause)) + root->group_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->groupClause, + tlist, + false); + else + root->group_pathkeys = NIL; + + /* We consider only the first (bottom) window in pathkeys logic */ + if (activeWindows != NIL) + { + WindowClause *wc = (WindowClause *) linitial(activeWindows); + + root->window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist, + false); + } + else + root->window_pathkeys = NIL; + + if (parse->distinctClause && + grouping_is_sortable(parse->distinctClause)) + root->distinct_pathkeys = + make_pathkeys_for_sortclauses(root, + parse->distinctClause, + tlist, + false); + else + root->distinct_pathkeys = NIL; + root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, @@ -894,18 +967,31 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * Figure out whether we need a sorted result from query_planner. + * Figure out whether we want a sorted result from query_planner. + * + * If we have a sortable GROUP BY clause, then we want a result sorted + * properly for grouping. Otherwise, if we have window functions to + * evaluate, we try to sort for the first window. Otherwise, if + * there's a sortable DISTINCT clause that's more rigorous than the + * ORDER BY clause, we try to produce output that's sufficiently well + * sorted for the DISTINCT. Otherwise, if there is an ORDER BY + * clause, we want to sort by the ORDER BY clause. * - * If we have a GROUP BY clause, then we want a result sorted properly - * for grouping. Otherwise, if there is an ORDER BY clause, we want - * to sort by the ORDER BY clause. (Note: if we have both, and ORDER - * BY is a superset of GROUP BY, it would be tempting to request sort - * by ORDER BY --- but that might just leave us failing to exploit an - * available sort order at all. Needs more thought...) + * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a + * superset of GROUP BY, it would be tempting to request sort by ORDER + * BY --- but that might just leave us failing to exploit an available + * sort order at all. Needs more thought. The choice for DISTINCT + * versus ORDER BY is much easier, since we know that the parser + * ensured that one is a superset of the other. */ - if (parse->groupClause) + if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; - else if (parse->sortClause) + else if (root->window_pathkeys) + root->query_pathkeys = root->window_pathkeys; + else if (list_length(root->distinct_pathkeys) > + list_length(root->sort_pathkeys)) + root->query_pathkeys = root->distinct_pathkeys; + else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; @@ -919,21 +1005,40 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) query_planner(root, sub_tlist, tuple_fraction, limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); - group_pathkeys = root->group_pathkeys; - sort_pathkeys = root->sort_pathkeys; - /* - * If grouping, extract the grouping operators and decide whether we - * want to use hashed grouping. + * If grouping, decide whether to use sorted or hashed grouping. */ if (parse->groupClause) { - groupOperators = extract_grouping_ops(parse->groupClause); - use_hashed_grouping = - choose_hashed_grouping(root, tuple_fraction, limit_tuples, - cheapest_path, sorted_path, - groupOperators, dNumGroups, - &agg_counts); + bool can_hash; + bool can_sort; + + /* + * Executor doesn't support hashed aggregation with DISTINCT + * aggregates. (Doing so would imply storing *all* the input + * values in the hash table, which seems like a certain loser.) + */ + can_hash = (agg_counts.numDistinctAggs == 0 && + grouping_is_hashable(parse->groupClause)); + can_sort = grouping_is_sortable(parse->groupClause); + if (can_hash && can_sort) + { + /* we have a meaningful choice to make ... */ + use_hashed_grouping = + choose_hashed_grouping(root, + tuple_fraction, limit_tuples, + cheapest_path, sorted_path, + dNumGroups, &agg_counts); + } + else if (can_hash) + use_hashed_grouping = true; + else if (can_sort) + use_hashed_grouping = false; + else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); @@ -972,9 +1077,23 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Normal case --- create a plan according to query_planner's * results. */ + bool need_sort_for_grouping = false; + result_plan = create_plan(root, best_path); current_pathkeys = best_path->pathkeys; + /* Detect if we'll need an explicit sort for grouping */ + if (parse->groupClause && !use_hashed_grouping && + !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) + { + need_sort_for_grouping = true; + /* + * Always override query_planner's tlist, so that we don't + * sort useless data from a "physical" tlist. + */ + need_tlist_eval = true; + } + /* * create_plan() returns a plan with just a "flat" tlist of * required Vars. Usually we need to insert the sub_tlist as the @@ -1019,10 +1138,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * * Below this point, any tlist eval cost for added-on nodes * should be accounted for as we create those nodes. - * Presently, of the node types we can add on, only Agg and - * Group project new tlists (the rest just copy their input - * tuples) --- so make_agg() and make_group() are responsible - * for computing the added cost. + * Presently, of the node types we can add on, only Agg, + * WindowAgg, and Group project new tlists (the rest just copy + * their input tuples) --- so make_agg(), make_windowagg() and + * make_group() are responsible for computing the added cost. */ cost_qual_eval(&tlist_cost, sub_tlist, root); result_plan->startup_cost += tlist_cost.startup; @@ -1055,7 +1174,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) AGG_HASHED, numGroupCols, groupColIdx, - groupOperators, + extract_grouping_ops(parse->groupClause), numGroups, agg_counts.numAggs, result_plan); @@ -1069,15 +1188,14 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) if (parse->groupClause) { - if (!pathkeys_contained_in(group_pathkeys, - current_pathkeys)) + if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); - current_pathkeys = group_pathkeys; + current_pathkeys = root->group_pathkeys; } aggstrategy = AGG_SORTED; @@ -1099,7 +1217,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) aggstrategy, numGroupCols, groupColIdx, - groupOperators, + extract_grouping_ops(parse->groupClause), numGroups, agg_counts.numAggs, result_plan); @@ -1113,14 +1231,14 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * Add an explicit sort if we couldn't make the path come out * the way the GROUP node needs it. */ - if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) + if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); - current_pathkeys = group_pathkeys; + current_pathkeys = root->group_pathkeys; } result_plan = (Plan *) make_group(root, @@ -1128,7 +1246,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) (List *) parse->havingQual, numGroupCols, groupColIdx, - groupOperators, + extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); /* The Group node won't change sort ordering */ @@ -1153,38 +1271,283 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) NULL); } } /* end of non-minmax-aggregate case */ + + /* + * Since each window function could require a different sort order, + * we stack up a WindowAgg node for each window, with sort steps + * between them as needed. + */ + if (activeWindows) + { + List *window_tlist; + ListCell *l; + + /* + * If the top-level plan node is one that cannot do expression + * evaluation, we must insert a Result node to project the + * desired tlist. (In some cases this might not really be + * required, but it's not worth trying to avoid it.) Note that + * on second and subsequent passes through the following loop, + * the top-level node will be a WindowAgg which we know can + * project; so we only need to check once. + */ + if (!is_projection_capable_plan(result_plan)) + { + result_plan = (Plan *) make_result(root, + NIL, + NULL, + result_plan); + } + + /* + * The "base" targetlist for all steps of the windowing process + * is a flat tlist of all Vars and Aggs needed in the result. + * (In some cases we wouldn't need to propagate all of these + * all the way to the top, since they might only be needed as + * inputs to WindowFuncs. It's probably not worth trying to + * optimize that though.) As we climb up the stack, we add + * outputs for the WindowFuncs computed at each level. Also, + * each input tlist has to present all the columns needed to + * sort the data for the next WindowAgg step. That's handled + * internally by make_sort_from_pathkeys, but we need the + * copyObject steps here to ensure that each plan node has + * a separately modifiable tlist. + */ + window_tlist = flatten_tlist(tlist); + if (parse->hasAggs) + window_tlist = add_to_flat_tlist(window_tlist, + pull_agg_clause((Node *) tlist)); + result_plan->targetlist = (List *) copyObject(window_tlist); + + foreach(l, activeWindows) + { + WindowClause *wc = (WindowClause *) lfirst(l); + List *window_pathkeys; + int partNumCols; + AttrNumber *partColIdx; + Oid *partOperators; + int ordNumCols; + AttrNumber *ordColIdx; + Oid *ordOperators; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + tlist, + true); + + /* + * This is a bit tricky: we build a sort node even if we don't + * really have to sort. Even when no explicit sort is needed, + * we need to have suitable resjunk items added to the input + * plan's tlist for any partitioning or ordering columns that + * aren't plain Vars. Furthermore, this way we can use + * existing infrastructure to identify which input columns are + * the interesting ones. + */ + if (window_pathkeys) + { + Sort *sort_plan; + + sort_plan = make_sort_from_pathkeys(root, + result_plan, + window_pathkeys, + -1.0); + if (!pathkeys_contained_in(window_pathkeys, + current_pathkeys)) + { + /* we do indeed need to sort */ + result_plan = (Plan *) sort_plan; + current_pathkeys = window_pathkeys; + } + /* In either case, extract the per-column information */ + get_column_info_for_window(root, wc, tlist, + sort_plan->numCols, + sort_plan->sortColIdx, + &partNumCols, + &partColIdx, + &partOperators, + &ordNumCols, + &ordColIdx, + &ordOperators); + } + else + { + /* empty window specification, nothing to sort */ + partNumCols = 0; + partColIdx = NULL; + partOperators = NULL; + ordNumCols = 0; + ordColIdx = NULL; + ordOperators = NULL; + } + + if (lnext(l)) + { + /* Add the current WindowFuncs to the running tlist */ + window_tlist = add_to_flat_tlist(window_tlist, + wflists->windowFuncs[wc->winref]); + } + else + { + /* Install the original tlist in the topmost WindowAgg */ + window_tlist = tlist; + } + + /* ... and make the WindowAgg plan node */ + result_plan = (Plan *) + make_windowagg(root, + (List *) copyObject(window_tlist), + list_length(wflists->windowFuncs[wc->winref]), + wc->winref, + partNumCols, + partColIdx, + partOperators, + ordNumCols, + ordColIdx, + ordOperators, + wc->frameOptions, + result_plan); + } + } } /* end of if (setOperations) */ /* - * If we were not able to make the plan come out in the right order, add - * an explicit sort step. + * If there is a DISTINCT clause, add the necessary node(s). */ - if (parse->sortClause) + if (parse->distinctClause) { - if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys)) + double dNumDistinctRows; + long numDistinctRows; + bool use_hashed_distinct; + bool can_sort; + bool can_hash; + + /* + * If there was grouping or aggregation, use the current number of + * rows as the estimated number of DISTINCT rows (ie, assume the + * result was already mostly unique). If not, use the number of + * distinct-groups calculated by query_planner. + */ + if (parse->groupClause || root->hasHavingQual || parse->hasAggs) + dNumDistinctRows = result_plan->plan_rows; + else + dNumDistinctRows = dNumGroups; + + /* Also convert to long int --- but 'ware overflow! */ + numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); + + /* + * If we have a sortable DISTINCT ON clause, we always use sorting. + * This enforces the expected behavior of DISTINCT ON. + */ + can_sort = grouping_is_sortable(parse->distinctClause); + if (can_sort && parse->hasDistinctOn) + use_hashed_distinct = false; + else { - result_plan = (Plan *) make_sort_from_pathkeys(root, - result_plan, - sort_pathkeys, - limit_tuples); - current_pathkeys = sort_pathkeys; + can_hash = grouping_is_hashable(parse->distinctClause); + if (can_hash && can_sort) + { + /* we have a meaningful choice to make ... */ + use_hashed_distinct = + choose_hashed_distinct(root, + result_plan, current_pathkeys, + tuple_fraction, limit_tuples, + dNumDistinctRows); + } + else if (can_hash) + use_hashed_distinct = true; + else if (can_sort) + use_hashed_distinct = false; + else + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement DISTINCT"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + use_hashed_distinct = false; /* keep compiler quiet */ + } + } + + if (use_hashed_distinct) + { + /* Hashed aggregate plan --- no sort needed */ + result_plan = (Plan *) make_agg(root, + result_plan->targetlist, + NIL, + AGG_HASHED, + list_length(parse->distinctClause), + extract_grouping_cols(parse->distinctClause, + result_plan->targetlist), + extract_grouping_ops(parse->distinctClause), + numDistinctRows, + 0, + result_plan); + /* Hashed aggregation produces randomly-ordered results */ + current_pathkeys = NIL; + } + else + { + /* + * Use a Unique node to implement DISTINCT. Add an explicit sort + * if we couldn't make the path come out the way the Unique node + * needs it. If we do have to sort, always sort by the more + * rigorous of DISTINCT and ORDER BY, to avoid a second sort + * below. However, for regular DISTINCT, don't sort now if we + * don't have to --- sorting afterwards will likely be cheaper, + * and also has the possibility of optimizing via LIMIT. But + * for DISTINCT ON, we *must* force the final sort now, else + * it won't have the desired behavior. + */ + List *needed_pathkeys; + + if (parse->hasDistinctOn && + list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + needed_pathkeys = root->sort_pathkeys; + else + needed_pathkeys = root->distinct_pathkeys; + + if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) + { + if (list_length(root->distinct_pathkeys) >= + list_length(root->sort_pathkeys)) + current_pathkeys = root->distinct_pathkeys; + else + { + current_pathkeys = root->sort_pathkeys; + /* Assert checks that parser didn't mess up... */ + Assert(pathkeys_contained_in(root->distinct_pathkeys, + current_pathkeys)); + } + + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + current_pathkeys, + -1.0); + } + + result_plan = (Plan *) make_unique(result_plan, + parse->distinctClause); + result_plan->plan_rows = dNumDistinctRows; + /* The Unique node won't change sort ordering */ } } /* - * If there is a DISTINCT clause, add the UNIQUE node. + * If ORDER BY was given and we were not able to make the plan come out in + * the right order, add an explicit sort step. */ - if (parse->distinctClause) + if (parse->sortClause) { - result_plan = (Plan *) make_unique(result_plan, parse->distinctClause); - - /* - * If there was grouping or aggregation, leave plan_rows as-is (ie, - * assume the result was already mostly unique). If not, use the - * number of distinct-groups calculated by query_planner. - */ - if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs) - result_plan->plan_rows = dNumGroups; + if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + { + result_plan = (Plan *) make_sort_from_pathkeys(root, + result_plan, + root->sort_pathkeys, + limit_tuples); + current_pathkeys = root->sort_pathkeys; + } } /* @@ -1450,72 +1813,117 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction, return tuple_fraction; } + /* - * extract_grouping_ops - make an array of the equality operator OIDs - * for the GROUP BY clause + * preprocess_groupclause - do preparatory work on GROUP BY clause + * + * The idea here is to adjust the ordering of the GROUP BY elements + * (which in itself is semantically insignificant) to match ORDER BY, + * thereby allowing a single sort operation to both implement the ORDER BY + * requirement and set up for a Unique step that implements GROUP BY. + * + * In principle it might be interesting to consider other orderings of the + * GROUP BY elements, which could match the sort ordering of other + * possible plans (eg an indexscan) and thereby reduce cost. We don't + * bother with that, though. Hashed grouping will frequently win anyway. + * + * Note: we need no comparable processing of the distinctClause because + * the parser already enforced that that matches ORDER BY. */ -static Oid * -extract_grouping_ops(List *groupClause) +static void +preprocess_groupclause(PlannerInfo *root) { - int numCols = list_length(groupClause); - int colno = 0; - Oid *groupOperators; - ListCell *glitem; + Query *parse = root->parse; + List *new_groupclause; + bool partial_match; + ListCell *sl; + ListCell *gl; - groupOperators = (Oid *) palloc(sizeof(Oid) * numCols); + /* If no ORDER BY, nothing useful to do here */ + if (parse->sortClause == NIL) + return; - foreach(glitem, groupClause) + /* + * Scan the ORDER BY clause and construct a list of matching GROUP BY + * items, but only as far as we can make a matching prefix. + * + * This code assumes that the sortClause contains no duplicate items. + */ + new_groupclause = NIL; + foreach(sl, parse->sortClause) { - GroupClause *groupcl = (GroupClause *) lfirst(glitem); + SortGroupClause *sc = (SortGroupClause *) lfirst(sl); + + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(gl); + + if (equal(gc, sc)) + { + new_groupclause = lappend(new_groupclause, gc); + break; + } + } + if (gl == NULL) + break; /* no match, so stop scanning */ + } + + /* Did we match all of the ORDER BY list, or just some of it? */ + partial_match = (sl != NULL); - groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop); - if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */ - elog(ERROR, "could not find equality operator for ordering operator %u", - groupcl->sortop); - colno++; + /* If no match at all, no point in reordering GROUP BY */ + if (new_groupclause == NIL) + return; + + /* + * Add any remaining GROUP BY items to the new list, but only if we + * were able to make a complete match. In other words, we only + * rearrange the GROUP BY list if the result is that one list is a + * prefix of the other --- otherwise there's no possibility of a + * common sort. Also, give up if there are any non-sortable GROUP BY + * items, since then there's no hope anyway. + */ + foreach(gl, parse->groupClause) + { + SortGroupClause *gc = (SortGroupClause *) lfirst(gl); + + if (list_member_ptr(new_groupclause, gc)) + continue; /* it matched an ORDER BY item */ + if (partial_match) + return; /* give up, no common sort possible */ + if (!OidIsValid(gc->sortop)) + return; /* give up, GROUP BY can't be sorted */ + new_groupclause = lappend(new_groupclause, gc); } - return groupOperators; + /* Success --- install the rearranged GROUP BY list */ + Assert(list_length(parse->groupClause) == list_length(new_groupclause)); + parse->groupClause = new_groupclause; } /* * choose_hashed_grouping - should we use hashed grouping? + * + * Note: this is only applied when both alternatives are actually feasible. */ static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, Path *cheapest_path, Path *sorted_path, - Oid *groupOperators, double dNumGroups, - AggClauseCounts *agg_counts) + double dNumGroups, AggClauseCounts *agg_counts) { int numGroupCols = list_length(root->parse->groupClause); double cheapest_path_rows; int cheapest_path_width; Size hashentrysize; + List *target_pathkeys; List *current_pathkeys; Path hashed_p; Path sorted_p; - int i; - /* - * Check can't-do-it conditions, including whether the grouping operators - * are hashjoinable. (We assume hashing is OK if they are marked - * oprcanhash. If there isn't actually a supporting hash function, the - * executor will complain at runtime.) - * - * Executor doesn't support hashed aggregation with DISTINCT aggregates. - * (Doing so would imply storing *all* the input values in the hash table, - * which seems like a certain loser.) - */ + /* Prefer sorting when enable_hashagg is off */ if (!enable_hashagg) return false; - if (agg_counts->numDistinctAggs != 0) - return false; - for (i = 0; i < numGroupCols; i++) - { - if (!op_hashjoinable(groupOperators[i])) - return false; - } /* * Don't do it if it doesn't look like the hashtable will fit into @@ -1545,6 +1953,20 @@ choose_hashed_grouping(PlannerInfo *root, if (hashentrysize * dNumGroups > work_mem * 1024L) return false; + /* + * When we have both GROUP BY and DISTINCT, use the more-rigorous of + * DISTINCT and ORDER BY as the assumed required output sort order. + * This is an oversimplification because the DISTINCT might get + * implemented via hashing, but it's not clear that the case is common + * enough (or that our estimates are good enough) to justify trying to + * solve it exactly. + */ + if (list_length(root->distinct_pathkeys) > + list_length(root->sort_pathkeys)) + target_pathkeys = root->distinct_pathkeys; + else + target_pathkeys = root->sort_pathkeys; + /* * See if the estimated cost is no more than doing it the other way. While * avoiding the need for sorted input is usually a win, the fact that the @@ -1566,8 +1988,8 @@ choose_hashed_grouping(PlannerInfo *root, cheapest_path->startup_cost, cheapest_path->total_cost, cheapest_path_rows); /* Result of hashed agg is always unsorted */ - if (root->sort_pathkeys) - cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, + if (target_pathkeys) + cost_sort(&hashed_p, root, target_pathkeys, hashed_p.total_cost, dNumGroups, cheapest_path_width, limit_tuples); if (sorted_path) @@ -1599,9 +2021,9 @@ choose_hashed_grouping(PlannerInfo *root, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); /* The Agg or Group node will preserve ordering */ - if (root->sort_pathkeys && - !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) - cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, + if (target_pathkeys && + !pathkeys_contained_in(target_pathkeys, current_pathkeys)) + cost_sort(&sorted_p, root, target_pathkeys, sorted_p.total_cost, dNumGroups, cheapest_path_width, limit_tuples); /* @@ -1620,6 +2042,120 @@ choose_hashed_grouping(PlannerInfo *root, return false; } +/* + * choose_hashed_distinct - should we use hashing for DISTINCT? + * + * This is fairly similar to choose_hashed_grouping, but there are enough + * differences that it doesn't seem worth trying to unify the two functions. + * + * But note that making the two choices independently is a bit bogus in + * itself. If the two could be combined into a single choice operation + * it'd probably be better, but that seems far too unwieldy to be practical, + * especially considering that the combination of GROUP BY and DISTINCT + * isn't very common in real queries. By separating them, we are giving + * extra preference to using a sorting implementation when a common sort key + * is available ... and that's not necessarily wrong anyway. + * + * Note: this is only applied when both alternatives are actually feasible. + */ +static bool +choose_hashed_distinct(PlannerInfo *root, + Plan *input_plan, List *input_pathkeys, + double tuple_fraction, double limit_tuples, + double dNumDistinctRows) +{ + int numDistinctCols = list_length(root->parse->distinctClause); + Size hashentrysize; + List *current_pathkeys; + List *needed_pathkeys; + Path hashed_p; + Path sorted_p; + + /* Prefer sorting when enable_hashagg is off */ + if (!enable_hashagg) + return false; + + /* + * Don't do it if it doesn't look like the hashtable will fit into + * work_mem. + */ + hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData)); + + if (hashentrysize * dNumDistinctRows > work_mem * 1024L) + return false; + + /* + * See if the estimated cost is no more than doing it the other way. While + * avoiding the need for sorted input is usually a win, the fact that the + * output won't be sorted may be a loss; so we need to do an actual cost + * comparison. + * + * We need to consider input_plan + hashagg [+ final sort] versus + * input_plan [+ sort] + group [+ final sort] where brackets indicate + * a step that may not be needed. + * + * These path variables are dummies that just hold cost fields; we don't + * make actual Paths for these steps. + */ + cost_agg(&hashed_p, root, AGG_HASHED, 0, + numDistinctCols, dNumDistinctRows, + input_plan->startup_cost, input_plan->total_cost, + input_plan->plan_rows); + /* + * Result of hashed agg is always unsorted, so if ORDER BY is present + * we need to charge for the final sort. + */ + if (root->parse->sortClause) + cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, + dNumDistinctRows, input_plan->plan_width, limit_tuples); + + /* + * Now for the GROUP case. See comments in grouping_planner about the + * sorting choices here --- this code should match that code. + */ + sorted_p.startup_cost = input_plan->startup_cost; + sorted_p.total_cost = input_plan->total_cost; + current_pathkeys = input_pathkeys; + if (root->parse->hasDistinctOn && + list_length(root->distinct_pathkeys) < + list_length(root->sort_pathkeys)) + needed_pathkeys = root->sort_pathkeys; + else + needed_pathkeys = root->distinct_pathkeys; + if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) + { + if (list_length(root->distinct_pathkeys) >= + list_length(root->sort_pathkeys)) + current_pathkeys = root->distinct_pathkeys; + else + current_pathkeys = root->sort_pathkeys; + cost_sort(&sorted_p, root, current_pathkeys, sorted_p.total_cost, + input_plan->plan_rows, input_plan->plan_width, -1.0); + } + cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows, + sorted_p.startup_cost, sorted_p.total_cost, + input_plan->plan_rows); + if (root->parse->sortClause && + !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) + cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, + dNumDistinctRows, input_plan->plan_width, limit_tuples); + + /* + * Now make the decision using the top-level tuple fraction. First we + * have to convert an absolute count (LIMIT) into fractional form. + */ + if (tuple_fraction >= 1.0) + tuple_fraction /= dNumDistinctRows; + + if (compare_fractional_path_costs(&hashed_p, &sorted_p, + tuple_fraction) < 0) + { + /* Hashed is cheaper, so use it */ + return true; + } + return false; +} + /*--------------- * make_subplanTargetList * Generate appropriate target list when grouping is required. @@ -1678,7 +2214,8 @@ make_subplanTargetList(PlannerInfo *root, * If we're not grouping or aggregating, there's nothing to do here; * query_planner should receive the unmodified target list. */ - if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual) + if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual && + !parse->hasWindowFuncs) { *need_tlist_eval = true; return tlist; @@ -1686,11 +2223,13 @@ make_subplanTargetList(PlannerInfo *root, /* * Otherwise, start with a "flattened" tlist (having just the vars - * mentioned in the targetlist and HAVING qual --- but not upper- level - * Vars; they will be replaced by Params later on). + * mentioned in the targetlist and HAVING qual --- but not upper-level + * Vars; they will be replaced by Params later on). Note this includes + * vars used in resjunk items, so we are covering the needs of ORDER BY + * and window specifications. */ sub_tlist = flatten_tlist(tlist); - extravars = pull_var_clause(parse->havingQual, false); + extravars = pull_var_clause(parse->havingQual, true); sub_tlist = add_to_flat_tlist(sub_tlist, extravars); list_free(extravars); *need_tlist_eval = false; /* only eval if not flat tlist */ @@ -1712,19 +2251,21 @@ make_subplanTargetList(PlannerInfo *root, foreach(gl, parse->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; - ListCell *sl; + TargetEntry *te; - /* Find or make a matching sub_tlist entry */ - foreach(sl, sub_tlist) - { - te = (TargetEntry *) lfirst(sl); - if (equal(groupexpr, te->expr)) - break; - } - if (!sl) + /* + * Find or make a matching sub_tlist entry. If the groupexpr + * isn't a Var, no point in searching. (Note that the parser + * won't make multiple groupClause entries for the same TLE.) + */ + if (groupexpr && IsA(groupexpr, Var)) + te = tlist_member(groupexpr, sub_tlist); + else + te = NULL; + + if (!te) { te = makeTargetEntry((Expr *) groupexpr, list_length(sub_tlist) + 1, @@ -1748,7 +2289,7 @@ make_subplanTargetList(PlannerInfo *root, * * This is only needed if we don't use the sub_tlist chosen by * make_subplanTargetList. We have to forget the column indexes found - * by that routine and re-locate the grouping vars in the real sub_tlist. + * by that routine and re-locate the grouping exprs in the real sub_tlist. */ static void locate_grouping_columns(PlannerInfo *root, @@ -1771,20 +2312,12 @@ locate_grouping_columns(PlannerInfo *root, foreach(gl, root->parse->groupClause) { - GroupClause *grpcl = (GroupClause *) lfirst(gl); + SortGroupClause *grpcl = (SortGroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); - TargetEntry *te = NULL; - ListCell *sl; + TargetEntry *te = tlist_member(groupexpr, sub_tlist); - foreach(sl, sub_tlist) - { - te = (TargetEntry *) lfirst(sl); - if (equal(groupexpr, te->expr)) - break; - } - if (!sl) + if (!te) elog(ERROR, "failed to locate grouping columns"); - groupColIdx[keyno++] = te->resno; } } @@ -1826,3 +2359,220 @@ postprocess_setop_tlist(List *new_tlist, List *orig_tlist) elog(ERROR, "resjunk output columns are not implemented"); return new_tlist; } + +/* + * select_active_windows + * Create a list of the "active" window clauses (ie, those referenced + * by non-deleted WindowFuncs) in the order they are to be executed. + */ +static List * +select_active_windows(PlannerInfo *root, WindowFuncLists *wflists) +{ + List *result; + List *actives; + ListCell *lc; + + /* First, make a list of the active windows */ + actives = NIL; + foreach(lc, root->parse->windowClause) + { + WindowClause *wc = (WindowClause *) lfirst(lc); + + /* It's only active if wflists shows some related WindowFuncs */ + Assert(wc->winref <= wflists->maxWinRef); + if (wflists->windowFuncs[wc->winref] != NIL) + actives = lappend(actives, wc); + } + + /* + * Now, ensure that windows with identical partitioning/ordering clauses + * are adjacent in the list. This is required by the SQL standard, which + * says that only one sort is to be used for such windows, even if they + * are otherwise distinct (eg, different names or framing clauses). + * + * There is room to be much smarter here, for example detecting whether + * one window's sort keys are a prefix of another's (so that sorting + * for the latter would do for the former), or putting windows first + * that match a sort order available for the underlying query. For the + * moment we are content with meeting the spec. + */ + result = NIL; + while (actives != NIL) + { + WindowClause *wc = (WindowClause *) linitial(actives); + ListCell *prev; + ListCell *next; + + /* Move wc from actives to result */ + actives = list_delete_first(actives); + result = lappend(result, wc); + + /* Now move any matching windows from actives to result */ + prev = NULL; + for (lc = list_head(actives); lc; lc = next) + { + WindowClause *wc2 = (WindowClause *) lfirst(lc); + + next = lnext(lc); + /* framing options are NOT to be compared here! */ + if (equal(wc->partitionClause, wc2->partitionClause) && + equal(wc->orderClause, wc2->orderClause)) + { + actives = list_delete_cell(actives, lc, prev); + result = lappend(result, wc2); + } + else + prev = lc; + } + } + + return result; +} + +/* + * make_pathkeys_for_window + * Create a pathkeys list describing the required input ordering + * for the given WindowClause. + * + * The required ordering is first the PARTITION keys, then the ORDER keys. + * In the future we might try to implement windowing using hashing, in which + * case the ordering could be relaxed, but for now we always sort. + */ +static List * +make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, + List *tlist, bool canonicalize) +{ + List *window_pathkeys; + List *window_sortclauses; + + /* Throw error if can't sort */ + if (!grouping_is_sortable(wc->partitionClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window PARTITION BY"), + errdetail("Window partitioning columns must be of sortable datatypes."))); + if (!grouping_is_sortable(wc->orderClause)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement window ORDER BY"), + errdetail("Window ordering columns must be of sortable datatypes."))); + + /* Okay, make the combined pathkeys */ + window_sortclauses = list_concat(list_copy(wc->partitionClause), + list_copy(wc->orderClause)); + window_pathkeys = make_pathkeys_for_sortclauses(root, + window_sortclauses, + tlist, + canonicalize); + list_free(window_sortclauses); + return window_pathkeys; +} + +/*---------- + * get_column_info_for_window + * Get the partitioning/ordering column numbers and equality operators + * for a WindowAgg node. + * + * This depends on the behavior of make_pathkeys_for_window()! + * + * We are given the target WindowClause and an array of the input column + * numbers associated with the resulting pathkeys. In the easy case, there + * are the same number of pathkey columns as partitioning + ordering columns + * and we just have to copy some data around. However, it's possible that + * some of the original partitioning + ordering columns were eliminated as + * redundant during the transformation to pathkeys. (This can happen even + * though the parser gets rid of obvious duplicates. A typical scenario is a + * window specification "PARTITION BY x ORDER BY y" coupled with a clause + * "WHERE x = y" that causes the two sort columns to be recognized as + * redundant.) In that unusual case, we have to work a lot harder to + * determine which keys are significant. + * + * The method used here is a bit brute-force: add the sort columns to a list + * one at a time and note when the resulting pathkey list gets longer. But + * it's a sufficiently uncommon case that a faster way doesn't seem worth + * the amount of code refactoring that'd be needed. + *---------- + */ +static void +get_column_info_for_window(PlannerInfo *root, WindowClause *wc, List *tlist, + int numSortCols, AttrNumber *sortColIdx, + int *partNumCols, + AttrNumber **partColIdx, + Oid **partOperators, + int *ordNumCols, + AttrNumber **ordColIdx, + Oid **ordOperators) +{ + int numPart = list_length(wc->partitionClause); + int numOrder = list_length(wc->orderClause); + + if (numSortCols == numPart + numOrder) + { + /* easy case */ + *partNumCols = numPart; + *partColIdx = sortColIdx; + *partOperators = extract_grouping_ops(wc->partitionClause); + *ordNumCols = numOrder; + *ordColIdx = sortColIdx + numPart; + *ordOperators = extract_grouping_ops(wc->orderClause); + } + else + { + List *sortclauses; + List *pathkeys; + int scidx; + ListCell *lc; + + /* first, allocate what's certainly enough space for the arrays */ + *partNumCols = 0; + *partColIdx = (AttrNumber *) palloc(numPart * sizeof(AttrNumber)); + *partOperators = (Oid *) palloc(numPart * sizeof(Oid)); + *ordNumCols = 0; + *ordColIdx = (AttrNumber *) palloc(numOrder * sizeof(AttrNumber)); + *ordOperators = (Oid *) palloc(numOrder * sizeof(Oid)); + sortclauses = NIL; + pathkeys = NIL; + scidx = 0; + foreach(lc, wc->partitionClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + List *new_pathkeys; + + sortclauses = lappend(sortclauses, sgc); + new_pathkeys = make_pathkeys_for_sortclauses(root, + sortclauses, + tlist, + true); + if (list_length(new_pathkeys) > list_length(pathkeys)) + { + /* this sort clause is actually significant */ + *partColIdx[*partNumCols] = sortColIdx[scidx++]; + *partOperators[*partNumCols] = sgc->eqop; + (*partNumCols)++; + pathkeys = new_pathkeys; + } + } + foreach(lc, wc->orderClause) + { + SortGroupClause *sgc = (SortGroupClause *) lfirst(lc); + List *new_pathkeys; + + sortclauses = lappend(sortclauses, sgc); + new_pathkeys = make_pathkeys_for_sortclauses(root, + sortclauses, + tlist, + true); + if (list_length(new_pathkeys) > list_length(pathkeys)) + { + /* this sort clause is actually significant */ + *ordColIdx[*ordNumCols] = sortColIdx[scidx++]; + *ordOperators[*ordNumCols] = sgc->eqop; + (*ordNumCols)++; + pathkeys = new_pathkeys; + } + } + /* complain if we didn't eat exactly the right number of sort cols */ + if (scidx != numSortCols) + elog(ERROR, "failed to deconstruct sort operators into partitioning/ordering operators"); + } +}