From e2f1eb0ee30d144628ab523432320f174a2c8966 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 22 Mar 2018 12:49:48 -0400 Subject: [PATCH] Implement partition-wise grouping/aggregation. If the partition keys of input relation are part of the GROUP BY clause, all the rows belonging to a given group come from a single partition. This allows aggregation/grouping over a partitioned relation to be broken down * into aggregation/grouping on each partition. This should be no worse, and often better, than the normal approach. If the GROUP BY clause does not contain all the partition keys, we can still perform partial aggregation for each partition and then finalize aggregation after appending the partial results. This is less certain to be a win, but it's still useful. Jeevan Chalke, Ashutosh Bapat, Robert Haas. The larger patch series of which this patch is a part was also reviewed and tested by Antonin Houska, Rajkumar Raghuwanshi, David Rowley, Dilip Kumar, Konstantin Knizhnik, Pascal Legrand, and Rafia Sabih. Discussion: http://postgr.es/m/CAM2+6=V64_xhstVHie0Rz=KPEQnLJMZt_e314P0jaT_oJ9MR8A@mail.gmail.com --- doc/src/sgml/config.sgml | 20 + src/backend/optimizer/README | 14 + src/backend/optimizer/path/allpaths.c | 17 +- src/backend/optimizer/path/costsize.c | 1 + src/backend/optimizer/plan/createplan.c | 10 +- src/backend/optimizer/plan/planner.c | 873 ++++++++--- src/backend/utils/misc/guc.c | 9 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/nodes/relation.h | 75 +- src/include/optimizer/cost.h | 1 + src/include/optimizer/paths.h | 2 + .../regress/expected/partition_aggregate.out | 1393 +++++++++++++++++ src/test/regress/expected/sysviews.out | 37 +- src/test/regress/parallel_schedule | 2 +- src/test/regress/serial_schedule | 1 + src/test/regress/sql/partition_aggregate.sql | 292 ++++ src/tools/pgindent/typedefs.list | 2 + 17 files changed, 2544 insertions(+), 206 deletions(-) create mode 100644 src/test/regress/expected/partition_aggregate.out create mode 100644 src/test/regress/sql/partition_aggregate.sql diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 0d61dcb179..4d899e3b24 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3821,6 +3821,26 @@ ANY num_sync ( + enable_partitionwise_aggregate (boolean) + + enable_partitionwise_aggregate configuration parameter + + + + + Enables or disables the query planner's use of partitionwise grouping + or aggregation, which allows grouping or aggregation on a partitioned + tables performed separately for each partition. If the GROUP + BY clause does not include the partition keys, only partial + aggregation can be performed on a per-partition basis, and + finalization must be performed later. Because partitionwise grouping + or aggregation can use significantly more CPU time and memory during + planning, the default is off. + + + + enable_seqscan (boolean) diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README index 3e254c8b2d..c29b79a0c3 100644 --- a/src/backend/optimizer/README +++ b/src/backend/optimizer/README @@ -1079,6 +1079,7 @@ busy for a long time to come. Partitionwise joins ------------------- + A join between two similarly partitioned tables can be broken down into joins between their matching partitions if there exists an equi-join condition between the partition keys of the joining tables. The equi-join between @@ -1102,3 +1103,16 @@ any two partitioned relations with same partitioning scheme point to the same PartitionSchemeData object. This reduces memory consumed by PartitionSchemeData objects and makes it easy to compare the partition schemes of joining relations. + +Partition-wise aggregates/grouping +---------------------------------- + +If the GROUP BY clause has contains all of the partition keys, all the rows +that belong to a given group must come from a single partition; therefore, +aggregation can be done completely separately for each partition. Otherwise, +partial aggregates can be computed for each partition, and then finalized +after appending the results from the individual partitions. This technique of +breaking down aggregation or grouping over a partitioned relation into +aggregation or grouping over its partitions is called partitionwise +aggregation. Especially when the partition keys match the GROUP BY clause, +this can be significantly faster than the regular method. diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 8735e29807..43f4e75748 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -134,8 +134,6 @@ static void subquery_push_qual(Query *subquery, static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); -static void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, - List *live_childrels); /* @@ -1326,7 +1324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * parameterization or ordering. Similarly it collects partial paths from * non-dummy children to create partial append paths. */ -static void +void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, List *live_childrels) { @@ -1413,8 +1411,12 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, * If child has an unparameterized cheapest-total path, add that to * the unparameterized Append path we are constructing for the parent. * If not, there's no workable unparameterized path. + * + * With partitionwise aggregates, the child rel's pathlist may be + * empty, so don't assume that a path exists here. */ - if (childrel->cheapest_total_path->param_info == NULL) + if (childrel->pathlist != NIL && + childrel->cheapest_total_path->param_info == NULL) accumulate_append_subpath(childrel->cheapest_total_path, &subpaths, NULL); else @@ -1682,6 +1684,13 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); Path *subpath; + if (childrel->pathlist == NIL) + { + /* failed to make a suitable path for this child */ + subpaths_valid = false; + break; + } + subpath = get_cheapest_parameterized_child_path(root, childrel, required_outer); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 36b3dfabb8..47729de896 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -135,6 +135,7 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; bool enable_gathermerge = true; bool enable_partitionwise_join = false; +bool enable_partitionwise_aggregate = false; bool enable_parallel_append = true; bool enable_parallel_hash = true; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9ae1bf31d5..8b4f031d96 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1670,7 +1670,15 @@ create_sort_plan(PlannerInfo *root, SortPath *best_path, int flags) subplan = create_plan_recurse(root, best_path->subpath, flags | CP_SMALL_TLIST); - plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, NULL); + /* + * make_sort_from_pathkeys() indirectly calls find_ec_member_for_tle(), + * which will ignore any child EC members that don't belong to the given + * relids. Thus, if this sort path is based on a child relation, we must + * pass its relids. + */ + plan = make_sort_from_pathkeys(subplan, best_path->path.pathkeys, + IS_OTHER_REL(best_path->subpath->parent) ? + best_path->path.parent->relids : NULL); copy_generic_path_info(&plan->plan, (Path *) best_path); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 85805ff5c7..54f2da70cb 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -93,25 +93,6 @@ typedef struct List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; -/* - * Various flags indicating what kinds of grouping are possible. - * - * GROUPING_CAN_USE_SORT should be set if it's possible to perform - * sort-based implementations of grouping. When grouping sets are in use, - * this will be true if sorting is potentially usable for any of the grouping - * sets, even if it's not usable for all of them. - * - * GROUPING_CAN_USE_HASH should be set if it's possible to perform - * hash-based implementations of grouping. - * - * GROUPING_CAN_PARTIAL_AGG should be set if the aggregation is of a type - * for which we support partial aggregation (not, for example, grouping sets). - * It says nothing about parallel-safety or the availability of suitable paths. - */ -#define GROUPING_CAN_USE_SORT 0x0001 -#define GROUPING_CAN_USE_HASH 0x0002 -#define GROUPING_CAN_PARTIAL_AGG 0x0004 - /* * Data specific to grouping sets */ @@ -164,11 +145,16 @@ static bool is_degenerate_grouping(PlannerInfo *root); static void create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel); +static RelOptInfo *make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + PathTarget *target, bool target_parallel_safe, + Node *havingQual); static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd, int flags); + grouping_sets_data *gd, + GroupPathExtraData *extra, + RelOptInfo **partially_grouped_rel_p); static void consider_groupingsets_paths(PlannerInfo *root, RelOptInfo *grouped_rel, Path *path, @@ -221,19 +207,34 @@ static void add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - const AggClauseCosts *agg_final_costs, - grouping_sets_data *gd, bool can_sort, bool can_hash, - double dNumGroups, List *havingQual); + grouping_sets_data *gd, + double dNumGroups, + GroupPathExtraData *extra); static RelOptInfo *create_partial_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel, RelOptInfo *input_rel, grouping_sets_data *gd, - bool can_sort, - bool can_hash, - AggClauseCosts *agg_final_costs); + GroupPathExtraData *extra, + bool force_rel_creation); static void gather_grouping_paths(PlannerInfo *root, RelOptInfo *rel); static bool can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs); +static void apply_scanjoin_target_to_paths(PlannerInfo *root, + RelOptInfo *rel, + PathTarget *scanjoin_target, + bool scanjoin_target_parallel_safe, + bool modify_in_place); +static void create_partitionwise_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + PartitionwiseAggregateType patype, + GroupPathExtraData *extra); +static bool group_by_has_partkey(RelOptInfo *input_rel, + List *targetList, + List *groupClause); /***************************************************************************** @@ -1950,74 +1951,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* * Forcibly apply SRF-free scan/join target to all the Paths for the * scan/join rel. - * - * In principle we should re-run set_cheapest() here to identify the - * cheapest path, but it seems unlikely that adding the same tlist - * eval costs to all the paths would change that, so we don't bother. - * Instead, just assume that the cheapest-startup and cheapest-total - * paths remain so. (There should be no parameterized paths anymore, - * so we needn't worry about updating cheapest_parameterized_paths.) */ - foreach(lc, current_rel->pathlist) - { - Path *subpath = (Path *) lfirst(lc); - Path *path; - - Assert(subpath->param_info == NULL); - path = apply_projection_to_path(root, current_rel, - subpath, scanjoin_target); - /* If we had to add a Result, path is different from subpath */ - if (path != subpath) - { - lfirst(lc) = path; - if (subpath == current_rel->cheapest_startup_path) - current_rel->cheapest_startup_path = path; - if (subpath == current_rel->cheapest_total_path) - current_rel->cheapest_total_path = path; - } - } - - /* - * Upper planning steps which make use of the top scan/join rel's - * partial pathlist will expect partial paths for that rel to produce - * the same output as complete paths ... and we just changed the - * output for the complete paths, so we'll need to do the same thing - * for partial paths. But only parallel-safe expressions can be - * computed by partial paths. - */ - if (current_rel->partial_pathlist && scanjoin_target_parallel_safe) - { - /* Apply the scan/join target to each partial path */ - foreach(lc, current_rel->partial_pathlist) - { - Path *subpath = (Path *) lfirst(lc); - Path *newpath; - - /* Shouldn't have any parameterized paths anymore */ - Assert(subpath->param_info == NULL); - - /* - * Don't use apply_projection_to_path() here, because there - * could be other pointers to these paths, and therefore we - * mustn't modify them in place. - */ - newpath = (Path *) create_projection_path(root, - current_rel, - subpath, - scanjoin_target); - lfirst(lc) = newpath; - } - } - else - { - /* - * In the unfortunate event that scanjoin_target is not - * parallel-safe, we can't apply it to the partial paths; in that - * case, we'll need to forget about the partial paths, which - * aren't valid input for upper planning steps. - */ - current_rel->partial_pathlist = NIL; - } + apply_scanjoin_target_to_paths(root, current_rel, scanjoin_target, + scanjoin_target_parallel_safe, true); /* Now fix things up if scan/join target contains SRFs */ if (parse->hasTargetSRFs) @@ -3705,30 +3641,14 @@ create_grouping_paths(PlannerInfo *root, { Query *parse = root->parse; RelOptInfo *grouped_rel; + RelOptInfo *partially_grouped_rel; /* - * For now, all aggregated paths are added to the (GROUP_AGG, NULL) - * upperrel. - */ - grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); - grouped_rel->reltarget = target; - - /* - * If the input relation is not parallel-safe, then the grouped relation - * can't be parallel-safe, either. Otherwise, it's parallel-safe if the - * target list and HAVING quals are parallel-safe. - */ - if (input_rel->consider_parallel && target_parallel_safe && - is_parallel_safe(root, (Node *) parse->havingQual)) - grouped_rel->consider_parallel = true; - - /* - * If the input rel belongs to a single FDW, so does the grouped rel. + * Create grouping relation to hold fully aggregated grouping and/or + * aggregation paths. */ - grouped_rel->serverid = input_rel->serverid; - grouped_rel->userid = input_rel->userid; - grouped_rel->useridiscurrent = input_rel->useridiscurrent; - grouped_rel->fdwroutine = input_rel->fdwroutine; + grouped_rel = make_grouping_rel(root, input_rel, target, + target_parallel_safe, parse->havingQual); /* * Create either paths for a degenerate grouping or paths for ordinary @@ -3739,6 +3659,7 @@ create_grouping_paths(PlannerInfo *root, else { int flags = 0; + GroupPathExtraData extra; /* * Determine whether it's possible to perform sort-based @@ -3786,14 +3707,87 @@ create_grouping_paths(PlannerInfo *root, if (can_partial_agg(root, agg_costs)) flags |= GROUPING_CAN_PARTIAL_AGG; + extra.flags = flags; + extra.target = target; + extra.target_parallel_safe = target_parallel_safe; + extra.havingQual = parse->havingQual; + extra.targetList = parse->targetList; + extra.partial_costs_set = false; + + /* + * Determine whether partitionwise aggregation is in theory possible. + * It can be disabled by the user, and for now, we don't try to + * support grouping sets. create_ordinary_grouping_paths() will check + * additional conditions, such as whether input_rel is partitioned. + */ + if (enable_partitionwise_aggregate && !parse->groupingSets) + extra.patype = PARTITIONWISE_AGGREGATE_FULL; + else + extra.patype = PARTITIONWISE_AGGREGATE_NONE; + create_ordinary_grouping_paths(root, input_rel, grouped_rel, - agg_costs, gd, flags); + agg_costs, gd, &extra, + &partially_grouped_rel); } set_cheapest(grouped_rel); return grouped_rel; } +/* + * make_grouping_rel + * + * Create a new grouping rel and set basic properties. + * + * input_rel represents the underlying scan/join relation. + * target is the output expected from the grouping relation. + */ +static RelOptInfo * +make_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, + PathTarget *target, bool target_parallel_safe, + Node *havingQual) +{ + RelOptInfo *grouped_rel; + + if (IS_OTHER_REL(input_rel)) + { + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, + input_rel->relids); + grouped_rel->reloptkind = RELOPT_OTHER_UPPER_REL; + } + else + { + /* + * By tradition, the relids set for the main grouping relation is + * NULL. (This could be changed, but might require adjustments + * elsewhere.) + */ + grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); + } + + /* Set target. */ + grouped_rel->reltarget = target; + + /* + * If the input relation is not parallel-safe, then the grouped relation + * can't be parallel-safe, either. Otherwise, it's parallel-safe if the + * target list and HAVING quals are parallel-safe. + */ + if (input_rel->consider_parallel && target_parallel_safe && + is_parallel_safe(root, (Node *) havingQual)) + grouped_rel->consider_parallel = true; + + /* + * If the input rel belongs to a single FDW, so does the grouped rel. + */ + grouped_rel->serverid = input_rel->serverid; + grouped_rel->userid = input_rel->userid; + grouped_rel->useridiscurrent = input_rel->useridiscurrent; + grouped_rel->fdwroutine = input_rel->fdwroutine; + + return grouped_rel; +} + /* * is_degenerate_grouping * @@ -3881,55 +3875,117 @@ create_degenerate_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * because otherwise (1) it would be harder to throw an appropriate error * message if neither way works, and (2) we should not allow hashtable size * considerations to dissuade us from using hashing if sorting is not possible. + * + * *partially_grouped_rel_p will be set to the partially grouped rel which this + * function creates, or to NULL if it doesn't create one. */ static void create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, const AggClauseCosts *agg_costs, - grouping_sets_data *gd, int flags) + grouping_sets_data *gd, + GroupPathExtraData *extra, + RelOptInfo **partially_grouped_rel_p) { - Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; RelOptInfo *partially_grouped_rel = NULL; - AggClauseCosts agg_final_costs; /* parallel only */ double dNumGroups; - bool can_hash = (flags & GROUPING_CAN_USE_HASH) != 0; - bool can_sort = (flags & GROUPING_CAN_USE_SORT) != 0; + PartitionwiseAggregateType patype = PARTITIONWISE_AGGREGATE_NONE; /* - * Estimate number of groups. + * If this is the topmost grouping relation or if the parent relation is + * doing some form of partitionwise aggregation, then we may be able to do + * it at this level also. However, if the input relation is not + * partitioned, partitionwise aggregate is impossible, and if it is dummy, + * partitionwise aggregate is pointless. */ - dNumGroups = get_number_of_groups(root, - cheapest_path->rows, - gd, - parse->targetList); + if (extra->patype != PARTITIONWISE_AGGREGATE_NONE && + input_rel->part_scheme && input_rel->part_rels && + !IS_DUMMY_REL(input_rel)) + { + /* + * If this is the topmost relation or if the parent relation is doing + * full partitionwise aggregation, then we can do full partitionwise + * aggregation provided that the GROUP BY clause contains all of the + * partitioning columns at this level. Otherwise, we can do at most + * partial partitionwise aggregation. But if partial aggregation is + * not supported in general then we can't use it for partitionwise + * aggregation either. + */ + if (extra->patype == PARTITIONWISE_AGGREGATE_FULL && + group_by_has_partkey(input_rel, extra->targetList, + root->parse->groupClause)) + patype = PARTITIONWISE_AGGREGATE_FULL; + else if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0) + patype = PARTITIONWISE_AGGREGATE_PARTIAL; + else + patype = PARTITIONWISE_AGGREGATE_NONE; + } /* * Before generating paths for grouped_rel, we first generate any possible * partially grouped paths; that way, later code can easily consider both * parallel and non-parallel approaches to grouping. */ - MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts)); - if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL - && (flags & GROUPING_CAN_PARTIAL_AGG) != 0) + if ((extra->flags & GROUPING_CAN_PARTIAL_AGG) != 0) { + bool force_rel_creation; + + /* + * If we're doing partitionwise aggregation at this level, force + * creation of a partially_grouped_rel so we can add partitionwise + * paths to it. + */ + force_rel_creation = (patype == PARTITIONWISE_AGGREGATE_PARTIAL); + partially_grouped_rel = create_partial_grouping_paths(root, grouped_rel, input_rel, gd, - can_sort, - can_hash, - &agg_final_costs); + extra, + force_rel_creation); + } + + /* Set out parameter. */ + *partially_grouped_rel_p = partially_grouped_rel; + + /* Apply partitionwise aggregation technique, if possible. */ + if (patype != PARTITIONWISE_AGGREGATE_NONE) + create_partitionwise_grouping_paths(root, input_rel, grouped_rel, + partially_grouped_rel, agg_costs, + gd, patype, extra); + + /* If we are doing partial aggregation only, return. */ + if (extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL) + { + Assert(partially_grouped_rel); + + if (partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); + + return; + } + + /* Gather any partially grouped partial paths. */ + if (partially_grouped_rel && partially_grouped_rel->partial_pathlist) + { gather_grouping_paths(root, partially_grouped_rel); set_cheapest(partially_grouped_rel); } + /* + * Estimate number of groups. + */ + dNumGroups = get_number_of_groups(root, + cheapest_path->rows, + gd, + extra->targetList); + /* Build final grouping paths */ add_paths_to_grouping_rel(root, input_rel, grouped_rel, - partially_grouped_rel, agg_costs, - &agg_final_costs, gd, can_sort, can_hash, - dNumGroups, (List *) parse->havingQual); + partially_grouped_rel, agg_costs, gd, + dNumGroups, extra); /* Give a helpful error if we failed to find any implementation */ if (grouped_rel->pathlist == NIL) @@ -6103,13 +6159,16 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *grouped_rel, RelOptInfo *partially_grouped_rel, const AggClauseCosts *agg_costs, - const AggClauseCosts *agg_final_costs, - grouping_sets_data *gd, bool can_sort, bool can_hash, - double dNumGroups, List *havingQual) + grouping_sets_data *gd, double dNumGroups, + GroupPathExtraData *extra) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; ListCell *lc; + bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + List *havingQual = (List *) extra->havingQual; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; if (can_sort) { @@ -6302,6 +6361,15 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, dNumGroups)); } } + + /* + * When partitionwise aggregate is used, we might have fully aggregated + * paths in the partial pathlist, because add_paths_to_append_rel() will + * consider a path for grouped_rel consisting of a Parallel Append of + * non-partial paths from each child. + */ + if (grouped_rel->partial_pathlist != NIL) + gather_grouping_paths(root, grouped_rel); } /* @@ -6316,23 +6384,58 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel, * All paths for this new upper relation -- both partial and non-partial -- * have been partially aggregated but require a subsequent FinalizeAggregate * step. + * + * NB: This function is allowed to return NULL if it determines that there is + * no real need to create a new RelOptInfo. */ static RelOptInfo * create_partial_grouping_paths(PlannerInfo *root, RelOptInfo *grouped_rel, RelOptInfo *input_rel, grouping_sets_data *gd, - bool can_sort, - bool can_hash, - AggClauseCosts *agg_final_costs) + GroupPathExtraData *extra, + bool force_rel_creation) { Query *parse = root->parse; RelOptInfo *partially_grouped_rel; - AggClauseCosts agg_partial_costs; - Path *cheapest_partial_path = linitial(input_rel->partial_pathlist); - Size hashaggtablesize; + AggClauseCosts *agg_partial_costs = &extra->agg_partial_costs; + AggClauseCosts *agg_final_costs = &extra->agg_final_costs; + Path *cheapest_partial_path = NULL; + Path *cheapest_total_path = NULL; double dNumPartialGroups = 0; + double dNumPartialPartialGroups = 0; ListCell *lc; + bool can_hash = (extra->flags & GROUPING_CAN_USE_HASH) != 0; + bool can_sort = (extra->flags & GROUPING_CAN_USE_SORT) != 0; + + /* + * Consider whether we should generate partially aggregated non-partial + * paths. We can only do this if we have a non-partial path, and only if + * the parent of the input rel is performing partial partitionwise + * aggregation. (Note that extra->patype is the type of partitionwise + * aggregation being used at the parent level, not this level.) + */ + if (input_rel->pathlist != NIL && + extra->patype == PARTITIONWISE_AGGREGATE_PARTIAL) + cheapest_total_path = input_rel->cheapest_total_path; + + /* + * If parallelism is possible for grouped_rel, then we should consider + * generating partially-grouped partial paths. However, if the input rel + * has no partial paths, then we can't. + */ + if (grouped_rel->consider_parallel && input_rel->partial_pathlist != NIL) + cheapest_partial_path = linitial(input_rel->partial_pathlist); + + /* + * If we can't partially aggregate partial paths, and we can't partially + * aggregate non-partial paths, then don't bother creating the new + * RelOptInfo at all, unless the caller specified force_rel_creation. + */ + if (cheapest_total_path == NULL && + cheapest_partial_path == NULL && + !force_rel_creation) + return NULL; /* * Build a new upper relation to represent the result of partially @@ -6343,6 +6446,7 @@ create_partial_grouping_paths(PlannerInfo *root, grouped_rel->relids); partially_grouped_rel->consider_parallel = grouped_rel->consider_parallel; + partially_grouped_rel->reloptkind = grouped_rel->reloptkind; partially_grouped_rel->serverid = grouped_rel->serverid; partially_grouped_rel->userid = grouped_rel->userid; partially_grouped_rel->useridiscurrent = grouped_rel->useridiscurrent; @@ -6356,39 +6460,53 @@ create_partial_grouping_paths(PlannerInfo *root, */ partially_grouped_rel->reltarget = make_partial_grouping_target(root, grouped_rel->reltarget, - (Node *) parse->havingQual); + extra->havingQual); - /* - * Collect statistics about aggregates for estimating costs of performing - * aggregation in parallel. - */ - MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts)); - if (parse->hasAggs) + if (!extra->partial_costs_set) { - List *partial_target_exprs; - - /* partial phase */ - partial_target_exprs = partially_grouped_rel->reltarget->exprs; - get_agg_clause_costs(root, (Node *) partial_target_exprs, - AGGSPLIT_INITIAL_SERIAL, - &agg_partial_costs); - - /* final phase */ - get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs, - AGGSPLIT_FINAL_DESERIAL, - agg_final_costs); - get_agg_clause_costs(root, parse->havingQual, - AGGSPLIT_FINAL_DESERIAL, - agg_final_costs); + /* + * Collect statistics about aggregates for estimating costs of + * performing aggregation in parallel. + */ + MemSet(agg_partial_costs, 0, sizeof(AggClauseCosts)); + MemSet(agg_final_costs, 0, sizeof(AggClauseCosts)); + if (parse->hasAggs) + { + List *partial_target_exprs; + + /* partial phase */ + partial_target_exprs = partially_grouped_rel->reltarget->exprs; + get_agg_clause_costs(root, (Node *) partial_target_exprs, + AGGSPLIT_INITIAL_SERIAL, + agg_partial_costs); + + /* final phase */ + get_agg_clause_costs(root, (Node *) grouped_rel->reltarget->exprs, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + get_agg_clause_costs(root, extra->havingQual, + AGGSPLIT_FINAL_DESERIAL, + agg_final_costs); + } + + extra->partial_costs_set = true; } /* Estimate number of partial groups. */ - dNumPartialGroups = get_number_of_groups(root, - cheapest_partial_path->rows, - gd, - parse->targetList); - - if (can_sort) + if (cheapest_total_path != NULL) + dNumPartialGroups = + get_number_of_groups(root, + cheapest_total_path->rows, + gd, + extra->targetList); + if (cheapest_partial_path != NULL) + dNumPartialPartialGroups = + get_number_of_groups(root, + cheapest_partial_path->rows, + gd, + extra->targetList); + + if (can_sort && cheapest_total_path != NULL) { /* This should have been checked previously */ Assert(parse->hasAggs || parse->groupClause); @@ -6397,6 +6515,50 @@ create_partial_grouping_paths(PlannerInfo *root, * Use any available suitably-sorted path as input, and also consider * sorting the cheapest partial path. */ + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + bool is_sorted; + + is_sorted = pathkeys_contained_in(root->group_pathkeys, + path->pathkeys); + if (path == cheapest_total_path || is_sorted) + { + /* Sort the cheapest partial path, if it isn't already */ + if (!is_sorted) + path = (Path *) create_sort_path(root, + partially_grouped_rel, + path, + root->group_pathkeys, + -1.0); + + if (parse->hasAggs) + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + path, + partially_grouped_rel->reltarget, + parse->groupClause ? AGG_SORTED : AGG_PLAIN, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + else + add_path(partially_grouped_rel, (Path *) + create_group_path(root, + partially_grouped_rel, + path, + parse->groupClause, + NIL, + dNumPartialGroups)); + } + } + } + + if (can_sort && cheapest_partial_path != NULL) + { + /* Similar to above logic, but for partial paths. */ foreach(lc, input_rel->partial_pathlist) { Path *path = (Path *) lfirst(lc); @@ -6424,8 +6586,8 @@ create_partial_grouping_paths(PlannerInfo *root, AGGSPLIT_INITIAL_SERIAL, parse->groupClause, NIL, - &agg_partial_costs, - dNumPartialGroups)); + agg_partial_costs, + dNumPartialPartialGroups)); else add_partial_path(partially_grouped_rel, (Path *) create_group_path(root, @@ -6433,26 +6595,56 @@ create_partial_grouping_paths(PlannerInfo *root, path, parse->groupClause, NIL, - dNumPartialGroups)); + dNumPartialPartialGroups)); } } } - if (can_hash) + if (can_hash && cheapest_total_path != NULL) { + Size hashaggtablesize; + /* Checked above */ Assert(parse->hasAggs || parse->groupClause); hashaggtablesize = - estimate_hashagg_tablesize(cheapest_partial_path, - &agg_partial_costs, + estimate_hashagg_tablesize(cheapest_total_path, + agg_partial_costs, dNumPartialGroups); /* * Tentatively produce a partial HashAgg Path, depending on if it * looks as if the hash table will fit in work_mem. */ - if (hashaggtablesize < work_mem * 1024L) + if (hashaggtablesize < work_mem * 1024L && + cheapest_total_path != NULL) + { + add_path(partially_grouped_rel, (Path *) + create_agg_path(root, + partially_grouped_rel, + cheapest_total_path, + partially_grouped_rel->reltarget, + AGG_HASHED, + AGGSPLIT_INITIAL_SERIAL, + parse->groupClause, + NIL, + agg_partial_costs, + dNumPartialGroups)); + } + } + + if (can_hash && cheapest_partial_path != NULL) + { + Size hashaggtablesize; + + hashaggtablesize = + estimate_hashagg_tablesize(cheapest_partial_path, + agg_partial_costs, + dNumPartialPartialGroups); + + /* Do the same for partial paths. */ + if (hashaggtablesize < work_mem * 1024L && + cheapest_partial_path != NULL) { add_partial_path(partially_grouped_rel, (Path *) create_agg_path(root, @@ -6463,8 +6655,8 @@ create_partial_grouping_paths(PlannerInfo *root, AGGSPLIT_INITIAL_SERIAL, parse->groupClause, NIL, - &agg_partial_costs, - dNumPartialGroups)); + agg_partial_costs, + dNumPartialPartialGroups)); } } @@ -6565,3 +6757,324 @@ can_partial_agg(PlannerInfo *root, const AggClauseCosts *agg_costs) /* Everything looks good. */ return true; } + +/* + * apply_scanjoin_target_to_paths + * + * Applies scan/join target to all the Paths for the scan/join rel. + */ +static void +apply_scanjoin_target_to_paths(PlannerInfo *root, + RelOptInfo *rel, + PathTarget *scanjoin_target, + bool scanjoin_target_parallel_safe, + bool modify_in_place) +{ + ListCell *lc; + + /* + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist eval + * costs to all the paths would change that, so we don't bother. Instead, + * just assume that the cheapest-startup and cheapest-total paths remain + * so. (There should be no parameterized paths anymore, so we needn't + * worry about updating cheapest_parameterized_paths.) + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath; + + Assert(subpath->param_info == NULL); + + /* + * Don't use apply_projection_to_path() when modify_in_place is false, + * because there could be other pointers to these paths, and therefore + * we mustn't modify them in place. + */ + if (modify_in_place) + newpath = apply_projection_to_path(root, rel, subpath, + scanjoin_target); + else + newpath = (Path *) create_projection_path(root, rel, subpath, + scanjoin_target); + + /* If we had to add a Result, newpath is different from subpath */ + if (newpath != subpath) + { + lfirst(lc) = newpath; + if (subpath == rel->cheapest_startup_path) + rel->cheapest_startup_path = newpath; + if (subpath == rel->cheapest_total_path) + rel->cheapest_total_path = newpath; + } + } + + /* + * Upper planning steps which make use of the top scan/join rel's partial + * pathlist will expect partial paths for that rel to produce the same + * output as complete paths ... and we just changed the output for the + * complete paths, so we'll need to do the same thing for partial paths. + * But only parallel-safe expressions can be computed by partial paths. + */ + if (rel->partial_pathlist && scanjoin_target_parallel_safe) + { + /* Apply the scan/join target to each partial path */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath; + + /* Shouldn't have any parameterized paths anymore */ + Assert(subpath->param_info == NULL); + + /* + * Don't use apply_projection_to_path() here, because there could + * be other pointers to these paths, and therefore we mustn't + * modify them in place. + */ + newpath = (Path *) create_projection_path(root, + rel, + subpath, + scanjoin_target); + lfirst(lc) = newpath; + } + } + else + { + /* + * In the unfortunate event that scanjoin_target is not parallel-safe, + * we can't apply it to the partial paths; in that case, we'll need to + * forget about the partial paths, which aren't valid input for upper + * planning steps. + */ + rel->partial_pathlist = NIL; + } +} + +/* + * create_partitionwise_grouping_paths + * + * If the partition keys of input relation are part of the GROUP BY clause, all + * the rows belonging to a given group come from a single partition. This + * allows aggregation/grouping over a partitioned relation to be broken down + * into aggregation/grouping on each partition. This should be no worse, and + * often better, than the normal approach. + * + * However, if the GROUP BY clause does not contain all the partition keys, + * rows from a given group may be spread across multiple partitions. In that + * case, we perform partial aggregation for each group, append the results, + * and then finalize aggregation. This is less certain to win than the + * previous case. It may win if the PartialAggregate stage greatly reduces + * the number of groups, because fewer rows will pass through the Append node. + * It may lose if we have lots of small groups. + */ +static void +create_partitionwise_grouping_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *grouped_rel, + RelOptInfo *partially_grouped_rel, + const AggClauseCosts *agg_costs, + grouping_sets_data *gd, + PartitionwiseAggregateType patype, + GroupPathExtraData *extra) +{ + int nparts = input_rel->nparts; + int cnt_parts; + List *grouped_live_children = NIL; + List *partially_grouped_live_children = NIL; + PathTarget *target = extra->target; + + Assert(patype != PARTITIONWISE_AGGREGATE_NONE); + Assert(patype != PARTITIONWISE_AGGREGATE_PARTIAL || + partially_grouped_rel != NULL); + + /* Add paths for partitionwise aggregation/grouping. */ + for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++) + { + RelOptInfo *child_input_rel = input_rel->part_rels[cnt_parts]; + PathTarget *child_target = copy_pathtarget(target); + AppendRelInfo **appinfos; + int nappinfos; + PathTarget *scanjoin_target; + GroupPathExtraData child_extra; + RelOptInfo *child_grouped_rel; + RelOptInfo *child_partially_grouped_rel; + + /* Input child rel must have a path */ + Assert(child_input_rel->pathlist != NIL); + + /* + * Copy the given "extra" structure as is and then override the + * members specific to this child. + */ + memcpy(&child_extra, extra, sizeof(child_extra)); + + appinfos = find_appinfos_by_relids(root, child_input_rel->relids, + &nappinfos); + + child_target->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) target->exprs, + nappinfos, appinfos); + child_extra.target = child_target; + + /* Translate havingQual and targetList. */ + child_extra.havingQual = (Node *) + adjust_appendrel_attrs(root, + extra->havingQual, + nappinfos, appinfos); + child_extra.targetList = (List *) + adjust_appendrel_attrs(root, + (Node *) extra->targetList, + nappinfos, appinfos); + + /* + * extra->patype was the value computed for our parent rel; patype is + * the value for this relation. For the child, our value is its + * parent rel's value. + */ + child_extra.patype = patype; + + /* + * Create grouping relation to hold fully aggregated grouping and/or + * aggregation paths for the child. + */ + child_grouped_rel = make_grouping_rel(root, child_input_rel, + child_target, + extra->target_parallel_safe, + child_extra.havingQual); + + /* Ignore empty children. They contribute nothing. */ + if (IS_DUMMY_REL(child_input_rel)) + { + mark_dummy_rel(child_grouped_rel); + + continue; + } + + /* + * Copy pathtarget from underneath scan/join as we are modifying it + * and translate its Vars with respect to this appendrel. The input + * relation's reltarget might not be the final scanjoin_target, but + * the pathtarget any given individual path should be. + */ + scanjoin_target = + copy_pathtarget(input_rel->cheapest_startup_path->pathtarget); + scanjoin_target->exprs = (List *) + adjust_appendrel_attrs(root, + (Node *) scanjoin_target->exprs, + nappinfos, appinfos); + + /* + * Forcibly apply scan/join target to all the Paths for the scan/join + * rel. + */ + apply_scanjoin_target_to_paths(root, child_input_rel, scanjoin_target, + extra->target_parallel_safe, false); + + /* Create grouping paths for this child relation. */ + create_ordinary_grouping_paths(root, child_input_rel, + child_grouped_rel, + agg_costs, gd, &child_extra, + &child_partially_grouped_rel); + + if (child_partially_grouped_rel) + { + partially_grouped_live_children = + lappend(partially_grouped_live_children, + child_partially_grouped_rel); + } + + if (patype == PARTITIONWISE_AGGREGATE_FULL) + { + set_cheapest(child_grouped_rel); + grouped_live_children = lappend(grouped_live_children, + child_grouped_rel); + } + + pfree(appinfos); + } + + /* + * All children can't be dummy at this point. If they are, then the parent + * too marked as dummy. + */ + Assert(grouped_live_children != NIL || + partially_grouped_live_children != NIL); + + /* + * Try to create append paths for partially grouped children. For full + * partitionwise aggregation, we might have paths in the partial_pathlist + * if parallel aggregation is possible. For partial partitionwise + * aggregation, we may have paths in both pathlist and partial_pathlist. + */ + if (partially_grouped_rel) + { + add_paths_to_append_rel(root, partially_grouped_rel, + partially_grouped_live_children); + + /* + * We need call set_cheapest, since the finalization step will use the + * cheapest path from the rel. + */ + if (partially_grouped_rel->pathlist) + set_cheapest(partially_grouped_rel); + } + + /* If possible, create append paths for fully grouped children. */ + if (patype == PARTITIONWISE_AGGREGATE_FULL) + add_paths_to_append_rel(root, grouped_rel, grouped_live_children); +} + +/* + * group_by_has_partkey + * + * Returns true, if all the partition keys of the given relation are part of + * the GROUP BY clauses, false otherwise. + */ +static bool +group_by_has_partkey(RelOptInfo *input_rel, + List *targetList, + List *groupClause) +{ + List *groupexprs = get_sortgrouplist_exprs(groupClause, targetList); + int cnt = 0; + int partnatts; + + /* Input relation should be partitioned. */ + Assert(input_rel->part_scheme); + + /* Rule out early, if there are no partition keys present. */ + if (!input_rel->partexprs) + return false; + + partnatts = input_rel->part_scheme->partnatts; + + for (cnt = 0; cnt < partnatts; cnt++) + { + List *partexprs = input_rel->partexprs[cnt]; + ListCell *lc; + bool found = false; + + foreach(lc, partexprs) + { + Expr *partexpr = lfirst(lc); + + if (list_member(groupexprs, partexpr)) + { + found = true; + break; + } + } + + /* + * If none of the partition key expressions match with any of the + * GROUP BY expression, return false. + */ + if (!found) + return false; + } + + return true; +} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index afb1007842..a3e39dc344 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -923,6 +923,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"enable_partitionwise_aggregate", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables partitionwise aggregation and grouping."), + NULL + }, + &enable_partitionwise_aggregate, + false, + NULL, NULL, NULL + }, { {"enable_parallel_append", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of parallel append plans."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 91eacacdc9..1e0f411c40 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -306,6 +306,7 @@ #enable_sort = on #enable_tidscan = on #enable_partitionwise_join = off +#enable_partitionwise_aggregate = off #enable_parallel_hash = on # - Planner Cost Constants - diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d576aa7350..abbbda9e91 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -553,6 +553,7 @@ typedef enum RelOptKind RELOPT_OTHER_MEMBER_REL, RELOPT_OTHER_JOINREL, RELOPT_UPPER_REL, + RELOPT_OTHER_UPPER_REL, RELOPT_DEADREL } RelOptKind; @@ -570,12 +571,15 @@ typedef enum RelOptKind (rel)->reloptkind == RELOPT_OTHER_JOINREL) /* Is the given relation an upper relation? */ -#define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL) +#define IS_UPPER_REL(rel) \ + ((rel)->reloptkind == RELOPT_UPPER_REL || \ + (rel)->reloptkind == RELOPT_OTHER_UPPER_REL) /* Is the given relation an "other" relation? */ #define IS_OTHER_REL(rel) \ ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \ - (rel)->reloptkind == RELOPT_OTHER_JOINREL) + (rel)->reloptkind == RELOPT_OTHER_JOINREL || \ + (rel)->reloptkind == RELOPT_OTHER_UPPER_REL) typedef struct RelOptInfo { @@ -2291,6 +2295,73 @@ typedef struct JoinPathExtraData Relids param_source_rels; } JoinPathExtraData; +/* + * Various flags indicating what kinds of grouping are possible. + * + * GROUPING_CAN_USE_SORT should be set if it's possible to perform + * sort-based implementations of grouping. When grouping sets are in use, + * this will be true if sorting is potentially usable for any of the grouping + * sets, even if it's not usable for all of them. + * + * GROUPING_CAN_USE_HASH should be set if it's possible to perform + * hash-based implementations of grouping. + * + * GROUPING_CAN_PARTIAL_AGG should be set if the aggregation is of a type + * for which we support partial aggregation (not, for example, grouping sets). + * It says nothing about parallel-safety or the availability of suitable paths. + */ +#define GROUPING_CAN_USE_SORT 0x0001 +#define GROUPING_CAN_USE_HASH 0x0002 +#define GROUPING_CAN_PARTIAL_AGG 0x0004 + +/* + * What kind of partitionwise aggregation is in use? + * + * PARTITIONWISE_AGGREGATE_NONE: Not used. + * + * PARTITIONWISE_AGGREGATE_FULL: Aggregate each partition separately, and + * append the results. + * + * PARTITIONWISE_AGGREGATE_PARTIAL: Partially aggregate each partition + * separately, append the results, and then finalize aggregation. + */ +typedef enum +{ + PARTITIONWISE_AGGREGATE_NONE, + PARTITIONWISE_AGGREGATE_FULL, + PARTITIONWISE_AGGREGATE_PARTIAL +} PartitionwiseAggregateType; + +/* + * Struct for extra information passed to subroutines of create_grouping_paths + * + * flags indicating what kinds of grouping are possible. + * partial_costs_set is true if the agg_partial_costs and agg_final_costs + * have been initialized. + * agg_partial_costs gives partial aggregation costs. + * agg_final_costs gives finalization costs. + * target is the PathTarget to be used while creating paths. + * target_parallel_safe is true if target is parallel safe. + * havingQual gives list of quals to be applied after aggregation. + * targetList gives list of columns to be projected. + * patype is the type of partitionwise aggregation that is being performed. + */ +typedef struct +{ + /* Data which remains constant once set. */ + int flags; + bool partial_costs_set; + AggClauseCosts agg_partial_costs; + AggClauseCosts agg_final_costs; + + /* Data which may differ across partitions. */ + PathTarget *target; + bool target_parallel_safe; + Node *havingQual; + List *targetList; + PartitionwiseAggregateType patype; +} GroupPathExtraData; + /* * For speed reasons, cost estimation for join paths is performed in two * phases: the first phase tries to quickly derive a lower bound for the diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 132e35551b..d3269eae71 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -68,6 +68,7 @@ extern PGDLLIMPORT bool enable_mergejoin; extern PGDLLIMPORT bool enable_hashjoin; extern PGDLLIMPORT bool enable_gathermerge; extern PGDLLIMPORT bool enable_partitionwise_join; +extern PGDLLIMPORT bool enable_partitionwise_aggregate; extern PGDLLIMPORT bool enable_parallel_append; extern PGDLLIMPORT bool enable_parallel_hash; extern PGDLLIMPORT int constraint_exclusion; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 94f9bb2b57..50e180c554 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -236,5 +236,7 @@ extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel); extern PathKey *make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first); +extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, + List *live_childrels); #endif /* PATHS_H */ diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out new file mode 100644 index 0000000000..bf8272e433 --- /dev/null +++ b/src/test/regress/expected/partition_aggregate.out @@ -0,0 +1,1393 @@ +-- +-- PARTITION_AGGREGATE +-- Test partitionwise aggregation on partitioned tables +-- +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; +-- +-- Tests for list partitioned tables. +-- +CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); +CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003'); +CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0004', '0005', '0006', '0007'); +CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0008', '0009', '0010', '0011'); +INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)), (avg(pagg_tab_p1.b)) + -> Append + -> HashAggregate + Group Key: pagg_tab_p1.c + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.c + Filter: (avg(pagg_tab_p2.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.c + Filter: (avg(pagg_tab_p3.d) < '15'::numeric) + -> Seq Scan on pagg_tab_p3 +(15 rows) + +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + c | sum | avg | count | min | max +------+------+---------------------+-------+-----+----- + 0000 | 2000 | 12.0000000000000000 | 250 | 0 | 24 + 0001 | 2250 | 13.0000000000000000 | 250 | 1 | 25 + 0002 | 2500 | 14.0000000000000000 | 250 | 2 | 26 + 0006 | 2500 | 12.0000000000000000 | 250 | 2 | 24 + 0007 | 2750 | 13.0000000000000000 | 250 | 3 | 25 + 0008 | 2000 | 14.0000000000000000 | 250 | 0 | 26 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b)), (avg(pagg_tab_p1.b)) + -> Finalize HashAggregate + Group Key: pagg_tab_p1.a + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + -> Partial HashAggregate + Group Key: pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + -> Partial HashAggregate + Group Key: pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 +(15 rows) + +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + a | sum | avg | count | min | max +----+------+---------------------+-------+-----+----- + 0 | 1500 | 10.0000000000000000 | 150 | 0 | 20 + 1 | 1650 | 11.0000000000000000 | 150 | 1 | 21 + 2 | 1800 | 12.0000000000000000 | 150 | 2 | 22 + 3 | 1950 | 13.0000000000000000 | 150 | 3 | 23 + 4 | 2100 | 14.0000000000000000 | 150 | 4 | 24 + 10 | 1500 | 10.0000000000000000 | 150 | 10 | 20 + 11 | 1650 | 11.0000000000000000 | 150 | 11 | 21 + 12 | 1800 | 12.0000000000000000 | 150 | 12 | 22 + 13 | 1950 | 13.0000000000000000 | 150 | 13 | 23 + 14 | 2100 | 14.0000000000000000 | 150 | 14 | 24 +(10 rows) + +-- Check with multiple columns in GROUP BY +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; + QUERY PLAN +------------------------------------------------- + Append + -> HashAggregate + Group Key: pagg_tab_p1.a, pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.a, pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.a, pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- Check with multiple columns in GROUP BY, order in GROUP BY is reversed +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; + QUERY PLAN +------------------------------------------------- + Append + -> HashAggregate + Group Key: pagg_tab_p1.c, pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.c, pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.c, pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- Check with multiple columns in GROUP BY, order in target-list is reversed +EXPLAIN (COSTS OFF) +SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; + QUERY PLAN +------------------------------------------------- + Append + -> HashAggregate + Group Key: pagg_tab_p1.a, pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> HashAggregate + Group Key: pagg_tab_p2.a, pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> HashAggregate + Group Key: pagg_tab_p3.a, pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- Test when input relation for grouping is dummy +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; + QUERY PLAN +-------------------------------- + HashAggregate + Group Key: pagg_tab.c + -> Result + One-Time Filter: false +(4 rows) + +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; + c | sum +---+----- +(0 rows) + +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + QUERY PLAN +-------------------------------- + GroupAggregate + Group Key: pagg_tab.c + -> Result + One-Time Filter: false +(4 rows) + +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + c | sum +---+----- +(0 rows) + +-- Test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +-- When GROUP BY clause matches full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)), (avg(pagg_tab_p1.b)) + -> Append + -> GroupAggregate + Group Key: pagg_tab_p1.c + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> GroupAggregate + Group Key: pagg_tab_p2.c + Filter: (avg(pagg_tab_p2.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> GroupAggregate + Group Key: pagg_tab_p3.c + Filter: (avg(pagg_tab_p3.d) < '15'::numeric) + -> Sort + Sort Key: pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(21 rows) + +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + c | sum | avg | count +------+------+---------------------+------- + 0000 | 2000 | 12.0000000000000000 | 250 + 0001 | 2250 | 13.0000000000000000 | 250 + 0002 | 2500 | 14.0000000000000000 | 250 + 0006 | 2500 | 12.0000000000000000 | 250 + 0007 | 2750 | 13.0000000000000000 | 250 + 0008 | 2000 | 14.0000000000000000 | 250 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b)), (avg(pagg_tab_p1.b)) + -> Finalize GroupAggregate + Group Key: pagg_tab_p1.a + Filter: (avg(pagg_tab_p1.d) < '15'::numeric) + -> Merge Append + Sort Key: pagg_tab_p1.a + -> Partial GroupAggregate + Group Key: pagg_tab_p1.a + -> Sort + Sort Key: pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + -> Partial GroupAggregate + Group Key: pagg_tab_p2.a + -> Sort + Sort Key: pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + -> Partial GroupAggregate + Group Key: pagg_tab_p3.a + -> Sort + Sort Key: pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 +(22 rows) + +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + a | sum | avg | count +----+------+---------------------+------- + 0 | 1500 | 10.0000000000000000 | 150 + 1 | 1650 | 11.0000000000000000 | 150 + 2 | 1800 | 12.0000000000000000 | 150 + 3 | 1950 | 13.0000000000000000 | 150 + 4 | 2100 | 14.0000000000000000 | 150 + 10 | 1500 | 10.0000000000000000 | 150 + 11 | 1650 | 11.0000000000000000 | 150 + 12 | 1800 | 12.0000000000000000 | 150 + 13 | 1950 | 13.0000000000000000 | 150 + 14 | 2100 | 14.0000000000000000 | 150 +(10 rows) + +-- Test partitionwise grouping without any aggregates +EXPLAIN (COSTS OFF) +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; + QUERY PLAN +------------------------------------------- + Merge Append + Sort Key: pagg_tab_p1.c + -> Group + Group Key: pagg_tab_p1.c + -> Sort + Sort Key: pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> Group + Group Key: pagg_tab_p2.c + -> Sort + Sort Key: pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> Group + Group Key: pagg_tab_p3.c + -> Sort + Sort Key: pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(17 rows) + +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; + c +------ + 0000 + 0001 + 0002 + 0003 + 0004 + 0005 + 0006 + 0007 + 0008 + 0009 + 0010 + 0011 +(12 rows) + +EXPLAIN (COSTS OFF) +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + QUERY PLAN +------------------------------------------------- + Group + Group Key: pagg_tab_p1.a + -> Merge Append + Sort Key: pagg_tab_p1.a + -> Group + Group Key: pagg_tab_p1.a + -> Sort + Sort Key: pagg_tab_p1.a + -> Seq Scan on pagg_tab_p1 + Filter: (a < 3) + -> Group + Group Key: pagg_tab_p2.a + -> Sort + Sort Key: pagg_tab_p2.a + -> Seq Scan on pagg_tab_p2 + Filter: (a < 3) + -> Group + Group Key: pagg_tab_p3.a + -> Sort + Sort Key: pagg_tab_p3.a + -> Seq Scan on pagg_tab_p3 + Filter: (a < 3) +(22 rows) + +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + a +--- + 0 + 1 + 2 +(3 rows) + +RESET enable_hashagg; +-- ROLLUP, partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------- + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.a)) + -> MixedAggregate + Hash Key: pagg_tab_p1.c + Group Key: () + -> Append + -> Seq Scan on pagg_tab_p1 + -> Seq Scan on pagg_tab_p2 + -> Seq Scan on pagg_tab_p3 +(9 rows) + +-- ORDERED SET within the aggregate. +-- Full aggregation; since all the rows that belong to the same group come +-- from the same partition, having an ORDER BY within the aggregate doesn't +-- make any difference. +EXPLAIN (COSTS OFF) +SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_p1.c, (sum(pagg_tab_p1.b ORDER BY pagg_tab_p1.a)) + -> Append + -> GroupAggregate + Group Key: pagg_tab_p1.c + -> Sort + Sort Key: pagg_tab_p1.c + -> Seq Scan on pagg_tab_p1 + -> GroupAggregate + Group Key: pagg_tab_p2.c + -> Sort + Sort Key: pagg_tab_p2.c + -> Seq Scan on pagg_tab_p2 + -> GroupAggregate + Group Key: pagg_tab_p3.c + -> Sort + Sort Key: pagg_tab_p3.c + -> Seq Scan on pagg_tab_p3 +(18 rows) + +-- Since GROUP BY clause does not match with PARTITION KEY; we need to do +-- partial aggregation. However, ORDERED SET are not partial safe and thus +-- partitionwise aggregation plan is not generated. +EXPLAIN (COSTS OFF) +SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; + QUERY PLAN +------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_p1.a, (sum(pagg_tab_p1.b ORDER BY pagg_tab_p1.a)) + -> GroupAggregate + Group Key: pagg_tab_p1.a + -> Sort + Sort Key: pagg_tab_p1.a + -> Append + -> Seq Scan on pagg_tab_p1 + -> Seq Scan on pagg_tab_p2 + -> Seq Scan on pagg_tab_p3 +(10 rows) + +-- JOIN query +CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); +CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; +ANALYZE pagg_tab1; +ANALYZE pagg_tab2; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t1.x, (sum(t1.y)), (count(*)) + -> Append + -> HashAggregate + Group Key: t1.x + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t1_1.x + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t1_2.x + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) + +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + x | sum | count +----+------+------- + 0 | 500 | 100 + 6 | 1100 | 100 + 12 | 700 | 100 + 18 | 1300 | 100 + 24 | 900 | 100 +(5 rows) + +-- GROUP BY having other matching key +EXPLAIN (COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------- + Sort + Sort Key: t2.y, (sum(t1.y)), (count(*)) + -> Append + -> HashAggregate + Group Key: t2.y + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> HashAggregate + Group Key: t2_1.y + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> HashAggregate + Group Key: t2_2.y + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(24 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +-- Also test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +EXPLAIN (COSTS OFF) +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------- + Sort + Sort Key: t1.y, (sum(t1.x)), (count(*)) + -> Finalize GroupAggregate + Group Key: t1.y + Filter: (avg(t1.x) > '10'::numeric) + -> Merge Append + Sort Key: t1.y + -> Partial GroupAggregate + Group Key: t1.y + -> Sort + Sort Key: t1.y + -> Hash Join + Hash Cond: (t1.x = t2.y) + -> Seq Scan on pagg_tab1_p1 t1 + -> Hash + -> Seq Scan on pagg_tab2_p1 t2 + -> Partial GroupAggregate + Group Key: t1_1.y + -> Sort + Sort Key: t1_1.y + -> Hash Join + Hash Cond: (t1_1.x = t2_1.y) + -> Seq Scan on pagg_tab1_p2 t1_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 t2_1 + -> Partial GroupAggregate + Group Key: t1_2.y + -> Sort + Sort Key: t1_2.y + -> Hash Join + Hash Cond: (t2_2.y = t1_2.x) + -> Seq Scan on pagg_tab2_p3 t2_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 t1_2 +(34 rows) + +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; + y | sum | count +----+------+------- + 2 | 600 | 50 + 4 | 1200 | 50 + 8 | 900 | 50 + 12 | 600 | 50 + 14 | 1200 | 50 + 18 | 900 | 50 +(6 rows) + +RESET enable_hashagg; +-- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for +-- aggregation +-- LEFT JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: b.y + -> Sort + Sort Key: b.y + -> Append + -> Partial HashAggregate + Group Key: b.y + -> Hash Left Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> Partial HashAggregate + Group Key: b_1.y + -> Hash Left Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> Partial HashAggregate + Group Key: b_2.y + -> Hash Right Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(26 rows) + +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + y | sum +----+------ + 0 | 500 + 6 | 1100 + 12 | 700 + 18 | 1300 + 24 | 900 + | 900 +(6 rows) + +-- RIGHT JOIN, should produce full partitionwise aggregation plan as +-- GROUP BY is on non-nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------ + Sort + Sort Key: b.y + -> Append + -> HashAggregate + Group Key: b.y + -> Hash Right Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> HashAggregate + Group Key: b_1.y + -> Hash Right Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> HashAggregate + Group Key: b_2.y + -> Hash Left Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(24 rows) + +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + y | sum +----+------ + 0 | 500 + 3 | + 6 | 1100 + 9 | + 12 | 700 + 15 | + 18 | 1300 + 21 | + 24 | 900 + 27 | +(10 rows) + +-- FULL JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + QUERY PLAN +------------------------------------------------------------------ + Finalize GroupAggregate + Group Key: a.x + -> Sort + Sort Key: a.x + -> Append + -> Partial HashAggregate + Group Key: a.x + -> Hash Full Join + Hash Cond: (a.x = b.y) + -> Seq Scan on pagg_tab1_p1 a + -> Hash + -> Seq Scan on pagg_tab2_p1 b + -> Partial HashAggregate + Group Key: a_1.x + -> Hash Full Join + Hash Cond: (a_1.x = b_1.y) + -> Seq Scan on pagg_tab1_p2 a_1 + -> Hash + -> Seq Scan on pagg_tab2_p2 b_1 + -> Partial HashAggregate + Group Key: a_2.x + -> Hash Full Join + Hash Cond: (b_2.y = a_2.x) + -> Seq Scan on pagg_tab2_p3 b_2 + -> Hash + -> Seq Scan on pagg_tab1_p3 a_2 +(26 rows) + +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + x | sum +----+------ + 0 | 500 + 2 | + 4 | + 6 | 1100 + 8 | + 10 | + 12 | 700 + 14 | + 16 | + 18 | 1300 + 20 | + 22 | + 24 | 900 + 26 | + 28 | + | 500 +(16 rows) + +-- LEFT JOIN, with dummy relation on right side, +-- should produce full partitionwise aggregation plan as GROUP BY is on +-- non-nullable columns +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + QUERY PLAN +----------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab1_p1.x, y + -> Append + -> HashAggregate + Group Key: pagg_tab1_p1.x, y + -> Hash Left Join + Hash Cond: (pagg_tab1_p1.x = y) + Filter: ((pagg_tab1_p1.x > 5) OR (y < 20)) + -> Seq Scan on pagg_tab1_p1 + Filter: (x < 20) + -> Hash + -> Result + One-Time Filter: false + -> HashAggregate + Group Key: pagg_tab1_p2.x, pagg_tab2_p2.y + -> Hash Left Join + Hash Cond: (pagg_tab1_p2.x = pagg_tab2_p2.y) + Filter: ((pagg_tab1_p2.x > 5) OR (pagg_tab2_p2.y < 20)) + -> Seq Scan on pagg_tab1_p2 + Filter: (x < 20) + -> Hash + -> Seq Scan on pagg_tab2_p2 + Filter: (y > 10) +(23 rows) + +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + x | y | count +----+----+------- + 6 | | 10 + 8 | | 10 + 10 | | 10 + 12 | 12 | 100 + 14 | | 10 + 16 | | 10 + 18 | 18 | 100 +(7 rows) + +-- FULL JOIN, with dummy relations on both sides, +-- should produce partial partitionwise aggregation plan as GROUP BY is on +-- nullable columns +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize GroupAggregate + Group Key: pagg_tab1_p1.x, y + -> Sort + Sort Key: pagg_tab1_p1.x, y + -> Append + -> Partial HashAggregate + Group Key: pagg_tab1_p1.x, y + -> Hash Full Join + Hash Cond: (pagg_tab1_p1.x = y) + Filter: ((pagg_tab1_p1.x > 5) OR (y < 20)) + -> Seq Scan on pagg_tab1_p1 + Filter: (x < 20) + -> Hash + -> Result + One-Time Filter: false + -> Partial HashAggregate + Group Key: pagg_tab1_p2.x, pagg_tab2_p2.y + -> Hash Full Join + Hash Cond: (pagg_tab1_p2.x = pagg_tab2_p2.y) + Filter: ((pagg_tab1_p2.x > 5) OR (pagg_tab2_p2.y < 20)) + -> Seq Scan on pagg_tab1_p2 + Filter: (x < 20) + -> Hash + -> Seq Scan on pagg_tab2_p2 + Filter: (y > 10) + -> Partial HashAggregate + Group Key: x, pagg_tab2_p3.y + -> Hash Full Join + Hash Cond: (pagg_tab2_p3.y = x) + Filter: ((x > 5) OR (pagg_tab2_p3.y < 20)) + -> Seq Scan on pagg_tab2_p3 + Filter: (y > 10) + -> Hash + -> Result + One-Time Filter: false +(35 rows) + +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + x | y | count +----+----+------- + 6 | | 10 + 8 | | 10 + 10 | | 10 + 12 | 12 | 100 + 14 | | 10 + 16 | | 10 + 18 | 18 | 100 + | 15 | 10 +(8 rows) + +-- Empty join relation because of empty outer side, no partitionwise agg plan +EXPLAIN (COSTS OFF) +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + QUERY PLAN +--------------------------------------- + GroupAggregate + Group Key: pagg_tab1.x, pagg_tab1.y + -> Sort + Sort Key: pagg_tab1.y + -> Result + One-Time Filter: false +(6 rows) + +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + x | y | count +---+---+------- +(0 rows) + +-- Partition by multiple columns +CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); +CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (10, 10); +CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (10, 10) TO (20, 20); +CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (20, 20) TO (30, 30); +INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab_m; +-- Partial aggregation as GROUP BY clause does not match with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + QUERY PLAN +----------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m_p1.a, (sum(pagg_tab_m_p1.b)), (avg(pagg_tab_m_p1.c)) + -> Finalize HashAggregate + Group Key: pagg_tab_m_p1.a + Filter: (avg(pagg_tab_m_p1.c) < '22'::numeric) + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_m_p1.a + -> Seq Scan on pagg_tab_m_p1 + -> Partial HashAggregate + Group Key: pagg_tab_m_p2.a + -> Seq Scan on pagg_tab_m_p2 + -> Partial HashAggregate + Group Key: pagg_tab_m_p3.a + -> Seq Scan on pagg_tab_m_p3 +(15 rows) + +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + a | sum | avg | count +----+------+---------------------+------- + 0 | 1500 | 20.0000000000000000 | 100 + 1 | 1600 | 21.0000000000000000 | 100 + 10 | 1500 | 20.0000000000000000 | 100 + 11 | 1600 | 21.0000000000000000 | 100 + 20 | 1500 | 20.0000000000000000 | 100 + 21 | 1600 | 21.0000000000000000 | 100 +(6 rows) + +-- Full aggregation as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_m_p1.a, (sum(pagg_tab_m_p1.b)), (avg(pagg_tab_m_p1.c)) + -> Append + -> HashAggregate + Group Key: pagg_tab_m_p1.a, ((pagg_tab_m_p1.a + pagg_tab_m_p1.b) / 2) + Filter: (sum(pagg_tab_m_p1.b) < 50) + -> Seq Scan on pagg_tab_m_p1 + -> HashAggregate + Group Key: pagg_tab_m_p2.a, ((pagg_tab_m_p2.a + pagg_tab_m_p2.b) / 2) + Filter: (sum(pagg_tab_m_p2.b) < 50) + -> Seq Scan on pagg_tab_m_p2 + -> HashAggregate + Group Key: pagg_tab_m_p3.a, ((pagg_tab_m_p3.a + pagg_tab_m_p3.b) / 2) + Filter: (sum(pagg_tab_m_p3.b) < 50) + -> Seq Scan on pagg_tab_m_p3 +(15 rows) + +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + a | sum | avg | count +----+-----+---------------------+------- + 0 | 0 | 20.0000000000000000 | 25 + 1 | 25 | 21.0000000000000000 | 25 + 10 | 0 | 20.0000000000000000 | 25 + 11 | 25 | 21.0000000000000000 | 25 + 20 | 0 | 20.0000000000000000 | 25 + 21 | 25 | 21.0000000000000000 | 25 +(6 rows) + +-- Full aggregation as PARTITION KEY is part of GROUP BY clause +EXPLAIN (COSTS OFF) +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Sort + Sort Key: pagg_tab_m_p1.a, pagg_tab_m_p1.c, (sum(pagg_tab_m_p1.b)) + -> Append + -> HashAggregate + Group Key: ((pagg_tab_m_p1.a + pagg_tab_m_p1.b) / 2), pagg_tab_m_p1.c, pagg_tab_m_p1.a + Filter: ((sum(pagg_tab_m_p1.b) = 50) AND (avg(pagg_tab_m_p1.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p1 + -> HashAggregate + Group Key: ((pagg_tab_m_p2.a + pagg_tab_m_p2.b) / 2), pagg_tab_m_p2.c, pagg_tab_m_p2.a + Filter: ((sum(pagg_tab_m_p2.b) = 50) AND (avg(pagg_tab_m_p2.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p2 + -> HashAggregate + Group Key: ((pagg_tab_m_p3.a + pagg_tab_m_p3.b) / 2), pagg_tab_m_p3.c, pagg_tab_m_p3.a + Filter: ((sum(pagg_tab_m_p3.b) = 50) AND (avg(pagg_tab_m_p3.c) > '25'::numeric)) + -> Seq Scan on pagg_tab_m_p3 +(15 rows) + +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + a | c | sum | avg | count +----+----+-----+---------------------+------- + 0 | 30 | 50 | 30.0000000000000000 | 5 + 0 | 40 | 50 | 40.0000000000000000 | 5 + 10 | 30 | 50 | 30.0000000000000000 | 5 + 10 | 40 | 50 | 40.0000000000000000 | 5 + 20 | 30 | 50 | 30.0000000000000000 | 5 + 20 | 40 | 50 | 40.0000000000000000 | 5 +(6 rows) + +-- Test with multi-level partitioning scheme +CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); +CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY LIST (c); +CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001'); +CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0002', '0003'); +-- This level of partitioning has different column positions than the parent +CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); +CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); +CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (5) TO (10); +ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (5); +ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_ml; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, but still we do not see a partial aggregation as array_agg() +-- is not partial agg safe. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p2_s1.a, (sum(pagg_tab_ml_p2_s1.b)), (array_agg(DISTINCT pagg_tab_ml_p2_s1.c)) + -> Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Seq Scan on pagg_tab_ml_p2_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Seq Scan on pagg_tab_ml_p3_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p1.a + -> Seq Scan on pagg_tab_ml_p1 +(27 rows) + +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | array_agg | count +----+------+-------------+------- + 0 | 0 | {0000,0002} | 1000 + 1 | 1000 | {0001,0003} | 1000 + 2 | 2000 | {0000,0002} | 1000 + 10 | 0 | {0000,0002} | 1000 + 11 | 1000 | {0001,0003} | 1000 + 12 | 2000 | {0000,0002} | 1000 + 20 | 0 | {0000,0002} | 1000 + 21 | 1000 | {0001,0003} | 1000 + 22 | 2000 | {0000,0002} | 1000 +(9 rows) + +-- Without ORDER BY clause, to test Gather at top-most path +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; + QUERY PLAN +----------------------------------------------------------------- + Gather + Workers Planned: 2 + -> Parallel Append + -> GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Seq Scan on pagg_tab_ml_p2_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Append + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Seq Scan on pagg_tab_ml_p3_s2 + -> GroupAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p1.a + -> Seq Scan on pagg_tab_ml_p1 +(25 rows) + +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Append + -> HashAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Seq Scan on pagg_tab_ml_p1 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.a + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.a + -> Seq Scan on pagg_tab_ml_p2_s2 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.a + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.a + -> Seq Scan on pagg_tab_ml_p3_s2 +(31 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 0 | 0 | 1000 + 1 | 1000 | 1000 + 2 | 2000 | 1000 + 10 | 0 | 1000 + 11 | 1000 | 1000 + 12 | 2000 | 1000 + 20 | 0 | 1000 + 21 | 1000 | 1000 + 22 | 2000 | 1000 +(9 rows) + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; + QUERY PLAN +------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.b, (sum(pagg_tab_ml_p1.a)), (count(*)) + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p1.b + -> Sort + Sort Key: pagg_tab_ml_p1.b + -> Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p1.b + -> Seq Scan on pagg_tab_ml_p1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.b + -> Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.b + -> Seq Scan on pagg_tab_ml_p2_s2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.b + -> Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.b + -> Seq Scan on pagg_tab_ml_p3_s2 +(22 rows) + +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + b | sum | count +---+-------+------- + 0 | 30000 | 3000 + 1 | 33000 | 3000 + 2 | 36000 | 3000 + 3 | 39000 | 3000 + 4 | 42000 | 3000 +(5 rows) + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Append + -> HashAggregate + Group Key: pagg_tab_ml_p1.a, pagg_tab_ml_p1.b, pagg_tab_ml_p1.c + Filter: (avg(pagg_tab_ml_p1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s1.a, pagg_tab_ml_p2_s1.b, pagg_tab_ml_p2_s1.c + Filter: (avg(pagg_tab_ml_p2_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s2.a, pagg_tab_ml_p2_s2.b, pagg_tab_ml_p2_s2.c + Filter: (avg(pagg_tab_ml_p2_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s2 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s1.a, pagg_tab_ml_p3_s1.b, pagg_tab_ml_p3_s1.c + Filter: (avg(pagg_tab_ml_p3_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s2.a, pagg_tab_ml_p3_s2.b, pagg_tab_ml_p3_s2.c + Filter: (avg(pagg_tab_ml_p3_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s2 +(23 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 8 | 4000 | 500 + 8 | 4000 | 500 + 9 | 4500 | 500 + 9 | 4500 | 500 + 18 | 4000 | 500 + 18 | 4000 | 500 + 19 | 4500 | 500 + 19 | 4500 | 500 + 28 | 4000 | 500 + 28 | 4000 | 500 + 29 | 4500 | 500 + 29 | 4500 | 500 +(12 rows) + +-- Parallelism within partitionwise aggregates +SET max_parallel_workers_per_gather TO 2; +SET min_parallel_table_scan_size TO '8kB'; +SET parallel_setup_cost TO 0; +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Append + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p1.a + Filter: (avg(pagg_tab_ml_p1.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p1.a + -> Partial HashAggregate + Group Key: pagg_tab_ml_p1.a + -> Parallel Seq Scan on pagg_tab_ml_p1 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p2_s1.a + Filter: (avg(pagg_tab_ml_p2_s1.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p2_s1.a + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.a + -> Parallel Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.a + -> Parallel Seq Scan on pagg_tab_ml_p2_s2 + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p3_s1.a + Filter: (avg(pagg_tab_ml_p3_s1.b) < '3'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p3_s1.a + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.a + -> Parallel Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.a + -> Parallel Seq Scan on pagg_tab_ml_p3_s2 +(41 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 0 | 0 | 1000 + 1 | 1000 | 1000 + 2 | 2000 | 1000 + 10 | 0 | 1000 + 11 | 1000 | 1000 + 12 | 2000 | 1000 + 20 | 0 | 1000 + 21 | 1000 | 1000 + 22 | 2000 | 1000 +(9 rows) + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_ml_p1.b, (sum(pagg_tab_ml_p1.a)), (count(*)) + -> Finalize GroupAggregate + Group Key: pagg_tab_ml_p1.b + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p1.b + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_ml_p1.b + -> Parallel Seq Scan on pagg_tab_ml_p1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s1.b + -> Parallel Seq Scan on pagg_tab_ml_p2_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p2_s2.b + -> Parallel Seq Scan on pagg_tab_ml_p2_s2 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s1.b + -> Parallel Seq Scan on pagg_tab_ml_p3_s1 + -> Partial HashAggregate + Group Key: pagg_tab_ml_p3_s2.b + -> Parallel Seq Scan on pagg_tab_ml_p3_s2 +(24 rows) + +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + b | sum | count +---+-------+------- + 0 | 30000 | 3000 + 1 | 33000 | 3000 + 2 | 36000 | 3000 + 3 | 39000 | 3000 + 4 | 42000 | 3000 +(5 rows) + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------------------------------------- + Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_ml_p1.a, (sum(pagg_tab_ml_p1.b)), (count(*)) + -> Parallel Append + -> HashAggregate + Group Key: pagg_tab_ml_p1.a, pagg_tab_ml_p1.b, pagg_tab_ml_p1.c + Filter: (avg(pagg_tab_ml_p1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s1.a, pagg_tab_ml_p2_s1.b, pagg_tab_ml_p2_s1.c + Filter: (avg(pagg_tab_ml_p2_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p2_s2.a, pagg_tab_ml_p2_s2.b, pagg_tab_ml_p2_s2.c + Filter: (avg(pagg_tab_ml_p2_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p2_s2 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s1.a, pagg_tab_ml_p3_s1.b, pagg_tab_ml_p3_s1.c + Filter: (avg(pagg_tab_ml_p3_s1.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s1 + -> HashAggregate + Group Key: pagg_tab_ml_p3_s2.a, pagg_tab_ml_p3_s2.b, pagg_tab_ml_p3_s2.c + Filter: (avg(pagg_tab_ml_p3_s2.b) > '7'::numeric) + -> Seq Scan on pagg_tab_ml_p3_s2 +(25 rows) + +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + a | sum | count +----+------+------- + 8 | 4000 | 500 + 8 | 4000 | 500 + 9 | 4500 | 500 + 9 | 4500 | 500 + 18 | 4000 | 500 + 18 | 4000 | 500 + 19 | 4500 | 500 + 19 | 4500 | 500 + 28 | 4000 | 500 + 28 | 4000 | 500 + 29 | 4500 | 500 + 29 | 4500 | 500 +(12 rows) + +-- Parallelism within partitionwise aggregates (single level) +-- Add few parallel setup cost, so that we will see a plan which gathers +-- partially created paths even for full aggregation and sticks a single Gather +-- followed by finalization step. +-- Without this, the cost of doing partial aggregation + Gather + finalization +-- for each partition and then Append over it turns out to be same and this +-- wins as we add it first. This parallel_setup_cost plays a vital role in +-- costing such plans. +SET parallel_setup_cost TO 10; +CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (20) TO (30); +INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_para; +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.x, (sum(pagg_tab_para_p1.y)), (avg(pagg_tab_para_p1.y)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para_p1.x + Filter: (avg(pagg_tab_para_p1.y) < '7'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para_p1.x + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para_p1.x + -> Parallel Seq Scan on pagg_tab_para_p1 + -> Partial HashAggregate + Group Key: pagg_tab_para_p2.x + -> Parallel Seq Scan on pagg_tab_para_p2 + -> Partial HashAggregate + Group Key: pagg_tab_para_p3.x + -> Parallel Seq Scan on pagg_tab_para_p3 +(19 rows) + +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + x | sum | avg | count +----+------+--------------------+------- + 0 | 5000 | 5.0000000000000000 | 1000 + 1 | 6000 | 6.0000000000000000 | 1000 + 10 | 5000 | 5.0000000000000000 | 1000 + 11 | 6000 | 6.0000000000000000 | 1000 + 20 | 5000 | 5.0000000000000000 | 1000 + 21 | 6000 | 6.0000000000000000 | 1000 +(6 rows) + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: pagg_tab_para_p1.y, (sum(pagg_tab_para_p1.x)), (avg(pagg_tab_para_p1.x)) + -> Finalize GroupAggregate + Group Key: pagg_tab_para_p1.y + Filter: (avg(pagg_tab_para_p1.x) < '12'::numeric) + -> Gather Merge + Workers Planned: 2 + -> Sort + Sort Key: pagg_tab_para_p1.y + -> Parallel Append + -> Partial HashAggregate + Group Key: pagg_tab_para_p1.y + -> Parallel Seq Scan on pagg_tab_para_p1 + -> Partial HashAggregate + Group Key: pagg_tab_para_p2.y + -> Parallel Seq Scan on pagg_tab_para_p2 + -> Partial HashAggregate + Group Key: pagg_tab_para_p3.y + -> Parallel Seq Scan on pagg_tab_para_p3 +(19 rows) + +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; + y | sum | avg | count +----+-------+---------------------+------- + 0 | 15000 | 10.0000000000000000 | 1500 + 1 | 16500 | 11.0000000000000000 | 1500 + 10 | 15000 | 10.0000000000000000 | 1500 + 11 | 16500 | 11.0000000000000000 | 1500 +(4 rows) + diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 759f7d9d59..a19ee08749 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -70,24 +70,25 @@ select count(*) >= 0 as ok from pg_prepared_xacts; -- This is to record the prevailing planner enable_foo settings during -- a regression test run. select name, setting from pg_settings where name like 'enable%'; - name | setting ----------------------------+--------- - enable_bitmapscan | on - enable_gathermerge | on - enable_hashagg | on - enable_hashjoin | on - enable_indexonlyscan | on - enable_indexscan | on - enable_material | on - enable_mergejoin | on - enable_nestloop | on - enable_parallel_append | on - enable_parallel_hash | on - enable_partitionwise_join | off - enable_seqscan | on - enable_sort | on - enable_tidscan | on -(15 rows) + name | setting +--------------------------------+--------- + enable_bitmapscan | on + enable_gathermerge | on + enable_hashagg | on + enable_hashjoin | on + enable_indexonlyscan | on + enable_indexscan | on + enable_material | on + enable_mergejoin | on + enable_nestloop | on + enable_parallel_append | on + enable_parallel_hash | on + enable_partitionwise_aggregate | off + enable_partitionwise_join | off + enable_seqscan | on + enable_sort | on + enable_tidscan | on +(16 rows) -- Test that the pg_timezone_names and pg_timezone_abbrevs views are -- more-or-less working. We can't test their contents in any great detail diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index ad9434fb87..d308a05117 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -116,7 +116,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c # ---------- # Another group of parallel tests # ---------- -test: identity partition_join partition_prune reloptions hash_part indexing +test: identity partition_join partition_prune reloptions hash_part indexing partition_aggregate # event triggers cannot run concurrently with any test that runs DDL test: event_trigger diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 27cd49845e..45147e9328 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -185,5 +185,6 @@ test: partition_prune test: reloptions test: hash_part test: indexing +test: partition_aggregate test: event_trigger test: stats diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql new file mode 100644 index 0000000000..f7b5f5a397 --- /dev/null +++ b/src/test/regress/sql/partition_aggregate.sql @@ -0,0 +1,292 @@ +-- +-- PARTITION_AGGREGATE +-- Test partitionwise aggregation on partitioned tables +-- + +-- Enable partitionwise aggregate, which by default is disabled. +SET enable_partitionwise_aggregate TO true; +-- Enable partitionwise join, which by default is disabled. +SET enable_partitionwise_join TO true; + +-- +-- Tests for list partitioned tables. +-- +CREATE TABLE pagg_tab (a int, b int, c text, d int) PARTITION BY LIST(c); +CREATE TABLE pagg_tab_p1 PARTITION OF pagg_tab FOR VALUES IN ('0000', '0001', '0002', '0003'); +CREATE TABLE pagg_tab_p2 PARTITION OF pagg_tab FOR VALUES IN ('0004', '0005', '0006', '0007'); +CREATE TABLE pagg_tab_p3 PARTITION OF pagg_tab FOR VALUES IN ('0008', '0009', '0010', '0011'); +INSERT INTO pagg_tab SELECT i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; +SELECT c, sum(a), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY c HAVING avg(d) < 15 ORDER BY 1, 2, 3; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; +SELECT a, sum(b), avg(b), count(*), min(a), max(b) FROM pagg_tab GROUP BY a HAVING avg(d) < 15 ORDER BY 1, 2, 3; + +-- Check with multiple columns in GROUP BY +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY a, c; +-- Check with multiple columns in GROUP BY, order in GROUP BY is reversed +EXPLAIN (COSTS OFF) +SELECT a, c, count(*) FROM pagg_tab GROUP BY c, a; +-- Check with multiple columns in GROUP BY, order in target-list is reversed +EXPLAIN (COSTS OFF) +SELECT c, a, count(*) FROM pagg_tab GROUP BY a, c; + +-- Test when input relation for grouping is dummy +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; +SELECT c, sum(a) FROM pagg_tab WHERE 1 = 2 GROUP BY c; +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; +SELECT c, sum(a) FROM pagg_tab WHERE c = 'x' GROUP BY c; + +-- Test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; + +-- When GROUP BY clause matches full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; +SELECT c, sum(a), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; +SELECT a, sum(b), avg(b), count(*) FROM pagg_tab GROUP BY 1 HAVING avg(d) < 15 ORDER BY 1, 2, 3; + +-- Test partitionwise grouping without any aggregates +EXPLAIN (COSTS OFF) +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; +SELECT c FROM pagg_tab GROUP BY c ORDER BY 1; +EXPLAIN (COSTS OFF) +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; +SELECT a FROM pagg_tab WHERE a < 3 GROUP BY a ORDER BY 1; + +RESET enable_hashagg; + +-- ROLLUP, partitionwise aggregation does not apply +EXPLAIN (COSTS OFF) +SELECT c, sum(a) FROM pagg_tab GROUP BY rollup(c) ORDER BY 1, 2; + +-- ORDERED SET within the aggregate. +-- Full aggregation; since all the rows that belong to the same group come +-- from the same partition, having an ORDER BY within the aggregate doesn't +-- make any difference. +EXPLAIN (COSTS OFF) +SELECT c, sum(b order by a) FROM pagg_tab GROUP BY c ORDER BY 1, 2; +-- Since GROUP BY clause does not match with PARTITION KEY; we need to do +-- partial aggregation. However, ORDERED SET are not partial safe and thus +-- partitionwise aggregation plan is not generated. +EXPLAIN (COSTS OFF) +SELECT a, sum(b order by a) FROM pagg_tab GROUP BY a ORDER BY 1, 2; + + +-- JOIN query + +CREATE TABLE pagg_tab1(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab1_p1 PARTITION OF pagg_tab1 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab1_p2 PARTITION OF pagg_tab1 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab1_p3 PARTITION OF pagg_tab1 FOR VALUES FROM (20) TO (30); + +CREATE TABLE pagg_tab2(x int, y int) PARTITION BY RANGE(y); +CREATE TABLE pagg_tab2_p1 PARTITION OF pagg_tab2 FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab2_p2 PARTITION OF pagg_tab2 FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab2_p3 PARTITION OF pagg_tab2 FOR VALUES FROM (20) TO (30); + +INSERT INTO pagg_tab1 SELECT i % 30, i % 20 FROM generate_series(0, 299, 2) i; +INSERT INTO pagg_tab2 SELECT i % 20, i % 30 FROM generate_series(0, 299, 3) i; + +ANALYZE pagg_tab1; +ANALYZE pagg_tab2; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; +SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3; + +-- GROUP BY having other matching key +EXPLAIN (COSTS OFF) +SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +-- Also test GroupAggregate paths by disabling hash aggregates. +SET enable_hashagg TO false; +EXPLAIN (COSTS OFF) +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; +SELECT t1.y, sum(t1.x), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.y HAVING avg(t1.x) > 10 ORDER BY 1, 2, 3; +RESET enable_hashagg; + +-- Check with LEFT/RIGHT/FULL OUTER JOINs which produces NULL values for +-- aggregation + +-- LEFT JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; +SELECT b.y, sum(a.y) FROM pagg_tab1 a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + +-- RIGHT JOIN, should produce full partitionwise aggregation plan as +-- GROUP BY is on non-nullable column +EXPLAIN (COSTS OFF) +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; +SELECT b.y, sum(a.y) FROM pagg_tab1 a RIGHT JOIN pagg_tab2 b ON a.x = b.y GROUP BY b.y ORDER BY 1 NULLS LAST; + +-- FULL JOIN, should produce partial partitionwise aggregation plan as +-- GROUP BY is on nullable column +EXPLAIN (COSTS OFF) +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; +SELECT a.x, sum(b.x) FROM pagg_tab1 a FULL OUTER JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x ORDER BY 1 NULLS LAST; + +-- LEFT JOIN, with dummy relation on right side, +-- should produce full partitionwise aggregation plan as GROUP BY is on +-- non-nullable columns +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + +-- FULL JOIN, with dummy relations on both sides, +-- should produce partial partitionwise aggregation plan as GROUP BY is on +-- nullable columns +EXPLAIN (COSTS OFF) +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; +SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a FULL JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2; + +-- Empty join relation because of empty outer side, no partitionwise agg plan +EXPLAIN (COSTS OFF) +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; +SELECT a.x, a.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x = 1 AND x = 2) a LEFT JOIN pagg_tab2 b ON a.x = b.y GROUP BY a.x, a.y ORDER BY 1, 2; + + +-- Partition by multiple columns + +CREATE TABLE pagg_tab_m (a int, b int, c int) PARTITION BY RANGE(a, ((a+b)/2)); +CREATE TABLE pagg_tab_m_p1 PARTITION OF pagg_tab_m FOR VALUES FROM (0, 0) TO (10, 10); +CREATE TABLE pagg_tab_m_p2 PARTITION OF pagg_tab_m FOR VALUES FROM (10, 10) TO (20, 20); +CREATE TABLE pagg_tab_m_p3 PARTITION OF pagg_tab_m FOR VALUES FROM (20, 20) TO (30, 30); +INSERT INTO pagg_tab_m SELECT i % 30, i % 40, i % 50 FROM generate_series(0, 2999) i; +ANALYZE pagg_tab_m; + +-- Partial aggregation as GROUP BY clause does not match with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a HAVING avg(c) < 22 ORDER BY 1, 2, 3; + +-- Full aggregation as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; +SELECT a, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY a, (a+b)/2 HAVING sum(b) < 50 ORDER BY 1, 2, 3; + +-- Full aggregation as PARTITION KEY is part of GROUP BY clause +EXPLAIN (COSTS OFF) +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; +SELECT a, c, sum(b), avg(c), count(*) FROM pagg_tab_m GROUP BY (a+b)/2, 2, 1 HAVING sum(b) = 50 AND avg(c) > 25 ORDER BY 1, 2, 3; + + +-- Test with multi-level partitioning scheme + +CREATE TABLE pagg_tab_ml (a int, b int, c text) PARTITION BY RANGE(a); +CREATE TABLE pagg_tab_ml_p1 PARTITION OF pagg_tab_ml FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab_ml_p2 PARTITION OF pagg_tab_ml FOR VALUES FROM (10) TO (20) PARTITION BY LIST (c); +CREATE TABLE pagg_tab_ml_p2_s1 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0000', '0001'); +CREATE TABLE pagg_tab_ml_p2_s2 PARTITION OF pagg_tab_ml_p2 FOR VALUES IN ('0002', '0003'); + +-- This level of partitioning has different column positions than the parent +CREATE TABLE pagg_tab_ml_p3(b int, c text, a int) PARTITION BY RANGE (b); +CREATE TABLE pagg_tab_ml_p3_s1(c text, a int, b int); +CREATE TABLE pagg_tab_ml_p3_s2 PARTITION OF pagg_tab_ml_p3 FOR VALUES FROM (5) TO (10); + +ALTER TABLE pagg_tab_ml_p3 ATTACH PARTITION pagg_tab_ml_p3_s1 FOR VALUES FROM (0) TO (5); +ALTER TABLE pagg_tab_ml ATTACH PARTITION pagg_tab_ml_p3 FOR VALUES FROM (20) TO (30); + +INSERT INTO pagg_tab_ml SELECT i % 30, i % 10, to_char(i % 4, 'FM0000') FROM generate_series(0, 29999) i; +ANALYZE pagg_tab_ml; + +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, but still we do not see a partial aggregation as array_agg() +-- is not partial agg safe. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + +-- Without ORDER BY clause, to test Gather at top-most path +EXPLAIN (COSTS OFF) +SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3; + +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + +-- Parallelism within partitionwise aggregates + +SET max_parallel_workers_per_gather TO 2; +SET min_parallel_table_scan_size TO '8kB'; +SET parallel_setup_cost TO 0; + +-- Full aggregation at level 1 as GROUP BY clause matches with PARTITION KEY +-- for level 1 only. For subpartitions, GROUP BY clause does not match with +-- PARTITION KEY, thus we will have a partial aggregation for them. +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3; + +-- Partial aggregation at all levels as GROUP BY clause does not match with +-- PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b ORDER BY 1, 2, 3; +SELECT b, sum(a), count(*) FROM pagg_tab_ml GROUP BY b HAVING avg(a) < 15 ORDER BY 1, 2, 3; + +-- Full aggregation at all levels as GROUP BY clause matches with PARTITION KEY +EXPLAIN (COSTS OFF) +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; +SELECT a, sum(b), count(*) FROM pagg_tab_ml GROUP BY a, b, c HAVING avg(b) > 7 ORDER BY 1, 2, 3; + + +-- Parallelism within partitionwise aggregates (single level) + +-- Add few parallel setup cost, so that we will see a plan which gathers +-- partially created paths even for full aggregation and sticks a single Gather +-- followed by finalization step. +-- Without this, the cost of doing partial aggregation + Gather + finalization +-- for each partition and then Append over it turns out to be same and this +-- wins as we add it first. This parallel_setup_cost plays a vital role in +-- costing such plans. +SET parallel_setup_cost TO 10; + +CREATE TABLE pagg_tab_para(x int, y int) PARTITION BY RANGE(x); +CREATE TABLE pagg_tab_para_p1 PARTITION OF pagg_tab_para FOR VALUES FROM (0) TO (10); +CREATE TABLE pagg_tab_para_p2 PARTITION OF pagg_tab_para FOR VALUES FROM (10) TO (20); +CREATE TABLE pagg_tab_para_p3 PARTITION OF pagg_tab_para FOR VALUES FROM (20) TO (30); + +INSERT INTO pagg_tab_para SELECT i % 30, i % 20 FROM generate_series(0, 29999) i; + +ANALYZE pagg_tab_para; + +-- When GROUP BY clause matches; full aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; +SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) < 7 ORDER BY 1, 2, 3; + +-- When GROUP BY clause does not match; partial aggregation is performed for each partition. +EXPLAIN (COSTS OFF) +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; +SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 543cb17e41..b7c0efef8f 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -884,6 +884,7 @@ GrantStmt GrantTargetType Group GroupPath +GroupPathExtraData GroupState GroupVarInfo GroupingFunc @@ -1597,6 +1598,7 @@ PartitionScheme PartitionSpec PartitionTupleRouting PartitionedChildRelInfo +PartitionwiseAggregateType PasswordType Path PathClauseUsage -- 2.40.0