if (parent && IsA(parent, AggState))
{
AggState *aggstate = (AggState *) parent;
+ Aggref *aggref = (Aggref *) node;
+
+ if (aggstate->finalizeAggs &&
+ aggref->aggoutputtype != aggref->aggtype)
+ {
+ /* planner messed up */
+ elog(ERROR, "Aggref aggoutputtype must match aggtype");
+ }
aggstate->aggs = lcons(astate, aggstate->aggs);
aggstate->numaggs++;
COPY_SCALAR_FIELD(aggfnoid);
COPY_SCALAR_FIELD(aggtype);
+ COPY_SCALAR_FIELD(aggoutputtype);
COPY_SCALAR_FIELD(aggcollid);
COPY_SCALAR_FIELD(inputcollid);
COPY_NODE_FIELD(aggdirectargs);
{
COMPARE_SCALAR_FIELD(aggfnoid);
COMPARE_SCALAR_FIELD(aggtype);
+ COMPARE_SCALAR_FIELD(aggoutputtype);
COMPARE_SCALAR_FIELD(aggcollid);
COMPARE_SCALAR_FIELD(inputcollid);
COMPARE_NODE_FIELD(aggdirectargs);
type = ((const Param *) expr)->paramtype;
break;
case T_Aggref:
- type = ((const Aggref *) expr)->aggtype;
+ type = ((const Aggref *) expr)->aggoutputtype;
break;
case T_GroupingFunc:
type = INT4OID;
WRITE_OID_FIELD(aggfnoid);
WRITE_OID_FIELD(aggtype);
+ WRITE_OID_FIELD(aggoutputtype);
WRITE_OID_FIELD(aggcollid);
WRITE_OID_FIELD(inputcollid);
WRITE_NODE_FIELD(aggdirectargs);
READ_OID_FIELD(aggfnoid);
READ_OID_FIELD(aggtype);
+ READ_OID_FIELD(aggoutputtype);
READ_OID_FIELD(aggcollid);
READ_OID_FIELD(inputcollid);
READ_NODE_FIELD(aggdirectargs);
*/
cheapest_partial_path = linitial(rel->partial_pathlist);
simple_gather_path = (Path *)
- create_gather_path(root, rel, cheapest_partial_path, NULL);
+ create_gather_path(root, rel, cheapest_partial_path, rel->reltarget,
+ NULL, NULL);
add_path(rel, simple_gather_path);
}
*
* 'rel' is the relation to be operated upon
* 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'rows' may be used to point to a row estimate; if non-NULL, it overrides
+ * both 'rel' and 'param_info'. This is useful when the path doesn't exactly
+ * correspond to any particular RelOptInfo.
*/
void
cost_gather(GatherPath *path, PlannerInfo *root,
- RelOptInfo *rel, ParamPathInfo *param_info)
+ RelOptInfo *rel, ParamPathInfo *param_info,
+ double *rows)
{
Cost startup_cost = 0;
Cost run_cost = 0;
/* Mark the path with the correct row estimate */
- if (param_info)
+ if (rows)
+ path->path.rows = *rows;
+ else if (param_info)
path->path.rows = param_info->ppi_rows;
else
path->path.rows = rel->rows;
{
/* must be AGG_HASHED */
startup_cost = input_total_cost;
+ if (!enable_hashagg)
+ startup_cost += disable_cost;
startup_cost += aggcosts->transCost.startup;
startup_cost += aggcosts->transCost.per_tuple * input_tuples;
startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
plan = make_agg(tlist, quals,
best_path->aggstrategy,
- false,
- true,
+ best_path->combineStates,
+ best_path->finalizeAggs,
list_length(best_path->groupClause),
extract_grouping_cols(best_path->groupClause,
subplan->targetlist),
double path_rows,
List *rollup_lists,
List *rollup_groupclauses);
+static void set_grouped_rel_consider_parallel(PlannerInfo *root,
+ RelOptInfo *grouped_rel,
+ PathTarget *target);
+static Size estimate_hashagg_tablesize(Path *path, AggClauseCosts *agg_costs,
+ double dNumGroups);
static RelOptInfo *create_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
PathTarget *target,
double limit_tuples);
static PathTarget *make_group_input_target(PlannerInfo *root,
PathTarget *final_target);
+static PathTarget *make_partialgroup_input_target(PlannerInfo *root,
+ PathTarget *final_target);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
static List *select_active_windows(PlannerInfo *root, WindowFuncLists *wflists);
static PathTarget *make_window_input_target(PlannerInfo *root,
}
}
+ /*
+ * Likewise for any partial paths, although this case is simpler, since
+ * we don't track the cheapest path.
+ */
+ foreach(lc, current_rel->partial_pathlist)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+
+ Assert(subpath->param_info == NULL);
+ lfirst(lc) = apply_projection_to_path(root, current_rel,
+ subpath, scanjoin_target);
+ }
+
/*
* Save the various upper-rel PathTargets we just computed into
* root->upper_targets[]. The core code doesn't use this, but it
return dNumGroups;
}
+/*
+ * set_grouped_rel_consider_parallel
+ * Determine if it's safe to generate partial paths for grouping.
+ */
+static void
+set_grouped_rel_consider_parallel(PlannerInfo *root, RelOptInfo *grouped_rel,
+ PathTarget *target)
+{
+ Query *parse = root->parse;
+
+ Assert(grouped_rel->reloptkind == RELOPT_UPPER_REL);
+
+ /*
+ * If there are no aggregates or GROUP BY clause, then no parallel
+ * aggregation is possible. At present, it doesn't matter whether
+ * consider_parallel gets set in this case, because none of the upper rels
+ * on top of this one try to set the flag or examine it, so we just bail
+ * out as quickly as possible. We might need to be more clever here in
+ * the future.
+ */
+ if (!parse->hasAggs && parse->groupClause == NIL)
+ return;
+
+ /*
+ * Similarly, bail out quickly if GROUPING SETS are present; we can't
+ * support those at present.
+ */
+ if (parse->groupingSets)
+ return;
+
+ /*
+ * If parallel-restricted functiosn are present in the target list or the
+ * HAVING clause, we cannot safely go parallel.
+ */
+ if (has_parallel_hazard((Node *) target->exprs, false) ||
+ has_parallel_hazard((Node *) parse->havingQual, false))
+ return;
+
+ /*
+ * All that's left to check now is to make sure all aggregate functions
+ * support partial mode. If there's no aggregates then we can skip checking
+ * that.
+ */
+ if (!parse->hasAggs)
+ grouped_rel->consider_parallel = true;
+ else if (aggregates_allow_partial((Node *) target->exprs) == PAT_ANY &&
+ aggregates_allow_partial(root->parse->havingQual) == PAT_ANY)
+ grouped_rel->consider_parallel = true;
+}
+
+/*
+ * estimate_hashagg_tablesize
+ * estimate the number of bytes that a hash aggregate hashtable will
+ * require based on the agg_costs, path width and dNumGroups.
+ */
+static Size
+estimate_hashagg_tablesize(Path *path, AggClauseCosts *agg_costs,
+ double dNumGroups)
+{
+ Size hashentrysize;
+
+ /* Estimate per-hash-entry space at tuple width... */
+ hashentrysize = MAXALIGN(path->pathtarget->width) +
+ MAXALIGN(SizeofMinimalTupleHeader);
+
+ /* plus space for pass-by-ref transition values... */
+ hashentrysize += agg_costs->transitionSpace;
+ /* plus the per-hash-entry overhead */
+ hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
+
+ return hashentrysize * dNumGroups;
+}
+
/*
* create_grouping_paths
*
*
* We need to consider sorted and hashed aggregation in the same function,
* because otherwise (1) it would be harder to throw an appropriate error
- * message if neither way works, and (2) we should not allow enable_hashagg or
- * hashtable size considerations to dissuade us from using hashing if sorting
- * is not possible.
+ * message if neither way works, and (2) we should not allow hashtable size
+ * considerations to dissuade us from using hashing if sorting is not possible.
*/
static RelOptInfo *
create_grouping_paths(PlannerInfo *root,
Query *parse = root->parse;
Path *cheapest_path = input_rel->cheapest_total_path;
RelOptInfo *grouped_rel;
+ PathTarget *partial_grouping_target = NULL;
AggClauseCosts agg_costs;
+ Size hashaggtablesize;
double dNumGroups;
- bool allow_hash;
+ double dNumPartialGroups = 0;
+ bool can_hash;
+ bool can_sort;
+
ListCell *lc;
/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
rollup_groupclauses);
/*
- * Consider sort-based implementations of grouping, if possible. (Note
- * that if groupClause is empty, grouping_is_sortable() is trivially true,
- * and all the pathkeys_contained_in() tests will succeed too, so that
- * we'll consider every surviving input path.)
+ * Partial paths in the input rel could allow us to perform aggregation in
+ * parallel. set_grouped_rel_consider_parallel() will determine if it's
+ * going to be safe to do so.
+ */
+ if (input_rel->partial_pathlist != NIL)
+ set_grouped_rel_consider_parallel(root, grouped_rel, target);
+
+ /*
+ * Determine whether it's possible to perform sort-based implementations
+ * of grouping. (Note that if groupClause is empty, grouping_is_sortable()
+ * is trivially true, and all the pathkeys_contained_in() tests will
+ * succeed too, so that we'll consider every surviving input path.)
*/
- if (grouping_is_sortable(parse->groupClause))
+ can_sort = grouping_is_sortable(parse->groupClause);
+
+ /*
+ * Determine whether we should consider hash-based implementations of
+ * grouping.
+ *
+ * Hashed aggregation only applies if we're grouping. We currently can't
+ * hash if there are grouping sets, though.
+ *
+ * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
+ * aggregates. (Doing so would imply storing *all* the input values in
+ * the hash table, and/or running many sorts in parallel, either of which
+ * seems like a certain loser.) We similarly don't support ordered-set
+ * aggregates in hashed aggregation, but that case is also included in the
+ * numOrderedAggs count.
+ *
+ * Note: grouping_is_hashable() is much more expensive to check than the
+ * other gating conditions, so we want to do it last.
+ */
+ can_hash = (parse->groupClause != NIL &&
+ parse->groupingSets == NIL &&
+ agg_costs.numOrderedAggs == 0 &&
+ grouping_is_hashable(parse->groupClause));
+
+ /*
+ * Before generating paths for grouped_rel, we first generate any possible
+ * partial paths; that way, later code can easily consider both parallel
+ * and non-parallel approaches to grouping. Note that the partial paths
+ * we generate here are also partially aggregated, so simply pushing a
+ * Gather node on top is insufficient to create a final path, as would be
+ * the case for a scan/join rel.
+ */
+ if (grouped_rel->consider_parallel)
+ {
+ Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
+
+ /*
+ * Build target list for partial aggregate paths. We cannot reuse the
+ * final target as Aggrefs must be set in partial mode, and we must
+ * also include Aggrefs from the HAVING clause in the target as these
+ * may not be present in the final target.
+ */
+ partial_grouping_target = make_partialgroup_input_target(root, target);
+
+ /* Estimate number of partial groups. */
+ dNumPartialGroups = get_number_of_groups(root,
+ clamp_row_est(cheapest_partial_path->rows),
+ NIL,
+ NIL);
+
+ if (can_sort)
+ {
+ /* Checked in set_grouped_rel_consider_parallel() */
+ Assert(parse->hasAggs || parse->groupClause);
+
+ /*
+ * Use any available suitably-sorted path as input, and also
+ * consider sorting the cheapest partial path.
+ */
+ foreach(lc, input_rel->partial_pathlist)
+ {
+ Path *path = (Path *) lfirst(lc);
+ bool is_sorted;
+
+ is_sorted = pathkeys_contained_in(root->group_pathkeys,
+ path->pathkeys);
+ if (path == cheapest_partial_path || is_sorted)
+ {
+ /* Sort the cheapest partial path, if it isn't already */
+ if (!is_sorted)
+ path = (Path *) create_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
+
+ if (parse->hasAggs)
+ add_partial_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ parse->groupClause,
+ NIL,
+ &agg_costs,
+ dNumPartialGroups,
+ false,
+ false));
+ else
+ add_partial_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ parse->groupClause,
+ NIL,
+ dNumPartialGroups));
+ }
+ }
+ }
+
+ if (can_hash)
+ {
+ /* Checked above */
+ Assert(parse->hasAggs || parse->groupClause);
+
+ hashaggtablesize =
+ estimate_hashagg_tablesize(cheapest_partial_path,
+ &agg_costs,
+ dNumPartialGroups);
+
+ /*
+ * Tentatively produce a partial HashAgg Path, depending on if it
+ * looks as if the hash table will fit in work_mem.
+ */
+ if (hashaggtablesize < work_mem * 1024L)
+ {
+ add_partial_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ cheapest_partial_path,
+ partial_grouping_target,
+ AGG_HASHED,
+ parse->groupClause,
+ NIL,
+ &agg_costs,
+ dNumPartialGroups,
+ false,
+ false));
+ }
+ }
+ }
+
+ /* Build final grouping paths */
+ if (can_sort)
{
/*
* Use any available suitably-sorted path as input, and also consider
parse->groupClause,
(List *) parse->havingQual,
&agg_costs,
- dNumGroups));
+ dNumGroups,
+ false,
+ true));
}
else if (parse->groupClause)
{
}
}
}
- }
- /*
- * Consider hash-based implementations of grouping, if possible.
- *
- * Hashed aggregation only applies if we're grouping. We currently can't
- * hash if there are grouping sets, though.
- *
- * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY
- * aggregates. (Doing so would imply storing *all* the input values in
- * the hash table, and/or running many sorts in parallel, either of which
- * seems like a certain loser.) We similarly don't support ordered-set
- * aggregates in hashed aggregation, but that case is also included in the
- * numOrderedAggs count.
- *
- * Note: grouping_is_hashable() is much more expensive to check than the
- * other gating conditions, so we want to do it last.
- */
- allow_hash = (parse->groupClause != NIL &&
- parse->groupingSets == NIL &&
- agg_costs.numOrderedAggs == 0);
-
- /* Consider reasons to disable hashing, but only if we can sort instead */
- if (allow_hash && grouped_rel->pathlist != NIL)
- {
- if (!enable_hashagg)
- allow_hash = false;
- else
+ /*
+ * Now generate a complete GroupAgg Path atop of the cheapest partial
+ * path. We need only bother with the cheapest path here, as the output
+ * of Gather is never sorted.
+ */
+ if (grouped_rel->partial_pathlist)
{
+ Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
+ double total_groups = path->rows * path->parallel_degree;
+
+ path = (Path *) create_gather_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ NULL,
+ &total_groups);
+
/*
- * Don't hash if it doesn't look like the hashtable will fit into
- * work_mem.
+ * Gather is always unsorted, so we'll need to sort, unless there's
+ * no GROUP BY clause, in which case there will only be a single
+ * group.
*/
- Size hashentrysize;
-
- /* Estimate per-hash-entry space at tuple width... */
- hashentrysize = MAXALIGN(cheapest_path->pathtarget->width) +
- MAXALIGN(SizeofMinimalTupleHeader);
- /* plus space for pass-by-ref transition values... */
- hashentrysize += agg_costs.transitionSpace;
- /* plus the per-hash-entry overhead */
- hashentrysize += hash_agg_entry_size(agg_costs.numAggs);
-
- if (hashentrysize * dNumGroups > work_mem * 1024L)
- allow_hash = false;
+ if (parse->groupClause)
+ path = (Path *) create_sort_path(root,
+ grouped_rel,
+ path,
+ root->group_pathkeys,
+ -1.0);
+
+ if (parse->hasAggs)
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ target,
+ parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ &agg_costs,
+ dNumGroups,
+ true,
+ true));
+ else
+ add_path(grouped_rel, (Path *)
+ create_group_path(root,
+ grouped_rel,
+ path,
+ target,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ dNumGroups));
}
}
- if (allow_hash && grouping_is_hashable(parse->groupClause))
+ if (can_hash)
{
+ hashaggtablesize = estimate_hashagg_tablesize(cheapest_path,
+ &agg_costs,
+ dNumGroups);
+
/*
- * We just need an Agg over the cheapest-total input path, since input
- * order won't matter.
+ * Provided that the estimated size of the hashtable does not exceed
+ * work_mem, we'll generate a HashAgg Path, although if we were unable
+ * to sort above, then we'd better generate a Path, so that we at least
+ * have one.
*/
- add_path(grouped_rel, (Path *)
- create_agg_path(root, grouped_rel,
- cheapest_path,
- target,
- AGG_HASHED,
- parse->groupClause,
- (List *) parse->havingQual,
- &agg_costs,
- dNumGroups));
+ if (hashaggtablesize < work_mem * 1024L ||
+ grouped_rel->pathlist == NIL)
+ {
+ /*
+ * We just need an Agg over the cheapest-total input path, since input
+ * order won't matter.
+ */
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root, grouped_rel,
+ cheapest_path,
+ target,
+ AGG_HASHED,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ &agg_costs,
+ dNumGroups,
+ false,
+ true));
+ }
+
+ /*
+ * Generate a HashAgg Path atop of the cheapest partial path. Once
+ * again, we'll only do this if it looks as though the hash table won't
+ * exceed work_mem.
+ */
+ if (grouped_rel->partial_pathlist)
+ {
+ Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
+
+ hashaggtablesize = estimate_hashagg_tablesize(path,
+ &agg_costs,
+ dNumGroups);
+
+ if (hashaggtablesize < work_mem * 1024L)
+ {
+ double total_groups = path->rows * path->parallel_degree;
+
+ path = (Path *) create_gather_path(root,
+ grouped_rel,
+ path,
+ partial_grouping_target,
+ NULL,
+ &total_groups);
+
+ add_path(grouped_rel, (Path *)
+ create_agg_path(root,
+ grouped_rel,
+ path,
+ target,
+ AGG_HASHED,
+ parse->groupClause,
+ (List *) parse->havingQual,
+ &agg_costs,
+ dNumGroups,
+ true,
+ true));
+ }
+ }
}
/* Give a helpful error if we failed to find any implementation */
parse->distinctClause,
NIL,
NULL,
- numDistinctRows));
+ numDistinctRows,
+ false,
+ true));
}
/* Give a helpful error if we failed to find any implementation */
return set_pathtarget_cost_width(root, input_target);
}
+/*
+ * make_partialgroup_input_target
+ * Generate appropriate PathTarget for input for Partial Aggregate nodes.
+ *
+ * Similar to make_group_input_target(), only we don't recurse into Aggrefs, as
+ * we need these to remain intact so that they can be found later in Combine
+ * Aggregate nodes during set_combineagg_references(). Vars will be still
+ * pulled out of non-Aggref nodes as these will still be required by the
+ * combine aggregate phase.
+ *
+ * We also convert any Aggrefs which we do find and put them into partial mode,
+ * this adjusts the Aggref's return type so that the partially calculated
+ * aggregate value can make its way up the execution tree up to the Finalize
+ * Aggregate node.
+ */
+static PathTarget *
+make_partialgroup_input_target(PlannerInfo *root, PathTarget *final_target)
+{
+ Query *parse = root->parse;
+ PathTarget *input_target;
+ List *non_group_cols;
+ List *non_group_exprs;
+ int i;
+ ListCell *lc;
+
+ input_target = create_empty_pathtarget();
+ non_group_cols = NIL;
+
+ i = 0;
+ foreach(lc, final_target->exprs)
+ {
+ Expr *expr = (Expr *) lfirst(lc);
+ Index sgref = final_target->sortgrouprefs[i];
+
+ if (sgref && parse->groupClause &&
+ get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+ {
+ /*
+ * It's a grouping column, so add it to the input target as-is.
+ */
+ add_column_to_pathtarget(input_target, expr, sgref);
+ }
+ else
+ {
+ /*
+ * Non-grouping column, so just remember the expression for later
+ * call to pull_var_clause.
+ */
+ non_group_cols = lappend(non_group_cols, expr);
+ }
+
+ i++;
+ }
+
+ /*
+ * If there's a HAVING clause, we'll need the Aggrefs it uses, too.
+ */
+ if (parse->havingQual)
+ non_group_cols = lappend(non_group_cols, parse->havingQual);
+
+ /*
+ * Pull out all the Vars mentioned in non-group cols (plus HAVING), and
+ * add them to the input target if not already present. (A Var used
+ * directly as a GROUP BY item will be present already.) Note this
+ * includes Vars used in resjunk items, so we are covering the needs of
+ * ORDER BY and window specifications. Vars used within Aggrefs will be
+ * ignored and the Aggrefs themselves will be added to the PathTarget.
+ */
+ non_group_exprs = pull_var_clause((Node *) non_group_cols,
+ PVC_INCLUDE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_INCLUDE_PLACEHOLDERS);
+
+ add_new_columns_to_pathtarget(input_target, non_group_exprs);
+
+ /* clean up cruft */
+ list_free(non_group_exprs);
+ list_free(non_group_cols);
+
+ /* Adjust Aggrefs to put them in partial mode. */
+ apply_partialaggref_adjustment(input_target);
+
+ /* XXX this causes some redundant cost calculation ... */
+ return set_pathtarget_cost_width(root, input_target);
+}
+
/*
* postprocess_setop_tlist
* Fix up targetlist returned by plan_set_operations().
static bool fix_scan_expr_walker(Node *node, fix_scan_expr_context *context);
static void set_join_references(PlannerInfo *root, Join *join, int rtoffset);
static void set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset);
+static void set_combineagg_references(PlannerInfo *root, Plan *plan,
+ int rtoffset);
static void set_dummy_tlist_references(Plan *plan, int rtoffset);
static indexed_tlist *build_tlist_index(List *tlist);
static Var *search_indexed_tlist_for_var(Var *var,
Index sortgroupref,
indexed_tlist *itlist,
Index newvarno);
+static Var *search_indexed_tlist_for_partial_aggref(Aggref *aggref,
+ indexed_tlist *itlist, Index newvarno);
static List *fix_join_expr(PlannerInfo *root,
List *clauses,
indexed_tlist *outer_itlist,
int rtoffset);
static Node *fix_upper_expr_mutator(Node *node,
fix_upper_expr_context *context);
+static Node *fix_combine_agg_expr(PlannerInfo *root,
+ Node *node,
+ indexed_tlist *subplan_itlist,
+ Index newvarno,
+ int rtoffset);
+static Node *fix_combine_agg_expr_mutator(Node *node,
+ fix_upper_expr_context *context);
static List *set_returning_clause_references(PlannerInfo *root,
List *rlist,
Plan *topplan,
}
break;
case T_Agg:
- set_upper_references(root, plan, rtoffset);
- break;
+ {
+ Agg *aggplan = (Agg *) plan;
+
+ if (aggplan->combineStates)
+ set_combineagg_references(root, plan, rtoffset);
+ else
+ set_upper_references(root, plan, rtoffset);
+
+ break;
+ }
case T_Group:
set_upper_references(root, plan, rtoffset);
break;
pfree(subplan_itlist);
}
+/*
+ * set_combineagg_references
+ * This serves the same function as set_upper_references(), but treats
+ * Aggrefs differently. Here we transform Aggref nodes args to suit the
+ * combine aggregate phase. This means that the Aggref->args are converted
+ * to reference the corresponding aggregate function in the subplan rather
+ * than simple Var(s), as would be the case for a non-combine aggregate
+ * node.
+ */
+static void
+set_combineagg_references(PlannerInfo *root, Plan *plan, int rtoffset)
+{
+ Plan *subplan = plan->lefttree;
+ indexed_tlist *subplan_itlist;
+ List *output_targetlist;
+ ListCell *l;
+
+ Assert(IsA(plan, Agg));
+ Assert(((Agg *) plan)->combineStates);
+
+ subplan_itlist = build_tlist_index(subplan->targetlist);
+
+ output_targetlist = NIL;
+
+ foreach(l, plan->targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+ Node *newexpr;
+
+ /* If it's a non-Var sort/group item, first try to match by sortref */
+ if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var))
+ {
+ newexpr = (Node *)
+ search_indexed_tlist_for_sortgroupref((Node *) tle->expr,
+ tle->ressortgroupref,
+ subplan_itlist,
+ OUTER_VAR);
+ if (!newexpr)
+ newexpr = fix_combine_agg_expr(root,
+ (Node *) tle->expr,
+ subplan_itlist,
+ OUTER_VAR,
+ rtoffset);
+ }
+ else
+ newexpr = fix_combine_agg_expr(root,
+ (Node *) tle->expr,
+ subplan_itlist,
+ OUTER_VAR,
+ rtoffset);
+ tle = flatCopyTargetEntry(tle);
+ tle->expr = (Expr *) newexpr;
+ output_targetlist = lappend(output_targetlist, tle);
+ }
+
+ plan->targetlist = output_targetlist;
+
+ plan->qual = (List *)
+ fix_combine_agg_expr(root,
+ (Node *) plan->qual,
+ subplan_itlist,
+ OUTER_VAR,
+ rtoffset);
+
+ pfree(subplan_itlist);
+}
+
/*
* set_dummy_tlist_references
* Replace the targetlist of an upper-level plan node with a simple
return NULL; /* no match */
}
+/*
+ * search_indexed_tlist_for_partial_aggref - find an Aggref in an indexed tlist
+ *
+ * Aggrefs for partial aggregates have their aggoutputtype adjusted to set it
+ * to the aggregate state's type. This means that a standard equal() comparison
+ * won't match when comparing an Aggref which is in partial mode with an Aggref
+ * which is not. Here we manually compare all of the fields apart from
+ * aggoutputtype.
+ */
+static Var *
+search_indexed_tlist_for_partial_aggref(Aggref *aggref, indexed_tlist *itlist,
+ Index newvarno)
+{
+ ListCell *lc;
+
+ foreach(lc, itlist->tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+ if (IsA(tle->expr, Aggref))
+ {
+ Aggref *tlistaggref = (Aggref *) tle->expr;
+ Var *newvar;
+
+ if (aggref->aggfnoid != tlistaggref->aggfnoid)
+ continue;
+ if (aggref->aggtype != tlistaggref->aggtype)
+ continue;
+ /* ignore aggoutputtype */
+ if (aggref->aggcollid != tlistaggref->aggcollid)
+ continue;
+ if (aggref->inputcollid != tlistaggref->inputcollid)
+ continue;
+ if (!equal(aggref->aggdirectargs, tlistaggref->aggdirectargs))
+ continue;
+ if (!equal(aggref->args, tlistaggref->args))
+ continue;
+ if (!equal(aggref->aggorder, tlistaggref->aggorder))
+ continue;
+ if (!equal(aggref->aggdistinct, tlistaggref->aggdistinct))
+ continue;
+ if (!equal(aggref->aggfilter, tlistaggref->aggfilter))
+ continue;
+ if (aggref->aggstar != tlistaggref->aggstar)
+ continue;
+ if (aggref->aggvariadic != tlistaggref->aggvariadic)
+ continue;
+ if (aggref->aggkind != tlistaggref->aggkind)
+ continue;
+ if (aggref->agglevelsup != tlistaggref->agglevelsup)
+ continue;
+
+ newvar = makeVarFromTargetEntry(newvarno, tle);
+ newvar->varnoold = 0; /* wasn't ever a plain Var */
+ newvar->varoattno = 0;
+
+ return newvar;
+ }
+ }
+ return NULL;
+}
+
/*
* fix_join_expr
* Create a new set of targetlist entries or join qual clauses by
(void *) context);
}
+/*
+ * fix_combine_agg_expr
+ * Like fix_upper_expr() but additionally adjusts the Aggref->args of
+ * Aggrefs so that they references the corresponding Aggref in the subplan.
+ */
+static Node *
+fix_combine_agg_expr(PlannerInfo *root,
+ Node *node,
+ indexed_tlist *subplan_itlist,
+ Index newvarno,
+ int rtoffset)
+{
+ fix_upper_expr_context context;
+
+ context.root = root;
+ context.subplan_itlist = subplan_itlist;
+ context.newvarno = newvarno;
+ context.rtoffset = rtoffset;
+ return fix_combine_agg_expr_mutator(node, &context);
+}
+
+static Node *
+fix_combine_agg_expr_mutator(Node *node, fix_upper_expr_context *context)
+{
+ Var *newvar;
+
+ if (node == NULL)
+ return NULL;
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ newvar = search_indexed_tlist_for_var(var,
+ context->subplan_itlist,
+ context->newvarno,
+ context->rtoffset);
+ if (!newvar)
+ elog(ERROR, "variable not found in subplan target list");
+ return (Node *) newvar;
+ }
+ if (IsA(node, PlaceHolderVar))
+ {
+ PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+ /* See if the PlaceHolderVar has bubbled up from a lower plan node */
+ if (context->subplan_itlist->has_ph_vars)
+ {
+ newvar = search_indexed_tlist_for_non_var((Node *) phv,
+ context->subplan_itlist,
+ context->newvarno);
+ if (newvar)
+ return (Node *) newvar;
+ }
+ /* If not supplied by input plan, evaluate the contained expr */
+ return fix_upper_expr_mutator((Node *) phv->phexpr, context);
+ }
+ if (IsA(node, Param))
+ return fix_param_node(context->root, (Param *) node);
+ if (IsA(node, Aggref))
+ {
+ Aggref *aggref = (Aggref *) node;
+
+ newvar = search_indexed_tlist_for_partial_aggref(aggref,
+ context->subplan_itlist,
+ context->newvarno);
+ if (newvar)
+ {
+ Aggref *newaggref;
+ TargetEntry *newtle;
+
+ /*
+ * Now build a new TargetEntry for the Aggref's arguments which is
+ * a single Var which references the corresponding AggRef in the
+ * node below.
+ */
+ newtle = makeTargetEntry((Expr *) newvar, 1, NULL, false);
+ newaggref = (Aggref *) copyObject(aggref);
+ newaggref->args = list_make1(newtle);
+
+ return (Node *) newaggref;
+ }
+ else
+ elog(ERROR, "Aggref not found in subplan target list");
+ }
+ /* Try matching more complex expressions too, if tlist has any */
+ if (context->subplan_itlist->has_non_vars)
+ {
+ newvar = search_indexed_tlist_for_non_var(node,
+ context->subplan_itlist,
+ context->newvarno);
+ if (newvar)
+ return (Node *) newvar;
+ }
+ fix_expr_common(context->root, node);
+ return expression_tree_mutator(node,
+ fix_combine_agg_expr_mutator,
+ (void *) context);
+}
+
/*
* set_returning_clause_references
* Perform setrefs.c's work on a RETURNING targetlist
groupList,
NIL,
NULL,
- dNumGroups);
+ dNumGroups,
+ false,
+ true);
}
else
{
#include "utils/syscache.h"
#include "utils/typcache.h"
+typedef struct
+{
+ PartialAggType allowedtype;
+} partial_agg_context;
typedef struct
{
bool allow_restricted;
} has_parallel_hazard_arg;
+static bool aggregates_allow_partial_walker(Node *node,
+ partial_agg_context *context);
static bool contain_agg_clause_walker(Node *node, void *context);
static bool count_agg_clauses_walker(Node *node,
count_agg_clauses_context *context);
* Aggregate-function clause manipulation
*****************************************************************************/
+/*
+ * aggregates_allow_partial
+ * Recursively search for Aggref clauses and determine the maximum
+ * level of partial aggregation which can be supported.
+ */
+PartialAggType
+aggregates_allow_partial(Node *clause)
+{
+ partial_agg_context context;
+
+ /* initially any type is okay, until we find Aggrefs which say otherwise */
+ context.allowedtype = PAT_ANY;
+
+ if (!aggregates_allow_partial_walker(clause, &context))
+ return context.allowedtype;
+ return context.allowedtype;
+}
+
+static bool
+aggregates_allow_partial_walker(Node *node, partial_agg_context *context)
+{
+ if (node == NULL)
+ return false;
+ if (IsA(node, Aggref))
+ {
+ Aggref *aggref = (Aggref *) node;
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+
+ Assert(aggref->agglevelsup == 0);
+
+ /*
+ * We can't perform partial aggregation with Aggrefs containing a
+ * DISTINCT or ORDER BY clause.
+ */
+ if (aggref->aggdistinct || aggref->aggorder)
+ {
+ context->allowedtype = PAT_DISABLED;
+ return true; /* abort search */
+ }
+ aggTuple = SearchSysCache1(AGGFNOID,
+ ObjectIdGetDatum(aggref->aggfnoid));
+ if (!HeapTupleIsValid(aggTuple))
+ elog(ERROR, "cache lookup failed for aggregate %u",
+ aggref->aggfnoid);
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+ /*
+ * If there is no combine function, then partial aggregation is not
+ * possible.
+ */
+ if (!OidIsValid(aggform->aggcombinefn))
+ {
+ ReleaseSysCache(aggTuple);
+ context->allowedtype = PAT_DISABLED;
+ return true; /* abort search */
+ }
+
+ /*
+ * If we find any aggs with an internal transtype then we must ensure
+ * that pointers to aggregate states are not passed to other processes;
+ * therefore, we set the maximum allowed type to PAT_INTERNAL_ONLY.
+ */
+ if (aggform->aggtranstype == INTERNALOID)
+ context->allowedtype = PAT_INTERNAL_ONLY;
+
+ ReleaseSysCache(aggTuple);
+ return false; /* continue searching */
+ }
+ return expression_tree_walker(node, aggregates_allow_partial_walker,
+ (void *) context);
+}
+
/*
* contain_agg_clause
* Recursively search for Aggref/GroupingFunc nodes within a clause.
* create_gather_path
* Creates a path corresponding to a gather scan, returning the
* pathnode.
+ *
+ * 'rows' may optionally be set to override row estimates from other sources.
*/
GatherPath *
create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
- Relids required_outer)
+ PathTarget *target, Relids required_outer, double *rows)
{
GatherPath *pathnode = makeNode(GatherPath);
pathnode->path.pathtype = T_Gather;
pathnode->path.parent = rel;
- pathnode->path.pathtarget = rel->reltarget;
+ pathnode->path.pathtarget = target;
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);
pathnode->path.parallel_aware = false;
pathnode->single_copy = true;
}
- cost_gather(pathnode, root, rel, pathnode->path.param_info);
+ cost_gather(pathnode, root, rel, pathnode->path.param_info, rows);
return pathnode;
}
* 'qual' is the HAVING quals if any
* 'aggcosts' contains cost info about the aggregate functions to be computed
* 'numGroups' is the estimated number of groups (1 if not grouping)
+ * 'combineStates' is set to true if the Agg node should combine agg states
+ * 'finalizeAggs' is set to false if the Agg node should not call the finalfn
*/
AggPath *
create_agg_path(PlannerInfo *root,
List *groupClause,
List *qual,
const AggClauseCosts *aggcosts,
- double numGroups)
+ double numGroups,
+ bool combineStates,
+ bool finalizeAggs)
{
AggPath *pathnode = makeNode(AggPath);
pathnode->numGroups = numGroups;
pathnode->groupClause = groupClause;
pathnode->qual = qual;
+ pathnode->finalizeAggs = finalizeAggs;
+ pathnode->combineStates = combineStates;
cost_agg(&pathnode->path, root,
aggstrategy, aggcosts,
*/
#include "postgres.h"
+#include "access/htup_details.h"
+#include "catalog/pg_aggregate.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/tlist.h"
+#include "utils/syscache.h"
/*****************************************************************************
i++;
}
}
+
+/*
+ * apply_partialaggref_adjustment
+ * Convert PathTarget to be suitable for a partial aggregate node. We simply
+ * adjust any Aggref nodes found in the target and set the aggoutputtype to
+ * the aggtranstype. This allows exprType() to return the actual type that
+ * will be produced.
+ *
+ * Note: We expect 'target' to be a flat target list and not have Aggrefs burried
+ * within other expressions.
+ */
+void
+apply_partialaggref_adjustment(PathTarget *target)
+{
+ ListCell *lc;
+
+ foreach(lc, target->exprs)
+ {
+ Aggref *aggref = (Aggref *) lfirst(lc);
+
+ if (IsA(aggref, Aggref))
+ {
+ HeapTuple aggTuple;
+ Form_pg_aggregate aggform;
+ Aggref *newaggref;
+
+ aggTuple = SearchSysCache1(AGGFNOID,
+ ObjectIdGetDatum(aggref->aggfnoid));
+ if (!HeapTupleIsValid(aggTuple))
+ elog(ERROR, "cache lookup failed for aggregate %u",
+ aggref->aggfnoid);
+ aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+ newaggref = (Aggref *) copyObject(aggref);
+ newaggref->aggoutputtype = aggform->aggtranstype;
+
+ lfirst(lc) = newaggref;
+
+ ReleaseSysCache(aggTuple);
+ }
+ }
+}
Aggref *aggref = makeNode(Aggref);
aggref->aggfnoid = funcid;
- aggref->aggtype = rettype;
+ /* default the outputtype to be the same as aggtype */
+ aggref->aggtype = aggref->aggoutputtype = rettype;
/* aggcollid and inputcollid will be set by parse_collate.c */
/* aggdirectargs and args will be set by transformAggregateCall */
/* aggorder and aggdistinct will be set by transformAggregateCall */
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201603181
+#define CATALOG_VERSION_NO 201603211
#endif
* DISTINCT is not supported in this case, so aggdistinct will be NIL.
* The direct arguments appear in aggdirectargs (as a list of plain
* expressions, not TargetEntry nodes).
+ *
+ * 'aggtype' and 'aggoutputtype' are the same except when we're performing
+ * partal aggregation; in that case, we output transition states. Nothing
+ * interesting happens in the Aggref itself, but we must set the output data
+ * type to whatever type is used for transition values.
+ *
+ * Note: If you are adding fields here you may also need to add a comparison
+ * in search_indexed_tlist_for_partial_aggref()
*/
typedef struct Aggref
{
Expr xpr;
Oid aggfnoid; /* pg_proc Oid of the aggregate */
- Oid aggtype; /* type Oid of result of the aggregate */
+ Oid aggtype; /* type Oid of final result of the aggregate */
+ Oid aggoutputtype; /* type Oid of result of this aggregate */
Oid aggcollid; /* OID of collation of result */
Oid inputcollid; /* OID of collation that function should use */
List *aggdirectargs; /* direct arguments, if an ordered-set agg */
double numGroups; /* estimated number of groups in input */
List *groupClause; /* a list of SortGroupClause's */
List *qual; /* quals (HAVING quals), if any */
+ bool combineStates; /* input is partially aggregated agg states */
+ bool finalizeAggs; /* should the executor call the finalfn? */
} AggPath;
/*
List **windowFuncs; /* lists of WindowFuncs for each winref */
} WindowFuncLists;
+/*
+ * PartialAggType
+ * PartialAggType stores whether partial aggregation is allowed and
+ * which context it is allowed in. We require three states here as there are
+ * two different contexts in which partial aggregation is safe. For aggregates
+ * which have an 'stype' of INTERNAL, it is okay to pass a pointer to the
+ * aggregate state within a single process, since the datum is just a
+ * pointer. In cases where the aggregate state must be passed between
+ * different processes, for example during parallel aggregation, passing
+ * pointers directly is not going to work.
+ */
+typedef enum
+{
+ PAT_ANY = 0, /* Any type of partial aggregation is okay. */
+ PAT_INTERNAL_ONLY, /* Some aggregates support only internal mode. */
+ PAT_DISABLED /* Some aggregates don't support partial mode at all */
+} PartialAggType;
extern Expr *make_opclause(Oid opno, Oid opresulttype, bool opretset,
Expr *leftop, Expr *rightop,
extern Expr *make_ands_explicit(List *andclauses);
extern List *make_ands_implicit(Expr *clause);
+extern PartialAggType aggregates_allow_partial(Node *clause);
extern bool contain_agg_clause(Node *clause);
extern void count_agg_clauses(PlannerInfo *root, Node *clause,
AggClauseCosts *costs);
SpecialJoinInfo *sjinfo,
SemiAntiJoinFactors *semifactors);
extern void cost_gather(GatherPath *path, PlannerInfo *root,
- RelOptInfo *baserel, ParamPathInfo *param_info);
+ RelOptInfo *baserel, ParamPathInfo *param_info, double *rows);
extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
Path *subpath, SpecialJoinInfo *sjinfo);
extern GatherPath *create_gather_path(PlannerInfo *root,
- RelOptInfo *rel, Path *subpath, Relids required_outer);
+ RelOptInfo *rel, Path *subpath, PathTarget *target,
+ Relids required_outer, double *rows);
extern SubqueryScanPath *create_subqueryscan_path(PlannerInfo *root,
RelOptInfo *rel, Path *subpath,
List *pathkeys, Relids required_outer);
List *groupClause,
List *qual,
const AggClauseCosts *aggcosts,
- double numGroups);
+ double numGroups,
+ bool combineStates,
+ bool finalizeAggs);
extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
RelOptInfo *rel,
Path *subpath,
extern void add_new_column_to_pathtarget(PathTarget *target, Expr *expr);
extern void add_new_columns_to_pathtarget(PathTarget *target, List *exprs);
extern void apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target);
+extern void apply_partialaggref_adjustment(PathTarget *target);
/* Convenience macro to get a PathTarget with valid cost/width fields */
#define create_pathtarget(root, tlist) \