From b5635948ab165b6070e7d05d111f966e07570d81 Mon Sep 17 00:00:00 2001 From: Andrew Gierth Date: Mon, 27 Mar 2017 04:20:54 +0100 Subject: [PATCH] Support hashed aggregation with grouping sets. This extends the Aggregate node with two new features: HashAggregate can now run multiple hashtables concurrently, and a new strategy MixedAggregate populates hashtables while doing sorted grouping. The planner will now attempt to save as many sorts as possible when planning grouping sets queries, while not exceeding work_mem for the estimated combined sizes of all hashtables used. No SQL-level changes are required. There should be no user-visible impact other than the new EXPLAIN output and possible changes to result ordering when ORDER BY was not used (which affected a few regression tests). The enable_hashagg option is respected. Author: Andrew Gierth Reviewers: Mark Dilger, Andres Freund Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk --- .../postgres_fdw/expected/postgres_fdw.out | 61 +- src/backend/commands/explain.c | 25 +- src/backend/executor/nodeAgg.c | 861 +++++++++++++----- src/backend/lib/Makefile | 4 +- src/backend/lib/knapsack.c | 114 +++ src/backend/nodes/bitmapset.c | 30 + src/backend/nodes/outfuncs.c | 34 +- src/backend/optimizer/path/costsize.c | 7 +- src/backend/optimizer/plan/createplan.c | 83 +- src/backend/optimizer/plan/planner.c | 861 ++++++++++++++---- src/backend/optimizer/util/pathnode.c | 150 +-- src/include/lib/knapsack.h | 17 + src/include/nodes/bitmapset.h | 6 + src/include/nodes/execnodes.h | 21 +- src/include/nodes/nodes.h | 5 +- src/include/nodes/plannodes.h | 2 +- src/include/nodes/relation.h | 30 +- src/include/optimizer/pathnode.h | 4 +- src/test/regress/expected/groupingsets.out | 616 ++++++++++++- src/test/regress/expected/tsrf.out | 50 +- src/test/regress/sql/groupingsets.sql | 155 +++- src/test/regress/sql/tsrf.sql | 2 + 22 files changed, 2544 insertions(+), 594 deletions(-) create mode 100644 src/backend/lib/knapsack.c create mode 100644 src/include/lib/knapsack.h diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 059c5c309a..a466bf2079 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -3276,16 +3276,19 @@ select sum(q.a), count(q.b) from ft4 left join (select 13, avg(ft1.c1), sum(ft2. -- Grouping sets explain (verbose, costs off) select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last; - QUERY PLAN ---------------------------------------------------------------------------------------------------- - GroupAggregate - Output: c2, sum(c1) - Group Key: ft1.c2 - Group Key: () - -> Foreign Scan on public.ft1 - Output: c2, c1 - Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST -(7 rows) + QUERY PLAN +------------------------------------------------------------------------------ + Sort + Output: c2, (sum(c1)) + Sort Key: ft1.c2 + -> MixedAggregate + Output: c2, sum(c1) + Hash Key: ft1.c2 + Group Key: () + -> Foreign Scan on public.ft1 + Output: c2, c1 + Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) +(10 rows) select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last; c2 | sum @@ -3298,16 +3301,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la explain (verbose, costs off) select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last; - QUERY PLAN ---------------------------------------------------------------------------------------------------- - GroupAggregate - Output: c2, sum(c1) - Group Key: ft1.c2 - Group Key: () - -> Foreign Scan on public.ft1 - Output: c2, c1 - Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST -(7 rows) + QUERY PLAN +------------------------------------------------------------------------------ + Sort + Output: c2, (sum(c1)) + Sort Key: ft1.c2 + -> MixedAggregate + Output: c2, sum(c1) + Hash Key: ft1.c2 + Group Key: () + -> Foreign Scan on public.ft1 + Output: c2, c1 + Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3)) +(10 rows) select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last; c2 | sum @@ -3320,20 +3326,19 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last explain (verbose, costs off) select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last; - QUERY PLAN -------------------------------------------------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------------------------- Sort Output: c2, c6, (sum(c1)) Sort Key: ft1.c2, ft1.c6 - -> GroupAggregate + -> HashAggregate Output: c2, c6, sum(c1) - Group Key: ft1.c2 - Sort Key: ft1.c6 - Group Key: ft1.c6 + Hash Key: ft1.c2 + Hash Key: ft1.c6 -> Foreign Scan on public.ft1 Output: c2, c6, c1 - Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) ORDER BY c2 ASC NULLS LAST -(11 rows) + Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3)) +(10 rows) select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last; c2 | c6 | sum diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 1036b96aae..ea19ba60c5 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1015,6 +1015,10 @@ ExplainNode(PlanState *planstate, List *ancestors, pname = "HashAggregate"; strategy = "Hashed"; break; + case AGG_MIXED: + pname = "MixedAggregate"; + strategy = "Mixed"; + break; default: pname = "Aggregate ???"; strategy = "???"; @@ -1978,6 +1982,19 @@ show_grouping_set_keys(PlanState *planstate, ListCell *lc; List *gsets = aggnode->groupingSets; AttrNumber *keycols = aggnode->grpColIdx; + const char *keyname; + const char *keysetname; + + if (aggnode->aggstrategy == AGG_HASHED || aggnode->aggstrategy == AGG_MIXED) + { + keyname = "Hash Key"; + keysetname = "Hash Keys"; + } + else + { + keyname = "Group Key"; + keysetname = "Group Keys"; + } ExplainOpenGroup("Grouping Set", NULL, true, es); @@ -1992,7 +2009,7 @@ show_grouping_set_keys(PlanState *planstate, es->indent++; } - ExplainOpenGroup("Group Keys", "Group Keys", false, es); + ExplainOpenGroup(keysetname, keysetname, false, es); foreach(lc, gsets) { @@ -2016,12 +2033,12 @@ show_grouping_set_keys(PlanState *planstate, } if (!result && es->format == EXPLAIN_FORMAT_TEXT) - ExplainPropertyText("Group Key", "()", es); + ExplainPropertyText(keyname, "()", es); else - ExplainPropertyListNested("Group Key", result, es); + ExplainPropertyListNested(keyname, result, es); } - ExplainCloseGroup("Group Keys", "Group Keys", false, es); + ExplainCloseGroup(keysetname, keysetname, false, es); if (sortnode && es->format == EXPLAIN_FORMAT_TEXT) es->indent--; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 471acc4b3e..ef35da6ade 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -122,12 +122,19 @@ * specific). * * Where more complex grouping sets are used, we break them down into - * "phases", where each phase has a different sort order. During each - * phase but the last, the input tuples are additionally stored in a - * tuplesort which is keyed to the next phase's sort order; during each - * phase but the first, the input tuples are drawn from the previously - * sorted data. (The sorting of the data for the first phase is handled by - * the planner, as it might be satisfied by underlying nodes.) + * "phases", where each phase has a different sort order (except phase 0 + * which is reserved for hashing). During each phase but the last, the + * input tuples are additionally stored in a tuplesort which is keyed to the + * next phase's sort order; during each phase but the first, the input + * tuples are drawn from the previously sorted data. (The sorting of the + * data for the first phase is handled by the planner, as it might be + * satisfied by underlying nodes.) + * + * Hashing can be mixed with sorted grouping. To do this, we have an + * AGG_MIXED strategy that populates the hashtables during the first sorted + * phase, and switches to reading them out after completing all sort phases. + * We can also support AGG_HASHED with multiple hash tables and no sorting + * at all. * * From the perspective of aggregate transition and final functions, the * only issue regarding grouping sets is this: a single call site (flinfo) @@ -139,7 +146,54 @@ * sensitive to the grouping set for which the aggregate function is * currently being called. * - * TODO: AGG_HASHED doesn't support multiple grouping sets yet. + * Plan structure: + * + * What we get from the planner is actually one "real" Agg node which is + * part of the plan tree proper, but which optionally has an additional list + * of Agg nodes hung off the side via the "chain" field. This is because an + * Agg node happens to be a convenient representation of all the data we + * need for grouping sets. + * + * For many purposes, we treat the "real" node as if it were just the first + * node in the chain. The chain must be ordered such that hashed entries + * come before sorted/plain entries; the real node is marked AGG_MIXED if + * there are both types present (in which case the real node describes one + * of the hashed groupings, other AGG_HASHED nodes may optionally follow in + * the chain, followed in turn by AGG_SORTED or (one) AGG_PLAIN node). If + * the real node is marked AGG_HASHED or AGG_SORTED, then all the chained + * nodes must be of the same type; if it is AGG_PLAIN, there can be no + * chained nodes. + * + * We collect all hashed nodes into a single "phase", numbered 0, and create + * a sorted phase (numbered 1..n) for each AGG_SORTED or AGG_PLAIN node. + * Phase 0 is allocated even if there are no hashes, but remains unused in + * that case. + * + * AGG_HASHED nodes actually refer to only a single grouping set each, + * because for each hashed grouping we need a separate grpColIdx and + * numGroups estimate. AGG_SORTED nodes represent a "rollup", a list of + * grouping sets that share a sort order. Each AGG_SORTED node other than + * the first one has an associated Sort node which describes the sort order + * to be used; the first sorted node takes its input from the outer subtree, + * which the planner has already arranged to provide ordered data. + * + * Memory and ExprContext usage: + * + * Because we're accumulating aggregate values across input rows, we need to + * use more memory contexts than just simple input/output tuple contexts. + * In fact, for a rollup, we need a separate context for each grouping set + * so that we can reset the inner (finer-grained) aggregates on their group + * boundaries while continuing to accumulate values for outer + * (coarser-grained) groupings. On top of this, we might be simultaneously + * populating hashtables; however, we only need one context for all the + * hashtables. + * + * So we create an array, aggcontexts, with an ExprContext for each grouping + * set in the largest rollup that we're going to process, and use the + * per-tuple memory context of those ExprContexts to store the aggregate + * transition values. hashcontext is the single context created to support + * all hash tables. + * * * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -432,6 +486,7 @@ typedef struct AggStatePerGroupData */ typedef struct AggStatePerPhaseData { + AggStrategy aggstrategy; /* strategy for this phase */ int numsets; /* number of grouping sets (or 0) */ int *gset_lengths; /* lengths of grouping sets */ Bitmapset **grouped_cols; /* column groupings for rollup */ @@ -440,7 +495,30 @@ typedef struct AggStatePerPhaseData Sort *sortnode; /* Sort node for input ordering for phase */ } AggStatePerPhaseData; +/* + * AggStatePerHashData - per-hashtable state + * + * When doing grouping sets with hashing, we have one of these for each + * grouping set. (When doing hashing without grouping sets, we have just one of + * them.) + */ +typedef struct AggStatePerHashData +{ + TupleHashTable hashtable; /* hash table with one entry per group */ + TupleHashIterator hashiter; /* for iterating through hash table */ + TupleTableSlot *hashslot; /* slot for loading hash table */ + FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ + FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ + int numCols; /* number of hash key columns */ + int numhashGrpCols; /* number of columns in hash table */ + int largestGrpColIdx; /* largest col required for hashing */ + AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */ + AttrNumber *hashGrpColIdxHash; /* indices in hashtbl tuples */ + Agg *aggnode; /* original Agg node, for numGroups etc. */ +} AggStatePerHashData; + +static void select_current_set(AggState *aggstate, int setno, bool is_hash); static void initialize_phase(AggState *aggstate, int newphase); static TupleTableSlot *fetch_input_tuple(AggState *aggstate); static void initialize_aggregates(AggState *aggstate, @@ -449,7 +527,8 @@ static void initialize_aggregates(AggState *aggstate, static void advance_transition_function(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroupstate); -static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup); +static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, + AggStatePerGroup *pergroups); static void advance_combine_function(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroupstate); @@ -473,14 +552,13 @@ static void prepare_projection_slot(AggState *aggstate, int currentSet); static void finalize_aggregates(AggState *aggstate, AggStatePerAgg peragg, - AggStatePerGroup pergroup, - int currentSet); + AggStatePerGroup pergroup); static TupleTableSlot *project_aggregates(AggState *aggstate); static Bitmapset *find_unaggregated_cols(AggState *aggstate); static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos); static void build_hash_table(AggState *aggstate); -static TupleHashEntryData *lookup_hash_entry(AggState *aggstate, - TupleTableSlot *inputslot); +static TupleHashEntryData *lookup_hash_entry(AggState *aggstate); +static AggStatePerGroup *lookup_hash_entries(AggState *aggstate); static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); static void agg_fill_hash_table(AggState *aggstate); static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate); @@ -501,13 +579,31 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg, /* - * Switch to phase "newphase", which must either be 0 (to reset) or + * Select the current grouping set; affects current_set and + * curaggcontext. + */ +static void +select_current_set(AggState *aggstate, int setno, bool is_hash) +{ + if (is_hash) + aggstate->curaggcontext = aggstate->hashcontext; + else + aggstate->curaggcontext = aggstate->aggcontexts[setno]; + + aggstate->current_set = setno; +} + +/* + * Switch to phase "newphase", which must either be 0 or 1 (to reset) or * current_phase + 1. Juggle the tuplesorts accordingly. + * + * Phase 0 is for hashing, which we currently handle last in the AGG_MIXED + * case, so when entering phase 0, all we need to do is drop open sorts. */ static void initialize_phase(AggState *aggstate, int newphase) { - Assert(newphase == 0 || newphase == aggstate->current_phase + 1); + Assert(newphase <= 1 || newphase == aggstate->current_phase + 1); /* * Whatever the previous state, we're now done with whatever input @@ -519,7 +615,7 @@ initialize_phase(AggState *aggstate, int newphase) aggstate->sort_in = NULL; } - if (newphase == 0) + if (newphase <= 1) { /* * Discard any existing output tuplesort. @@ -546,7 +642,7 @@ initialize_phase(AggState *aggstate, int newphase) * If this isn't the last phase, we need to sort appropriately for the * next phase in sequence. */ - if (newphase < aggstate->numphases - 1) + if (newphase > 0 && newphase < aggstate->numphases - 1) { Sort *sortnode = aggstate->phases[newphase + 1].sortnode; PlanState *outerNode = outerPlanState(aggstate); @@ -567,7 +663,7 @@ initialize_phase(AggState *aggstate, int newphase) } /* - * Fetch a tuple from either the outer plan (for phase 0) or from the sorter + * Fetch a tuple from either the outer plan (for phase 1) or from the sorter * populated by the previous phase. Copy it to the sorter for the next phase * if any. */ @@ -595,8 +691,8 @@ fetch_input_tuple(AggState *aggstate) /* * (Re)Initialize an individual aggregate. * - * This function handles only one grouping set (already set in - * aggstate->current_set). + * This function handles only one grouping set, already set in + * aggstate->current_set. * * When called, CurrentMemoryContext should be the per-query context. */ @@ -653,7 +749,7 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, MemoryContext oldContext; oldContext = MemoryContextSwitchTo( - aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + aggstate->curaggcontext->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(pertrans->initValue, pertrans->transtypeByVal, pertrans->transtypeLen); @@ -676,8 +772,9 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans, * * If there are multiple grouping sets, we initialize only the first numReset * of them (the grouping sets are ordered so that the most specific one, which - * is reset most often, is first). As a convenience, if numReset is < 1, we - * reinitialize all sets. + * is reset most often, is first). As a convenience, if numReset is 0, we + * reinitialize all sets. numReset is -1 to initialize a hashtable entry, in + * which case the caller must have used select_current_set appropriately. * * When called, CurrentMemoryContext should be the per-query context. */ @@ -689,25 +786,37 @@ initialize_aggregates(AggState *aggstate, int transno; int numGroupingSets = Max(aggstate->phase->numsets, 1); int setno = 0; + int numTrans = aggstate->numtrans; AggStatePerTrans transstates = aggstate->pertrans; - if (numReset < 1) + if (numReset == 0) numReset = numGroupingSets; - for (transno = 0; transno < aggstate->numtrans; transno++) + for (transno = 0; transno < numTrans; transno++) { AggStatePerTrans pertrans = &transstates[transno]; - for (setno = 0; setno < numReset; setno++) + if (numReset < 0) { AggStatePerGroup pergroupstate; - pergroupstate = &pergroup[transno + (setno * (aggstate->numtrans))]; - - aggstate->current_set = setno; + pergroupstate = &pergroup[transno]; initialize_aggregate(aggstate, pertrans, pergroupstate); } + else + { + for (setno = 0; setno < numReset; setno++) + { + AggStatePerGroup pergroupstate; + + pergroupstate = &pergroup[transno + (setno * numTrans)]; + + select_current_set(aggstate, setno, false); + + initialize_aggregate(aggstate, pertrans, pergroupstate); + } + } } } @@ -757,7 +866,7 @@ advance_transition_function(AggState *aggstate, * do not need to pfree the old transValue, since it's NULL. */ oldContext = MemoryContextSwitchTo( - aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + aggstate->curaggcontext->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(fcinfo->arg[1], pertrans->transtypeByVal, pertrans->transtypeLen); @@ -807,7 +916,7 @@ advance_transition_function(AggState *aggstate, { if (!fcinfo->isnull) { - MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory); if (DatumIsReadWriteExpandedObject(newVal, false, pertrans->transtypeLen) && @@ -838,17 +947,21 @@ advance_transition_function(AggState *aggstate, /* * Advance each aggregate transition state for one input tuple. The input * tuple has been stored in tmpcontext->ecxt_outertuple, so that it is - * accessible to ExecEvalExpr. pergroup is the array of per-group structs to - * use (this might be in a hashtable entry). + * accessible to ExecEvalExpr. + * + * We have two sets of transition states to handle: one for sorted aggregation + * and one for hashed; we do them both here, to avoid multiple evaluation of + * the inputs. * * When called, CurrentMemoryContext should be the per-query context. */ static void -advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) +advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup, AggStatePerGroup *pergroups) { int transno; int setno = 0; int numGroupingSets = Max(aggstate->phase->numsets, 1); + int numHashes = aggstate->num_hashes; int numTrans = aggstate->numtrans; TupleTableSlot *slot = aggstate->evalslot; @@ -880,6 +993,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) { /* DISTINCT and/or ORDER BY case */ Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff)); + Assert(!pergroups); /* * If the transfn is strict, we want to check for nullity before @@ -940,13 +1054,36 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff]; } - for (setno = 0; setno < numGroupingSets; setno++) + if (pergroup) { - AggStatePerGroup pergroupstate = &pergroup[transno + (setno * numTrans)]; + /* advance transition states for ordered grouping */ + + for (setno = 0; setno < numGroupingSets; setno++) + { + AggStatePerGroup pergroupstate; - aggstate->current_set = setno; + select_current_set(aggstate, setno, false); - advance_transition_function(aggstate, pertrans, pergroupstate); + pergroupstate = &pergroup[transno + (setno * numTrans)]; + + advance_transition_function(aggstate, pertrans, pergroupstate); + } + } + + if (pergroups) + { + /* advance transition states for hashed grouping */ + + for (setno = 0; setno < numHashes; setno++) + { + AggStatePerGroup pergroupstate; + + select_current_set(aggstate, setno, true); + + pergroupstate = &pergroups[setno][transno]; + + advance_transition_function(aggstate, pertrans, pergroupstate); + } } } } @@ -967,7 +1104,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup) TupleTableSlot *slot; /* combine not supported with grouping sets */ - Assert(aggstate->phase->numsets == 0); + Assert(aggstate->phase->numsets <= 1); /* compute input for all aggregates */ slot = ExecProject(aggstate->evalproj); @@ -1060,7 +1197,7 @@ advance_combine_function(AggState *aggstate, if (!pertrans->transtypeByVal) { oldContext = MemoryContextSwitchTo( - aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + aggstate->curaggcontext->ecxt_per_tuple_memory); pergroupstate->transValue = datumCopy(fcinfo->arg[1], pertrans->transtypeByVal, pertrans->transtypeLen); @@ -1105,7 +1242,7 @@ advance_combine_function(AggState *aggstate, { if (!fcinfo->isnull) { - MemoryContextSwitchTo(aggstate->aggcontexts[aggstate->current_set]->ecxt_per_tuple_memory); + MemoryContextSwitchTo(aggstate->curaggcontext->ecxt_per_tuple_memory); if (DatumIsReadWriteExpandedObject(newVal, false, pertrans->transtypeLen) && @@ -1559,15 +1696,16 @@ prepare_projection_slot(AggState *aggstate, TupleTableSlot *slot, int currentSet /* * Compute the final value of all aggregates for one group. * - * This function handles only one grouping set at a time. + * This function handles only one grouping set at a time, which the caller must + * have selected. It's also the caller's responsibility to adjust the supplied + * pergroup parameter to point to the current set's transvalues. * * Results are stored in the output econtext aggvalues/aggnulls. */ static void finalize_aggregates(AggState *aggstate, AggStatePerAgg peraggs, - AggStatePerGroup pergroup, - int currentSet) + AggStatePerGroup pergroup) { ExprContext *econtext = aggstate->ss.ps.ps_ExprContext; Datum *aggvalues = econtext->ecxt_aggvalues; @@ -1575,11 +1713,6 @@ finalize_aggregates(AggState *aggstate, int aggno; int transno; - Assert(currentSet == 0 || - ((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); - - aggstate->current_set = currentSet; - /* * If there were any DISTINCT and/or ORDER BY aggregates, sort their * inputs and run the transition functions. @@ -1589,11 +1722,12 @@ finalize_aggregates(AggState *aggstate, AggStatePerTrans pertrans = &aggstate->pertrans[transno]; AggStatePerGroup pergroupstate; - pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))]; + pergroupstate = &pergroup[transno]; if (pertrans->numSortCols > 0) { - Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); + Assert(aggstate->aggstrategy != AGG_HASHED && + aggstate->aggstrategy != AGG_MIXED); if (pertrans->numInputs == 1) process_ordered_aggregate_single(aggstate, @@ -1615,7 +1749,7 @@ finalize_aggregates(AggState *aggstate, int transno = peragg->transno; AggStatePerGroup pergroupstate; - pergroupstate = &pergroup[transno + (currentSet * (aggstate->numtrans))]; + pergroupstate = &pergroup[transno]; if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit)) finalize_partialaggregate(aggstate, peragg, pergroupstate, @@ -1697,7 +1831,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) } /* - * Initialize the hash table to empty. + * Initialize the hash table(s) to empty. * * To implement hashed aggregation, we need a hashtable that stores a * representative tuple and an array of AggStatePerGroup structs for each @@ -1705,29 +1839,40 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) * GROUP BY columns. The per-group data is allocated in lookup_hash_entry(), * for each entry. * - * The hash table always lives in the aggcontext memory context. + * We have a separate hashtable and associated perhash data structure for each + * grouping set for which we're doing hashing. + * + * The hash tables always live in the hashcontext's per-tuple memory context + * (there is only one of these for all tables together, since they are all + * reset at the same time). */ static void build_hash_table(AggState *aggstate) { - Agg *node = (Agg *) aggstate->ss.ps.plan; MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory; Size additionalsize; + int i; - Assert(node->aggstrategy == AGG_HASHED); - Assert(node->numGroups > 0); + Assert(aggstate->aggstrategy == AGG_HASHED || aggstate->aggstrategy == AGG_MIXED); - additionalsize = aggstate->numaggs * sizeof(AggStatePerGroupData); + additionalsize = aggstate->numtrans * sizeof(AggStatePerGroupData); - aggstate->hashtable = BuildTupleHashTable(node->numCols, - aggstate->hashGrpColIdxHash, - aggstate->phase->eqfunctions, - aggstate->hashfunctions, - node->numGroups, - additionalsize, - aggstate->aggcontexts[0]->ecxt_per_tuple_memory, - tmpmem, + for (i = 0; i < aggstate->num_hashes; ++i) + { + AggStatePerHash perhash = &aggstate->perhash[i]; + + Assert(perhash->aggnode->numGroups > 0); + + perhash->hashtable = BuildTupleHashTable(perhash->numCols, + perhash->hashGrpColIdxHash, + perhash->eqfunctions, + perhash->hashfunctions, + perhash->aggnode->numGroups, + additionalsize, + aggstate->hashcontext->ecxt_per_tuple_memory, + tmpmem, DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit)); + } } /* @@ -1750,72 +1895,98 @@ build_hash_table(AggState *aggstate) * the array is preserved over ExecReScanAgg, so we allocate it in the * per-query context (unlike the hash table itself). */ -static List * +static void find_hash_columns(AggState *aggstate) { - Agg *node = (Agg *) aggstate->ss.ps.plan; - Bitmapset *colnos; - List *collist; - TupleDesc hashDesc; + Bitmapset *base_colnos; List *outerTlist = outerPlanState(aggstate)->plan->targetlist; - List *hashTlist = NIL; - int i; - - aggstate->largestGrpColIdx = 0; + int numHashes = aggstate->num_hashes; + int j; /* Find Vars that will be needed in tlist and qual */ - colnos = find_unaggregated_cols(aggstate); - /* Add in all the grouping columns */ - for (i = 0; i < node->numCols; i++) - colnos = bms_add_member(colnos, node->grpColIdx[i]); - /* Convert to list, using lcons so largest element ends up first */ - collist = NIL; + base_colnos = find_unaggregated_cols(aggstate); - aggstate->hashGrpColIdxInput = - palloc(bms_num_members(colnos) * sizeof(AttrNumber)); - aggstate->hashGrpColIdxHash = - palloc(node->numCols * sizeof(AttrNumber)); - - /* - * First build mapping for columns directly hashed. These are the first, - * because they'll be accessed when computing hash values and comparing - * tuples for exact matches. We also build simple mapping for - * execGrouping, so it knows where to find the to-be-hashed / compared - * columns in the input. - */ - for (i = 0; i < node->numCols; i++) + for (j = 0; j < numHashes; ++j) { - aggstate->hashGrpColIdxInput[i] = node->grpColIdx[i]; - aggstate->hashGrpColIdxHash[i] = i + 1; - aggstate->numhashGrpCols++; - /* delete already mapped columns */ - bms_del_member(colnos, node->grpColIdx[i]); - } + AggStatePerHash perhash = &aggstate->perhash[j]; + Bitmapset *colnos = bms_copy(base_colnos); + AttrNumber *grpColIdx = perhash->aggnode->grpColIdx; + List *hashTlist = NIL; + TupleDesc hashDesc; + int i; - /* and add the remaining columns */ - while ((i = bms_first_member(colnos)) >= 0) - { - aggstate->hashGrpColIdxInput[aggstate->numhashGrpCols] = i; - aggstate->numhashGrpCols++; - } + perhash->largestGrpColIdx = 0; - /* and build a tuple descriptor for the hashtable */ - for (i = 0; i < aggstate->numhashGrpCols; i++) - { - int varNumber = aggstate->hashGrpColIdxInput[i] - 1; + /* + * If we're doing grouping sets, then some Vars might be referenced in + * tlist/qual for the benefit of other grouping sets, but not needed + * when hashing; i.e. prepare_projection_slot will null them out, so + * there'd be no point storing them. Use prepare_projection_slot's + * logic to determine which. + */ + if (aggstate->phases[0].grouped_cols) + { + Bitmapset *grouped_cols = aggstate->phases[0].grouped_cols[j]; + ListCell *lc; - hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber)); - aggstate->largestGrpColIdx = - Max(varNumber + 1, aggstate->largestGrpColIdx); - } + foreach(lc, aggstate->all_grouped_cols) + { + int attnum = lfirst_int(lc); - hashDesc = ExecTypeFromTL(hashTlist, false); - ExecSetSlotDescriptor(aggstate->hashslot, hashDesc); + if (!bms_is_member(attnum, grouped_cols)) + colnos = bms_del_member(colnos, attnum); + } + } + /* Add in all the grouping columns */ + for (i = 0; i < perhash->numCols; i++) + colnos = bms_add_member(colnos, grpColIdx[i]); - list_free(hashTlist); - bms_free(colnos); + perhash->hashGrpColIdxInput = + palloc(bms_num_members(colnos) * sizeof(AttrNumber)); + perhash->hashGrpColIdxHash = + palloc(perhash->numCols * sizeof(AttrNumber)); + + /* + * First build mapping for columns directly hashed. These are the + * first, because they'll be accessed when computing hash values and + * comparing tuples for exact matches. We also build simple mapping + * for execGrouping, so it knows where to find the to-be-hashed / + * compared columns in the input. + */ + for (i = 0; i < perhash->numCols; i++) + { + perhash->hashGrpColIdxInput[i] = grpColIdx[i]; + perhash->hashGrpColIdxHash[i] = i + 1; + perhash->numhashGrpCols++; + /* delete already mapped columns */ + bms_del_member(colnos, grpColIdx[i]); + } - return collist; + /* and add the remaining columns */ + while ((i = bms_first_member(colnos)) >= 0) + { + perhash->hashGrpColIdxInput[perhash->numhashGrpCols] = i; + perhash->numhashGrpCols++; + } + + /* and build a tuple descriptor for the hashtable */ + for (i = 0; i < perhash->numhashGrpCols; i++) + { + int varNumber = perhash->hashGrpColIdxInput[i] - 1; + + hashTlist = lappend(hashTlist, list_nth(outerTlist, varNumber)); + perhash->largestGrpColIdx = + Max(varNumber + 1, perhash->largestGrpColIdx); + } + + hashDesc = ExecTypeFromTL(hashTlist, false); + ExecSetSlotDescriptor(perhash->hashslot, hashDesc); + + list_free(hashTlist); + bms_free(colnos); + } + + bms_free(base_colnos); } /* @@ -1840,26 +2011,30 @@ hash_agg_entry_size(int numAggs) } /* - * Find or create a hashtable entry for the tuple group containing the - * given tuple. + * Find or create a hashtable entry for the tuple group containing the current + * tuple (already set in tmpcontext's outertuple slot), in the current grouping + * set (which the caller must have selected - note that initialize_aggregate + * depends on this). * * When called, CurrentMemoryContext should be the per-query context. */ static TupleHashEntryData * -lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) +lookup_hash_entry(AggState *aggstate) { - TupleTableSlot *hashslot = aggstate->hashslot; + TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple; + AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; + TupleTableSlot *hashslot = perhash->hashslot; TupleHashEntryData *entry; bool isnew; - int i; + int i; /* transfer just the needed columns into hashslot */ - slot_getsomeattrs(inputslot, aggstate->largestGrpColIdx); + slot_getsomeattrs(inputslot, perhash->largestGrpColIdx); ExecClearTuple(hashslot); - for (i = 0; i < aggstate->numhashGrpCols; i++) + for (i = 0; i < perhash->numhashGrpCols; i++) { - int varNumber = aggstate->hashGrpColIdxInput[i] - 1; + int varNumber = perhash->hashGrpColIdxInput[i] - 1; hashslot->tts_values[i] = inputslot->tts_values[varNumber]; hashslot->tts_isnull[i] = inputslot->tts_isnull[varNumber]; @@ -1867,21 +2042,43 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) ExecStoreVirtualTuple(hashslot); /* find or create the hashtable entry using the filtered tuple */ - entry = LookupTupleHashEntry(aggstate->hashtable, hashslot, &isnew); + entry = LookupTupleHashEntry(perhash->hashtable, hashslot, &isnew); if (isnew) { entry->additional = (AggStatePerGroup) - MemoryContextAlloc(aggstate->hashtable->tablecxt, + MemoryContextAlloc(perhash->hashtable->tablecxt, sizeof(AggStatePerGroupData) * aggstate->numtrans); /* initialize aggregates for new tuple group */ initialize_aggregates(aggstate, (AggStatePerGroup) entry->additional, - 0); + -1); } return entry; } +/* + * Look up hash entries for the current tuple in all hashed grouping sets, + * returning an array of pergroup pointers suitable for advance_aggregates. + * + * Be aware that lookup_hash_entry can reset the tmpcontext. + */ +static AggStatePerGroup * +lookup_hash_entries(AggState *aggstate) +{ + int numHashes = aggstate->num_hashes; + AggStatePerGroup *pergroup = aggstate->hash_pergroup; + int setno; + + for (setno = 0; setno < numHashes; setno++) + { + select_current_set(aggstate, setno, true); + pergroup[setno] = lookup_hash_entry(aggstate)->additional; + } + + return pergroup; +} + /* * ExecAgg - * @@ -1898,19 +2095,22 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) TupleTableSlot * ExecAgg(AggState *node) { - TupleTableSlot *result; + TupleTableSlot *result = NULL; if (!node->agg_done) { /* Dispatch based on strategy */ - switch (node->phase->aggnode->aggstrategy) + switch (node->phase->aggstrategy) { case AGG_HASHED: if (!node->table_filled) agg_fill_hash_table(node); + /* FALLTHROUGH */ + case AGG_MIXED: result = agg_retrieve_hash_table(node); break; - default: + case AGG_PLAIN: + case AGG_SORTED: result = agg_retrieve_direct(node); break; } @@ -1933,6 +2133,7 @@ agg_retrieve_direct(AggState *aggstate) ExprContext *tmpcontext; AggStatePerAgg peragg; AggStatePerGroup pergroup; + AggStatePerGroup *hash_pergroups = NULL; TupleTableSlot *outerslot; TupleTableSlot *firstSlot; TupleTableSlot *result; @@ -2019,6 +2220,19 @@ agg_retrieve_direct(AggState *aggstate) node = aggstate->phase->aggnode; numReset = numGroupingSets; } + else if (aggstate->aggstrategy == AGG_MIXED) + { + /* + * Mixed mode; we've output all the grouped stuff and have + * full hashtables, so switch to outputting those. + */ + initialize_phase(aggstate, 0); + aggstate->table_filled = true; + ResetTupleHashIterator(aggstate->perhash[0].hashtable, + &aggstate->perhash[0].hashiter); + select_current_set(aggstate, 0, true); + return agg_retrieve_hash_table(aggstate); + } else { aggstate->agg_done = true; @@ -2055,7 +2269,7 @@ agg_retrieve_direct(AggState *aggstate) *---------- */ if (aggstate->input_done || - (node->aggstrategy == AGG_SORTED && + (node->aggstrategy != AGG_PLAIN && aggstate->projected_set != -1 && aggstate->projected_set < (numGroupingSets - 1) && nextSetSize > 0 && @@ -2168,10 +2382,22 @@ agg_retrieve_direct(AggState *aggstate) */ for (;;) { + /* + * During phase 1 only of a mixed agg, we need to update + * hashtables as well in advance_aggregates. + */ + if (aggstate->aggstrategy == AGG_MIXED && + aggstate->current_phase == 1) + { + hash_pergroups = lookup_hash_entries(aggstate); + } + else + hash_pergroups = NULL; + if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) combine_aggregates(aggstate, pergroup); else - advance_aggregates(aggstate, pergroup); + advance_aggregates(aggstate, pergroup, hash_pergroups); /* Reset per-input-tuple context after each tuple */ ResetExprContext(tmpcontext); @@ -2198,7 +2424,7 @@ agg_retrieve_direct(AggState *aggstate) * If we are grouping, check whether we've crossed a group * boundary. */ - if (node->aggstrategy == AGG_SORTED) + if (node->aggstrategy != AGG_PLAIN) { if (!execTuplesMatch(firstSlot, outerslot, @@ -2231,7 +2457,11 @@ agg_retrieve_direct(AggState *aggstate) prepare_projection_slot(aggstate, econtext->ecxt_outertuple, currentSet); - finalize_aggregates(aggstate, peragg, pergroup, currentSet); + select_current_set(aggstate, currentSet, false); + + finalize_aggregates(aggstate, + peragg, + pergroup + (currentSet * aggstate->numtrans)); /* * If there's no row to project right now, we must continue rather @@ -2247,21 +2477,13 @@ agg_retrieve_direct(AggState *aggstate) } /* - * ExecAgg for hashed case: phase 1, read input and build hash table + * ExecAgg for hashed case: read input and build hash table */ static void agg_fill_hash_table(AggState *aggstate) { - ExprContext *tmpcontext; - TupleHashEntryData *entry; TupleTableSlot *outerslot; - - /* - * get state info from node - * - * tmpcontext is the per-input-tuple expression context - */ - tmpcontext = aggstate->tmpcontext; + ExprContext *tmpcontext = aggstate->tmpcontext; /* * Process each outer-plan tuple, and then fetch the next one, until we @@ -2269,32 +2491,40 @@ agg_fill_hash_table(AggState *aggstate) */ for (;;) { + AggStatePerGroup *pergroups; + outerslot = fetch_input_tuple(aggstate); if (TupIsNull(outerslot)) break; - /* set up for advance_aggregates call */ + + /* set up for lookup_hash_entries and advance_aggregates */ tmpcontext->ecxt_outertuple = outerslot; - /* Find or build hashtable entry for this tuple's group */ - entry = lookup_hash_entry(aggstate, outerslot); + /* Find or build hashtable entries */ + pergroups = lookup_hash_entries(aggstate); /* Advance the aggregates */ if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit)) - combine_aggregates(aggstate, (AggStatePerGroup) entry->additional); + combine_aggregates(aggstate, pergroups[0]); else - advance_aggregates(aggstate, (AggStatePerGroup) entry->additional); + advance_aggregates(aggstate, NULL, pergroups); - /* Reset per-input-tuple context after each tuple */ - ResetExprContext(tmpcontext); + /* + * Reset per-input-tuple context after each tuple, but note that the + * hash lookups do this too + */ + ResetExprContext(aggstate->tmpcontext); } aggstate->table_filled = true; - /* Initialize to walk the hash table */ - ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter); + /* Initialize to walk the first hash table */ + select_current_set(aggstate, 0, true); + ResetTupleHashIterator(aggstate->perhash[0].hashtable, + &aggstate->perhash[0].hashiter); } /* - * ExecAgg for hashed case: phase 2, retrieving groups from hash table + * ExecAgg for hashed case: retrieving groups from hash table */ static TupleTableSlot * agg_retrieve_hash_table(AggState *aggstate) @@ -2305,17 +2535,22 @@ agg_retrieve_hash_table(AggState *aggstate) TupleHashEntryData *entry; TupleTableSlot *firstSlot; TupleTableSlot *result; - TupleTableSlot *hashslot; + AggStatePerHash perhash; /* - * get state info from node + * get state info from node. + * + * econtext is the per-output-tuple expression context. */ - /* econtext is the per-output-tuple expression context */ econtext = aggstate->ss.ps.ps_ExprContext; peragg = aggstate->peragg; firstSlot = aggstate->ss.ss_ScanTupleSlot; - hashslot = aggstate->hashslot; + /* + * Note that perhash (and therefore anything accessed through it) can + * change inside the loop, as we change between grouping sets. + */ + perhash = &aggstate->perhash[aggstate->current_set]; /* * We loop retrieving groups until we find one satisfying @@ -2323,17 +2558,37 @@ agg_retrieve_hash_table(AggState *aggstate) */ while (!aggstate->agg_done) { - int i; + TupleTableSlot *hashslot = perhash->hashslot; + int i; /* * Find the next entry in the hash table */ - entry = ScanTupleHashTable(aggstate->hashtable, &aggstate->hashiter); + entry = ScanTupleHashTable(perhash->hashtable, &perhash->hashiter); if (entry == NULL) { - /* No more entries in hashtable, so done */ - aggstate->agg_done = TRUE; - return NULL; + int nextset = aggstate->current_set + 1; + + if (nextset < aggstate->num_hashes) + { + /* + * Switch to next grouping set, reinitialize, and restart the + * loop. + */ + select_current_set(aggstate, nextset, true); + + perhash = &aggstate->perhash[aggstate->current_set]; + + ResetTupleHashIterator(perhash->hashtable, &perhash->hashiter); + + continue; + } + else + { + /* No more hashtables, so done */ + aggstate->agg_done = TRUE; + return NULL; + } } /* @@ -2356,9 +2611,9 @@ agg_retrieve_hash_table(AggState *aggstate) memset(firstSlot->tts_isnull, true, firstSlot->tts_tupleDescriptor->natts * sizeof(bool)); - for (i = 0; i < aggstate->numhashGrpCols; i++) + for (i = 0; i < perhash->numhashGrpCols; i++) { - int varNumber = aggstate->hashGrpColIdxInput[i] - 1; + int varNumber = perhash->hashGrpColIdxInput[i] - 1; firstSlot->tts_values[varNumber] = hashslot->tts_values[i]; firstSlot->tts_isnull[varNumber] = hashslot->tts_isnull[i]; @@ -2367,14 +2622,18 @@ agg_retrieve_hash_table(AggState *aggstate) pergroup = (AggStatePerGroup) entry->additional; - finalize_aggregates(aggstate, peragg, pergroup, 0); - /* * Use the representative input tuple for any references to * non-aggregated input columns in the qual and tlist. */ econtext->ecxt_outertuple = firstSlot; + prepare_projection_slot(aggstate, + econtext->ecxt_outertuple, + aggstate->current_set); + + finalize_aggregates(aggstate, peragg, pergroup); + result = project_aggregates(aggstate); if (result) return result; @@ -2388,7 +2647,8 @@ agg_retrieve_hash_table(AggState *aggstate) * ExecInitAgg * * Creates the run-time information for the agg node produced by the - * planner and initializes its outer subtree + * planner and initializes its outer subtree. + * * ----------------- */ AggState * @@ -2403,14 +2663,18 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) transno, aggno; int phase; + int phaseidx; List *combined_inputeval; ListCell *l; Bitmapset *all_grouped_cols = NULL; int numGroupingSets = 1; int numPhases; + int numHashes; int column_offset; int i = 0; int j = 0; + bool use_hashing = (node->aggstrategy == AGG_HASHED || + node->aggstrategy == AGG_MIXED); /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2425,9 +2689,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->aggs = NIL; aggstate->numaggs = 0; aggstate->numtrans = 0; + aggstate->aggstrategy = node->aggstrategy; aggstate->aggsplit = node->aggsplit; aggstate->maxsets = 0; - aggstate->hashfunctions = NULL; aggstate->projected_set = -1; aggstate->current_set = 0; aggstate->peragg = NULL; @@ -2437,18 +2701,22 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->agg_done = false; aggstate->pergroup = NULL; aggstate->grp_firstTuple = NULL; - aggstate->hashtable = NULL; aggstate->sort_in = NULL; aggstate->sort_out = NULL; + /* + * phases[0] always exists, but is dummy in sorted/plain mode + */ + numPhases = (use_hashing ? 1 : 2); + numHashes = (use_hashing ? 1 : 0); + /* * Calculate the maximum number of grouping sets in any phase; this - * determines the size of some allocations. + * determines the size of some allocations. Also calculate the number of + * phases, since all hashed/mixed nodes contribute to only a single phase. */ if (node->groupingSets) { - Assert(node->aggstrategy != AGG_HASHED); - numGroupingSets = list_length(node->groupingSets); foreach(l, node->chain) @@ -2457,22 +2725,32 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) numGroupingSets = Max(numGroupingSets, list_length(agg->groupingSets)); + + /* + * additional AGG_HASHED aggs become part of phase 0, but all + * others add an extra phase. + */ + if (agg->aggstrategy != AGG_HASHED) + ++numPhases; + else + ++numHashes; } } aggstate->maxsets = numGroupingSets; - aggstate->numphases = numPhases = 1 + list_length(node->chain); + aggstate->numphases = numPhases; aggstate->aggcontexts = (ExprContext **) palloc0(sizeof(ExprContext *) * numGroupingSets); /* * Create expression contexts. We need three or more, one for - * per-input-tuple processing, one for per-output-tuple processing, and - * one for each grouping set. The per-tuple memory context of the - * per-grouping-set ExprContexts (aggcontexts) replaces the standalone - * memory context formerly used to hold transition values. We cheat a - * little by using ExecAssignExprContext() to build all of them. + * per-input-tuple processing, one for per-output-tuple processing, one + * for all the hashtables, and one for each grouping set. The per-tuple + * memory context of the per-grouping-set ExprContexts (aggcontexts) + * replaces the standalone memory context formerly used to hold transition + * values. We cheat a little by using ExecAssignExprContext() to build + * all of them. * * NOTE: the details of what is stored in aggcontexts and what is stored * in the regular per-query memory context are driven by a simple @@ -2488,14 +2766,21 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext; } + if (use_hashing) + { + ExecAssignExprContext(estate, &aggstate->ss.ps); + aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext; + } + ExecAssignExprContext(estate, &aggstate->ss.ps); /* - * tuple table initialization + * tuple table initialization. + * + * For hashtables, we create some additional slots below. */ ExecInitScanTupleSlot(estate, &aggstate->ss); ExecInitResultTupleSlot(estate, &aggstate->ss.ps); - aggstate->hashslot = ExecInitExtraTupleSlot(estate); aggstate->sort_slot = ExecInitExtraTupleSlot(estate); /* @@ -2559,19 +2844,26 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) * For each phase, prepare grouping set data and fmgr lookup data for * compare functions. Accumulate all_grouped_cols in passing. */ - aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData)); - for (phase = 0; phase < numPhases; ++phase) + aggstate->num_hashes = numHashes; + if (numHashes) + { + aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes); + aggstate->phases[0].numsets = 0; + aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int)); + aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *)); + } + + phase = 0; + for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx) { - AggStatePerPhase phasedata = &aggstate->phases[phase]; Agg *aggnode; Sort *sortnode; - int num_sets; - if (phase > 0) + if (phaseidx > 0) { - aggnode = castNode(Agg, list_nth(node->chain, phase - 1)); + aggnode = castNode(Agg, list_nth(node->chain, phaseidx - 1)); sortnode = castNode(Sort, aggnode->plan.lefttree); } else @@ -2580,53 +2872,91 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) sortnode = NULL; } - phasedata->numsets = num_sets = list_length(aggnode->groupingSets); + Assert(phase <= 1 || sortnode); - if (num_sets) + if (aggnode->aggstrategy == AGG_HASHED + || aggnode->aggstrategy == AGG_MIXED) { - phasedata->gset_lengths = palloc(num_sets * sizeof(int)); - phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *)); + AggStatePerPhase phasedata = &aggstate->phases[0]; + AggStatePerHash perhash; + Bitmapset *cols = NULL; - i = 0; - foreach(l, aggnode->groupingSets) - { - int current_length = list_length(lfirst(l)); - Bitmapset *cols = NULL; + Assert(phase == 0); + i = phasedata->numsets++; + perhash = &aggstate->perhash[i]; - /* planner forces this to be correct */ - for (j = 0; j < current_length; ++j) - cols = bms_add_member(cols, aggnode->grpColIdx[j]); + /* phase 0 always points to the "real" Agg in the hash case */ + phasedata->aggnode = node; + phasedata->aggstrategy = node->aggstrategy; - phasedata->grouped_cols[i] = cols; - phasedata->gset_lengths[i] = current_length; - ++i; - } + /* but the actual Agg node representing this hash is saved here */ + perhash->aggnode = aggnode; + + phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols; + + for (j = 0; j < aggnode->numCols; ++j) + cols = bms_add_member(cols, aggnode->grpColIdx[j]); + + phasedata->grouped_cols[i] = cols; - all_grouped_cols = bms_add_members(all_grouped_cols, - phasedata->grouped_cols[0]); + all_grouped_cols = bms_add_members(all_grouped_cols, cols); + continue; } else { - Assert(phase == 0); + AggStatePerPhase phasedata = &aggstate->phases[++phase]; + int num_sets; - phasedata->gset_lengths = NULL; - phasedata->grouped_cols = NULL; - } + phasedata->numsets = num_sets = list_length(aggnode->groupingSets); - /* - * If we are grouping, precompute fmgr lookup data for inner loop. - */ - if (aggnode->aggstrategy == AGG_SORTED) - { - Assert(aggnode->numCols > 0); + if (num_sets) + { + phasedata->gset_lengths = palloc(num_sets * sizeof(int)); + phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *)); - phasedata->eqfunctions = - execTuplesMatchPrepare(aggnode->numCols, - aggnode->grpOperators); - } + i = 0; + foreach(l, aggnode->groupingSets) + { + int current_length = list_length(lfirst(l)); + Bitmapset *cols = NULL; + + /* planner forces this to be correct */ + for (j = 0; j < current_length; ++j) + cols = bms_add_member(cols, aggnode->grpColIdx[j]); + + phasedata->grouped_cols[i] = cols; + phasedata->gset_lengths[i] = current_length; + + ++i; + } + + all_grouped_cols = bms_add_members(all_grouped_cols, + phasedata->grouped_cols[0]); + } + else + { + Assert(phaseidx == 0); + + phasedata->gset_lengths = NULL; + phasedata->grouped_cols = NULL; + } + + /* + * If we are grouping, precompute fmgr lookup data for inner loop. + */ + if (aggnode->aggstrategy == AGG_SORTED) + { + Assert(aggnode->numCols > 0); + + phasedata->eqfunctions = + execTuplesMatchPrepare(aggnode->numCols, + aggnode->grpOperators); + } - phasedata->aggnode = aggnode; - phasedata->sortnode = sortnode; + phasedata->aggnode = aggnode; + phasedata->aggstrategy = aggnode->aggstrategy; + phasedata->sortnode = sortnode; + } } /* @@ -2636,13 +2966,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) while ((i = bms_next_member(all_grouped_cols, i)) >= 0) aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols); - /* - * Initialize current phase-dependent values to initial phase - */ - - aggstate->current_phase = 0; - initialize_phase(aggstate, 0); - /* * Set up aggregate-result storage in the output expr context, and also * allocate my private per-agg working storage @@ -2657,23 +2980,30 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->peragg = peraggs; aggstate->pertrans = pertransstates; - /* * Hashing can only appear in the initial phase. */ - if (node->aggstrategy == AGG_HASHED) + if (use_hashing) { - find_hash_columns(aggstate); + for (i = 0; i < numHashes; ++i) + { + aggstate->perhash[i].hashslot = ExecInitExtraTupleSlot(estate); + + execTuplesHashPrepare(aggstate->perhash[i].numCols, + aggstate->perhash[i].aggnode->grpOperators, + &aggstate->perhash[i].eqfunctions, + &aggstate->perhash[i].hashfunctions); + } - execTuplesHashPrepare(node->numCols, - node->grpOperators, - &aggstate->phases[0].eqfunctions, - &aggstate->hashfunctions); + /* this is an array of pointers, not structures */ + aggstate->hash_pergroup = palloc0(sizeof(AggStatePerGroup) * numHashes); + find_hash_columns(aggstate); build_hash_table(aggstate); aggstate->table_filled = false; } - else + + if (node->aggstrategy != AGG_HASHED) { AggStatePerGroup pergroup; @@ -2684,6 +3014,25 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->pergroup = pergroup; } + /* + * Initialize current phase-dependent values to initial phase. The initial + * phase is 1 (first sort pass) for all strategies that use sorting (if + * hashing is being done too, then phase 0 is processed last); but if only + * hashing is being done, then phase 0 is all there is. + */ + if (node->aggstrategy == AGG_HASHED) + { + aggstate->current_phase = 0; + initialize_phase(aggstate, 0); + select_current_set(aggstate, 0, true); + } + else + { + aggstate->current_phase = 1; + initialize_phase(aggstate, 1); + select_current_set(aggstate, 0, false); + } + /* ----------------- * Perform lookups of aggregate function info, and initialize the * unchanging fields of the per-agg and per-trans data. @@ -3261,7 +3610,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans, * We don't implement DISTINCT or ORDER BY aggs in the HASHED case * (yet) */ - Assert(((Agg *) aggstate->ss.ps.plan)->aggstrategy != AGG_HASHED); + Assert(aggstate->aggstrategy != AGG_HASHED && aggstate->aggstrategy != AGG_MIXED); /* If we have only one input, we need its len/byval info. */ if (numInputs == 1) @@ -3510,6 +3859,8 @@ ExecEndAgg(AggState *node) /* And ensure any agg shutdown callbacks have been called */ for (setno = 0; setno < numGroupingSets; setno++) ReScanExprContext(node->aggcontexts[setno]); + if (node->hashcontext) + ReScanExprContext(node->hashcontext); /* * We don't actually free any ExprContexts here (see comment in @@ -3537,7 +3888,7 @@ ExecReScanAgg(AggState *node) node->agg_done = false; - if (aggnode->aggstrategy == AGG_HASHED) + if (node->aggstrategy == AGG_HASHED) { /* * In the hashed case, if we haven't yet built the hash table then we @@ -3557,7 +3908,9 @@ ExecReScanAgg(AggState *node) if (outerPlan->chgParam == NULL && !bms_overlap(node->ss.ps.chgParam, aggnode->aggParams)) { - ResetTupleHashIterator(node->hashtable, &node->hashiter); + ResetTupleHashIterator(node->perhash[0].hashtable, + &node->perhash[0].hashiter); + select_current_set(node, 0, true); return; } } @@ -3582,11 +3935,7 @@ ExecReScanAgg(AggState *node) * ExecReScan already did it. But we do need to reset our per-grouping-set * contexts, which may have transvalues stored in them. (We use rescan * rather than just reset because transfns may have registered callbacks - * that need to be run now.) - * - * Note that with AGG_HASHED, the hash table is allocated in a sub-context - * of the aggcontext. This used to be an issue, but now, resetting a - * context automatically deletes sub-contexts too. + * that need to be run now.) For the AGG_HASHED case, see below. */ for (setno = 0; setno < numGroupingSets; setno++) @@ -3606,13 +3955,21 @@ ExecReScanAgg(AggState *node) MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs); MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs); - if (aggnode->aggstrategy == AGG_HASHED) + /* + * With AGG_HASHED/MIXED, the hash table is allocated in a sub-context of + * the hashcontext. This used to be an issue, but now, resetting a context + * automatically deletes sub-contexts too. + */ + if (node->aggstrategy == AGG_HASHED || node->aggstrategy == AGG_MIXED) { + ReScanExprContext(node->hashcontext); /* Rebuild an empty hash table */ build_hash_table(node); node->table_filled = false; + /* iterator will be reset when the table is filled */ } - else + + if (node->aggstrategy != AGG_HASHED) { /* * Reset the per-group state (in particular, mark transvalues null) @@ -3620,8 +3977,8 @@ ExecReScanAgg(AggState *node) MemSet(node->pergroup, 0, sizeof(AggStatePerGroupData) * node->numaggs * numGroupingSets); - /* reset to phase 0 */ - initialize_phase(node, 0); + /* reset to phase 1 */ + initialize_phase(node, 1); node->input_done = false; node->projected_set = -1; @@ -3662,7 +4019,7 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext) if (aggcontext) { AggState *aggstate = ((AggState *) fcinfo->context); - ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; + ExprContext *cxt = aggstate->curaggcontext; *aggcontext = cxt->ecxt_per_tuple_memory; } @@ -3751,7 +4108,7 @@ AggRegisterCallback(FunctionCallInfo fcinfo, if (fcinfo->context && IsA(fcinfo->context, AggState)) { AggState *aggstate = (AggState *) fcinfo->context; - ExprContext *cxt = aggstate->aggcontexts[aggstate->current_set]; + ExprContext *cxt = aggstate->curaggcontext; RegisterExprContextCallback(cxt, func, arg); diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile index 2d2ba84fe9..f222c6c20d 100644 --- a/src/backend/lib/Makefile +++ b/src/backend/lib/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/lib top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o pairingheap.o \ - rbtree.o stringinfo.o +OBJS = binaryheap.o bipartite_match.o hyperloglog.o ilist.o knapsack.o \ + pairingheap.o rbtree.o stringinfo.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/lib/knapsack.c b/src/backend/lib/knapsack.c new file mode 100644 index 0000000000..ddf2b9afa3 --- /dev/null +++ b/src/backend/lib/knapsack.c @@ -0,0 +1,114 @@ +/*------------------------------------------------------------------------- + * + * knapsack.c + * Knapsack problem solver + * + * Given input vectors of integral item weights (must be >= 0) and values + * (double >= 0), compute the set of items which produces the greatest total + * value without exceeding a specified total weight; each item is included at + * most once (this is the 0/1 knapsack problem). Weight 0 items will always be + * included. + * + * The performance of this algorithm is pseudo-polynomial, O(nW) where W is the + * weight limit. To use with non-integral weights or approximate solutions, + * the caller should pre-scale the input weights to a suitable range. This + * allows approximate solutions in polynomial time (the general case of the + * exact problem is NP-hard). + * + * Copyright (c) 2017, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/knapsack.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include + +#include "lib/knapsack.h" +#include "miscadmin.h" +#include "nodes/bitmapset.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/palloc.h" + +/* + * DiscreteKnapsack + * + * The item_values input is optional; if omitted, all the items are assumed to + * have value 1. + * + * Returns a Bitmapset of the 0..(n-1) indexes of the items chosen for + * inclusion in the solution. + * + * This uses the usual dynamic-programming algorithm, adapted to reuse the + * memory on each pass (by working from larger weights to smaller). At the + * start of pass number i, the values[w] array contains the largest value + * computed with total weight <= w, using only items with indices < i; and + * sets[w] contains the bitmap of items actually used for that value. (The + * bitmapsets are all pre-initialized with an unused high bit so that memory + * allocation is done only once.) + */ +Bitmapset * +DiscreteKnapsack(int max_weight, int num_items, + int *item_weights, double *item_values) +{ + MemoryContext local_ctx = AllocSetContextCreate(CurrentMemoryContext, + "Knapsack", + ALLOCSET_SMALL_MINSIZE, + ALLOCSET_SMALL_INITSIZE, + ALLOCSET_SMALL_MAXSIZE); + MemoryContext oldctx = MemoryContextSwitchTo(local_ctx); + double *values; + Bitmapset **sets; + Bitmapset *result; + int i, + j; + + Assert(max_weight >= 0); + Assert(num_items > 0 && item_weights); + + values = palloc((1 + max_weight) * sizeof(double)); + sets = palloc((1 + max_weight) * sizeof(Bitmapset *)); + + for (i = 0; i <= max_weight; ++i) + { + values[i] = 0; + sets[i] = bms_make_singleton(num_items); + } + + for (i = 0; i < num_items; ++i) + { + int iw = item_weights[i]; + double iv = item_values ? item_values[i] : 1; + + for (j = max_weight; j >= iw; --j) + { + int ow = j - iw; + + if (values[j] <= values[ow] + iv) + { + /* copy sets[ow] to sets[j] without realloc */ + if (j != ow) + { + sets[j] = bms_del_members(sets[j], sets[j]); + sets[j] = bms_add_members(sets[j], sets[ow]); + } + + sets[j] = bms_add_member(sets[j], i); + + values[j] = values[ow] + iv; + } + } + } + + MemoryContextSwitchTo(oldctx); + + result = bms_del_member(bms_copy(sets[max_weight]), num_items); + + MemoryContextDelete(local_ctx); + + return result; +} diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index 252af5c870..bf8545d437 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -21,6 +21,7 @@ #include "postgres.h" #include "access/hash.h" +#include "nodes/pg_list.h" #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) @@ -457,6 +458,35 @@ bms_overlap(const Bitmapset *a, const Bitmapset *b) return false; } +/* + * bms_overlap_list - does a set overlap an integer list? + */ +bool +bms_overlap_list(const Bitmapset *a, const List *b) +{ + ListCell *lc; + int wordnum, + bitnum; + + if (a == NULL || b == NIL) + return false; + + foreach(lc, b) + { + int x = lfirst_int(lc); + + if (x < 0) + elog(ERROR, "negative bitmapset member not allowed"); + wordnum = WORDNUM(x); + bitnum = BITNUM(x); + if (wordnum < a->nwords) + if ((a->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0) + return true; + } + + return false; +} + /* * bms_nonempty_difference - do sets have a nonempty difference? */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 541af02935..bbb63a4bfa 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1941,6 +1941,28 @@ _outAggPath(StringInfo str, const AggPath *node) WRITE_NODE_FIELD(qual); } +static void +_outRollupData(StringInfo str, const RollupData *node) +{ + WRITE_NODE_TYPE("ROLLUP"); + + WRITE_NODE_FIELD(groupClause); + WRITE_NODE_FIELD(gsets); + WRITE_NODE_FIELD(gsets_data); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); + WRITE_BOOL_FIELD(hashable); + WRITE_BOOL_FIELD(is_hashed); +} + +static void +_outGroupingSetData(StringInfo str, const GroupingSetData *node) +{ + WRITE_NODE_TYPE("GSDATA"); + + WRITE_NODE_FIELD(set); + WRITE_FLOAT_FIELD(numGroups, "%.0f"); +} + static void _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node) { @@ -1949,8 +1971,8 @@ _outGroupingSetsPath(StringInfo str, const GroupingSetsPath *node) _outPathInfo(str, (const Path *) node); WRITE_NODE_FIELD(subpath); - WRITE_NODE_FIELD(rollup_groupclauses); - WRITE_NODE_FIELD(rollup_lists); + WRITE_ENUM_FIELD(aggstrategy, AggStrategy); + WRITE_NODE_FIELD(rollups); WRITE_NODE_FIELD(qual); } @@ -3961,14 +3983,18 @@ outNode(StringInfo str, const void *obj) case T_PlannerParamItem: _outPlannerParamItem(str, obj); break; + case T_RollupData: + _outRollupData(str, obj); + break; + case T_GroupingSetData: + _outGroupingSetData(str, obj); + break; case T_StatisticExtInfo: _outStatisticExtInfo(str, obj); break; - case T_ExtensibleNode: _outExtensibleNode(str, obj); break; - case T_CreateStmt: _outCreateStmt(str, obj); break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 57229059bd..92de2b7d48 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root, total_cost = startup_cost + cpu_tuple_cost; output_tuples = 1; } - else if (aggstrategy == AGG_SORTED) + else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED) { /* Here we are able to deliver output on-the-fly */ startup_cost = input_startup_cost; total_cost = input_total_cost; + if (aggstrategy == AGG_MIXED && !enable_hashagg) + { + startup_cost += disable_cost; + total_cost += disable_cost; + } /* calcs phrased this way to match HASHED case, see note above */ total_cost += aggcosts->transCost.startup; total_cost += aggcosts->transCost.per_tuple * input_tuples; diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index c80c9992c9..aafec58281 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) { Agg *plan; Plan *subplan; - List *rollup_groupclauses = best_path->rollup_groupclauses; - List *rollup_lists = best_path->rollup_lists; + List *rollups = best_path->rollups; AttrNumber *grouping_map; int maxref; List *chain; - ListCell *lc, - *lc2; + ListCell *lc; /* Shouldn't get here without grouping sets */ Assert(root->parse->groupingSets); - Assert(rollup_lists != NIL); - Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); + Assert(rollups != NIL); /* * Agg can project, so no need to be terribly picky about child tlist, but @@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) * costs will be shown by EXPLAIN. */ chain = NIL; - if (list_length(rollup_groupclauses) > 1) + if (list_length(rollups) > 1) { - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + ListCell *lc2 = lnext(list_head(rollups)); + bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed; + + for_each_cell(lc, lc2) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); + RollupData *rollup = lfirst(lc); AttrNumber *new_grpColIdx; - Plan *sort_plan; + Plan *sort_plan = NULL; Plan *agg_plan; + AggStrategy strat; - /* We want to iterate over all but the last rollup list elements */ - if (lnext(lc) == NULL) - break; + new_grpColIdx = remap_groupColIdx(root, rollup->groupClause); + + if (!rollup->is_hashed && !is_first_sort) + { + sort_plan = (Plan *) + make_sort_from_groupcols(rollup->groupClause, + new_grpColIdx, + subplan); + } - new_grpColIdx = remap_groupColIdx(root, groupClause); + if (!rollup->is_hashed) + is_first_sort = false; - sort_plan = (Plan *) - make_sort_from_groupcols(groupClause, - new_grpColIdx, - subplan); + if (rollup->is_hashed) + strat = AGG_HASHED; + else if (list_length(linitial(rollup->gsets)) == 0) + strat = AGG_PLAIN; + else + strat = AGG_SORTED; agg_plan = (Plan *) make_agg(NIL, NIL, - AGG_SORTED, + strat, AGGSPLIT_SIMPLE, - list_length((List *) linitial(gsets)), + list_length((List *) linitial(rollup->gsets)), new_grpColIdx, - extract_grouping_ops(groupClause), - gsets, + extract_grouping_ops(rollup->groupClause), + rollup->gsets, NIL, - 0, /* numGroups not needed */ + rollup->numGroups, sort_plan); /* - * Nuke stuff we don't need to avoid bloating debug output. + * Remove stuff we don't need to avoid bloating debug output. */ - sort_plan->targetlist = NIL; - sort_plan->lefttree = NULL; + if (sort_plan) + { + sort_plan->targetlist = NIL; + sort_plan->lefttree = NULL; + } chain = lappend(chain, agg_plan); } } /* - * Now make the final Agg node + * Now make the real Agg node */ { - List *groupClause = (List *) llast(rollup_groupclauses); - List *gsets = (List *) llast(rollup_lists); + RollupData *rollup = linitial(rollups); AttrNumber *top_grpColIdx; int numGroupCols; - top_grpColIdx = remap_groupColIdx(root, groupClause); + top_grpColIdx = remap_groupColIdx(root, rollup->groupClause); - numGroupCols = list_length((List *) linitial(gsets)); + numGroupCols = list_length((List *) linitial(rollup->gsets)); plan = make_agg(build_path_tlist(root, &best_path->path), best_path->qual, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, + best_path->aggstrategy, AGGSPLIT_SIMPLE, numGroupCols, top_grpColIdx, - extract_grouping_ops(groupClause), - gsets, + extract_grouping_ops(rollup->groupClause), + rollup->gsets, chain, - 0, /* numGroups not needed */ + rollup->numGroups, subplan); /* Copy cost data from Path to Plan */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 90619509a2..fa7a5f8427 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -30,6 +30,7 @@ #include "foreign/fdwapi.h" #include "miscadmin.h" #include "lib/bipartite_match.h" +#include "lib/knapsack.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #ifdef OPTIMIZER_DEBUG @@ -91,12 +92,31 @@ typedef struct List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; +/* + * Data specific to grouping sets + */ + +typedef struct +{ + List *rollups; + List *hash_sets_idx; + double dNumHashGroups; + bool any_hashable; + Bitmapset *unsortable_refs; + Bitmapset *unhashable_refs; + List *unsortable_sets; + int *tleref_to_colnum_map; +} grouping_sets_data; + /* Local functions */ static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind); static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode); static void inheritance_planner(PlannerInfo *root); static void grouping_planner(PlannerInfo *root, bool inheritance_update, double tuple_fraction); +static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root); +static List *remap_to_groupclause_idx(List *groupClause, List *gsets, + int *tleref_to_colnum_map); static void preprocess_rowmarks(PlannerInfo *root); static double preprocess_limit(PlannerInfo *root, double tuple_fraction, @@ -109,8 +129,7 @@ static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static double get_number_of_groups(PlannerInfo *root, double path_rows, - List *rollup_lists, - List *rollup_groupclauses); + grouping_sets_data *gd); static Size estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, double dNumGroups); @@ -118,8 +137,16 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - List *rollup_lists, - List *rollup_groupclauses); + grouping_sets_data *gd); +static void consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups); static RelOptInfo *create_window_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *input_target, @@ -1540,8 +1567,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, AggClauseCosts agg_costs; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; - List *rollup_lists = NIL; - List *rollup_groupclauses = NIL; + grouping_sets_data *gset_data = NULL; standard_qp_extra qp_extra; /* A recursive query should always have setOperations */ @@ -1550,84 +1576,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Preprocess grouping sets and GROUP BY clause, if any */ if (parse->groupingSets) { - int *tleref_to_colnum_map; - List *sets; - int maxref; - ListCell *lc; - ListCell *lc2; - ListCell *lc_set; - - parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); - - /* Identify max SortGroupRef in groupClause, for array sizing */ - maxref = 0; - foreach(lc, parse->groupClause) - { - SortGroupClause *gc = lfirst(lc); - - if (gc->tleSortGroupRef > maxref) - maxref = gc->tleSortGroupRef; - } - - /* Allocate workspace array for remapping */ - tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); - - /* Examine the rollup sets */ - sets = extract_rollup_sets(parse->groupingSets); - - foreach(lc_set, sets) - { - List *current_sets = (List *) lfirst(lc_set); - List *groupclause; - int ref; - - /* - * Reorder the current list of grouping sets into correct - * prefix order. If only one aggregation pass is needed, try - * to make the list match the ORDER BY clause; if more than - * one pass is needed, we don't bother with that. - */ - current_sets = reorder_grouping_sets(current_sets, - (list_length(sets) == 1 - ? parse->sortClause - : NIL)); - - /* - * Order the groupClause appropriately. If the first grouping - * set is empty, this can match regular GROUP BY - * preprocessing, otherwise we have to force the groupClause - * to match that grouping set's order. - */ - groupclause = preprocess_groupclause(root, - linitial(current_sets)); - - /* - * Now that we've pinned down an order for the groupClause for - * this list of grouping sets, we need to remap the entries in - * the grouping sets from sortgrouprefs to plain indices - * (0-based) into the groupClause for this collection of - * grouping sets. - */ - ref = 0; - foreach(lc, groupclause) - { - SortGroupClause *gc = lfirst(lc); - - tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; - } - - foreach(lc, current_sets) - { - foreach(lc2, (List *) lfirst(lc)) - { - lfirst_int(lc2) = tleref_to_colnum_map[lfirst_int(lc2)]; - } - } - - /* Save the reordered sets and corresponding groupclauses */ - rollup_lists = lcons(current_sets, rollup_lists); - rollup_groupclauses = lcons(groupclause, rollup_groupclauses); - } + gset_data = preprocess_grouping_sets(root); } else { @@ -1721,8 +1670,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; - qp_extra.groupClause = - parse->groupingSets ? llast(rollup_groupclauses) : parse->groupClause; + qp_extra.groupClause = (gset_data + ? (gset_data->rollups ? ((RollupData *) linitial(gset_data->rollups))->groupClause : NIL) + : parse->groupClause); /* * Generate the best unsorted and presorted paths for the scan/join @@ -1922,8 +1872,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel, grouping_target, &agg_costs, - rollup_lists, - rollup_groupclauses); + gset_data); /* Fix things up if grouping_target contains SRFs */ if (parse->hasTargetSRFs) adjust_paths_for_srfs(root, current_rel, @@ -1960,7 +1909,6 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel = create_distinct_paths(root, current_rel); } - } /* end of if (setOperations) */ /* @@ -2113,6 +2061,221 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Note: currently, we leave it to callers to do set_cheapest() */ } +/* + * Do preprocessing for groupingSets clause and related data. This handles the + * preliminary steps of expanding the grouping sets, organizing them into lists + * of rollups, and preparing annotations which will later be filled in with + * size estimates. + */ +static grouping_sets_data * +preprocess_grouping_sets(PlannerInfo *root) +{ + Query *parse = root->parse; + List *sets; + int maxref = 0; + ListCell *lc; + ListCell *lc_set; + grouping_sets_data *gd = palloc0(sizeof(grouping_sets_data)); + + parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); + + gd->any_hashable = false; + gd->unhashable_refs = NULL; + gd->unsortable_refs = NULL; + gd->unsortable_sets = NIL; + + if (parse->groupClause) + { + ListCell *lc; + + foreach(lc, parse->groupClause) + { + SortGroupClause *gc = lfirst(lc); + Index ref = gc->tleSortGroupRef; + + if (ref > maxref) + maxref = ref; + + if (!gc->hashable) + gd->unhashable_refs = bms_add_member(gd->unhashable_refs, ref); + + if (!OidIsValid(gc->sortop)) + gd->unsortable_refs = bms_add_member(gd->unsortable_refs, ref); + } + } + + /* Allocate workspace array for remapping */ + gd->tleref_to_colnum_map = (int *) palloc((maxref + 1) * sizeof(int)); + + /* + * If we have any unsortable sets, we must extract them before trying to + * prepare rollups. Unsortable sets don't go through + * reorder_grouping_sets, so we must apply the GroupingSetData annotation + * here. + */ + if (!bms_is_empty(gd->unsortable_refs)) + { + List *sortable_sets = NIL; + + foreach(lc, parse->groupingSets) + { + List *gset = lfirst(lc); + + if (bms_overlap_list(gd->unsortable_refs, gset)) + { + GroupingSetData *gs = makeNode(GroupingSetData); + + gs->set = gset; + gd->unsortable_sets = lappend(gd->unsortable_sets, gs); + + /* + * We must enforce here that an unsortable set is hashable; + * later code assumes this. Parse analysis only checks that + * every individual column is either hashable or sortable. + * + * Note that passing this test doesn't guarantee we can + * generate a plan; there might be other showstoppers. + */ + if (bms_overlap_list(gd->unhashable_refs, gset)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("could not implement GROUP BY"), + errdetail("Some of the datatypes only support hashing, while others only support sorting."))); + } + else + sortable_sets = lappend(sortable_sets, gset); + } + + if (sortable_sets) + sets = extract_rollup_sets(sortable_sets); + else + sets = NIL; + } + else + sets = extract_rollup_sets(parse->groupingSets); + + foreach(lc_set, sets) + { + List *current_sets = (List *) lfirst(lc_set); + RollupData *rollup = makeNode(RollupData); + GroupingSetData *gs; + + /* + * Reorder the current list of grouping sets into correct prefix + * order. If only one aggregation pass is needed, try to make the + * list match the ORDER BY clause; if more than one pass is needed, we + * don't bother with that. + * + * Note that this reorders the sets from smallest-member-first to + * largest-member-first, and applies the GroupingSetData annotations, + * though the data will be filled in later. + */ + current_sets = reorder_grouping_sets(current_sets, + (list_length(sets) == 1 + ? parse->sortClause + : NIL)); + + /* + * Get the initial (and therefore largest) grouping set. + */ + gs = linitial(current_sets); + + /* + * Order the groupClause appropriately. If the first grouping set is + * empty, then the groupClause must also be empty; otherwise we have + * to force the groupClause to match that grouping set's order. + * + * (The first grouping set can be empty even though parse->groupClause + * is not empty only if all non-empty grouping sets are unsortable. + * The groupClauses for hashed grouping sets are built later on.) + */ + if (gs->set) + rollup->groupClause = preprocess_groupclause(root, gs->set); + else + rollup->groupClause = NIL; + + /* + * Is it hashable? We pretend empty sets are hashable even though we + * actually force them not to be hashed later. But don't bother if + * there's nothing but empty sets (since in that case we can't hash + * anything). + */ + if (gs->set && + !bms_overlap_list(gd->unhashable_refs, gs->set)) + { + rollup->hashable = true; + gd->any_hashable = true; + } + + /* + * Now that we've pinned down an order for the groupClause for this + * list of grouping sets, we need to remap the entries in the grouping + * sets from sortgrouprefs to plain indices (0-based) into the + * groupClause for this collection of grouping sets. We keep the + * original form for later use, though. + */ + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + current_sets, + gd->tleref_to_colnum_map); + rollup->gsets_data = current_sets; + + gd->rollups = lappend(gd->rollups, rollup); + } + + if (gd->unsortable_sets) + { + /* + * We have not yet pinned down a groupclause for this, but we will + * need index-based lists for estimation purposes. Construct + * hash_sets_idx based on the entire original groupclause for now. + */ + gd->hash_sets_idx = remap_to_groupclause_idx(parse->groupClause, + gd->unsortable_sets, + gd->tleref_to_colnum_map); + gd->any_hashable = true; + } + + return gd; +} + +/* + * Given a groupclause and a list of GroupingSetData, return equivalent sets + * (without annotation) mapped to indexes into the given groupclause. + */ +static List * +remap_to_groupclause_idx(List *groupClause, + List *gsets, + int *tleref_to_colnum_map) +{ + int ref = 0; + List *result = NIL; + ListCell *lc; + + foreach(lc, groupClause) + { + SortGroupClause *gc = lfirst(lc); + + tleref_to_colnum_map[gc->tleSortGroupRef] = ref++; + } + + foreach(lc, gsets) + { + List *set = NIL; + ListCell *lc2; + GroupingSetData *gs = lfirst(lc); + + foreach(lc2, gs->set) + { + set = lappend_int(set, tleref_to_colnum_map[lfirst_int(lc2)]); + } + + result = lappend(result, set); + } + + return result; +} + + /* * Detect whether a plan node is a "dummy" plan created when a relation @@ -3028,7 +3191,7 @@ extract_rollup_sets(List *groupingSets) /* * Reorder the elements of a list of grouping sets such that they have correct - * prefix relationships. + * prefix relationships. Also inserts the GroupingSetData annotations. * * The input must be ordered with smallest sets first; the result is returned * with largest sets first. Note that the result shares no list substructure @@ -3051,6 +3214,7 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) { List *candidate = lfirst(lc); List *new_elems = list_difference_int(candidate, previous); + GroupingSetData *gs = makeNode(GroupingSetData); if (list_length(new_elems) > 0) { @@ -3078,7 +3242,8 @@ reorder_grouping_sets(List *groupingsets, List *sortclause) } } - result = lcons(list_copy(previous), result); + gs->set = list_copy(previous); + result = lcons(gs, result); list_free(new_elems); } @@ -3173,15 +3338,16 @@ standard_qp_callback(PlannerInfo *root, void *extra) * Estimate number of groups produced by grouping clauses (1 if not grouping) * * path_rows: number of output rows from scan/join step - * rollup_lists: list of grouping sets, or NIL if not doing grouping sets - * rollup_groupclauses: list of grouping clauses for grouping sets, - * or NIL if not doing grouping sets + * gsets: grouping set data, or NULL if not doing grouping sets + * + * If doing grouping sets, we also annotate the gsets data with the estimates + * for each set and each individual rollup list, with a view to later + * determining whether some combination of them could be hashed instead. */ static double get_number_of_groups(PlannerInfo *root, double path_rows, - List *rollup_lists, - List *rollup_groupclauses) + grouping_sets_data *gd) { Query *parse = root->parse; double dNumGroups; @@ -3193,28 +3359,60 @@ get_number_of_groups(PlannerInfo *root, if (parse->groupingSets) { /* Add up the estimates for each grouping set */ - ListCell *lc, - *lc2; + ListCell *lc; + ListCell *lc2; dNumGroups = 0; - forboth(lc, rollup_groupclauses, lc2, rollup_lists) + + foreach(lc, gd->rollups) { - List *groupClause = (List *) lfirst(lc); - List *gsets = (List *) lfirst(lc2); - ListCell *lc3; + RollupData *rollup = lfirst(lc); + ListCell *lc; - groupExprs = get_sortgrouplist_exprs(groupClause, + groupExprs = get_sortgrouplist_exprs(rollup->groupClause, parse->targetList); - foreach(lc3, gsets) + rollup->numGroups = 0.0; + + forboth(lc, rollup->gsets, lc2, rollup->gsets_data) { - List *gset = (List *) lfirst(lc3); + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst(lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + + gs->numGroups = numGroups; + rollup->numGroups += numGroups; + } + + dNumGroups += rollup->numGroups; + } + + if (gd->hash_sets_idx) + { + ListCell *lc; + + gd->dNumHashGroups = 0; - dNumGroups += estimate_num_groups(root, - groupExprs, - path_rows, - &gset); + groupExprs = get_sortgrouplist_exprs(parse->groupClause, + parse->targetList); + + forboth(lc, gd->hash_sets_idx, lc2, gd->unsortable_sets) + { + List *gset = (List *) lfirst(lc); + GroupingSetData *gs = lfirst(lc2); + double numGroups = estimate_num_groups(root, + groupExprs, + path_rows, + &gset); + + gs->numGroups = numGroups; + gd->dNumHashGroups += numGroups; } + + dNumGroups += gd->dNumHashGroups; } } else @@ -3250,6 +3448,11 @@ get_number_of_groups(PlannerInfo *root, * estimate_hashagg_tablesize * estimate the number of bytes that a hash aggregate hashtable will * require based on the agg_costs, path width and dNumGroups. + * + * XXX this may be over-estimating the size now that hashagg knows to omit + * unneeded columns from the hashtable. Also for mixed-mode grouping sets, + * grouping columns not in the hashed set are counted here even though hashagg + * won't store them. Is this a problem? */ static Size estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs, @@ -3300,8 +3503,7 @@ create_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, PathTarget *target, const AggClauseCosts *agg_costs, - List *rollup_lists, - List *rollup_groupclauses) + grouping_sets_data *gd) { Query *parse = root->parse; Path *cheapest_path = input_rel->cheapest_total_path; @@ -3410,8 +3612,7 @@ create_grouping_paths(PlannerInfo *root, */ dNumGroups = get_number_of_groups(root, cheapest_path->rows, - rollup_lists, - rollup_groupclauses); + gd); /* * Determine whether it's possible to perform sort-based implementations @@ -3419,15 +3620,22 @@ create_grouping_paths(PlannerInfo *root, * grouping_is_sortable() is trivially true, and all the * pathkeys_contained_in() tests will succeed too, so that we'll consider * every surviving input path.) + * + * If we have grouping sets, we might be able to sort some but not all of + * them; in this case, we need can_sort to be true as long as we must + * consider any sorted-input plan. */ - can_sort = grouping_is_sortable(parse->groupClause); + can_sort = (gd && gd->rollups != NIL) + || grouping_is_sortable(parse->groupClause); /* * Determine whether we should consider hash-based implementations of * grouping. * - * Hashed aggregation only applies if we're grouping. We currently can't - * hash if there are grouping sets, though. + * Hashed aggregation only applies if we're grouping. If we have grouping + * sets, some groups might be hashable but others not; in this case we set + * can_hash true as long as there is nothing globally preventing us from + * hashing (and we should therefore consider plans with hashes). * * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY * aggregates. (Doing so would imply storing *all* the input values in @@ -3440,9 +3648,8 @@ create_grouping_paths(PlannerInfo *root, * other gating conditions, so we want to do it last. */ can_hash = (parse->groupClause != NIL && - parse->groupingSets == NIL && agg_costs->numOrderedAggs == 0 && - grouping_is_hashable(parse->groupClause)); + (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))); /* * If grouped_rel->consider_parallel is true, then paths that we generate @@ -3508,8 +3715,7 @@ create_grouping_paths(PlannerInfo *root, /* Estimate number of partial groups. */ dNumPartialGroups = get_number_of_groups(root, cheapest_partial_path->rows, - NIL, - NIL); + gd); /* * Collect statistics about aggregates for estimating costs of @@ -3642,20 +3848,9 @@ create_grouping_paths(PlannerInfo *root, /* Now decide what to stick atop it */ if (parse->groupingSets) { - /* - * We have grouping sets, possibly with aggregation. Make - * a GroupingSetsPath. - */ - add_path(grouped_rel, (Path *) - create_groupingsets_path(root, - grouped_rel, - path, - target, - (List *) parse->havingQual, - rollup_lists, - rollup_groupclauses, - agg_costs, - dNumGroups)); + consider_groupingsets_paths(root, grouped_rel, + path, true, can_hash, target, + gd, agg_costs, dNumGroups); } else if (parse->hasAggs) { @@ -3816,33 +4011,45 @@ create_grouping_paths(PlannerInfo *root, if (can_hash) { - hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, - agg_costs, - dNumGroups); - - /* - * Provided that the estimated size of the hashtable does not exceed - * work_mem, we'll generate a HashAgg Path, although if we were unable - * to sort above, then we'd better generate a Path, so that we at - * least have one. - */ - if (hashaggtablesize < work_mem * 1024L || - grouped_rel->pathlist == NIL) + if (parse->groupingSets) { /* - * We just need an Agg over the cheapest-total input path, since - * input order won't matter. + * Try for a hash-only groupingsets path over unsorted input. */ - add_path(grouped_rel, (Path *) - create_agg_path(root, grouped_rel, - cheapest_path, - target, - AGG_HASHED, - AGGSPLIT_SIMPLE, - parse->groupClause, - (List *) parse->havingQual, - agg_costs, - dNumGroups)); + consider_groupingsets_paths(root, grouped_rel, + cheapest_path, false, true, target, + gd, agg_costs, dNumGroups); + } + else + { + hashaggtablesize = estimate_hashagg_tablesize(cheapest_path, + agg_costs, + dNumGroups); + + /* + * Provided that the estimated size of the hashtable does not + * exceed work_mem, we'll generate a HashAgg Path, although if we + * were unable to sort above, then we'd better generate a Path, so + * that we at least have one. + */ + if (hashaggtablesize < work_mem * 1024L || + grouped_rel->pathlist == NIL) + { + /* + * We just need an Agg over the cheapest-total input path, + * since input order won't matter. + */ + add_path(grouped_rel, (Path *) + create_agg_path(root, grouped_rel, + cheapest_path, + target, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->groupClause, + (List *) parse->havingQual, + agg_costs, + dNumGroups)); + } } /* @@ -3921,6 +4128,344 @@ create_grouping_paths(PlannerInfo *root, return grouped_rel; } + +/* + * For a given input path, consider the possible ways of doing grouping sets on + * it, by combinations of hashing and sorting. This can be called multiple + * times, so it's important that it not scribble on input. No result is + * returned, but any generated paths are added to grouped_rel. + */ +static void +consider_groupingsets_paths(PlannerInfo *root, + RelOptInfo *grouped_rel, + Path *path, + bool is_sorted, + bool can_hash, + PathTarget *target, + grouping_sets_data *gd, + const AggClauseCosts *agg_costs, + double dNumGroups) +{ + Query *parse = root->parse; + + /* + * If we're not being offered sorted input, then only consider plans that + * can be done entirely by hashing. + * + * We can hash everything if it looks like it'll fit in work_mem. But if + * the input is actually sorted despite not being advertised as such, we + * prefer to make use of that in order to use less memory. + * + * If none of the grouping sets are sortable, then ignore the work_mem + * limit and generate a path anyway, since otherwise we'll just fail. + */ + if (!is_sorted) + { + List *new_rollups = NIL; + RollupData *unhashed_rollup = NULL; + List *sets_data; + List *empty_sets_data = NIL; + List *empty_sets = NIL; + ListCell *lc; + ListCell *l_start = list_head(gd->rollups); + AggStrategy strat = AGG_HASHED; + Size hashsize; + double exclude_groups = 0.0; + + Assert(can_hash); + + if (pathkeys_contained_in(root->group_pathkeys, path->pathkeys)) + { + unhashed_rollup = lfirst(l_start); + exclude_groups = unhashed_rollup->numGroups; + l_start = lnext(l_start); + } + + hashsize = estimate_hashagg_tablesize(path, + agg_costs, + dNumGroups - exclude_groups); + + /* + * gd->rollups is empty if we have only unsortable columns to work + * with. Override work_mem in that case; otherwise, we'll rely on the + * sorted-input case to generate usable mixed paths. + */ + if (hashsize > work_mem * 1024L && gd->rollups) + return; /* nope, won't fit */ + + /* + * We need to burst the existing rollups list into individual grouping + * sets and recompute a groupClause for each set. + */ + sets_data = list_copy(gd->unsortable_sets); + + for_each_cell(lc, l_start) + { + RollupData *rollup = lfirst(lc); + + /* + * If we find an unhashable rollup that's not been skipped by the + * "actually sorted" check above, we can't cope; we'd need sorted + * input (with a different sort order) but we can't get that here. + * So bail out; we'll get a valid path from the is_sorted case + * instead. + * + * The mere presence of empty grouping sets doesn't make a rollup + * unhashable (see preprocess_grouping_sets), we handle those + * specially below. + */ + if (!rollup->hashable) + return; + else + sets_data = list_concat(sets_data, list_copy(rollup->gsets_data)); + } + foreach(lc, sets_data) + { + GroupingSetData *gs = lfirst(lc); + List *gset = gs->set; + RollupData *rollup; + + if (gset == NIL) + { + /* Empty grouping sets can't be hashed. */ + empty_sets_data = lappend(empty_sets_data, gs); + empty_sets = lappend(empty_sets, NIL); + } + else + { + rollup = makeNode(RollupData); + + rollup->groupClause = preprocess_groupclause(root, gset); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + new_rollups = lappend(new_rollups, rollup); + } + } + + /* + * If we didn't find anything nonempty to hash, then bail. We'll + * generate a path from the is_sorted case. + */ + if (new_rollups == NIL) + return; + + /* + * If there were empty grouping sets they should have been in the + * first rollup. + */ + Assert(!unhashed_rollup || !empty_sets); + + if (unhashed_rollup) + { + new_rollups = lappend(new_rollups, unhashed_rollup); + strat = AGG_MIXED; + } + else if (empty_sets) + { + RollupData *rollup = makeNode(RollupData); + + rollup->groupClause = NIL; + rollup->gsets_data = empty_sets_data; + rollup->gsets = empty_sets; + rollup->numGroups = list_length(empty_sets); + rollup->hashable = false; + rollup->is_hashed = false; + new_rollups = lappend(new_rollups, rollup); + strat = AGG_MIXED; + } + + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + strat, + new_rollups, + agg_costs, + dNumGroups)); + return; + } + + /* + * If we have sorted input but nothing we can do with it, bail. + */ + if (list_length(gd->rollups) == 0) + return; + + /* + * Given sorted input, we try and make two paths: one sorted and one mixed + * sort/hash. (We need to try both because hashagg might be disabled, or + * some columns might not be sortable.) + * + * can_hash is passed in as false if some obstacle elsewhere (such as + * ordered aggs) means that we shouldn't consider hashing at all. + */ + if (can_hash && gd->any_hashable) + { + List *rollups = NIL; + List *hash_sets = list_copy(gd->unsortable_sets); + double availspace = (work_mem * 1024.0); + ListCell *lc; + + /* + * Account first for space needed for groups we can't sort at all. + */ + availspace -= (double) estimate_hashagg_tablesize(path, + agg_costs, + gd->dNumHashGroups); + + if (availspace > 0 && list_length(gd->rollups) > 1) + { + double scale; + int num_rollups = list_length(gd->rollups); + int k_capacity; + int *k_weights = palloc(num_rollups * sizeof(int)); + Bitmapset *hash_items = NULL; + int i; + + /* + * We treat this as a knapsack problem: the knapsack capacity + * represents work_mem, the item weights are the estimated memory + * usage of the hashtables needed to implement a single rollup, and + * we really ought to use the cost saving as the item value; + * however, currently the costs assigned to sort nodes don't + * reflect the comparison costs well, and so we treat all items as + * of equal value (each rollup we hash instead saves us one sort). + * + * To use the discrete knapsack, we need to scale the values to a + * reasonably small bounded range. We choose to allow a 5% error + * margin; we have no more than 4096 rollups in the worst possible + * case, which with a 5% error margin will require a bit over 42MB + * of workspace. (Anyone wanting to plan queries that complex had + * better have the memory for it. In more reasonable cases, with + * no more than a couple of dozen rollups, the memory usage will + * be negligible.) + * + * k_capacity is naturally bounded, but we clamp the values for + * scale and weight (below) to avoid overflows or underflows (or + * uselessly trying to use a scale factor less than 1 byte). + */ + scale = Max(availspace / (20.0 * num_rollups), 1.0); + k_capacity = (int) floor(availspace / scale); + + /* + * We leave the first rollup out of consideration since it's the + * one that matches the input sort order. We assign indexes "i" + * to only those entries considered for hashing; the second loop, + * below, must use the same condition. + */ + i = 0; + for_each_cell(lc, lnext(list_head(gd->rollups))) + { + RollupData *rollup = lfirst(lc); + + if (rollup->hashable) + { + double sz = estimate_hashagg_tablesize(path, + agg_costs, + rollup->numGroups); + + /* + * If sz is enormous, but work_mem (and hence scale) is + * small, avoid integer overflow here. + */ + k_weights[i] = (int) Min(floor(sz / scale), + k_capacity + 1.0); + ++i; + } + } + + /* + * Apply knapsack algorithm; compute the set of items which + * maximizes the value stored (in this case the number of sorts + * saved) while keeping the total size (approximately) within + * capacity. + */ + if (i > 0) + hash_items = DiscreteKnapsack(k_capacity, i, k_weights, NULL); + + if (!bms_is_empty(hash_items)) + { + rollups = list_make1(linitial(gd->rollups)); + + i = 0; + for_each_cell(lc, lnext(list_head(gd->rollups))) + { + RollupData *rollup = lfirst(lc); + + if (rollup->hashable) + { + if (bms_is_member(i, hash_items)) + hash_sets = list_concat(hash_sets, + list_copy(rollup->gsets_data)); + else + rollups = lappend(rollups, rollup); + ++i; + } + else + rollups = lappend(rollups, rollup); + } + } + } + + if (!rollups && hash_sets) + rollups = list_copy(gd->rollups); + + foreach(lc, hash_sets) + { + GroupingSetData *gs = lfirst(lc); + RollupData *rollup = makeNode(RollupData); + + Assert(gs->set != NIL); + + rollup->groupClause = preprocess_groupclause(root, gs->set); + rollup->gsets_data = list_make1(gs); + rollup->gsets = remap_to_groupclause_idx(rollup->groupClause, + rollup->gsets_data, + gd->tleref_to_colnum_map); + rollup->numGroups = gs->numGroups; + rollup->hashable = true; + rollup->is_hashed = true; + rollups = lcons(rollup, rollups); + } + + if (rollups) + { + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + AGG_MIXED, + rollups, + agg_costs, + dNumGroups)); + } + } + + /* + * Now try the simple sorted case. + */ + if (!gd->unsortable_sets) + add_path(grouped_rel, (Path *) + create_groupingsets_path(root, + grouped_rel, + path, + target, + (List *) parse->havingQual, + AGG_SORTED, + gd->rollups, + agg_costs, + dNumGroups)); +} + /* * create_window_paths * diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index fca96eb001..999ebcee70 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root, * 'subpath' is the path representing the source of data * 'target' is the PathTarget to be computed * 'having_qual' is the HAVING quals if any - * 'rollup_lists' is a list of grouping sets - * 'rollup_groupclauses' is a list of grouping clauses for grouping sets + * 'rollups' is a list of RollupData nodes * 'agg_costs' contains cost info about the aggregate functions to be computed - * 'numGroups' is the estimated number of groups + * 'numGroups' is the estimated total number of groups */ GroupingSetsPath * create_groupingsets_path(PlannerInfo *root, @@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *having_qual, - List *rollup_lists, - List *rollup_groupclauses, + AggStrategy aggstrategy, + List *rollups, const AggClauseCosts *agg_costs, double numGroups) { GroupingSetsPath *pathnode = makeNode(GroupingSetsPath); - int numGroupCols; + ListCell *lc; + bool is_first = true; + bool is_first_sort = true; /* The topmost generated Plan node will be an Agg */ pathnode->path.pathtype = T_Agg; @@ -2727,75 +2728,110 @@ create_groupingsets_path(PlannerInfo *root, pathnode->path.parallel_workers = subpath->parallel_workers; pathnode->subpath = subpath; + /* + * Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED + * to AGG_HASHED, here if possible. + */ + if (aggstrategy == AGG_SORTED && + list_length(rollups) == 1 && + ((RollupData *) linitial(rollups))->groupClause == NIL) + aggstrategy = AGG_PLAIN; + + if (aggstrategy == AGG_MIXED && + list_length(rollups) == 1) + aggstrategy = AGG_HASHED; + /* * Output will be in sorted order by group_pathkeys if, and only if, there * is a single rollup operation on a non-empty list of grouping * expressions. */ - if (list_length(rollup_groupclauses) == 1 && - ((List *) linitial(rollup_groupclauses)) != NIL) + if (aggstrategy == AGG_SORTED && list_length(rollups) == 1) pathnode->path.pathkeys = root->group_pathkeys; else pathnode->path.pathkeys = NIL; - pathnode->rollup_groupclauses = rollup_groupclauses; - pathnode->rollup_lists = rollup_lists; + pathnode->aggstrategy = aggstrategy; + pathnode->rollups = rollups; pathnode->qual = having_qual; - Assert(rollup_lists != NIL); - Assert(list_length(rollup_lists) == list_length(rollup_groupclauses)); - - /* Account for cost of the topmost Agg node */ - numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists))); - - cost_agg(&pathnode->path, root, - (numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN, - agg_costs, - numGroupCols, - numGroups, - subpath->startup_cost, - subpath->total_cost, - subpath->rows); + Assert(rollups != NIL); + Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1); + Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1); - /* - * Add in the costs and output rows of the additional sorting/aggregation - * steps, if any. Only total costs count, since the extra sorts aren't - * run on startup. - */ - if (list_length(rollup_lists) > 1) + foreach(lc, rollups) { - ListCell *lc; + RollupData *rollup = lfirst(lc); + List *gsets = rollup->gsets; + int numGroupCols = list_length(linitial(gsets)); - foreach(lc, rollup_lists) + /* + * In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the + * (already-sorted) input, and following ones do their own sort. + * + * In AGG_HASHED mode, there is one rollup for each grouping set. + * + * In AGG_MIXED mode, the first rollups are hashed, the first + * non-hashed one takes the (already-sorted) input, and following ones + * do their own sort. + */ + if (is_first) + { + cost_agg(&pathnode->path, root, + aggstrategy, + agg_costs, + numGroupCols, + rollup->numGroups, + subpath->startup_cost, + subpath->total_cost, + subpath->rows); + is_first = false; + if (!rollup->is_hashed) + is_first_sort = false; + } + else { - List *gsets = (List *) lfirst(lc); Path sort_path; /* dummy for result of cost_sort */ Path agg_path; /* dummy for result of cost_agg */ - /* We must iterate over all but the last rollup_lists element */ - if (lnext(lc) == NULL) - break; - - /* Account for cost of sort, but don't charge input cost again */ - cost_sort(&sort_path, root, NIL, - 0.0, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - -1.0); - - /* Account for cost of aggregation */ - numGroupCols = list_length((List *) linitial(gsets)); - - cost_agg(&agg_path, root, - AGG_SORTED, - agg_costs, - numGroupCols, - numGroups, /* XXX surely not right for all steps? */ - sort_path.startup_cost, - sort_path.total_cost, - sort_path.rows); + if (rollup->is_hashed || is_first_sort) + { + /* + * Account for cost of aggregation, but don't charge input + * cost again + */ + cost_agg(&agg_path, root, + rollup->is_hashed ? AGG_HASHED : AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + 0.0, 0.0, + subpath->rows); + if (!rollup->is_hashed) + is_first_sort = false; + } + else + { + /* Account for cost of sort, but don't charge input cost again */ + cost_sort(&sort_path, root, NIL, + 0.0, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + -1.0); + + /* Account for cost of aggregation */ + + cost_agg(&agg_path, root, + AGG_SORTED, + agg_costs, + numGroupCols, + rollup->numGroups, + sort_path.startup_cost, + sort_path.total_cost, + sort_path.rows); + } pathnode->path.total_cost += agg_path.total_cost; pathnode->path.rows += agg_path.rows; diff --git a/src/include/lib/knapsack.h b/src/include/lib/knapsack.h new file mode 100644 index 0000000000..8d1e6d0aa0 --- /dev/null +++ b/src/include/lib/knapsack.h @@ -0,0 +1,17 @@ +/* + * knapsack.h + * + * Copyright (c) 2017, PostgreSQL Global Development Group + * + * src/include/lib/knapsack.h + */ +#ifndef KNAPSACK_H +#define KNAPSACK_H + +#include "postgres.h" +#include "nodes/bitmapset.h" + +extern Bitmapset *DiscreteKnapsack(int max_weight, int num_items, + int *item_weights, double *item_values); + +#endif /* KNAPSACK_H */ diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 4f1910e5a9..109f7b0c14 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -20,6 +20,11 @@ #ifndef BITMAPSET_H #define BITMAPSET_H +/* + * Forward decl to save including pg_list.h + */ +struct List; + /* * Data representation */ @@ -70,6 +75,7 @@ extern bool bms_is_subset(const Bitmapset *a, const Bitmapset *b); extern BMS_Comparison bms_subset_compare(const Bitmapset *a, const Bitmapset *b); extern bool bms_is_member(int x, const Bitmapset *a); extern bool bms_overlap(const Bitmapset *a, const Bitmapset *b); +extern bool bms_overlap_list(const Bitmapset *a, const struct List *b); extern bool bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b); extern int bms_singleton_member(const Bitmapset *a); extern bool bms_get_singleton_member(const Bitmapset *a, int *member); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ff42895118..11a68500ee 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1699,6 +1699,7 @@ typedef struct AggStatePerAggData *AggStatePerAgg; typedef struct AggStatePerTransData *AggStatePerTrans; typedef struct AggStatePerGroupData *AggStatePerGroup; typedef struct AggStatePerPhaseData *AggStatePerPhase; +typedef struct AggStatePerHashData *AggStatePerHash; typedef struct AggState { @@ -1706,15 +1707,17 @@ typedef struct AggState List *aggs; /* all Aggref nodes in targetlist & quals */ int numaggs; /* length of list (could be zero!) */ int numtrans; /* number of pertrans items */ + AggStrategy aggstrategy; /* strategy mode */ AggSplit aggsplit; /* agg-splitting mode, see nodes.h */ AggStatePerPhase phase; /* pointer to current phase data */ - int numphases; /* number of phases */ + int numphases; /* number of phases (including phase 0) */ int current_phase; /* current phase number */ - FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ AggStatePerAgg peragg; /* per-Aggref information */ AggStatePerTrans pertrans; /* per-Trans state information */ + ExprContext *hashcontext; /* econtexts for long-lived data (hashtable) */ ExprContext **aggcontexts; /* econtexts for long-lived data (per GS) */ ExprContext *tmpcontext; /* econtext for input expressions */ + ExprContext *curaggcontext; /* currently active aggcontext */ AggStatePerTrans curpertrans; /* currently active trans state */ bool input_done; /* indicates end of input */ bool agg_done; /* indicates completion of Agg scan */ @@ -1726,21 +1729,17 @@ typedef struct AggState /* These fields are for grouping set phase data */ int maxsets; /* The max number of sets in any phase */ AggStatePerPhase phases; /* array of all phases */ - Tuplesortstate *sort_in; /* sorted input to phases > 0 */ + Tuplesortstate *sort_in; /* sorted input to phases > 1 */ Tuplesortstate *sort_out; /* input is copied here for next phase */ TupleTableSlot *sort_slot; /* slot for sort results */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ HeapTuple grp_firstTuple; /* copy of first tuple of current group */ - /* these fields are used in AGG_HASHED mode: */ - TupleHashTable hashtable; /* hash table with one entry per group */ - TupleTableSlot *hashslot; /* slot for loading hash table */ - int numhashGrpCols; /* number of columns in hash table */ - int largestGrpColIdx; /* largest column required for hashing */ - AttrNumber *hashGrpColIdxInput; /* and their indices in input slot */ - AttrNumber *hashGrpColIdxHash; /* indices for execGrouping in hashtbl */ + /* these fields are used in AGG_HASHED and AGG_MIXED modes: */ bool table_filled; /* hash table filled yet? */ - TupleHashIterator hashiter; /* for iterating through hash table */ + int num_hashes; + AggStatePerHash perhash; + AggStatePerGroup *hash_pergroup; /* array of per-group pointers */ /* support for evaluation of agg inputs */ TupleTableSlot *evalslot; /* slot for agg inputs */ ProjectionInfo *evalproj; /* projection machinery */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index c83216943c..b9369ac275 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -261,6 +261,8 @@ typedef enum NodeTag T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, + T_RollupData, + T_GroupingSetData, T_StatisticExtInfo, /* @@ -724,7 +726,8 @@ typedef enum AggStrategy { AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ - AGG_HASHED /* grouped agg, use internal hashtable */ + AGG_HASHED, /* grouped agg, use internal hashtable */ + AGG_MIXED /* grouped agg, hash and sort both used */ } AggStrategy; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 4a95e16b69..6e531b6238 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -758,7 +758,7 @@ typedef struct Agg Oid *grpOperators; /* equality operators to compare with */ long numGroups; /* estimated number of groups in input */ Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */ - /* Note: planner provides numGroups & aggParams only in AGG_HASHED case */ + /* Note: planner provides numGroups & aggParams only in HASHED/MIXED case */ List *groupingSets; /* grouping sets to use */ List *chain; /* chained Agg/Sort nodes */ } Agg; diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 0a5187cef3..8930edf826 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1417,18 +1417,38 @@ typedef struct AggPath List *qual; /* quals (HAVING quals), if any */ } AggPath; +/* + * Various annotations used for grouping sets in the planner. + */ + +typedef struct GroupingSetData +{ + NodeTag type; + List *set; /* grouping set as list of sortgrouprefs */ + double numGroups; /* est. number of result groups */ +} GroupingSetData; + +typedef struct RollupData +{ + NodeTag type; + List *groupClause; /* applicable subset of parse->groupClause */ + List *gsets; /* lists of integer indexes into groupClause */ + List *gsets_data; /* list of GroupingSetData */ + double numGroups; /* est. number of result groups */ + bool hashable; /* can be hashed */ + bool is_hashed; /* to be implemented as a hashagg */ +} RollupData; + /* * GroupingSetsPath represents a GROUPING SETS aggregation - * - * Currently we only support this in sorted not hashed form, so the input - * must always be appropriately presorted. */ + typedef struct GroupingSetsPath { Path path; Path *subpath; /* path representing input source */ - List *rollup_groupclauses; /* list of lists of SortGroupClause's */ - List *rollup_lists; /* parallel list of lists of grouping sets */ + AggStrategy aggstrategy; /* basic strategy */ + List *rollups; /* list of RollupData */ List *qual; /* quals (HAVING quals), if any */ } GroupingSetsPath; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 81640de7ab..c72c7e02cb 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -195,8 +195,8 @@ extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root, Path *subpath, PathTarget *target, List *having_qual, - List *rollup_lists, - List *rollup_groupclauses, + AggStrategy aggstrategy, + List *rollups, const AggClauseCosts *agg_costs, double numGroups); extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root, diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index b0886da8d7..1374710087 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -13,6 +13,13 @@ copy gstest2 from stdin; create temp table gstest3 (a integer, b integer, c integer, d integer); copy gstest3 from stdin; alter table gstest3 add primary key (a); +create temp table gstest4(id integer, v integer, + unhashable_col bit(4), unsortable_col xid); +insert into gstest4 +values (1,1,b'0000','1'), (2,2,b'0001','1'), + (3,4,b'0010','2'), (4,8,b'0011','2'), + (5,16,b'0000','2'), (6,32,b'0001','2'), + (7,64,b'0010','1'), (8,128,b'0011','1'); create temp table gstest_empty (a integer, b integer, v integer); create function gstest_data(v integer, out a integer, out b integer) returns setof record @@ -22,6 +29,7 @@ create function gstest_data(v integer, out a integer, out b integer) end; $f$ language plpgsql; -- basic functionality +set enable_hashagg = false; -- test hashing explicitly later -- simple rollup with multiple plain aggregates, with and without ordering -- (and with ordering differing from grouping) select a, b, grouping(a,b), sum(v), count(*), max(v) @@ -462,7 +470,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); -- Tests for chained aggregates select a, b, grouping(a,b), sum(v), count(*), max(v) - from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; a | b | grouping | sum | count | max ---+---+----------+-----+-------+----- 1 | 1 | 0 | 21 | 2 | 11 @@ -473,19 +481,19 @@ select a, b, grouping(a,b), sum(v), count(*), max(v) 3 | 4 | 0 | 17 | 1 | 17 4 | 1 | 0 | 37 | 2 | 19 | | 3 | 21 | 2 | 11 - | | 3 | 25 | 2 | 13 - | | 3 | 14 | 1 | 14 - | | 3 | 15 | 1 | 15 - | | 3 | 16 | 1 | 16 - | | 3 | 17 | 1 | 17 - | | 3 | 37 | 2 | 19 | | 3 | 21 | 2 | 11 | | 3 | 25 | 2 | 13 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 | | 3 | 14 | 1 | 14 | | 3 | 15 | 1 | 15 + | | 3 | 15 | 1 | 15 | | 3 | 16 | 1 | 16 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 | | 3 | 17 | 1 | 17 | | 3 | 37 | 2 | 19 + | | 3 | 37 | 2 | 19 (21 rows) select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); @@ -847,4 +855,598 @@ select sum(ten) from onek group by rollup(four::text), two order by 1; 2500 (6 rows) +-- hashing support +set enable_hashagg = true; +-- failure cases +select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col); +ERROR: could not implement GROUP BY +DETAIL: Some of the datatypes only support hashing, while others only support sorting. +select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id)); +ERROR: could not implement GROUP BY +DETAIL: Some of the datatypes only support hashing, while others only support sorting. +-- simple cases +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | | 1 | 60 | 5 | 14 + 2 | | 1 | 15 | 1 | 15 + 3 | | 1 | 33 | 2 | 17 + 4 | | 1 | 37 | 2 | 19 + | 1 | 2 | 58 | 4 | 19 + | 2 | 2 | 25 | 2 | 13 + | 3 | 2 | 45 | 3 | 16 + | 4 | 2 | 17 | 1 | 17 +(8 rows) + +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Sort + Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2 + -> HashAggregate + Hash Key: "*VALUES*".column1 + Hash Key: "*VALUES*".column2 + -> Values Scan on "*VALUES*" +(6 rows) + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + 1 | | 1 | 60 | 5 | 14 + 2 | | 1 | 15 | 1 | 15 + 3 | | 1 | 33 | 2 | 17 + 4 | | 1 | 37 | 2 | 19 + | 1 | 2 | 58 | 4 | 19 + | 2 | 2 | 25 | 2 | 13 + | 3 | 2 | 45 | 3 | 16 + | 4 | 2 | 17 | 1 | 17 + | | 3 | 145 | 10 | 19 +(16 rows) + +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Sort + Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), "*VALUES*".column1, "*VALUES*".column2 + -> MixedAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: "*VALUES*".column1 + Hash Key: "*VALUES*".column2 + Group Key: () + -> Values Scan on "*VALUES*" +(8 rows) + +-- shouldn't try and hash +explain (costs off) + select a, b, grouping(a,b), array_agg(v order by v) + from gstest1 group by cube(a,b); + QUERY PLAN +---------------------------------------------------------- + GroupAggregate + Group Key: "*VALUES*".column1, "*VALUES*".column2 + Group Key: "*VALUES*".column1 + Group Key: () + Sort Key: "*VALUES*".column2 + Group Key: "*VALUES*".column2 + -> Sort + Sort Key: "*VALUES*".column1, "*VALUES*".column2 + -> Values Scan on "*VALUES*" +(9 rows) + +-- mixed hashable/sortable cases +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3, 5; + unhashable_col | unsortable_col | grouping | count | sum +----------------+----------------+----------+-------+----- + 0000 | | 1 | 2 | 17 + 0001 | | 1 | 2 | 34 + 0010 | | 1 | 2 | 68 + 0011 | | 1 | 2 | 136 + | 2 | 2 | 4 | 60 + | 1 | 2 | 4 | 195 +(6 rows) + +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------ + Sort + Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v)) + -> MixedAggregate + Hash Key: unsortable_col + Group Key: unhashable_col + -> Sort + Sort Key: unhashable_col + -> Seq Scan on gstest4 +(8 rows) + +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; + unhashable_col | unsortable_col | grouping | count | sum +----------------+----------------+----------+-------+----- + 0000 | | 1 | 1 | 1 + 0001 | | 1 | 1 | 2 + 0010 | | 1 | 1 | 4 + 0011 | | 1 | 1 | 8 + 0000 | | 1 | 1 | 16 + 0001 | | 1 | 1 | 32 + 0010 | | 1 | 1 | 64 + 0011 | | 1 | 1 | 128 + | 1 | 2 | 1 | 1 + | 1 | 2 | 1 | 2 + | 2 | 2 | 1 | 4 + | 2 | 2 | 1 | 8 + | 2 | 2 | 1 | 16 + | 2 | 2 | 1 | 32 + | 1 | 2 | 1 | 64 + | 1 | 2 | 1 | 128 +(16 rows) + +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; + QUERY PLAN +------------------------------------------------------------------ + Sort + Sort Key: (GROUPING(unhashable_col, unsortable_col)), (sum(v)) + -> MixedAggregate + Hash Key: v, unsortable_col + Group Key: v, unhashable_col + -> Sort + Sort Key: v, unhashable_col + -> Seq Scan on gstest4 +(8 rows) + +-- empty input: first is 0 rows, second 1, third 3 etc. +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); + a | b | sum | count +---+---+-----+------- +(0 rows) + +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); + QUERY PLAN +-------------------------------- + HashAggregate + Hash Key: a, b + Hash Key: a + -> Seq Scan on gstest_empty +(4 rows) + +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); + a | b | sum | count +---+---+-----+------- + | | | 0 +(1 row) + +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); + a | b | sum | count +---+---+-----+------- + | | | 0 + | | | 0 + | | | 0 +(3 rows) + +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); + QUERY PLAN +-------------------------------- + MixedAggregate + Hash Key: a, b + Group Key: () + Group Key: () + Group Key: () + -> Seq Scan on gstest_empty +(6 rows) + +select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + sum | count +-----+------- + | 0 + | 0 + | 0 +(3 rows) + +explain (costs off) + select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + QUERY PLAN +-------------------------------- + Aggregate + Group Key: () + Group Key: () + Group Key: () + -> Seq Scan on gstest_empty +(5 rows) + +-- check that functionally dependent cols are not nulled +select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + a | d | grouping +---+---+---------- + 1 | 1 | 1 + 2 | 2 | 1 + 1 | 1 | 2 + 2 | 2 | 2 +(4 rows) + +explain (costs off) + select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + QUERY PLAN +--------------------------- + HashAggregate + Hash Key: a, b + Hash Key: a, c + -> Seq Scan on gstest3 +(4 rows) + +-- simple rescan tests +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); + a | b | sum +---+---+----- + 2 | | 6 + 1 | | 3 + | 2 | 3 + | 3 | 3 + | 1 | 3 +(5 rows) + +explain (costs off) + select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); + QUERY PLAN +------------------------------------------ + HashAggregate + Hash Key: gstest_data.a + Hash Key: gstest_data.b + -> Nested Loop + -> Values Scan on "*VALUES*" + -> Function Scan on gstest_data +(6 rows) + +select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; +ERROR: aggregate functions are not allowed in FROM clause of their own query level +LINE 3: lateral (select a, b, sum(v.x) from gstest_data(v.x) ... + ^ +explain (costs off) + select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; +ERROR: aggregate functions are not allowed in FROM clause of their own query level +LINE 4: lateral (select a, b, sum(v.x) from gstest_data(v.x... + ^ +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + | | 3 | 21 | 2 | 11 + | | 3 | 21 | 2 | 11 + | | 3 | 25 | 2 | 13 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 + | | 3 | 14 | 1 | 14 + | | 3 | 15 | 1 | 15 + | | 3 | 15 | 1 | 15 + | | 3 | 16 | 1 | 16 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 + | | 3 | 17 | 1 | 17 + | | 3 | 37 | 2 | 19 + | | 3 | 37 | 2 | 19 +(21 rows) + +explain (costs off) + select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; + QUERY PLAN +------------------------------------------------------------------------------------------- + Sort + Sort Key: (GROUPING("*VALUES*".column1, "*VALUES*".column2)), (max("*VALUES*".column3)) + -> HashAggregate + Hash Key: "*VALUES*".column1, "*VALUES*".column2 + Hash Key: ("*VALUES*".column1 + 1), ("*VALUES*".column2 + 1) + Hash Key: ("*VALUES*".column1 + 2), ("*VALUES*".column2 + 2) + -> Values Scan on "*VALUES*" +(7 rows) + +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; + a | b | sum | rsum +---+---+-----+------ + 1 | 1 | 8 | 8 + 1 | 2 | 2 | 10 + 1 | | 10 | 20 + 2 | 2 | 2 | 22 + 2 | | 2 | 24 + | 1 | 8 | 32 + | 2 | 4 | 36 + | | 12 | 48 +(8 rows) + +explain (costs off) + select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; + QUERY PLAN +--------------------------------------------- + Sort + Sort Key: (sum((sum(c))) OVER (?)), a, b + -> WindowAgg + -> Sort + Sort Key: a, b + -> MixedAggregate + Hash Key: a, b + Hash Key: a + Hash Key: b + Group Key: () + -> Seq Scan on gstest2 +(11 rows) + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + a | b | sum +---+---+----- + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 + 1 | | 3 + 2 | 1 | 2 + 2 | 2 | 2 + 2 | 3 | 2 + 2 | | 6 + | 1 | 3 + | 2 | 3 + | 3 | 3 + | | 9 +(12 rows) + +explain (costs off) + select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + QUERY PLAN +------------------------------------------------ + Sort + Sort Key: gstest_data.a, gstest_data.b + -> MixedAggregate + Hash Key: gstest_data.a, gstest_data.b + Hash Key: gstest_data.a + Hash Key: gstest_data.b + Group Key: () + -> Nested Loop + -> Values Scan on "*VALUES*" + -> Function Scan on gstest_data +(10 rows) + +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; + a | a | four | ten | count +---+---+------+-----+------- + 1 | 1 | 0 | 0 | 50 + 1 | 1 | 0 | 2 | 50 + 1 | 1 | 0 | 4 | 50 + 1 | 1 | 0 | 6 | 50 + 1 | 1 | 0 | 8 | 50 + 1 | 1 | 0 | | 250 + 1 | 1 | 1 | 1 | 50 + 1 | 1 | 1 | 3 | 50 + 1 | 1 | 1 | 5 | 50 + 1 | 1 | 1 | 7 | 50 + 1 | 1 | 1 | 9 | 50 + 1 | 1 | 1 | | 250 + 1 | 1 | 2 | 0 | 50 + 1 | 1 | 2 | 2 | 50 + 1 | 1 | 2 | 4 | 50 + 1 | 1 | 2 | 6 | 50 + 1 | 1 | 2 | 8 | 50 + 1 | 1 | 2 | | 250 + 1 | 1 | 3 | 1 | 50 + 1 | 1 | 3 | 3 | 50 + 1 | 1 | 3 | 5 | 50 + 1 | 1 | 3 | 7 | 50 + 1 | 1 | 3 | 9 | 50 + 1 | 1 | 3 | | 250 + 1 | 1 | | 0 | 100 + 1 | 1 | | 1 | 100 + 1 | 1 | | 2 | 100 + 1 | 1 | | 3 | 100 + 1 | 1 | | 4 | 100 + 1 | 1 | | 5 | 100 + 1 | 1 | | 6 | 100 + 1 | 1 | | 7 | 100 + 1 | 1 | | 8 | 100 + 1 | 1 | | 9 | 100 + 1 | 1 | | | 1000 + 2 | 2 | 0 | 0 | 50 + 2 | 2 | 0 | 2 | 50 + 2 | 2 | 0 | 4 | 50 + 2 | 2 | 0 | 6 | 50 + 2 | 2 | 0 | 8 | 50 + 2 | 2 | 0 | | 250 + 2 | 2 | 1 | 1 | 50 + 2 | 2 | 1 | 3 | 50 + 2 | 2 | 1 | 5 | 50 + 2 | 2 | 1 | 7 | 50 + 2 | 2 | 1 | 9 | 50 + 2 | 2 | 1 | | 250 + 2 | 2 | 2 | 0 | 50 + 2 | 2 | 2 | 2 | 50 + 2 | 2 | 2 | 4 | 50 + 2 | 2 | 2 | 6 | 50 + 2 | 2 | 2 | 8 | 50 + 2 | 2 | 2 | | 250 + 2 | 2 | 3 | 1 | 50 + 2 | 2 | 3 | 3 | 50 + 2 | 2 | 3 | 5 | 50 + 2 | 2 | 3 | 7 | 50 + 2 | 2 | 3 | 9 | 50 + 2 | 2 | 3 | | 250 + 2 | 2 | | 0 | 100 + 2 | 2 | | 1 | 100 + 2 | 2 | | 2 | 100 + 2 | 2 | | 3 | 100 + 2 | 2 | | 4 | 100 + 2 | 2 | | 5 | 100 + 2 | 2 | | 6 | 100 + 2 | 2 | | 7 | 100 + 2 | 2 | | 8 | 100 + 2 | 2 | | 9 | 100 + 2 | 2 | | | 1000 +(70 rows) + +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + array +------------------------------------------------------------------------------------------------------------------------------------------------------ + {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"} + {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"} +(2 rows) + +-- Rescan logic changes when there are no empty grouping sets, so test +-- that too: +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten; + a | a | four | ten | count +---+---+------+-----+------- + 1 | 1 | 0 | | 250 + 1 | 1 | 1 | | 250 + 1 | 1 | 2 | | 250 + 1 | 1 | 3 | | 250 + 1 | 1 | | 0 | 100 + 1 | 1 | | 1 | 100 + 1 | 1 | | 2 | 100 + 1 | 1 | | 3 | 100 + 1 | 1 | | 4 | 100 + 1 | 1 | | 5 | 100 + 1 | 1 | | 6 | 100 + 1 | 1 | | 7 | 100 + 1 | 1 | | 8 | 100 + 1 | 1 | | 9 | 100 + 2 | 2 | 0 | | 250 + 2 | 2 | 1 | | 250 + 2 | 2 | 2 | | 250 + 2 | 2 | 3 | | 250 + 2 | 2 | | 0 | 100 + 2 | 2 | | 1 | 100 + 2 | 2 | | 2 | 100 + 2 | 2 | | 3 | 100 + 2 | 2 | | 4 | 100 + 2 | 2 | | 5 | 100 + 2 | 2 | | 6 | 100 + 2 | 2 | | 7 | 100 + 2 | 2 | | 8 | 100 + 2 | 2 | | 9 | 100 +(28 rows) + +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a); + array +--------------------------------------------------------------------------------- + {"(1,0,,500)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)"} + {"(2,0,,500)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)"} +(2 rows) + +-- test the knapsack +set work_mem = '64kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); + QUERY PLAN +------------------------------- + MixedAggregate + Hash Key: two + Hash Key: four + Hash Key: ten + Hash Key: hundred + Group Key: unique1 + Sort Key: twothousand + Group Key: twothousand + Sort Key: thousand + Group Key: thousand + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(13 rows) + +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,hundred,ten,four,two); + QUERY PLAN +------------------------------- + MixedAggregate + Hash Key: two + Hash Key: four + Hash Key: ten + Hash Key: hundred + Group Key: unique1 + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(9 rows) + +set work_mem = '384kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); + QUERY PLAN +------------------------------- + MixedAggregate + Hash Key: two + Hash Key: four + Hash Key: ten + Hash Key: hundred + Hash Key: thousand + Group Key: unique1 + Sort Key: twothousand + Group Key: twothousand + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(12 rows) + -- end diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out index 0eeaf9e830..33f370b494 100644 --- a/src/test/regress/expected/tsrf.out +++ b/src/test/regress/expected/tsrf.out @@ -233,6 +233,7 @@ SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROU (6 rows) -- grouping sets are a bit special, they produce NULLs in columns not actually NULL +set enable_hashagg = false; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab); dataa | b | g | count -------+-----+---+------- @@ -311,46 +312,46 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d b | bar | | 2 b | | | 2 | | | 6 - a | | 1 | 2 - b | | 1 | 1 - | | 1 | 3 - a | | 2 | 2 - b | | 2 | 1 - | | 2 | 3 | bar | 1 | 2 | bar | 2 | 2 | bar | | 4 | foo | 1 | 1 | foo | 2 | 1 | foo | | 2 + a | | 1 | 2 + b | | 1 | 1 + | | 1 | 3 + a | | 2 | 2 + b | | 2 | 1 + | | 2 | 3 (24 rows) SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa; dataa | b | g | count -------+-----+---+------- + a | foo | | 2 + a | | | 4 + a | | 2 | 2 a | bar | 1 | 1 a | bar | 2 | 1 a | bar | | 2 a | foo | 1 | 1 a | foo | 2 | 1 - a | foo | | 2 - a | | | 4 a | | 1 | 2 - a | | 2 | 2 - b | bar | 2 | 1 + b | bar | 1 | 1 b | | | 2 b | | 1 | 1 - b | | 2 | 1 - b | bar | 1 | 1 + b | bar | 2 | 1 b | bar | | 2 - | foo | | 2 - | foo | 1 | 1 + b | | 2 | 1 | | 2 | 3 + | | | 6 | bar | 1 | 2 | bar | 2 | 2 - | | | 6 - | foo | 2 | 1 | bar | | 4 + | foo | 1 | 1 + | foo | 2 | 1 + | foo | | 2 | | 1 | 3 (24 rows) @@ -360,29 +361,30 @@ SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(d a | bar | 1 | 1 a | foo | 1 | 1 b | bar | 1 | 1 + | bar | 1 | 2 + | foo | 1 | 1 a | | 1 | 2 b | | 1 | 1 | | 1 | 3 - | bar | 1 | 2 - | foo | 1 | 1 - | foo | 2 | 1 - | bar | 2 | 2 a | | 2 | 2 b | | 2 | 1 - a | bar | 2 | 1 + | bar | 2 | 2 | | 2 | 3 + | foo | 2 | 1 + a | bar | 2 | 1 a | foo | 2 | 1 b | bar | 2 | 1 - a | foo | | 2 + a | | | 4 b | bar | | 2 b | | | 2 | | | 6 - a | | | 4 + a | foo | | 2 + a | bar | | 2 | bar | | 4 | foo | | 2 - a | bar | | 2 (24 rows) +reset enable_hashagg; -- data modification CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data; INSERT INTO fewmore VALUES(generate_series(4,5)); diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index cc557cc552..0e023d9ddc 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -31,6 +31,14 @@ copy gstest3 from stdin; \. alter table gstest3 add primary key (a); +create temp table gstest4(id integer, v integer, + unhashable_col bit(4), unsortable_col xid); +insert into gstest4 +values (1,1,b'0000','1'), (2,2,b'0001','1'), + (3,4,b'0010','2'), (4,8,b'0011','2'), + (5,16,b'0000','2'), (6,32,b'0001','2'), + (7,64,b'0010','1'), (8,128,b'0011','1'); + create temp table gstest_empty (a integer, b integer, v integer); create function gstest_data(v integer, out a integer, out b integer) @@ -43,8 +51,11 @@ create function gstest_data(v integer, out a integer, out b integer) -- basic functionality +set enable_hashagg = false; -- test hashing explicitly later + -- simple rollup with multiple plain aggregates, with and without ordering -- (and with ordering differing from grouping) + select a, b, grouping(a,b), sum(v), count(*), max(v) from gstest1 group by rollup (a,b); select a, b, grouping(a,b), sum(v), count(*), max(v) @@ -161,7 +172,7 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); -- Tests for chained aggregates select a, b, grouping(a,b), sum(v), count(*), max(v) - from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum @@ -224,4 +235,146 @@ select array(select row(v.a,s1.*) from (select two,four, count(*) from onek grou select sum(ten) from onek group by two, rollup(four::text) order by 1; select sum(ten) from onek group by rollup(four::text), two order by 1; +-- hashing support + +set enable_hashagg = true; + +-- failure cases + +select count(*) from gstest4 group by rollup(unhashable_col,unsortable_col); +select array_agg(v order by v) from gstest4 group by grouping sets ((id,unsortable_col),(id)); + +-- simple cases + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a),(b)) order by 3,1,2; + +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; +explain (costs off) select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by cube(a,b) order by 3,1,2; + +-- shouldn't try and hash +explain (costs off) + select a, b, grouping(a,b), array_agg(v order by v) + from gstest1 group by cube(a,b); + +-- mixed hashable/sortable cases +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3, 5; +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((unhashable_col),(unsortable_col)) + order by 3,5; + +select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; +explain (costs off) + select unhashable_col, unsortable_col, + grouping(unhashable_col, unsortable_col), + count(*), sum(v) + from gstest4 group by grouping sets ((v,unhashable_col),(v,unsortable_col)) + order by 3,5; + +-- empty input: first is 0 rows, second 1, third 3 etc. +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); +select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); +explain (costs off) + select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); +select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); +explain (costs off) + select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); + +-- check that functionally dependent cols are not nulled +select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); +explain (costs off) + select a, d, grouping(a,b,c) + from gstest3 + group by grouping sets ((a,b), (a,c)); + +-- simple rescan tests + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); +explain (costs off) + select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by grouping sets (a,b); + +select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; +explain (costs off) + select * + from (values (1),(2)) v(x), + lateral (select a, b, sum(v.x) from gstest_data(v.x) group by grouping sets (a,b)) s; + +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; +explain (costs off) + select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)) order by 3,6; +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; +explain (costs off) + select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; +explain (costs off) + select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + +-- Rescan logic changes when there are no empty grouping sets, so test +-- that too: +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by grouping sets(four,ten)) s on true order by v.a,four,ten; +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by grouping sets(two,four) order by two,four) s1) from (values (1),(2)) v(a); + +-- test the knapsack + +set work_mem = '64kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,hundred,ten,four,two); + +set work_mem = '384kB'; +explain (costs off) + select unique1, + count(two), count(four), count(ten), + count(hundred), count(thousand), count(twothousand), + count(*) + from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); + -- end diff --git a/src/test/regress/sql/tsrf.sql b/src/test/regress/sql/tsrf.sql index e627bb99ed..417e78c53d 100644 --- a/src/test/regress/sql/tsrf.sql +++ b/src/test/regress/sql/tsrf.sql @@ -66,12 +66,14 @@ SELECT SUM(count(*)) OVER(PARTITION BY generate_series(1,3) ORDER BY generate_se SELECT few.dataa, count(*), min(id), max(id), generate_series(1,3) FROM few GROUP BY few.dataa ORDER BY 5, 1; -- grouping sets are a bit special, they produce NULLs in columns not actually NULL +set enable_hashagg = false; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab) ORDER BY g; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g); SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY dataa; SELECT dataa, datab b, generate_series(1,2) g, count(*) FROM few GROUP BY CUBE(dataa, datab, g) ORDER BY g; +reset enable_hashagg; -- data modification CREATE TABLE fewmore AS SELECT generate_series(1,3) AS data; -- 2.40.0