1 /*-------------------------------------------------------------------------
4 * Routines to handle aggregate nodes.
6 * ExecAgg evaluates each aggregate in the following steps:
8 * transvalue = initcond
9 * foreach input_value do
10 * transvalue = transfunc(transvalue, input_value)
11 * result = finalfunc(transvalue)
13 * If a finalfunc is not supplied then the result is just the ending
14 * value of transvalue.
16 * If transfunc is marked "strict" in pg_proc and initcond is NULL,
17 * then the first non-NULL input_value is assigned directly to transvalue,
18 * and transfunc isn't applied until the second non-NULL input_value.
19 * The agg's input type and transtype must be the same in this case!
21 * If transfunc is marked "strict" then NULL input_values are skipped,
22 * keeping the previous transvalue. If transfunc is not strict then it
23 * is called for every input tuple and must deal with NULL initcond
24 * or NULL input_value for itself.
26 * If finalfunc is marked "strict" then it is not called when the
27 * ending transvalue is NULL, instead a NULL result is created
28 * automatically (this is just the usual handling of strict functions,
29 * of course). A non-strict finalfunc can make its own choice of
30 * what to return for a NULL ending transvalue.
32 * We compute aggregate input expressions and run the transition functions
33 * in a temporary econtext (aggstate->tmpcontext). This is reset at
34 * least once per input tuple, so when the transvalue datatype is
35 * pass-by-reference, we have to be careful to copy it into a longer-lived
36 * memory context, and free the prior value to avoid memory leakage.
37 * We store transvalues in the memory context aggstate->aggcontext,
38 * which is also used for the hashtable structures in AGG_HASHED mode.
39 * The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext)
40 * is used to run finalize functions and compute the output tuple;
41 * this context can be reset once per output tuple.
44 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
45 * Portions Copyright (c) 1994, Regents of the University of California
48 * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.118 2004/02/03 17:34:02 tgl Exp $
50 *-------------------------------------------------------------------------
55 #include "access/heapam.h"
56 #include "catalog/pg_aggregate.h"
57 #include "catalog/pg_operator.h"
58 #include "executor/executor.h"
59 #include "executor/nodeAgg.h"
60 #include "miscadmin.h"
61 #include "optimizer/clauses.h"
62 #include "parser/parse_agg.h"
63 #include "parser/parse_coerce.h"
64 #include "parser/parse_expr.h"
65 #include "parser/parse_oper.h"
66 #include "utils/acl.h"
67 #include "utils/builtins.h"
68 #include "utils/lsyscache.h"
69 #include "utils/syscache.h"
70 #include "utils/tuplesort.h"
71 #include "utils/datum.h"
75 * AggStatePerAggData - per-aggregate working state for the Agg scan
77 typedef struct AggStatePerAggData
80 * These values are set up during ExecInitAgg() and do not change
84 /* Links to Aggref expr and state nodes this working state is for */
85 AggrefExprState *aggrefstate;
88 /* Oids of transfer functions */
90 Oid finalfn_oid; /* may be InvalidOid */
93 * fmgr lookup data for transfer functions --- only valid when
94 * corresponding oid is not InvalidOid. Note in particular that
95 * fn_strict flags are kept here.
101 * Type of input data and Oid of sort operator to use for it; only
102 * set/used when aggregate has DISTINCT flag. (These are not used
103 * directly by nodeAgg, but must be passed to the Tuplesort object.)
109 * fmgr lookup data for input type's equality operator --- only
110 * set/used when aggregate has DISTINCT flag.
115 * initial value from pg_aggregate entry
118 bool initValueIsNull;
121 * We need the len and byval info for the agg's input, result, and
122 * transition data types in order to know how to copy/delete values.
132 * These values are working state that is initialized at the start of
133 * an input tuple group and updated for each input tuple.
135 * For a simple (non DISTINCT) aggregate, we just feed the input values
136 * straight to the transition function. If it's DISTINCT, we pass the
137 * input values into a Tuplesort object; then at completion of the
138 * input tuple group, we scan the sorted values, eliminate duplicates,
139 * and run the transition function on the rest.
142 Tuplesortstate *sortstate; /* sort object, if a DISTINCT agg */
143 } AggStatePerAggData;
146 * AggStatePerGroupData - per-aggregate-per-group working state
148 * These values are working state that is initialized at the start of
149 * an input tuple group and updated for each input tuple.
151 * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
152 * structs (pointed to by aggstate->pergroup); we re-use the array for
153 * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the
154 * hash table contains an array of these structs for each tuple group.
156 * Logically, the sortstate field belongs in this struct, but we do not
157 * keep it here for space reasons: we don't support DISTINCT aggregates
158 * in AGG_HASHED mode, so there's no reason to use up a pointer field
159 * in every entry of the hashtable.
161 typedef struct AggStatePerGroupData
163 Datum transValue; /* current transition value */
164 bool transValueIsNull;
166 bool noTransValue; /* true if transValue not set yet */
169 * Note: noTransValue initially has the same value as
170 * transValueIsNull, and if true both are cleared to false at the same
171 * time. They are not the same though: if transfn later returns a
172 * NULL, we want to keep that NULL and not auto-replace it with a
173 * later input value. Only the first non-NULL input will be
176 } AggStatePerGroupData;
179 * To implement hashed aggregation, we need a hashtable that stores a
180 * representative tuple and an array of AggStatePerGroup structs for each
181 * distinct set of GROUP BY column values. We compute the hash key from
182 * the GROUP BY columns.
184 typedef struct AggHashEntryData *AggHashEntry;
186 typedef struct AggHashEntryData
188 TupleHashEntryData shared; /* common header for hash table entries */
189 /* per-aggregate transition status array - must be last! */
190 AggStatePerGroupData pergroup[1]; /* VARIABLE LENGTH ARRAY */
191 } AggHashEntryData; /* VARIABLE LENGTH STRUCT */
194 static void initialize_aggregates(AggState *aggstate,
195 AggStatePerAgg peragg,
196 AggStatePerGroup pergroup);
197 static void advance_transition_function(AggState *aggstate,
198 AggStatePerAgg peraggstate,
199 AggStatePerGroup pergroupstate,
200 Datum newVal, bool isNull);
201 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
202 static void process_sorted_aggregate(AggState *aggstate,
203 AggStatePerAgg peraggstate,
204 AggStatePerGroup pergroupstate);
205 static void finalize_aggregate(AggState *aggstate,
206 AggStatePerAgg peraggstate,
207 AggStatePerGroup pergroupstate,
208 Datum *resultVal, bool *resultIsNull);
209 static void build_hash_table(AggState *aggstate);
210 static AggHashEntry lookup_hash_entry(AggState *aggstate,
211 TupleTableSlot *slot);
212 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
213 static void agg_fill_hash_table(AggState *aggstate);
214 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
215 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
219 * Initialize all aggregates for a new group of input values.
221 * When called, CurrentMemoryContext should be the per-query context.
224 initialize_aggregates(AggState *aggstate,
225 AggStatePerAgg peragg,
226 AggStatePerGroup pergroup)
230 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
232 AggStatePerAgg peraggstate = &peragg[aggno];
233 AggStatePerGroup pergroupstate = &pergroup[aggno];
234 Aggref *aggref = peraggstate->aggref;
237 * Start a fresh sort operation for each DISTINCT aggregate.
239 if (aggref->aggdistinct)
242 * In case of rescan, maybe there could be an uncompleted sort
243 * operation? Clean it up if so.
245 if (peraggstate->sortstate)
246 tuplesort_end(peraggstate->sortstate);
248 peraggstate->sortstate =
249 tuplesort_begin_datum(peraggstate->inputType,
250 peraggstate->sortOperator,
255 * (Re)set transValue to the initial value.
257 * Note that when the initial value is pass-by-ref, we must copy it
258 * (into the aggcontext) since we will pfree the transValue later.
260 if (peraggstate->initValueIsNull)
261 pergroupstate->transValue = peraggstate->initValue;
264 MemoryContext oldContext;
266 oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
267 pergroupstate->transValue = datumCopy(peraggstate->initValue,
268 peraggstate->transtypeByVal,
269 peraggstate->transtypeLen);
270 MemoryContextSwitchTo(oldContext);
272 pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
275 * If the initial value for the transition state doesn't exist in
276 * the pg_aggregate table then we will let the first non-NULL
277 * value returned from the outer procNode become the initial
278 * value. (This is useful for aggregates like max() and min().)
279 * The noTransValue flag signals that we still need to do this.
281 pergroupstate->noTransValue = peraggstate->initValueIsNull;
286 * Given a new input value, advance the transition function of an aggregate.
288 * It doesn't matter which memory context this is called in.
291 advance_transition_function(AggState *aggstate,
292 AggStatePerAgg peraggstate,
293 AggStatePerGroup pergroupstate,
294 Datum newVal, bool isNull)
296 FunctionCallInfoData fcinfo;
297 MemoryContext oldContext;
299 if (peraggstate->transfn.fn_strict)
302 * For a strict transfn, nothing happens at a NULL input tuple; we
303 * just keep the prior transValue.
307 if (pergroupstate->noTransValue)
310 * transValue has not been initialized. This is the first
311 * non-NULL input value. We use it as the initial value for
312 * transValue. (We already checked that the agg's input type
313 * is binary-compatible with its transtype, so straight copy
316 * We must copy the datum into aggcontext if it is pass-by-ref.
317 * We do not need to pfree the old transValue, since it's
320 oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
321 pergroupstate->transValue = datumCopy(newVal,
322 peraggstate->transtypeByVal,
323 peraggstate->transtypeLen);
324 pergroupstate->transValueIsNull = false;
325 pergroupstate->noTransValue = false;
326 MemoryContextSwitchTo(oldContext);
329 if (pergroupstate->transValueIsNull)
332 * Don't call a strict function with NULL inputs. Note it is
333 * possible to get here despite the above tests, if the
334 * transfn is strict *and* returned a NULL on a prior cycle.
335 * If that happens we will propagate the NULL all the way to
342 /* We run the transition functions in per-input-tuple memory context */
343 oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
346 * OK to call the transition function
348 * This is heavily-used code, so manually zero just the necessary fields
349 * instead of using MemSet(). Compare FunctionCall2().
352 /* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
353 fcinfo.context = NULL;
354 fcinfo.resultinfo = NULL;
355 fcinfo.isnull = false;
357 fcinfo.flinfo = &peraggstate->transfn;
359 fcinfo.arg[0] = pergroupstate->transValue;
360 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
361 fcinfo.arg[1] = newVal;
362 fcinfo.argnull[1] = isNull;
364 newVal = FunctionCallInvoke(&fcinfo);
367 * If pass-by-ref datatype, must copy the new value into aggcontext
368 * and pfree the prior transValue. But if transfn returned a pointer
369 * to its first input, we don't need to do anything.
371 if (!peraggstate->transtypeByVal &&
372 DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
376 MemoryContextSwitchTo(aggstate->aggcontext);
377 newVal = datumCopy(newVal,
378 peraggstate->transtypeByVal,
379 peraggstate->transtypeLen);
381 if (!pergroupstate->transValueIsNull)
382 pfree(DatumGetPointer(pergroupstate->transValue));
385 pergroupstate->transValue = newVal;
386 pergroupstate->transValueIsNull = fcinfo.isnull;
388 MemoryContextSwitchTo(oldContext);
392 * Advance all the aggregates for one input tuple. The input tuple
393 * has been stored in tmpcontext->ecxt_scantuple, so that it is accessible
394 * to ExecEvalExpr. pergroup is the array of per-group structs to use
395 * (this might be in a hashtable entry).
397 * When called, CurrentMemoryContext should be the per-query context.
400 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
402 ExprContext *econtext = aggstate->tmpcontext;
405 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
407 AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
408 AggStatePerGroup pergroupstate = &pergroup[aggno];
409 AggrefExprState *aggrefstate = peraggstate->aggrefstate;
410 Aggref *aggref = peraggstate->aggref;
414 newVal = ExecEvalExprSwitchContext(aggrefstate->target, econtext,
417 if (aggref->aggdistinct)
419 /* in DISTINCT mode, we may ignore nulls */
422 tuplesort_putdatum(peraggstate->sortstate, newVal, isNull);
426 advance_transition_function(aggstate, peraggstate, pergroupstate,
433 * Run the transition function for a DISTINCT aggregate. This is called
434 * after we have completed entering all the input values into the sort
435 * object. We complete the sort, read out the values in sorted order,
436 * and run the transition function on each non-duplicate value.
438 * When called, CurrentMemoryContext should be the per-query context.
441 process_sorted_aggregate(AggState *aggstate,
442 AggStatePerAgg peraggstate,
443 AggStatePerGroup pergroupstate)
445 Datum oldVal = (Datum) 0;
446 bool haveOldVal = false;
447 MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
448 MemoryContext oldContext;
452 tuplesort_performsort(peraggstate->sortstate);
455 * Note: if input type is pass-by-ref, the datums returned by the sort
456 * are freshly palloc'd in the per-query context, so we must be
457 * careful to pfree them when they are no longer needed.
460 while (tuplesort_getdatum(peraggstate->sortstate, true,
464 * DISTINCT always suppresses nulls, per SQL spec, regardless of
465 * the transition function's strictness.
471 * Clear and select the working context for evaluation of the
472 * equality function and transition function.
474 MemoryContextReset(workcontext);
475 oldContext = MemoryContextSwitchTo(workcontext);
478 DatumGetBool(FunctionCall2(&peraggstate->equalfn,
481 /* equal to prior, so forget this one */
482 if (!peraggstate->inputtypeByVal)
483 pfree(DatumGetPointer(newVal));
487 advance_transition_function(aggstate, peraggstate, pergroupstate,
489 /* forget the old value, if any */
490 if (haveOldVal && !peraggstate->inputtypeByVal)
491 pfree(DatumGetPointer(oldVal));
492 /* and remember the new one for subsequent equality checks */
497 MemoryContextSwitchTo(oldContext);
500 if (haveOldVal && !peraggstate->inputtypeByVal)
501 pfree(DatumGetPointer(oldVal));
503 tuplesort_end(peraggstate->sortstate);
504 peraggstate->sortstate = NULL;
508 * Compute the final value of one aggregate.
510 * The finalfunction will be run, and the result delivered, in the
511 * output-tuple context; caller's CurrentMemoryContext does not matter.
514 finalize_aggregate(AggState *aggstate,
515 AggStatePerAgg peraggstate,
516 AggStatePerGroup pergroupstate,
517 Datum *resultVal, bool *resultIsNull)
519 MemoryContext oldContext;
521 oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
524 * Apply the agg's finalfn if one is provided, else return transValue.
526 if (OidIsValid(peraggstate->finalfn_oid))
528 FunctionCallInfoData fcinfo;
530 MemSet(&fcinfo, 0, sizeof(fcinfo));
531 fcinfo.flinfo = &peraggstate->finalfn;
533 fcinfo.arg[0] = pergroupstate->transValue;
534 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
535 if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull)
537 /* don't call a strict function with NULL inputs */
538 *resultVal = (Datum) 0;
539 *resultIsNull = true;
543 *resultVal = FunctionCallInvoke(&fcinfo);
544 *resultIsNull = fcinfo.isnull;
549 *resultVal = pergroupstate->transValue;
550 *resultIsNull = pergroupstate->transValueIsNull;
554 * If result is pass-by-ref, make sure it is in the right context.
556 if (!peraggstate->resulttypeByVal && !*resultIsNull &&
557 !MemoryContextContains(CurrentMemoryContext,
558 DatumGetPointer(*resultVal)))
559 *resultVal = datumCopy(*resultVal,
560 peraggstate->resulttypeByVal,
561 peraggstate->resulttypeLen);
563 MemoryContextSwitchTo(oldContext);
567 * Initialize the hash table to empty.
569 * The hash table always lives in the aggcontext memory context.
572 build_hash_table(AggState *aggstate)
574 Agg *node = (Agg *) aggstate->ss.ps.plan;
575 MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
578 Assert(node->aggstrategy == AGG_HASHED);
579 Assert(node->numGroups > 0);
581 entrysize = sizeof(AggHashEntryData) +
582 (aggstate->numaggs - 1) *sizeof(AggStatePerGroupData);
584 aggstate->hashtable = BuildTupleHashTable(node->numCols,
586 aggstate->eqfunctions,
587 aggstate->hashfunctions,
590 aggstate->aggcontext,
595 * Find or create a hashtable entry for the tuple group containing the
598 * When called, CurrentMemoryContext should be the per-query context.
601 lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
606 entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
612 /* initialize aggregates for new tuple group */
613 initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
622 * ExecAgg receives tuples from its outer subplan and aggregates over
623 * the appropriate attribute for each aggregate function use (Aggref
624 * node) appearing in the targetlist or qual of the node. The number
625 * of tuples to aggregate over depends on whether grouped or plain
626 * aggregation is selected. In grouped aggregation, we produce a result
627 * row for each group; in plain aggregation there's a single result row
628 * for the whole query. In either case, the value of each aggregate is
629 * stored in the expression context to be used when ExecProject evaluates
633 ExecAgg(AggState *node)
638 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
640 if (!node->table_filled)
641 agg_fill_hash_table(node);
642 return agg_retrieve_hash_table(node);
645 return agg_retrieve_direct(node);
649 * ExecAgg for non-hashed case
651 static TupleTableSlot *
652 agg_retrieve_direct(AggState *aggstate)
654 Agg *node = (Agg *) aggstate->ss.ps.plan;
655 PlanState *outerPlan;
656 ExprContext *econtext;
657 ExprContext *tmpcontext;
658 ProjectionInfo *projInfo;
661 AggStatePerAgg peragg;
662 AggStatePerGroup pergroup;
663 TupleTableSlot *outerslot;
664 TupleTableSlot *firstSlot;
665 TupleTableSlot *resultSlot;
669 * get state info from node
671 outerPlan = outerPlanState(aggstate);
672 /* econtext is the per-output-tuple expression context */
673 econtext = aggstate->ss.ps.ps_ExprContext;
674 aggvalues = econtext->ecxt_aggvalues;
675 aggnulls = econtext->ecxt_aggnulls;
676 /* tmpcontext is the per-input-tuple expression context */
677 tmpcontext = aggstate->tmpcontext;
678 projInfo = aggstate->ss.ps.ps_ProjInfo;
679 peragg = aggstate->peragg;
680 pergroup = aggstate->pergroup;
681 firstSlot = aggstate->ss.ss_ScanTupleSlot;
684 * We loop retrieving groups until we find one matching
685 * aggstate->ss.ps.qual
689 if (aggstate->agg_done)
693 * If we don't already have the first tuple of the new group,
694 * fetch it from the outer plan.
696 if (aggstate->grp_firstTuple == NULL)
698 outerslot = ExecProcNode(outerPlan);
699 if (!TupIsNull(outerslot))
702 * Make a copy of the first input tuple; we will use this
703 * for comparisons (in group mode) and for projection.
705 aggstate->grp_firstTuple = heap_copytuple(outerslot->val);
709 /* outer plan produced no tuples at all */
710 aggstate->agg_done = true;
711 /* If we are grouping, we should produce no tuples too */
712 if (node->aggstrategy != AGG_PLAIN)
718 * Clear the per-output-tuple context for each group
720 ResetExprContext(econtext);
723 * Initialize working state for a new input tuple group
725 initialize_aggregates(aggstate, peragg, pergroup);
727 if (aggstate->grp_firstTuple != NULL)
730 * Store the copied first input tuple in the tuple table slot
731 * reserved for it. The tuple will be deleted when it is
732 * cleared from the slot.
734 ExecStoreTuple(aggstate->grp_firstTuple,
738 aggstate->grp_firstTuple = NULL; /* don't keep two pointers */
740 /* set up for first advance_aggregates call */
741 tmpcontext->ecxt_scantuple = firstSlot;
744 * Process each outer-plan tuple, and then fetch the next one,
745 * until we exhaust the outer plan or cross a group boundary.
749 advance_aggregates(aggstate, pergroup);
751 /* Reset per-input-tuple context after each tuple */
752 ResetExprContext(tmpcontext);
754 outerslot = ExecProcNode(outerPlan);
755 if (TupIsNull(outerslot))
757 /* no more outer-plan tuples available */
758 aggstate->agg_done = true;
761 /* set up for next advance_aggregates call */
762 tmpcontext->ecxt_scantuple = outerslot;
765 * If we are grouping, check whether we've crossed a group
768 if (node->aggstrategy == AGG_SORTED)
770 if (!execTuplesMatch(firstSlot->val,
772 firstSlot->ttc_tupleDescriptor,
773 node->numCols, node->grpColIdx,
774 aggstate->eqfunctions,
775 tmpcontext->ecxt_per_tuple_memory))
778 * Save the first input tuple of the next group.
780 aggstate->grp_firstTuple = heap_copytuple(outerslot->val);
788 * Done scanning input tuple group. Finalize each aggregate
789 * calculation, and stash results in the per-output-tuple context.
791 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
793 AggStatePerAgg peraggstate = &peragg[aggno];
794 AggStatePerGroup pergroupstate = &pergroup[aggno];
796 if (peraggstate->aggref->aggdistinct)
797 process_sorted_aggregate(aggstate, peraggstate, pergroupstate);
799 finalize_aggregate(aggstate, peraggstate, pergroupstate,
800 &aggvalues[aggno], &aggnulls[aggno]);
804 * If we have no first tuple (ie, the outerPlan didn't return
805 * anything), create a dummy all-nulls input tuple for use by
806 * ExecProject. 99.44% of the time this is a waste of cycles,
807 * because ordinarily the projected output tuple's targetlist
808 * cannot contain any direct (non-aggregated) references to input
809 * columns, so the dummy tuple will not be referenced. However
810 * there are special cases where this isn't so --- in particular
811 * an UPDATE involving an aggregate will have a targetlist
812 * reference to ctid. We need to return a null for ctid in that
813 * situation, not coredump.
815 * The values returned for the aggregates will be the initial values
816 * of the transition functions.
818 if (TupIsNull(firstSlot))
822 /* Should only happen in non-grouped mode */
823 Assert(node->aggstrategy == AGG_PLAIN);
824 Assert(aggstate->agg_done);
826 tupType = firstSlot->ttc_tupleDescriptor;
827 /* watch out for zero-column input tuples, though... */
828 if (tupType && tupType->natts > 0)
830 HeapTuple nullsTuple;
834 dvalues = (Datum *) palloc0(sizeof(Datum) * tupType->natts);
835 dnulls = (char *) palloc(sizeof(char) * tupType->natts);
836 MemSet(dnulls, 'n', sizeof(char) * tupType->natts);
837 nullsTuple = heap_formtuple(tupType, dvalues, dnulls);
838 ExecStoreTuple(nullsTuple,
848 * Form a projection tuple using the aggregate results and the
849 * representative input tuple. Store it in the result tuple slot.
850 * Note we do not support aggregates returning sets ...
852 econtext->ecxt_scantuple = firstSlot;
853 resultSlot = ExecProject(projInfo, NULL);
856 * If the completed tuple does not match the qualifications, it is
857 * ignored and we loop back to try to process another group.
858 * Otherwise, return the tuple.
861 while (!ExecQual(aggstate->ss.ps.qual, econtext, false));
867 * ExecAgg for hashed case: phase 1, read input and build hash table
870 agg_fill_hash_table(AggState *aggstate)
872 PlanState *outerPlan;
873 ExprContext *tmpcontext;
875 TupleTableSlot *outerslot;
878 * get state info from node
880 outerPlan = outerPlanState(aggstate);
881 /* tmpcontext is the per-input-tuple expression context */
882 tmpcontext = aggstate->tmpcontext;
885 * Process each outer-plan tuple, and then fetch the next one, until
886 * we exhaust the outer plan.
890 outerslot = ExecProcNode(outerPlan);
891 if (TupIsNull(outerslot))
893 /* set up for advance_aggregates call */
894 tmpcontext->ecxt_scantuple = outerslot;
896 /* Find or build hashtable entry for this tuple's group */
897 entry = lookup_hash_entry(aggstate, outerslot);
899 /* Advance the aggregates */
900 advance_aggregates(aggstate, entry->pergroup);
902 /* Reset per-input-tuple context after each tuple */
903 ResetExprContext(tmpcontext);
906 aggstate->table_filled = true;
907 /* Initialize to walk the hash table */
908 ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
912 * ExecAgg for hashed case: phase 2, retrieving groups from hash table
914 static TupleTableSlot *
915 agg_retrieve_hash_table(AggState *aggstate)
917 ExprContext *econtext;
918 ProjectionInfo *projInfo;
921 AggStatePerAgg peragg;
922 AggStatePerGroup pergroup;
924 TupleTableSlot *firstSlot;
925 TupleTableSlot *resultSlot;
929 * get state info from node
931 /* econtext is the per-output-tuple expression context */
932 econtext = aggstate->ss.ps.ps_ExprContext;
933 aggvalues = econtext->ecxt_aggvalues;
934 aggnulls = econtext->ecxt_aggnulls;
935 projInfo = aggstate->ss.ps.ps_ProjInfo;
936 peragg = aggstate->peragg;
937 firstSlot = aggstate->ss.ss_ScanTupleSlot;
940 * We loop retrieving groups until we find one satisfying
941 * aggstate->ss.ps.qual
945 if (aggstate->agg_done)
949 * Find the next entry in the hash table
951 entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
954 /* No more entries in hashtable, so done */
955 aggstate->agg_done = TRUE;
960 * Clear the per-output-tuple context for each group
962 ResetExprContext(econtext);
965 * Store the copied first input tuple in the tuple table slot
966 * reserved for it, so that it can be used in ExecProject.
968 ExecStoreTuple(entry->shared.firstTuple,
973 pergroup = entry->pergroup;
976 * Finalize each aggregate calculation, and stash results in the
977 * per-output-tuple context.
979 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
981 AggStatePerAgg peraggstate = &peragg[aggno];
982 AggStatePerGroup pergroupstate = &pergroup[aggno];
984 Assert(!peraggstate->aggref->aggdistinct);
985 finalize_aggregate(aggstate, peraggstate, pergroupstate,
986 &aggvalues[aggno], &aggnulls[aggno]);
990 * Form a projection tuple using the aggregate results and the
991 * representative input tuple. Store it in the result tuple slot.
992 * Note we do not support aggregates returning sets ...
994 econtext->ecxt_scantuple = firstSlot;
995 resultSlot = ExecProject(projInfo, NULL);
998 * If the completed tuple does not match the qualifications, it is
999 * ignored and we loop back to try to process another group.
1000 * Otherwise, return the tuple.
1003 while (!ExecQual(aggstate->ss.ps.qual, econtext, false));
1008 /* -----------------
1011 * Creates the run-time information for the agg node produced by the
1012 * planner and initializes its outer subtree
1016 ExecInitAgg(Agg *node, EState *estate)
1019 AggStatePerAgg peragg;
1021 ExprContext *econtext;
1027 * create state structure
1029 aggstate = makeNode(AggState);
1030 aggstate->ss.ps.plan = (Plan *) node;
1031 aggstate->ss.ps.state = estate;
1033 aggstate->aggs = NIL;
1034 aggstate->numaggs = 0;
1035 aggstate->eqfunctions = NULL;
1036 aggstate->hashfunctions = NULL;
1037 aggstate->peragg = NULL;
1038 aggstate->agg_done = false;
1039 aggstate->pergroup = NULL;
1040 aggstate->grp_firstTuple = NULL;
1041 aggstate->hashtable = NULL;
1044 * Create expression contexts. We need two, one for per-input-tuple
1045 * processing and one for per-output-tuple processing. We cheat a
1046 * little by using ExecAssignExprContext() to build both.
1048 ExecAssignExprContext(estate, &aggstate->ss.ps);
1049 aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
1050 ExecAssignExprContext(estate, &aggstate->ss.ps);
1053 * We also need a long-lived memory context for holding hashtable data
1054 * structures and transition values. NOTE: the details of what is
1055 * stored in aggcontext and what is stored in the regular per-query
1056 * memory context are driven by a simple decision: we want to reset
1057 * the aggcontext in ExecReScanAgg to recover no-longer-wanted space.
1059 aggstate->aggcontext =
1060 AllocSetContextCreate(CurrentMemoryContext,
1062 ALLOCSET_DEFAULT_MINSIZE,
1063 ALLOCSET_DEFAULT_INITSIZE,
1064 ALLOCSET_DEFAULT_MAXSIZE);
1066 #define AGG_NSLOTS 2
1069 * tuple table initialization
1071 ExecInitScanTupleSlot(estate, &aggstate->ss);
1072 ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
1075 * initialize child expressions
1077 * Note: ExecInitExpr finds Aggrefs for us, and also checks that no aggs
1078 * contain other agg calls in their arguments. This would make no
1079 * sense under SQL semantics anyway (and it's forbidden by the spec).
1080 * Because that is true, we don't need to worry about evaluating the
1081 * aggs in any particular order.
1083 aggstate->ss.ps.targetlist = (List *)
1084 ExecInitExpr((Expr *) node->plan.targetlist,
1085 (PlanState *) aggstate);
1086 aggstate->ss.ps.qual = (List *)
1087 ExecInitExpr((Expr *) node->plan.qual,
1088 (PlanState *) aggstate);
1091 * initialize child nodes
1093 outerPlan = outerPlan(node);
1094 outerPlanState(aggstate) = ExecInitNode(outerPlan, estate);
1097 * initialize source tuple type.
1099 ExecAssignScanTypeFromOuterPlan(&aggstate->ss);
1102 * Initialize result tuple type and projection info.
1104 ExecAssignResultTypeFromTL(&aggstate->ss.ps);
1105 ExecAssignProjectionInfo(&aggstate->ss.ps);
1108 * get the count of aggregates in targetlist and quals
1110 numaggs = aggstate->numaggs;
1111 Assert(numaggs == length(aggstate->aggs));
1115 * This is not an error condition: we might be using the Agg node
1116 * just to do hash-based grouping. Even in the regular case,
1117 * constant-expression simplification could optimize away all of
1118 * the Aggrefs in the targetlist and qual. So keep going, but
1119 * force local copy of numaggs positive so that palloc()s below
1126 * If we are grouping, precompute fmgr lookup data for inner loop. We
1127 * need both equality and hashing functions to do it by hashing, but
1128 * only equality if not hashing.
1130 if (node->numCols > 0)
1132 if (node->aggstrategy == AGG_HASHED)
1133 execTuplesHashPrepare(ExecGetScanType(&aggstate->ss),
1136 &aggstate->eqfunctions,
1137 &aggstate->hashfunctions);
1139 aggstate->eqfunctions =
1140 execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
1146 * Set up aggregate-result storage in the output expr context, and
1147 * also allocate my private per-agg working storage
1149 econtext = aggstate->ss.ps.ps_ExprContext;
1150 econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
1151 econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
1153 peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
1154 aggstate->peragg = peragg;
1156 if (node->aggstrategy == AGG_HASHED)
1158 build_hash_table(aggstate);
1159 aggstate->table_filled = false;
1163 AggStatePerGroup pergroup;
1165 pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) * numaggs);
1166 aggstate->pergroup = pergroup;
1170 * Perform lookups of aggregate function info, and initialize the
1171 * unchanging fields of the per-agg data. We also detect duplicate
1172 * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0").
1173 * When duplicates are detected, we only make an AggStatePerAgg struct
1174 * for the first one. The clones are simply pointed at the same
1175 * result entry by giving them duplicate aggno values.
1178 foreach(alist, aggstate->aggs)
1180 AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(alist);
1181 Aggref *aggref = (Aggref *) aggrefstate->xprstate.expr;
1182 AggStatePerAgg peraggstate;
1185 Form_pg_aggregate aggform;
1187 AclResult aclresult;
1195 /* Planner should have assigned aggregate to correct level */
1196 Assert(aggref->agglevelsup == 0);
1198 /* Look for a previous duplicate aggregate */
1199 for (i = 0; i <= aggno; i++)
1201 if (equal(aggref, peragg[i].aggref) &&
1202 !contain_volatile_functions((Node *) aggref))
1207 /* Found a match to an existing entry, so just mark it */
1208 aggrefstate->aggno = i;
1212 /* Nope, so assign a new PerAgg record */
1213 peraggstate = &peragg[++aggno];
1215 /* Mark Aggref state node with assigned index in the result array */
1216 aggrefstate->aggno = aggno;
1218 /* Fill in the peraggstate data */
1219 peraggstate->aggrefstate = aggrefstate;
1220 peraggstate->aggref = aggref;
1223 * Get actual datatype of the input. We need this because it may
1224 * be different from the agg's declared input type, when the agg
1225 * accepts ANY (eg, COUNT(*)) or ANYARRAY or ANYELEMENT.
1227 inputType = exprType((Node *) aggref->target);
1229 aggTuple = SearchSysCache(AGGFNOID,
1230 ObjectIdGetDatum(aggref->aggfnoid),
1232 if (!HeapTupleIsValid(aggTuple))
1233 elog(ERROR, "cache lookup failed for aggregate %u",
1235 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
1237 /* Check permission to call aggregate function */
1238 aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
1240 if (aclresult != ACLCHECK_OK)
1241 aclcheck_error(aclresult, ACL_KIND_PROC,
1242 get_func_name(aggref->aggfnoid));
1244 peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
1245 peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
1247 /* resolve actual type of transition state, if polymorphic */
1248 aggtranstype = aggform->aggtranstype;
1249 if (aggtranstype == ANYARRAYOID || aggtranstype == ANYELEMENTOID)
1251 /* have to fetch the agg's declared input type... */
1252 Oid agg_arg_types[FUNC_MAX_ARGS];
1255 (void) get_func_signature(aggref->aggfnoid,
1256 agg_arg_types, &agg_nargs);
1257 Assert(agg_nargs == 1);
1258 aggtranstype = resolve_generic_type(aggtranstype,
1263 /* build expression trees using actual argument & result types */
1264 build_aggregate_fnexprs(inputType,
1272 fmgr_info(transfn_oid, &peraggstate->transfn);
1273 peraggstate->transfn.fn_expr = (Node *) transfnexpr;
1275 if (OidIsValid(finalfn_oid))
1277 fmgr_info(finalfn_oid, &peraggstate->finalfn);
1278 peraggstate->finalfn.fn_expr = (Node *) finalfnexpr;
1281 get_typlenbyval(aggref->aggtype,
1282 &peraggstate->resulttypeLen,
1283 &peraggstate->resulttypeByVal);
1284 get_typlenbyval(aggtranstype,
1285 &peraggstate->transtypeLen,
1286 &peraggstate->transtypeByVal);
1289 * initval is potentially null, so don't try to access it as a
1290 * struct field. Must do it the hard way with SysCacheGetAttr.
1292 textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
1293 Anum_pg_aggregate_agginitval,
1294 &peraggstate->initValueIsNull);
1296 if (peraggstate->initValueIsNull)
1297 peraggstate->initValue = (Datum) 0;
1299 peraggstate->initValue = GetAggInitVal(textInitVal,
1303 * If the transfn is strict and the initval is NULL, make sure
1304 * input type and transtype are the same (or at least binary-
1305 * compatible), so that it's OK to use the first input value as
1306 * the initial transValue. This should have been checked at agg
1307 * definition time, but just in case...
1309 if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
1311 if (!IsBinaryCoercible(inputType, aggtranstype))
1313 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
1314 errmsg("aggregate %u needs to have compatible input type and transition type",
1315 aggref->aggfnoid)));
1318 if (aggref->aggdistinct)
1322 /* We don't implement DISTINCT aggs in the HASHED case */
1323 Assert(node->aggstrategy != AGG_HASHED);
1325 peraggstate->inputType = inputType;
1326 get_typlenbyval(inputType,
1327 &peraggstate->inputtypeLen,
1328 &peraggstate->inputtypeByVal);
1330 eq_function = equality_oper_funcid(inputType);
1331 fmgr_info(eq_function, &(peraggstate->equalfn));
1332 peraggstate->sortOperator = ordering_oper_opid(inputType);
1333 peraggstate->sortstate = NULL;
1336 ReleaseSysCache(aggTuple);
1339 /* Update numaggs to match number of unique aggregates found */
1340 aggstate->numaggs = aggno + 1;
1346 GetAggInitVal(Datum textInitVal, Oid transtype)
1354 strInitVal = DatumGetCString(DirectFunctionCall1(textout, textInitVal));
1356 tup = SearchSysCache(TYPEOID,
1357 ObjectIdGetDatum(transtype),
1359 if (!HeapTupleIsValid(tup))
1360 elog(ERROR, "cache lookup failed for type %u", transtype);
1362 typinput = ((Form_pg_type) GETSTRUCT(tup))->typinput;
1363 typelem = ((Form_pg_type) GETSTRUCT(tup))->typelem;
1364 ReleaseSysCache(tup);
1366 initVal = OidFunctionCall3(typinput,
1367 CStringGetDatum(strInitVal),
1368 ObjectIdGetDatum(typelem),
1376 ExecCountSlotsAgg(Agg *node)
1378 return ExecCountSlotsNode(outerPlan(node)) +
1379 ExecCountSlotsNode(innerPlan(node)) +
1384 ExecEndAgg(AggState *node)
1386 PlanState *outerPlan;
1389 /* Make sure we have closed any open tuplesorts */
1390 for (aggno = 0; aggno < node->numaggs; aggno++)
1392 AggStatePerAgg peraggstate = &node->peragg[aggno];
1394 if (peraggstate->sortstate)
1395 tuplesort_end(peraggstate->sortstate);
1399 * Free both the expr contexts.
1401 ExecFreeExprContext(&node->ss.ps);
1402 node->ss.ps.ps_ExprContext = node->tmpcontext;
1403 ExecFreeExprContext(&node->ss.ps);
1405 /* clean up tuple table */
1406 ExecClearTuple(node->ss.ss_ScanTupleSlot);
1408 MemoryContextDelete(node->aggcontext);
1410 outerPlan = outerPlanState(node);
1411 ExecEndNode(outerPlan);
1415 ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
1417 ExprContext *econtext = node->ss.ps.ps_ExprContext;
1420 node->agg_done = false;
1422 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1425 * In the hashed case, if we haven't yet built the hash table then
1426 * we can just return; nothing done yet, so nothing to undo. If
1427 * subnode's chgParam is not NULL then it will be re-scanned by
1428 * ExecProcNode, else no reason to re-scan it at all.
1430 if (!node->table_filled)
1434 * If we do have the hash table and the subplan does not have any
1435 * parameter changes, then we can just rescan the existing hash
1436 * table; no need to build it again.
1438 if (((PlanState *) node)->lefttree->chgParam == NULL)
1440 ResetTupleHashIterator(node->hashtable, &node->hashiter);
1445 /* Make sure we have closed any open tuplesorts */
1446 for (aggno = 0; aggno < node->numaggs; aggno++)
1448 AggStatePerAgg peraggstate = &node->peragg[aggno];
1450 if (peraggstate->sortstate)
1451 tuplesort_end(peraggstate->sortstate);
1452 peraggstate->sortstate = NULL;
1455 /* Release first tuple of group, if we have made a copy */
1456 if (node->grp_firstTuple != NULL)
1458 heap_freetuple(node->grp_firstTuple);
1459 node->grp_firstTuple = NULL;
1462 /* Forget current agg values */
1463 MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
1464 MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
1466 /* Release all temp storage */
1467 MemoryContextReset(node->aggcontext);
1469 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1471 /* Rebuild an empty hash table */
1472 build_hash_table(node);
1473 node->table_filled = false;
1477 * if chgParam of subnode is not null then plan will be re-scanned by
1478 * first ExecProcNode.
1480 if (((PlanState *) node)->lefttree->chgParam == NULL)
1481 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
1485 * aggregate_dummy - dummy execution routine for aggregate functions
1487 * This function is listed as the implementation (prosrc field) of pg_proc
1488 * entries for aggregate functions. Its only purpose is to throw an error
1489 * if someone mistakenly executes such a function in the normal way.
1491 * Perhaps someday we could assign real meaning to the prosrc field of
1495 aggregate_dummy(PG_FUNCTION_ARGS)
1497 elog(ERROR, "aggregate function %u called as normal function",
1498 fcinfo->flinfo->fn_oid);
1499 return (Datum) 0; /* keep compiler quiet */