1 /*-------------------------------------------------------------------------
4 * Routines to handle aggregate nodes.
6 * ExecAgg evaluates each aggregate in the following steps:
8 * transvalue = initcond
9 * foreach input_value do
10 * transvalue = transfunc(transvalue, input_value)
11 * result = finalfunc(transvalue)
13 * If a finalfunc is not supplied then the result is just the ending
14 * value of transvalue.
16 * If transfunc is marked "strict" in pg_proc and initcond is NULL,
17 * then the first non-NULL input_value is assigned directly to transvalue,
18 * and transfunc isn't applied until the second non-NULL input_value.
19 * The agg's input type and transtype must be the same in this case!
21 * If transfunc is marked "strict" then NULL input_values are skipped,
22 * keeping the previous transvalue. If transfunc is not strict then it
23 * is called for every input tuple and must deal with NULL initcond
24 * or NULL input_value for itself.
26 * If finalfunc is marked "strict" then it is not called when the
27 * ending transvalue is NULL, instead a NULL result is created
28 * automatically (this is just the usual handling of strict functions,
29 * of course). A non-strict finalfunc can make its own choice of
30 * what to return for a NULL ending transvalue.
32 * We compute aggregate input expressions and run the transition functions
33 * in a temporary econtext (aggstate->tmpcontext). This is reset at
34 * least once per input tuple, so when the transvalue datatype is
35 * pass-by-reference, we have to be careful to copy it into a longer-lived
36 * memory context, and free the prior value to avoid memory leakage.
37 * We store transvalues in the memory context aggstate->aggcontext,
38 * which is also used for the hashtable structures in AGG_HASHED mode.
39 * The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext)
40 * is used to run finalize functions and compute the output tuple;
41 * this context can be reset once per output tuple.
43 * Beginning in PostgreSQL 8.1, the executor's AggState node is passed as
44 * the fmgr "context" value in all transfunc and finalfunc calls. It is
45 * not really intended that the transition functions will look into the
46 * AggState node, but they can use code like
47 * if (fcinfo->context && IsA(fcinfo->context, AggState))
48 * to verify that they are being called by nodeAgg.c and not as ordinary
49 * SQL functions. The main reason a transition function might want to know
50 * that is that it can avoid palloc'ing a fixed-size pass-by-ref transition
51 * value on every call: it can instead just scribble on and return its left
52 * input. Ordinarily it is completely forbidden for functions to modify
53 * pass-by-ref inputs, but in the aggregate case we know the left input is
54 * either the initial transition value or a previous function result, and
55 * in either case its value need not be preserved. See int8inc() for an
56 * example. Notice that advance_transition_function() is coded to avoid a
57 * data copy step when the previous transition value pointer is returned.
60 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
61 * Portions Copyright (c) 1994, Regents of the University of California
64 * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.139 2006/04/04 19:35:34 tgl Exp $
66 *-------------------------------------------------------------------------
71 #include "access/heapam.h"
72 #include "catalog/pg_aggregate.h"
73 #include "catalog/pg_operator.h"
74 #include "catalog/pg_proc.h"
75 #include "executor/executor.h"
76 #include "executor/nodeAgg.h"
77 #include "miscadmin.h"
78 #include "optimizer/clauses.h"
79 #include "parser/parse_agg.h"
80 #include "parser/parse_coerce.h"
81 #include "parser/parse_expr.h"
82 #include "parser/parse_oper.h"
83 #include "utils/acl.h"
84 #include "utils/builtins.h"
85 #include "utils/lsyscache.h"
86 #include "utils/memutils.h"
87 #include "utils/syscache.h"
88 #include "utils/tuplesort.h"
89 #include "utils/datum.h"
93 * AggStatePerAggData - per-aggregate working state for the Agg scan
95 typedef struct AggStatePerAggData
98 * These values are set up during ExecInitAgg() and do not change
102 /* Links to Aggref expr and state nodes this working state is for */
103 AggrefExprState *aggrefstate;
106 /* Oids of transfer functions */
108 Oid finalfn_oid; /* may be InvalidOid */
111 * fmgr lookup data for transfer functions --- only valid when
112 * corresponding oid is not InvalidOid. Note in particular that fn_strict
113 * flags are kept here.
119 * Type of input data and Oid of sort operator to use for it; only
120 * set/used when aggregate has DISTINCT flag. (These are not used
121 * directly by nodeAgg, but must be passed to the Tuplesort object.)
127 * fmgr lookup data for input type's equality operator --- only set/used
128 * when aggregate has DISTINCT flag.
133 * initial value from pg_aggregate entry
136 bool initValueIsNull;
139 * We need the len and byval info for the agg's input, result, and
140 * transition data types in order to know how to copy/delete values.
150 * These values are working state that is initialized at the start of an
151 * input tuple group and updated for each input tuple.
153 * For a simple (non DISTINCT) aggregate, we just feed the input values
154 * straight to the transition function. If it's DISTINCT, we pass the
155 * input values into a Tuplesort object; then at completion of the input
156 * tuple group, we scan the sorted values, eliminate duplicates, and run
157 * the transition function on the rest.
160 Tuplesortstate *sortstate; /* sort object, if a DISTINCT agg */
161 } AggStatePerAggData;
164 * AggStatePerGroupData - per-aggregate-per-group working state
166 * These values are working state that is initialized at the start of
167 * an input tuple group and updated for each input tuple.
169 * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
170 * structs (pointed to by aggstate->pergroup); we re-use the array for
171 * each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the
172 * hash table contains an array of these structs for each tuple group.
174 * Logically, the sortstate field belongs in this struct, but we do not
175 * keep it here for space reasons: we don't support DISTINCT aggregates
176 * in AGG_HASHED mode, so there's no reason to use up a pointer field
177 * in every entry of the hashtable.
179 typedef struct AggStatePerGroupData
181 Datum transValue; /* current transition value */
182 bool transValueIsNull;
184 bool noTransValue; /* true if transValue not set yet */
187 * Note: noTransValue initially has the same value as transValueIsNull,
188 * and if true both are cleared to false at the same time. They are not
189 * the same though: if transfn later returns a NULL, we want to keep that
190 * NULL and not auto-replace it with a later input value. Only the first
191 * non-NULL input will be auto-substituted.
193 } AggStatePerGroupData;
196 * To implement hashed aggregation, we need a hashtable that stores a
197 * representative tuple and an array of AggStatePerGroup structs for each
198 * distinct set of GROUP BY column values. We compute the hash key from
199 * the GROUP BY columns.
201 typedef struct AggHashEntryData *AggHashEntry;
203 typedef struct AggHashEntryData
205 TupleHashEntryData shared; /* common header for hash table entries */
206 /* per-aggregate transition status array - must be last! */
207 AggStatePerGroupData pergroup[1]; /* VARIABLE LENGTH ARRAY */
208 } AggHashEntryData; /* VARIABLE LENGTH STRUCT */
211 static void initialize_aggregates(AggState *aggstate,
212 AggStatePerAgg peragg,
213 AggStatePerGroup pergroup);
214 static void advance_transition_function(AggState *aggstate,
215 AggStatePerAgg peraggstate,
216 AggStatePerGroup pergroupstate,
217 Datum newVal, bool isNull);
218 static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
219 static void process_sorted_aggregate(AggState *aggstate,
220 AggStatePerAgg peraggstate,
221 AggStatePerGroup pergroupstate);
222 static void finalize_aggregate(AggState *aggstate,
223 AggStatePerAgg peraggstate,
224 AggStatePerGroup pergroupstate,
225 Datum *resultVal, bool *resultIsNull);
226 static void build_hash_table(AggState *aggstate);
227 static AggHashEntry lookup_hash_entry(AggState *aggstate,
228 TupleTableSlot *slot);
229 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
230 static void agg_fill_hash_table(AggState *aggstate);
231 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
232 static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
236 * Initialize all aggregates for a new group of input values.
238 * When called, CurrentMemoryContext should be the per-query context.
241 initialize_aggregates(AggState *aggstate,
242 AggStatePerAgg peragg,
243 AggStatePerGroup pergroup)
247 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
249 AggStatePerAgg peraggstate = &peragg[aggno];
250 AggStatePerGroup pergroupstate = &pergroup[aggno];
251 Aggref *aggref = peraggstate->aggref;
254 * Start a fresh sort operation for each DISTINCT aggregate.
256 if (aggref->aggdistinct)
259 * In case of rescan, maybe there could be an uncompleted sort
260 * operation? Clean it up if so.
262 if (peraggstate->sortstate)
263 tuplesort_end(peraggstate->sortstate);
265 peraggstate->sortstate =
266 tuplesort_begin_datum(peraggstate->inputType,
267 peraggstate->sortOperator,
272 * If we are reinitializing after a group boundary, we have to free
273 * any prior transValue to avoid memory leakage. We must check not
274 * only the isnull flag but whether the pointer is NULL; since
275 * pergroupstate is initialized with palloc0, the initial condition
276 * has isnull = 0 and null pointer.
278 if (!peraggstate->transtypeByVal &&
279 !pergroupstate->transValueIsNull &&
280 DatumGetPointer(pergroupstate->transValue) != NULL)
281 pfree(DatumGetPointer(pergroupstate->transValue));
284 * (Re)set transValue to the initial value.
286 * Note that when the initial value is pass-by-ref, we must copy it
287 * (into the aggcontext) since we will pfree the transValue later.
289 if (peraggstate->initValueIsNull)
290 pergroupstate->transValue = peraggstate->initValue;
293 MemoryContext oldContext;
295 oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
296 pergroupstate->transValue = datumCopy(peraggstate->initValue,
297 peraggstate->transtypeByVal,
298 peraggstate->transtypeLen);
299 MemoryContextSwitchTo(oldContext);
301 pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
304 * If the initial value for the transition state doesn't exist in the
305 * pg_aggregate table then we will let the first non-NULL value
306 * returned from the outer procNode become the initial value. (This is
307 * useful for aggregates like max() and min().) The noTransValue flag
308 * signals that we still need to do this.
310 pergroupstate->noTransValue = peraggstate->initValueIsNull;
315 * Given a new input value, advance the transition function of an aggregate.
317 * It doesn't matter which memory context this is called in.
320 advance_transition_function(AggState *aggstate,
321 AggStatePerAgg peraggstate,
322 AggStatePerGroup pergroupstate,
323 Datum newVal, bool isNull)
325 FunctionCallInfoData fcinfo;
326 MemoryContext oldContext;
328 if (peraggstate->transfn.fn_strict)
331 * For a strict transfn, nothing happens at a NULL input tuple; we
332 * just keep the prior transValue.
336 if (pergroupstate->noTransValue)
339 * transValue has not been initialized. This is the first non-NULL
340 * input value. We use it as the initial value for transValue. (We
341 * already checked that the agg's input type is binary-compatible
342 * with its transtype, so straight copy here is OK.)
344 * We must copy the datum into aggcontext if it is pass-by-ref. We
345 * do not need to pfree the old transValue, since it's NULL.
347 oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
348 pergroupstate->transValue = datumCopy(newVal,
349 peraggstate->transtypeByVal,
350 peraggstate->transtypeLen);
351 pergroupstate->transValueIsNull = false;
352 pergroupstate->noTransValue = false;
353 MemoryContextSwitchTo(oldContext);
356 if (pergroupstate->transValueIsNull)
359 * Don't call a strict function with NULL inputs. Note it is
360 * possible to get here despite the above tests, if the transfn is
361 * strict *and* returned a NULL on a prior cycle. If that happens
362 * we will propagate the NULL all the way to the end.
368 /* We run the transition functions in per-input-tuple memory context */
369 oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
372 * OK to call the transition function
374 InitFunctionCallInfoData(fcinfo, &(peraggstate->transfn), 2,
375 (void *) aggstate, NULL);
376 fcinfo.arg[0] = pergroupstate->transValue;
377 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
378 fcinfo.arg[1] = newVal;
379 fcinfo.argnull[1] = isNull;
381 newVal = FunctionCallInvoke(&fcinfo);
384 * If pass-by-ref datatype, must copy the new value into aggcontext and
385 * pfree the prior transValue. But if transfn returned a pointer to its
386 * first input, we don't need to do anything.
388 if (!peraggstate->transtypeByVal &&
389 DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
393 MemoryContextSwitchTo(aggstate->aggcontext);
394 newVal = datumCopy(newVal,
395 peraggstate->transtypeByVal,
396 peraggstate->transtypeLen);
398 if (!pergroupstate->transValueIsNull)
399 pfree(DatumGetPointer(pergroupstate->transValue));
402 pergroupstate->transValue = newVal;
403 pergroupstate->transValueIsNull = fcinfo.isnull;
405 MemoryContextSwitchTo(oldContext);
409 * Advance all the aggregates for one input tuple. The input tuple
410 * has been stored in tmpcontext->ecxt_scantuple, so that it is accessible
411 * to ExecEvalExpr. pergroup is the array of per-group structs to use
412 * (this might be in a hashtable entry).
414 * When called, CurrentMemoryContext should be the per-query context.
417 advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
419 ExprContext *econtext = aggstate->tmpcontext;
422 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
424 AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
425 AggStatePerGroup pergroupstate = &pergroup[aggno];
426 AggrefExprState *aggrefstate = peraggstate->aggrefstate;
427 Aggref *aggref = peraggstate->aggref;
431 newVal = ExecEvalExprSwitchContext(aggrefstate->target, econtext,
434 if (aggref->aggdistinct)
436 /* in DISTINCT mode, we may ignore nulls */
439 tuplesort_putdatum(peraggstate->sortstate, newVal, isNull);
443 advance_transition_function(aggstate, peraggstate, pergroupstate,
450 * Run the transition function for a DISTINCT aggregate. This is called
451 * after we have completed entering all the input values into the sort
452 * object. We complete the sort, read out the values in sorted order,
453 * and run the transition function on each non-duplicate value.
455 * When called, CurrentMemoryContext should be the per-query context.
458 process_sorted_aggregate(AggState *aggstate,
459 AggStatePerAgg peraggstate,
460 AggStatePerGroup pergroupstate)
462 Datum oldVal = (Datum) 0;
463 bool haveOldVal = false;
464 MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
465 MemoryContext oldContext;
469 tuplesort_performsort(peraggstate->sortstate);
472 * Note: if input type is pass-by-ref, the datums returned by the sort are
473 * freshly palloc'd in the per-query context, so we must be careful to
474 * pfree them when they are no longer needed.
477 while (tuplesort_getdatum(peraggstate->sortstate, true,
481 * DISTINCT always suppresses nulls, per SQL spec, regardless of the
482 * transition function's strictness.
488 * Clear and select the working context for evaluation of the equality
489 * function and transition function.
491 MemoryContextReset(workcontext);
492 oldContext = MemoryContextSwitchTo(workcontext);
495 DatumGetBool(FunctionCall2(&peraggstate->equalfn,
498 /* equal to prior, so forget this one */
499 if (!peraggstate->inputtypeByVal)
500 pfree(DatumGetPointer(newVal));
504 advance_transition_function(aggstate, peraggstate, pergroupstate,
506 /* forget the old value, if any */
507 if (haveOldVal && !peraggstate->inputtypeByVal)
508 pfree(DatumGetPointer(oldVal));
509 /* and remember the new one for subsequent equality checks */
514 MemoryContextSwitchTo(oldContext);
517 if (haveOldVal && !peraggstate->inputtypeByVal)
518 pfree(DatumGetPointer(oldVal));
520 tuplesort_end(peraggstate->sortstate);
521 peraggstate->sortstate = NULL;
525 * Compute the final value of one aggregate.
527 * The finalfunction will be run, and the result delivered, in the
528 * output-tuple context; caller's CurrentMemoryContext does not matter.
531 finalize_aggregate(AggState *aggstate,
532 AggStatePerAgg peraggstate,
533 AggStatePerGroup pergroupstate,
534 Datum *resultVal, bool *resultIsNull)
536 MemoryContext oldContext;
538 oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
541 * Apply the agg's finalfn if one is provided, else return transValue.
543 if (OidIsValid(peraggstate->finalfn_oid))
545 FunctionCallInfoData fcinfo;
547 InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), 1,
548 (void *) aggstate, NULL);
549 fcinfo.arg[0] = pergroupstate->transValue;
550 fcinfo.argnull[0] = pergroupstate->transValueIsNull;
551 if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull)
553 /* don't call a strict function with NULL inputs */
554 *resultVal = (Datum) 0;
555 *resultIsNull = true;
559 *resultVal = FunctionCallInvoke(&fcinfo);
560 *resultIsNull = fcinfo.isnull;
565 *resultVal = pergroupstate->transValue;
566 *resultIsNull = pergroupstate->transValueIsNull;
570 * If result is pass-by-ref, make sure it is in the right context.
572 if (!peraggstate->resulttypeByVal && !*resultIsNull &&
573 !MemoryContextContains(CurrentMemoryContext,
574 DatumGetPointer(*resultVal)))
575 *resultVal = datumCopy(*resultVal,
576 peraggstate->resulttypeByVal,
577 peraggstate->resulttypeLen);
579 MemoryContextSwitchTo(oldContext);
583 * Initialize the hash table to empty.
585 * The hash table always lives in the aggcontext memory context.
588 build_hash_table(AggState *aggstate)
590 Agg *node = (Agg *) aggstate->ss.ps.plan;
591 MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
594 Assert(node->aggstrategy == AGG_HASHED);
595 Assert(node->numGroups > 0);
597 entrysize = sizeof(AggHashEntryData) +
598 (aggstate->numaggs - 1) *sizeof(AggStatePerGroupData);
600 aggstate->hashtable = BuildTupleHashTable(node->numCols,
602 aggstate->eqfunctions,
603 aggstate->hashfunctions,
606 aggstate->aggcontext,
611 * Estimate per-hash-table-entry overhead for the planner.
613 * Note that the estimate does not include space for pass-by-reference
614 * transition data values.
617 hash_agg_entry_size(int numAggs)
621 /* This must match build_hash_table */
622 entrysize = sizeof(AggHashEntryData) +
623 (numAggs - 1) *sizeof(AggStatePerGroupData);
624 /* Account for hashtable overhead */
625 entrysize += 2 * sizeof(void *);
626 entrysize = MAXALIGN(entrysize);
631 * Find or create a hashtable entry for the tuple group containing the
634 * When called, CurrentMemoryContext should be the per-query context.
637 lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
642 entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
648 /* initialize aggregates for new tuple group */
649 initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
658 * ExecAgg receives tuples from its outer subplan and aggregates over
659 * the appropriate attribute for each aggregate function use (Aggref
660 * node) appearing in the targetlist or qual of the node. The number
661 * of tuples to aggregate over depends on whether grouped or plain
662 * aggregation is selected. In grouped aggregation, we produce a result
663 * row for each group; in plain aggregation there's a single result row
664 * for the whole query. In either case, the value of each aggregate is
665 * stored in the expression context to be used when ExecProject evaluates
669 ExecAgg(AggState *node)
674 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
676 if (!node->table_filled)
677 agg_fill_hash_table(node);
678 return agg_retrieve_hash_table(node);
681 return agg_retrieve_direct(node);
685 * ExecAgg for non-hashed case
687 static TupleTableSlot *
688 agg_retrieve_direct(AggState *aggstate)
690 Agg *node = (Agg *) aggstate->ss.ps.plan;
691 PlanState *outerPlan;
692 ExprContext *econtext;
693 ExprContext *tmpcontext;
694 ProjectionInfo *projInfo;
697 AggStatePerAgg peragg;
698 AggStatePerGroup pergroup;
699 TupleTableSlot *outerslot;
700 TupleTableSlot *firstSlot;
704 * get state info from node
706 outerPlan = outerPlanState(aggstate);
707 /* econtext is the per-output-tuple expression context */
708 econtext = aggstate->ss.ps.ps_ExprContext;
709 aggvalues = econtext->ecxt_aggvalues;
710 aggnulls = econtext->ecxt_aggnulls;
711 /* tmpcontext is the per-input-tuple expression context */
712 tmpcontext = aggstate->tmpcontext;
713 projInfo = aggstate->ss.ps.ps_ProjInfo;
714 peragg = aggstate->peragg;
715 pergroup = aggstate->pergroup;
716 firstSlot = aggstate->ss.ss_ScanTupleSlot;
719 * We loop retrieving groups until we find one matching
720 * aggstate->ss.ps.qual
722 while (!aggstate->agg_done)
725 * If we don't already have the first tuple of the new group, fetch it
726 * from the outer plan.
728 if (aggstate->grp_firstTuple == NULL)
730 outerslot = ExecProcNode(outerPlan);
731 if (!TupIsNull(outerslot))
734 * Make a copy of the first input tuple; we will use this for
735 * comparisons (in group mode) and for projection.
737 aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
741 /* outer plan produced no tuples at all */
742 aggstate->agg_done = true;
743 /* If we are grouping, we should produce no tuples too */
744 if (node->aggstrategy != AGG_PLAIN)
750 * Clear the per-output-tuple context for each group
752 ResetExprContext(econtext);
755 * Initialize working state for a new input tuple group
757 initialize_aggregates(aggstate, peragg, pergroup);
759 if (aggstate->grp_firstTuple != NULL)
762 * Store the copied first input tuple in the tuple table slot
763 * reserved for it. The tuple will be deleted when it is cleared
766 ExecStoreTuple(aggstate->grp_firstTuple,
770 aggstate->grp_firstTuple = NULL; /* don't keep two pointers */
772 /* set up for first advance_aggregates call */
773 tmpcontext->ecxt_scantuple = firstSlot;
776 * Process each outer-plan tuple, and then fetch the next one,
777 * until we exhaust the outer plan or cross a group boundary.
781 advance_aggregates(aggstate, pergroup);
783 /* Reset per-input-tuple context after each tuple */
784 ResetExprContext(tmpcontext);
786 outerslot = ExecProcNode(outerPlan);
787 if (TupIsNull(outerslot))
789 /* no more outer-plan tuples available */
790 aggstate->agg_done = true;
793 /* set up for next advance_aggregates call */
794 tmpcontext->ecxt_scantuple = outerslot;
797 * If we are grouping, check whether we've crossed a group
800 if (node->aggstrategy == AGG_SORTED)
802 if (!execTuplesMatch(firstSlot,
804 node->numCols, node->grpColIdx,
805 aggstate->eqfunctions,
806 tmpcontext->ecxt_per_tuple_memory))
809 * Save the first input tuple of the next group.
811 aggstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
819 * Done scanning input tuple group. Finalize each aggregate
820 * calculation, and stash results in the per-output-tuple context.
822 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
824 AggStatePerAgg peraggstate = &peragg[aggno];
825 AggStatePerGroup pergroupstate = &pergroup[aggno];
827 if (peraggstate->aggref->aggdistinct)
828 process_sorted_aggregate(aggstate, peraggstate, pergroupstate);
830 finalize_aggregate(aggstate, peraggstate, pergroupstate,
831 &aggvalues[aggno], &aggnulls[aggno]);
835 * If we have no first tuple (ie, the outerPlan didn't return
836 * anything), create a dummy all-nulls input tuple for use by
837 * ExecQual/ExecProject. 99.44% of the time this is a waste of cycles,
838 * because ordinarily the projected output tuple's targetlist cannot
839 * contain any direct (non-aggregated) references to input columns, so
840 * the dummy tuple will not be referenced. However there are special
841 * cases where this isn't so --- in particular an UPDATE involving an
842 * aggregate will have a targetlist reference to ctid. We need to
843 * return a null for ctid in that situation, not coredump.
845 * The values returned for the aggregates will be the initial values
846 * of the transition functions.
848 if (TupIsNull(firstSlot))
850 /* Should only happen in non-grouped mode */
851 Assert(node->aggstrategy == AGG_PLAIN);
852 Assert(aggstate->agg_done);
854 ExecStoreAllNullTuple(firstSlot);
858 * Use the representative input tuple for any references to
859 * non-aggregated input columns in the qual and tlist.
861 econtext->ecxt_scantuple = firstSlot;
864 * Check the qual (HAVING clause); if the group does not match, ignore
865 * it and loop back to try to process another group.
867 if (ExecQual(aggstate->ss.ps.qual, econtext, false))
870 * Form and return a projection tuple using the aggregate results
871 * and the representative input tuple. Note we do not support
872 * aggregates returning sets ...
874 return ExecProject(projInfo, NULL);
883 * ExecAgg for hashed case: phase 1, read input and build hash table
886 agg_fill_hash_table(AggState *aggstate)
888 PlanState *outerPlan;
889 ExprContext *tmpcontext;
891 TupleTableSlot *outerslot;
894 * get state info from node
896 outerPlan = outerPlanState(aggstate);
897 /* tmpcontext is the per-input-tuple expression context */
898 tmpcontext = aggstate->tmpcontext;
901 * Process each outer-plan tuple, and then fetch the next one, until we
902 * exhaust the outer plan.
906 outerslot = ExecProcNode(outerPlan);
907 if (TupIsNull(outerslot))
909 /* set up for advance_aggregates call */
910 tmpcontext->ecxt_scantuple = outerslot;
912 /* Find or build hashtable entry for this tuple's group */
913 entry = lookup_hash_entry(aggstate, outerslot);
915 /* Advance the aggregates */
916 advance_aggregates(aggstate, entry->pergroup);
918 /* Reset per-input-tuple context after each tuple */
919 ResetExprContext(tmpcontext);
922 aggstate->table_filled = true;
923 /* Initialize to walk the hash table */
924 ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
928 * ExecAgg for hashed case: phase 2, retrieving groups from hash table
930 static TupleTableSlot *
931 agg_retrieve_hash_table(AggState *aggstate)
933 ExprContext *econtext;
934 ProjectionInfo *projInfo;
937 AggStatePerAgg peragg;
938 AggStatePerGroup pergroup;
940 TupleTableSlot *firstSlot;
944 * get state info from node
946 /* econtext is the per-output-tuple expression context */
947 econtext = aggstate->ss.ps.ps_ExprContext;
948 aggvalues = econtext->ecxt_aggvalues;
949 aggnulls = econtext->ecxt_aggnulls;
950 projInfo = aggstate->ss.ps.ps_ProjInfo;
951 peragg = aggstate->peragg;
952 firstSlot = aggstate->ss.ss_ScanTupleSlot;
955 * We loop retrieving groups until we find one satisfying
956 * aggstate->ss.ps.qual
958 while (!aggstate->agg_done)
961 * Find the next entry in the hash table
963 entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
966 /* No more entries in hashtable, so done */
967 aggstate->agg_done = TRUE;
972 * Clear the per-output-tuple context for each group
974 ResetExprContext(econtext);
977 * Store the copied first input tuple in the tuple table slot reserved
978 * for it, so that it can be used in ExecProject.
980 ExecStoreTuple(entry->shared.firstTuple,
985 pergroup = entry->pergroup;
988 * Finalize each aggregate calculation, and stash results in the
989 * per-output-tuple context.
991 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
993 AggStatePerAgg peraggstate = &peragg[aggno];
994 AggStatePerGroup pergroupstate = &pergroup[aggno];
996 Assert(!peraggstate->aggref->aggdistinct);
997 finalize_aggregate(aggstate, peraggstate, pergroupstate,
998 &aggvalues[aggno], &aggnulls[aggno]);
1002 * Use the representative input tuple for any references to
1003 * non-aggregated input columns in the qual and tlist.
1005 econtext->ecxt_scantuple = firstSlot;
1008 * Check the qual (HAVING clause); if the group does not match, ignore
1009 * it and loop back to try to process another group.
1011 if (ExecQual(aggstate->ss.ps.qual, econtext, false))
1014 * Form and return a projection tuple using the aggregate results
1015 * and the representative input tuple. Note we do not support
1016 * aggregates returning sets ...
1018 return ExecProject(projInfo, NULL);
1022 /* No more groups */
1026 /* -----------------
1029 * Creates the run-time information for the agg node produced by the
1030 * planner and initializes its outer subtree
1034 ExecInitAgg(Agg *node, EState *estate, int eflags)
1037 AggStatePerAgg peragg;
1039 ExprContext *econtext;
1044 /* check for unsupported flags */
1045 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
1048 * create state structure
1050 aggstate = makeNode(AggState);
1051 aggstate->ss.ps.plan = (Plan *) node;
1052 aggstate->ss.ps.state = estate;
1054 aggstate->aggs = NIL;
1055 aggstate->numaggs = 0;
1056 aggstate->eqfunctions = NULL;
1057 aggstate->hashfunctions = NULL;
1058 aggstate->peragg = NULL;
1059 aggstate->agg_done = false;
1060 aggstate->pergroup = NULL;
1061 aggstate->grp_firstTuple = NULL;
1062 aggstate->hashtable = NULL;
1065 * Create expression contexts. We need two, one for per-input-tuple
1066 * processing and one for per-output-tuple processing. We cheat a little
1067 * by using ExecAssignExprContext() to build both.
1069 ExecAssignExprContext(estate, &aggstate->ss.ps);
1070 aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
1071 ExecAssignExprContext(estate, &aggstate->ss.ps);
1074 * We also need a long-lived memory context for holding hashtable data
1075 * structures and transition values. NOTE: the details of what is stored
1076 * in aggcontext and what is stored in the regular per-query memory
1077 * context are driven by a simple decision: we want to reset the
1078 * aggcontext in ExecReScanAgg to recover no-longer-wanted space.
1080 aggstate->aggcontext =
1081 AllocSetContextCreate(CurrentMemoryContext,
1083 ALLOCSET_DEFAULT_MINSIZE,
1084 ALLOCSET_DEFAULT_INITSIZE,
1085 ALLOCSET_DEFAULT_MAXSIZE);
1087 #define AGG_NSLOTS 2
1090 * tuple table initialization
1092 ExecInitScanTupleSlot(estate, &aggstate->ss);
1093 ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
1096 * initialize child expressions
1098 * Note: ExecInitExpr finds Aggrefs for us, and also checks that no aggs
1099 * contain other agg calls in their arguments. This would make no sense
1100 * under SQL semantics anyway (and it's forbidden by the spec). Because
1101 * that is true, we don't need to worry about evaluating the aggs in any
1104 aggstate->ss.ps.targetlist = (List *)
1105 ExecInitExpr((Expr *) node->plan.targetlist,
1106 (PlanState *) aggstate);
1107 aggstate->ss.ps.qual = (List *)
1108 ExecInitExpr((Expr *) node->plan.qual,
1109 (PlanState *) aggstate);
1112 * initialize child nodes
1114 * If we are doing a hashed aggregation then the child plan does not
1115 * need to handle REWIND efficiently; see ExecReScanAgg.
1117 if (node->aggstrategy == AGG_HASHED)
1118 eflags &= ~EXEC_FLAG_REWIND;
1119 outerPlan = outerPlan(node);
1120 outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags);
1123 * initialize source tuple type.
1125 ExecAssignScanTypeFromOuterPlan(&aggstate->ss);
1128 * Initialize result tuple type and projection info.
1130 ExecAssignResultTypeFromTL(&aggstate->ss.ps);
1131 ExecAssignProjectionInfo(&aggstate->ss.ps);
1134 * get the count of aggregates in targetlist and quals
1136 numaggs = aggstate->numaggs;
1137 Assert(numaggs == list_length(aggstate->aggs));
1141 * This is not an error condition: we might be using the Agg node just
1142 * to do hash-based grouping. Even in the regular case,
1143 * constant-expression simplification could optimize away all of the
1144 * Aggrefs in the targetlist and qual. So keep going, but force local
1145 * copy of numaggs positive so that palloc()s below don't choke.
1151 * If we are grouping, precompute fmgr lookup data for inner loop. We need
1152 * both equality and hashing functions to do it by hashing, but only
1153 * equality if not hashing.
1155 if (node->numCols > 0)
1157 if (node->aggstrategy == AGG_HASHED)
1158 execTuplesHashPrepare(ExecGetScanType(&aggstate->ss),
1161 &aggstate->eqfunctions,
1162 &aggstate->hashfunctions);
1164 aggstate->eqfunctions =
1165 execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
1171 * Set up aggregate-result storage in the output expr context, and also
1172 * allocate my private per-agg working storage
1174 econtext = aggstate->ss.ps.ps_ExprContext;
1175 econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
1176 econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
1178 peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
1179 aggstate->peragg = peragg;
1181 if (node->aggstrategy == AGG_HASHED)
1183 build_hash_table(aggstate);
1184 aggstate->table_filled = false;
1188 AggStatePerGroup pergroup;
1190 pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) * numaggs);
1191 aggstate->pergroup = pergroup;
1195 * Perform lookups of aggregate function info, and initialize the
1196 * unchanging fields of the per-agg data. We also detect duplicate
1197 * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0"). When
1198 * duplicates are detected, we only make an AggStatePerAgg struct for the
1199 * first one. The clones are simply pointed at the same result entry by
1200 * giving them duplicate aggno values.
1203 foreach(l, aggstate->aggs)
1205 AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
1206 Aggref *aggref = (Aggref *) aggrefstate->xprstate.expr;
1207 AggStatePerAgg peraggstate;
1210 Form_pg_aggregate aggform;
1212 AclResult aclresult;
1220 /* Planner should have assigned aggregate to correct level */
1221 Assert(aggref->agglevelsup == 0);
1223 /* Look for a previous duplicate aggregate */
1224 for (i = 0; i <= aggno; i++)
1226 if (equal(aggref, peragg[i].aggref) &&
1227 !contain_volatile_functions((Node *) aggref))
1232 /* Found a match to an existing entry, so just mark it */
1233 aggrefstate->aggno = i;
1237 /* Nope, so assign a new PerAgg record */
1238 peraggstate = &peragg[++aggno];
1240 /* Mark Aggref state node with assigned index in the result array */
1241 aggrefstate->aggno = aggno;
1243 /* Fill in the peraggstate data */
1244 peraggstate->aggrefstate = aggrefstate;
1245 peraggstate->aggref = aggref;
1248 * Get actual datatype of the input. We need this because it may be
1249 * different from the agg's declared input type, when the agg accepts
1250 * ANY (eg, COUNT(*)) or ANYARRAY or ANYELEMENT.
1252 inputType = exprType((Node *) aggref->target);
1254 aggTuple = SearchSysCache(AGGFNOID,
1255 ObjectIdGetDatum(aggref->aggfnoid),
1257 if (!HeapTupleIsValid(aggTuple))
1258 elog(ERROR, "cache lookup failed for aggregate %u",
1260 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
1262 /* Check permission to call aggregate function */
1263 aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
1265 if (aclresult != ACLCHECK_OK)
1266 aclcheck_error(aclresult, ACL_KIND_PROC,
1267 get_func_name(aggref->aggfnoid));
1269 peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
1270 peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
1272 /* Check that aggregate owner has permission to call component fns */
1274 HeapTuple procTuple;
1277 procTuple = SearchSysCache(PROCOID,
1278 ObjectIdGetDatum(aggref->aggfnoid),
1280 if (!HeapTupleIsValid(procTuple))
1281 elog(ERROR, "cache lookup failed for function %u",
1283 aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
1284 ReleaseSysCache(procTuple);
1286 aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
1288 if (aclresult != ACLCHECK_OK)
1289 aclcheck_error(aclresult, ACL_KIND_PROC,
1290 get_func_name(transfn_oid));
1291 if (OidIsValid(finalfn_oid))
1293 aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
1295 if (aclresult != ACLCHECK_OK)
1296 aclcheck_error(aclresult, ACL_KIND_PROC,
1297 get_func_name(finalfn_oid));
1301 /* resolve actual type of transition state, if polymorphic */
1302 aggtranstype = aggform->aggtranstype;
1303 if (aggtranstype == ANYARRAYOID || aggtranstype == ANYELEMENTOID)
1305 /* have to fetch the agg's declared input type... */
1309 (void) get_func_signature(aggref->aggfnoid,
1310 &agg_arg_types, &agg_nargs);
1311 Assert(agg_nargs == 1);
1312 aggtranstype = resolve_generic_type(aggtranstype,
1315 pfree(agg_arg_types);
1318 /* build expression trees using actual argument & result types */
1319 build_aggregate_fnexprs(inputType,
1327 fmgr_info(transfn_oid, &peraggstate->transfn);
1328 peraggstate->transfn.fn_expr = (Node *) transfnexpr;
1330 if (OidIsValid(finalfn_oid))
1332 fmgr_info(finalfn_oid, &peraggstate->finalfn);
1333 peraggstate->finalfn.fn_expr = (Node *) finalfnexpr;
1336 get_typlenbyval(aggref->aggtype,
1337 &peraggstate->resulttypeLen,
1338 &peraggstate->resulttypeByVal);
1339 get_typlenbyval(aggtranstype,
1340 &peraggstate->transtypeLen,
1341 &peraggstate->transtypeByVal);
1344 * initval is potentially null, so don't try to access it as a struct
1345 * field. Must do it the hard way with SysCacheGetAttr.
1347 textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
1348 Anum_pg_aggregate_agginitval,
1349 &peraggstate->initValueIsNull);
1351 if (peraggstate->initValueIsNull)
1352 peraggstate->initValue = (Datum) 0;
1354 peraggstate->initValue = GetAggInitVal(textInitVal,
1358 * If the transfn is strict and the initval is NULL, make sure input
1359 * type and transtype are the same (or at least binary- compatible),
1360 * so that it's OK to use the first input value as the initial
1361 * transValue. This should have been checked at agg definition time,
1362 * but just in case...
1364 if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
1366 if (!IsBinaryCoercible(inputType, aggtranstype))
1368 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
1369 errmsg("aggregate %u needs to have compatible input type and transition type",
1370 aggref->aggfnoid)));
1373 if (aggref->aggdistinct)
1377 /* We don't implement DISTINCT aggs in the HASHED case */
1378 Assert(node->aggstrategy != AGG_HASHED);
1380 peraggstate->inputType = inputType;
1381 get_typlenbyval(inputType,
1382 &peraggstate->inputtypeLen,
1383 &peraggstate->inputtypeByVal);
1385 eq_function = equality_oper_funcid(inputType);
1386 fmgr_info(eq_function, &(peraggstate->equalfn));
1387 peraggstate->sortOperator = ordering_oper_opid(inputType);
1388 peraggstate->sortstate = NULL;
1391 ReleaseSysCache(aggTuple);
1394 /* Update numaggs to match number of unique aggregates found */
1395 aggstate->numaggs = aggno + 1;
1401 GetAggInitVal(Datum textInitVal, Oid transtype)
1408 getTypeInputInfo(transtype, &typinput, &typioparam);
1409 strInitVal = DatumGetCString(DirectFunctionCall1(textout, textInitVal));
1410 initVal = OidInputFunctionCall(typinput, strInitVal,
1417 ExecCountSlotsAgg(Agg *node)
1419 return ExecCountSlotsNode(outerPlan(node)) +
1420 ExecCountSlotsNode(innerPlan(node)) +
1425 ExecEndAgg(AggState *node)
1427 PlanState *outerPlan;
1430 /* Make sure we have closed any open tuplesorts */
1431 for (aggno = 0; aggno < node->numaggs; aggno++)
1433 AggStatePerAgg peraggstate = &node->peragg[aggno];
1435 if (peraggstate->sortstate)
1436 tuplesort_end(peraggstate->sortstate);
1440 * Free both the expr contexts.
1442 ExecFreeExprContext(&node->ss.ps);
1443 node->ss.ps.ps_ExprContext = node->tmpcontext;
1444 ExecFreeExprContext(&node->ss.ps);
1446 /* clean up tuple table */
1447 ExecClearTuple(node->ss.ss_ScanTupleSlot);
1449 MemoryContextDelete(node->aggcontext);
1451 outerPlan = outerPlanState(node);
1452 ExecEndNode(outerPlan);
1456 ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
1458 ExprContext *econtext = node->ss.ps.ps_ExprContext;
1461 node->agg_done = false;
1463 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1466 * In the hashed case, if we haven't yet built the hash table then we
1467 * can just return; nothing done yet, so nothing to undo. If subnode's
1468 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
1469 * else no reason to re-scan it at all.
1471 if (!node->table_filled)
1475 * If we do have the hash table and the subplan does not have any
1476 * parameter changes, then we can just rescan the existing hash table;
1477 * no need to build it again.
1479 if (((PlanState *) node)->lefttree->chgParam == NULL)
1481 ResetTupleHashIterator(node->hashtable, &node->hashiter);
1486 /* Make sure we have closed any open tuplesorts */
1487 for (aggno = 0; aggno < node->numaggs; aggno++)
1489 AggStatePerAgg peraggstate = &node->peragg[aggno];
1491 if (peraggstate->sortstate)
1492 tuplesort_end(peraggstate->sortstate);
1493 peraggstate->sortstate = NULL;
1496 /* Release first tuple of group, if we have made a copy */
1497 if (node->grp_firstTuple != NULL)
1499 heap_freetuple(node->grp_firstTuple);
1500 node->grp_firstTuple = NULL;
1503 /* Forget current agg values */
1504 MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
1505 MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
1507 /* Release all temp storage */
1508 MemoryContextReset(node->aggcontext);
1510 if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1512 /* Rebuild an empty hash table */
1513 build_hash_table(node);
1514 node->table_filled = false;
1519 * Reset the per-group state (in particular, mark transvalues null)
1521 MemSet(node->pergroup, 0,
1522 sizeof(AggStatePerGroupData) * node->numaggs);
1526 * if chgParam of subnode is not null then plan will be re-scanned by
1527 * first ExecProcNode.
1529 if (((PlanState *) node)->lefttree->chgParam == NULL)
1530 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
1534 * aggregate_dummy - dummy execution routine for aggregate functions
1536 * This function is listed as the implementation (prosrc field) of pg_proc
1537 * entries for aggregate functions. Its only purpose is to throw an error
1538 * if someone mistakenly executes such a function in the normal way.
1540 * Perhaps someday we could assign real meaning to the prosrc field of
1544 aggregate_dummy(PG_FUNCTION_ARGS)
1546 elog(ERROR, "aggregate function %u called as normal function",
1547 fcinfo->flinfo->fn_oid);
1548 return (Datum) 0; /* keep compiler quiet */