Rename SortMem and VacuumMem to work_mem and maintenance_work_mem.

[postgresql] / src / backend / executor / nodeAgg.c
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 1ac5c3c9e21e53c88e048afaeb2d50c39ccfbf38..cb0a64c42771beb660c1f30a4c2aec1f39dd1711 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -3,36 +3,49 @@
   * nodeAgg.c
   *       Routines to handle aggregate nodes.
   *
- *       ExecAgg evaluates each aggregate in the following steps: (initcond1,
- *       initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
- *       the transition functions.)
+ *       ExecAgg evaluates each aggregate in the following steps:
   *
- *              value1 = initcond1
- *              value2 = initcond2
+ *              transvalue = initcond
   *              foreach input_value do
- *                     value1 = sfunc1(value1, input_value)
- *                     value2 = sfunc2(value2)
- *              value1 = finalfunc(value1, value2)
+ *                     transvalue = transfunc(transvalue, input_value)
+ *              result = finalfunc(transvalue)
   *
- *       If initcond1 is NULL then the first non-NULL input_value is
- *       assigned directly to value1.  sfunc1 isn't applied until value1
- *       is non-NULL.
+ *       If a finalfunc is not supplied then the result is just the ending
+ *       value of transvalue.
   *
- *       sfunc1 is never applied when the current tuple's input_value is NULL.
- *       sfunc2 is applied for each tuple if the aggref is marked 'usenulls',
- *       otherwise it is only applied when input_value is not NULL.
- *       (usenulls was formerly used for COUNT(*), but is no longer needed for
- *       that purpose; as of 10/1999 the support for usenulls is dead code.
- *       I have not removed it because it seems like a potentially useful
- *       feature for user-defined aggregates.  We'd just need to add a
- *       flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
+ *       If transfunc is marked "strict" in pg_proc and initcond is NULL,
+ *       then the first non-NULL input_value is assigned directly to transvalue,
+ *       and transfunc isn't applied until the second non-NULL input_value.
+ *       The agg's input type and transtype must be the same in this case!
   *
+ *       If transfunc is marked "strict" then NULL input_values are skipped,
+ *       keeping the previous transvalue.      If transfunc is not strict then it
+ *       is called for every input tuple and must deal with NULL initcond
+ *       or NULL input_value for itself.
   *
- * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
+ *       If finalfunc is marked "strict" then it is not called when the
+ *       ending transvalue is NULL, instead a NULL result is created
+ *       automatically (this is just the usual handling of strict functions,
+ *       of course).  A non-strict finalfunc can make its own choice of
+ *       what to return for a NULL ending transvalue.
+ *
+ *       We compute aggregate input expressions and run the transition functions
+ *       in a temporary econtext (aggstate->tmpcontext).  This is reset at
+ *       least once per input tuple, so when the transvalue datatype is
+ *       pass-by-reference, we have to be careful to copy it into a longer-lived
+ *       memory context, and free the prior value to avoid memory leakage.
+ *       We store transvalues in the memory context aggstate->aggcontext,
+ *       which is also used for the hashtable structures in AGG_HASHED mode.
+ *       The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext)
+ *       is used to run finalize functions and compute the output tuple;
+ *       this context can be reset once per output tuple.
+ *
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.69 2000/07/12 02:37:03 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.118 2004/02/03 17:34:02 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -44,39 +57,44 @@
  #include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
+#include "miscadmin.h"
  #include "optimizer/clauses.h"
+#include "parser/parse_agg.h"
+#include "parser/parse_coerce.h"
  #include "parser/parse_expr.h"
  #include "parser/parse_oper.h"
-#include "parser/parse_type.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
  #include "utils/syscache.h"
  #include "utils/tuplesort.h"
  #include "utils/datum.h"
  
+
  /*
   * AggStatePerAggData - per-aggregate working state for the Agg scan
   */
  typedef struct AggStatePerAggData
  {
-
         /*
          * These values are set up during ExecInitAgg() and do not change
          * thereafter:
          */
  
-       /* Link to Aggref node this working state is for */
+       /* Links to Aggref expr and state nodes this working state is for */
+       AggrefExprState *aggrefstate;
         Aggref     *aggref;
  
         /* Oids of transfer functions */
-       Oid                     xfn1_oid;
-       Oid                     xfn2_oid;
-       Oid                     finalfn_oid;
+       Oid                     transfn_oid;
+       Oid                     finalfn_oid;    /* may be InvalidOid */
  
         /*
          * fmgr lookup data for transfer functions --- only valid when
-        * corresponding oid is not InvalidOid
+        * corresponding oid is not InvalidOid.  Note in particular that
+        * fn_strict flags are kept here.
          */
-       FmgrInfo        xfn1;
-       FmgrInfo        xfn2;
+       FmgrInfo        transfn;
         FmgrInfo        finalfn;
  
         /*
@@ -94,241 +112,339 @@ typedef struct AggStatePerAggData
         FmgrInfo        equalfn;
  
         /*
-        * initial values from pg_aggregate entry
+        * initial value from pg_aggregate entry
          */
-       Datum           initValue1;             /* for transtype1 */
-       Datum           initValue2;             /* for transtype2 */
-       bool            initValue1IsNull,
-                               initValue2IsNull;
+       Datum           initValue;
+       bool            initValueIsNull;
  
         /*
          * We need the len and byval info for the agg's input, result, and
          * transition data types in order to know how to copy/delete values.
          */
-       int                     inputtypeLen,
+       int16           inputtypeLen,
                                 resulttypeLen,
-                               transtype1Len,
-                               transtype2Len;
+                               transtypeLen;
         bool            inputtypeByVal,
                                 resulttypeByVal,
-                               transtype1ByVal,
-                               transtype2ByVal;
+                               transtypeByVal;
  
         /*
          * These values are working state that is initialized at the start of
          * an input tuple group and updated for each input tuple.
          *
          * For a simple (non DISTINCT) aggregate, we just feed the input values
-        * straight to the transition functions.  If it's DISTINCT, we pass
-        * the input values into a Tuplesort object; then at completion of the
+        * straight to the transition function.  If it's DISTINCT, we pass the
+        * input values into a Tuplesort object; then at completion of the
          * input tuple group, we scan the sorted values, eliminate duplicates,
-        * and run the transition functions on the rest.
+        * and run the transition function on the rest.
          */
  
         Tuplesortstate *sortstate;      /* sort object, if a DISTINCT agg */
+} AggStatePerAggData;
+
+/*
+ * AggStatePerGroupData - per-aggregate-per-group working state
+ *
+ * These values are working state that is initialized at the start of
+ * an input tuple group and updated for each input tuple.
+ *
+ * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
+ * structs (pointed to by aggstate->pergroup); we re-use the array for
+ * each input group, if it's AGG_SORTED mode.  In AGG_HASHED mode, the
+ * hash table contains an array of these structs for each tuple group.
+ *
+ * Logically, the sortstate field belongs in this struct, but we do not
+ * keep it here for space reasons: we don't support DISTINCT aggregates
+ * in AGG_HASHED mode, so there's no reason to use up a pointer field
+ * in every entry of the hashtable.
+ */
+typedef struct AggStatePerGroupData
+{
+       Datum           transValue;             /* current transition value */
+       bool            transValueIsNull;
  
-       Datum           value1,                 /* current transfer values 1 and 2 */
-                               value2;
-       bool            value1IsNull,
-                               value2IsNull;
-       bool            noInitValue;    /* true if value1 not set yet */
+       bool            noTransValue;   /* true if transValue not set yet */
  
         /*
-        * Note: right now, noInitValue always has the same value as
-        * value1IsNull. But we should keep them separate because once the
-        * fmgr interface is fixed, we'll need to distinguish a null returned
-        * by transfn1 from a null we haven't yet replaced with an input
-        * value.
+        * Note: noTransValue initially has the same value as
+        * transValueIsNull, and if true both are cleared to false at the same
+        * time.  They are not the same though: if transfn later returns a
+        * NULL, we want to keep that NULL and not auto-replace it with a
+        * later input value. Only the first non-NULL input will be
+        * auto-substituted.
          */
-} AggStatePerAggData;
+} AggStatePerGroupData;
  
+/*
+ * To implement hashed aggregation, we need a hashtable that stores a
+ * representative tuple and an array of AggStatePerGroup structs for each
+ * distinct set of GROUP BY column values.     We compute the hash key from
+ * the GROUP BY columns.
+ */
+typedef struct AggHashEntryData *AggHashEntry;
  
-static void initialize_aggregate(AggStatePerAgg peraggstate);
-static void advance_transition_functions(AggStatePerAgg peraggstate,
-                                                        Datum newVal, bool isNull);
+typedef struct AggHashEntryData
+{
+       TupleHashEntryData shared;      /* common header for hash table entries */
+       /* per-aggregate transition status array - must be last! */
+       AggStatePerGroupData pergroup[1];       /* VARIABLE LENGTH ARRAY */
+} AggHashEntryData;                            /* VARIABLE LENGTH STRUCT */
+
+
+static void initialize_aggregates(AggState *aggstate,
+                                         AggStatePerAgg peragg,
+                                         AggStatePerGroup pergroup);
+static void advance_transition_function(AggState *aggstate,
+                                                       AggStatePerAgg peraggstate,
+                                                       AggStatePerGroup pergroupstate,
+                                                       Datum newVal, bool isNull);
+static void advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup);
  static void process_sorted_aggregate(AggState *aggstate,
-                                                                        AggStatePerAgg peraggstate);
-static void finalize_aggregate(AggStatePerAgg peraggstate,
+                                                AggStatePerAgg peraggstate,
+                                                AggStatePerGroup pergroupstate);
+static void finalize_aggregate(AggState *aggstate,
+                                  AggStatePerAgg peraggstate,
+                                  AggStatePerGroup pergroupstate,
                                    Datum *resultVal, bool *resultIsNull);
+static void build_hash_table(AggState *aggstate);
+static AggHashEntry lookup_hash_entry(AggState *aggstate,
+                                 TupleTableSlot *slot);
+static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
+static void agg_fill_hash_table(AggState *aggstate);
+static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
+static Datum GetAggInitVal(Datum textInitVal, Oid transtype);
  
  
  /*
- * Initialize one aggregate for a new set of input values.
+ * Initialize all aggregates for a new group of input values.
   *
   * When called, CurrentMemoryContext should be the per-query context.
   */
  static void
-initialize_aggregate(AggStatePerAgg peraggstate)
+initialize_aggregates(AggState *aggstate,
+                                         AggStatePerAgg peragg,
+                                         AggStatePerGroup pergroup)
  {
-       Aggref     *aggref = peraggstate->aggref;
+       int                     aggno;
  
-       /*
-        * Start a fresh sort operation for each DISTINCT aggregate.
-        */
-       if (aggref->aggdistinct)
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
         {
+               AggStatePerAgg peraggstate = &peragg[aggno];
+               AggStatePerGroup pergroupstate = &pergroup[aggno];
+               Aggref     *aggref = peraggstate->aggref;
  
                 /*
-                * In case of rescan, maybe there could be an uncompleted sort
-                * operation?  Clean it up if so.
+                * Start a fresh sort operation for each DISTINCT aggregate.
                  */
-               if (peraggstate->sortstate)
-                       tuplesort_end(peraggstate->sortstate);
+               if (aggref->aggdistinct)
+               {
+                       /*
+                        * In case of rescan, maybe there could be an uncompleted sort
+                        * operation?  Clean it up if so.
+                        */
+                       if (peraggstate->sortstate)
+                               tuplesort_end(peraggstate->sortstate);
  
-               peraggstate->sortstate =
-                       tuplesort_begin_datum(peraggstate->inputType,
-                                                                 peraggstate->sortOperator,
-                                                                 false);
-       }
+                       peraggstate->sortstate =
+                               tuplesort_begin_datum(peraggstate->inputType,
+                                                                         peraggstate->sortOperator,
+                                                                         work_mem, false);
+               }
  
-       /*
-        * (Re)set value1 and value2 to their initial values.
-        *
-        * Note that when the initial values are pass-by-ref, we just reuse
-        * them without copying for each group.  Hence, transition function
-        * had better not scribble on its input!
-        */
-       peraggstate->value1 = peraggstate->initValue1;
-       peraggstate->value1IsNull = peraggstate->initValue1IsNull;
-       peraggstate->value2 = peraggstate->initValue2;
-       peraggstate->value2IsNull = peraggstate->initValue2IsNull;
-
-       /* ------------------------------------------
-        * If the initial value for the first transition function
-        * doesn't exist in the pg_aggregate table then we will let
-        * the first value returned from the outer procNode become
-        * the initial value. (This is useful for aggregates like
-        * max{} and min{}.)  The noInitValue flag signals that we
-        * still need to do this.
-        * ------------------------------------------
-        */
-       peraggstate->noInitValue = peraggstate->initValue1IsNull;
+               /*
+                * (Re)set transValue to the initial value.
+                *
+                * Note that when the initial value is pass-by-ref, we must copy it
+                * (into the aggcontext) since we will pfree the transValue later.
+                */
+               if (peraggstate->initValueIsNull)
+                       pergroupstate->transValue = peraggstate->initValue;
+               else
+               {
+                       MemoryContext oldContext;
+
+                       oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
+                       pergroupstate->transValue = datumCopy(peraggstate->initValue,
+                                                                                        peraggstate->transtypeByVal,
+                                                                                         peraggstate->transtypeLen);
+                       MemoryContextSwitchTo(oldContext);
+               }
+               pergroupstate->transValueIsNull = peraggstate->initValueIsNull;
+
+               /*
+                * If the initial value for the transition state doesn't exist in
+                * the pg_aggregate table then we will let the first non-NULL
+                * value returned from the outer procNode become the initial
+                * value. (This is useful for aggregates like max() and min().)
+                * The noTransValue flag signals that we still need to do this.
+                */
+               pergroupstate->noTransValue = peraggstate->initValueIsNull;
+       }
  }
  
  /*
- * Given a new input value, advance the transition functions of an aggregate.
- *
- * When called, CurrentMemoryContext should be the context we want transition
- * function results to be delivered into on this cycle.
+ * Given a new input value, advance the transition function of an aggregate.
   *
- * Note: if the agg does not have usenulls set, null inputs will be filtered
- * out before reaching here.
+ * It doesn't matter which memory context this is called in.
   */
  static void
-advance_transition_functions(AggStatePerAgg peraggstate,
-                                                        Datum newVal, bool isNull)
+advance_transition_function(AggState *aggstate,
+                                                       AggStatePerAgg peraggstate,
+                                                       AggStatePerGroup pergroupstate,
+                                                       Datum newVal, bool isNull)
  {
-       FunctionCallInfoData    fcinfo;
-
-       MemSet(&fcinfo, 0, sizeof(fcinfo));
+       FunctionCallInfoData fcinfo;
+       MemoryContext oldContext;
  
-       /*
-        * XXX reconsider isNULL handling here
-        */
-       if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
+       if (peraggstate->transfn.fn_strict)
         {
-               if (peraggstate->noInitValue)
+               /*
+                * For a strict transfn, nothing happens at a NULL input tuple; we
+                * just keep the prior transValue.
+                */
+               if (isNull)
+                       return;
+               if (pergroupstate->noTransValue)
                 {
-
                         /*
-                        * value1 has not been initialized. This is the first non-NULL
-                        * input value. We use it as the initial value for value1.
+                        * transValue has not been initialized. This is the first
+                        * non-NULL input value. We use it as the initial value for
+                        * transValue. (We already checked that the agg's input type
+                        * is binary-compatible with its transtype, so straight copy
+                        * here is OK.)
                          *
-                        * XXX We assume, without having checked, that the agg's input
-                        * type is binary-compatible with its transtype1!
-                        *
-                        * We had better copy the datum if it is pass-by-ref, since
-                        * the given pointer may be pointing into a scan tuple that
-                        * will be freed on the next iteration of the scan.
+                        * We must copy the datum into aggcontext if it is pass-by-ref.
+                        * We do not need to pfree the old transValue, since it's
+                        * NULL.
                          */
-                       peraggstate->value1 = datumCopy(newVal,
-                                                                                       peraggstate->transtype1ByVal,
-                                                                                       peraggstate->transtype1Len);
-                       peraggstate->value1IsNull = false;
-                       peraggstate->noInitValue = false;
+                       oldContext = MemoryContextSwitchTo(aggstate->aggcontext);
+                       pergroupstate->transValue = datumCopy(newVal,
+                                                                                        peraggstate->transtypeByVal,
+                                                                                         peraggstate->transtypeLen);
+                       pergroupstate->transValueIsNull = false;
+                       pergroupstate->noTransValue = false;
+                       MemoryContextSwitchTo(oldContext);
+                       return;
                 }
-               else
+               if (pergroupstate->transValueIsNull)
                 {
-                       /* apply transition function 1 */
-                       fcinfo.flinfo = &peraggstate->xfn1;
-                       fcinfo.nargs = 2;
-                       fcinfo.arg[0] = peraggstate->value1;
-                       fcinfo.argnull[0] = peraggstate->value1IsNull;
-                       fcinfo.arg[1] = newVal;
-                       fcinfo.argnull[1] = isNull;
-                       if (fcinfo.flinfo->fn_strict &&
-                               (peraggstate->value1IsNull || isNull))
-                       {
-                               /* don't call a strict function with NULL inputs */
-                               newVal = (Datum) 0;
-                               fcinfo.isnull = true;
-                       }
-                       else
-                               newVal = FunctionCallInvoke(&fcinfo);
                         /*
-                        * If the transition function was uncooperative, it may have
-                        * given us a pass-by-ref result that points at the scan tuple
-                        * or the prior-cycle working memory.  Copy it into the active
-                        * context if it doesn't look right.
+                        * Don't call a strict function with NULL inputs.  Note it is
+                        * possible to get here despite the above tests, if the
+                        * transfn is strict *and* returned a NULL on a prior cycle.
+                        * If that happens we will propagate the NULL all the way to
+                        * the end.
                          */
-                       if (!peraggstate->transtype1ByVal && !fcinfo.isnull &&
-                               ! MemoryContextContains(CurrentMemoryContext,
-                                                                               DatumGetPointer(newVal)))
-                               newVal = datumCopy(newVal,
-                                                                  peraggstate->transtype1ByVal,
-                                                                  peraggstate->transtype1Len);
-                       peraggstate->value1 = newVal;
-                       peraggstate->value1IsNull = fcinfo.isnull;
+                       return;
                 }
         }
  
-       if (OidIsValid(peraggstate->xfn2_oid))
+       /* We run the transition functions in per-input-tuple memory context */
+       oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory);
+
+       /*
+        * OK to call the transition function
+        *
+        * This is heavily-used code, so manually zero just the necessary fields
+        * instead of using MemSet().  Compare FunctionCall2().
+        */
+
+       /* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
+       fcinfo.context = NULL;
+       fcinfo.resultinfo = NULL;
+       fcinfo.isnull = false;
+
+       fcinfo.flinfo = &peraggstate->transfn;
+       fcinfo.nargs = 2;
+       fcinfo.arg[0] = pergroupstate->transValue;
+       fcinfo.argnull[0] = pergroupstate->transValueIsNull;
+       fcinfo.arg[1] = newVal;
+       fcinfo.argnull[1] = isNull;
+
+       newVal = FunctionCallInvoke(&fcinfo);
+
+       /*
+        * If pass-by-ref datatype, must copy the new value into aggcontext
+        * and pfree the prior transValue.      But if transfn returned a pointer
+        * to its first input, we don't need to do anything.
+        */
+       if (!peraggstate->transtypeByVal &&
+       DatumGetPointer(newVal) != DatumGetPointer(pergroupstate->transValue))
         {
-               /* apply transition function 2 */
-               fcinfo.flinfo = &peraggstate->xfn2;
-               fcinfo.nargs = 1;
-               fcinfo.arg[0] = peraggstate->value2;
-               fcinfo.argnull[0] = peraggstate->value2IsNull;
-               fcinfo.isnull = false;  /* must reset after use by xfn1 */
-               if (fcinfo.flinfo->fn_strict && peraggstate->value2IsNull)
+               if (!fcinfo.isnull)
                 {
-                       /* don't call a strict function with NULL inputs */
-                       newVal = (Datum) 0;
-                       fcinfo.isnull = true;
+                       MemoryContextSwitchTo(aggstate->aggcontext);
+                       newVal = datumCopy(newVal,
+                                                          peraggstate->transtypeByVal,
+                                                          peraggstate->transtypeLen);
+               }
+               if (!pergroupstate->transValueIsNull)
+                       pfree(DatumGetPointer(pergroupstate->transValue));
+       }
+
+       pergroupstate->transValue = newVal;
+       pergroupstate->transValueIsNull = fcinfo.isnull;
+
+       MemoryContextSwitchTo(oldContext);
+}
+
+/*
+ * Advance all the aggregates for one input tuple.     The input tuple
+ * has been stored in tmpcontext->ecxt_scantuple, so that it is accessible
+ * to ExecEvalExpr.  pergroup is the array of per-group structs to use
+ * (this might be in a hashtable entry).
+ *
+ * When called, CurrentMemoryContext should be the per-query context.
+ */
+static void
+advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
+{
+       ExprContext *econtext = aggstate->tmpcontext;
+       int                     aggno;
+
+       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+       {
+               AggStatePerAgg peraggstate = &aggstate->peragg[aggno];
+               AggStatePerGroup pergroupstate = &pergroup[aggno];
+               AggrefExprState *aggrefstate = peraggstate->aggrefstate;
+               Aggref     *aggref = peraggstate->aggref;
+               Datum           newVal;
+               bool            isNull;
+
+               newVal = ExecEvalExprSwitchContext(aggrefstate->target, econtext,
+                                                                                  &isNull, NULL);
+
+               if (aggref->aggdistinct)
+               {
+                       /* in DISTINCT mode, we may ignore nulls */
+                       if (isNull)
+                               continue;
+                       tuplesort_putdatum(peraggstate->sortstate, newVal, isNull);
                 }
                 else
-                       newVal = FunctionCallInvoke(&fcinfo);
-               /*
-                * If the transition function was uncooperative, it may have
-                * given us a pass-by-ref result that points at the scan tuple
-                * or the prior-cycle working memory.  Copy it into the active
-                * context if it doesn't look right.
-                */
-               if (!peraggstate->transtype2ByVal && !fcinfo.isnull &&
-                       ! MemoryContextContains(CurrentMemoryContext,
-                                                                       DatumGetPointer(newVal)))
-                       newVal = datumCopy(newVal,
-                                                          peraggstate->transtype2ByVal,
-                                                          peraggstate->transtype2Len);
-               peraggstate->value2 = newVal;
-               peraggstate->value2IsNull = fcinfo.isnull;
+               {
+                       advance_transition_function(aggstate, peraggstate, pergroupstate,
+                                                                               newVal, isNull);
+               }
         }
  }
  
  /*
- * Run the transition functions for a DISTINCT aggregate.  This is called
+ * Run the transition function for a DISTINCT aggregate.  This is called
   * after we have completed entering all the input values into the sort
- * object.  We complete the sort, read out the value in sorted order, and
- * run the transition functions on each non-duplicate value.
+ * object.     We complete the sort, read out the values in sorted order,
+ * and run the transition function on each non-duplicate value.
   *
   * When called, CurrentMemoryContext should be the per-query context.
   */
  static void
  process_sorted_aggregate(AggState *aggstate,
-                                                AggStatePerAgg peraggstate)
+                                                AggStatePerAgg peraggstate,
+                                                AggStatePerGroup pergroupstate)
  {
         Datum           oldVal = (Datum) 0;
         bool            haveOldVal = false;
+       MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
         MemoryContext oldContext;
         Datum           newVal;
         bool            isNull;
@@ -337,8 +453,8 @@ process_sorted_aggregate(AggState *aggstate,
  
         /*
          * Note: if input type is pass-by-ref, the datums returned by the sort
-        * are freshly palloc'd in the per-query context, so we must be careful
-        * to pfree them when they are no longer needed.
+        * are freshly palloc'd in the per-query context, so we must be
+        * careful to pfree them when they are no longer needed.
          */
  
         while (tuplesort_getdatum(peraggstate->sortstate, true,
@@ -346,17 +462,17 @@ process_sorted_aggregate(AggState *aggstate,
         {
                 /*
                  * DISTINCT always suppresses nulls, per SQL spec, regardless of
-                * the aggregate's usenulls setting.
+                * the transition function's strictness.
                  */
                 if (isNull)
                         continue;
+
                 /*
-                * Clear and select the current working context for evaluation of
-                * the equality function and transition functions.
+                * Clear and select the working context for evaluation of the
+                * equality function and transition function.
                  */
-               MemoryContextReset(aggstate->agg_cxt[aggstate->which_cxt]);
-               oldContext =
-                       MemoryContextSwitchTo(aggstate->agg_cxt[aggstate->which_cxt]);
+               MemoryContextReset(workcontext);
+               oldContext = MemoryContextSwitchTo(workcontext);
  
                 if (haveOldVal &&
                         DatumGetBool(FunctionCall2(&peraggstate->equalfn,
@@ -365,19 +481,15 @@ process_sorted_aggregate(AggState *aggstate,
                         /* equal to prior, so forget this one */
                         if (!peraggstate->inputtypeByVal)
                                 pfree(DatumGetPointer(newVal));
-                       /* note we do NOT flip contexts in this case... */
                 }
                 else
                 {
-                       advance_transition_functions(peraggstate, newVal, false);
-                       /*
-                        * Make the other context current so that this transition
-                        * result is preserved.
-                        */
-                       aggstate->which_cxt = 1 - aggstate->which_cxt;
+                       advance_transition_function(aggstate, peraggstate, pergroupstate,
+                                                                               newVal, false);
                         /* forget the old value, if any */
                         if (haveOldVal && !peraggstate->inputtypeByVal)
                                 pfree(DatumGetPointer(oldVal));
+                       /* and remember the new one for subsequent equality checks */
                         oldVal = newVal;
                         haveOldVal = true;
                 }
@@ -395,55 +507,32 @@ process_sorted_aggregate(AggState *aggstate,
  /*
   * Compute the final value of one aggregate.
   *
- * When called, CurrentMemoryContext should be the context where we want
- * final values delivered (ie, the per-output-tuple expression context).
+ * The finalfunction will be run, and the result delivered, in the
+ * output-tuple context; caller's CurrentMemoryContext does not matter.
   */
  static void
-finalize_aggregate(AggStatePerAgg peraggstate,
+finalize_aggregate(AggState *aggstate,
+                                  AggStatePerAgg peraggstate,
+                                  AggStatePerGroup pergroupstate,
                                    Datum *resultVal, bool *resultIsNull)
  {
-       FunctionCallInfoData    fcinfo;
+       MemoryContext oldContext;
  
-       MemSet(&fcinfo, 0, sizeof(fcinfo));
+       oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory);
  
         /*
-        * Apply the agg's finalfn, or substitute the appropriate
-        * transition value if there is no finalfn.
-        *
-        * XXX For now, only apply finalfn if we got at least one non-null input
-        * value.  This prevents zero divide in AVG(). If we had cleaner
-        * handling of null inputs/results in functions, we could probably
-        * take out this hack and define the result for no inputs as whatever
-        * finalfn returns for null input.
+        * Apply the agg's finalfn if one is provided, else return transValue.
          */
-       if (OidIsValid(peraggstate->finalfn_oid) &&
-               !peraggstate->noInitValue)
+       if (OidIsValid(peraggstate->finalfn_oid))
         {
+               FunctionCallInfoData fcinfo;
+
+               MemSet(&fcinfo, 0, sizeof(fcinfo));
                 fcinfo.flinfo = &peraggstate->finalfn;
-               if (peraggstate->finalfn.fn_nargs > 1)
-               {
-                       fcinfo.nargs = 2;
-                       fcinfo.arg[0] = peraggstate->value1;
-                       fcinfo.argnull[0] = peraggstate->value1IsNull;
-                       fcinfo.arg[1] = peraggstate->value2;
-                       fcinfo.argnull[1] = peraggstate->value2IsNull;
-               }
-               else if (OidIsValid(peraggstate->xfn1_oid))
-               {
-                       fcinfo.nargs = 1;
-                       fcinfo.arg[0] = peraggstate->value1;
-                       fcinfo.argnull[0] = peraggstate->value1IsNull;
-               }
-               else if (OidIsValid(peraggstate->xfn2_oid))
-               {
-                       fcinfo.nargs = 1;
-                       fcinfo.arg[0] = peraggstate->value2;
-                       fcinfo.argnull[0] = peraggstate->value2IsNull;
-               }
-               else
-                       elog(ERROR, "ExecAgg: no valid transition functions??");
-               if (fcinfo.flinfo->fn_strict &&
-                       (fcinfo.argnull[0] || fcinfo.argnull[1]))
+               fcinfo.nargs = 1;
+               fcinfo.arg[0] = pergroupstate->transValue;
+               fcinfo.argnull[0] = pergroupstate->transValueIsNull;
+               if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull)
                 {
                         /* don't call a strict function with NULL inputs */
                         *resultVal = (Datum) 0;
@@ -455,291 +544,455 @@ finalize_aggregate(AggStatePerAgg peraggstate,
                         *resultIsNull = fcinfo.isnull;
                 }
         }
-       else if (OidIsValid(peraggstate->xfn1_oid))
-       {
-               /* Return value1 */
-               *resultVal = peraggstate->value1;
-               *resultIsNull = peraggstate->value1IsNull;
-       }
-       else if (OidIsValid(peraggstate->xfn2_oid))
+       else
         {
-               /* Return value2 */
-               *resultVal = peraggstate->value2;
-               *resultIsNull = peraggstate->value2IsNull;
+               *resultVal = pergroupstate->transValue;
+               *resultIsNull = pergroupstate->transValueIsNull;
         }
-       else
-               elog(ERROR, "ExecAgg: no valid transition functions??");
+
         /*
          * If result is pass-by-ref, make sure it is in the right context.
          */
-       if (!peraggstate->resulttypeByVal && ! *resultIsNull &&
-               ! MemoryContextContains(CurrentMemoryContext,
-                                                               DatumGetPointer(*resultVal)))
+       if (!peraggstate->resulttypeByVal && !*resultIsNull &&
+               !MemoryContextContains(CurrentMemoryContext,
+                                                          DatumGetPointer(*resultVal)))
                 *resultVal = datumCopy(*resultVal,
                                                            peraggstate->resulttypeByVal,
                                                            peraggstate->resulttypeLen);
+
+       MemoryContextSwitchTo(oldContext);
  }
  
+/*
+ * Initialize the hash table to empty.
+ *
+ * The hash table always lives in the aggcontext memory context.
+ */
+static void
+build_hash_table(AggState *aggstate)
+{
+       Agg                *node = (Agg *) aggstate->ss.ps.plan;
+       MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
+       Size            entrysize;
+
+       Assert(node->aggstrategy == AGG_HASHED);
+       Assert(node->numGroups > 0);
+
+       entrysize = sizeof(AggHashEntryData) +
+               (aggstate->numaggs - 1) *sizeof(AggStatePerGroupData);
+
+       aggstate->hashtable = BuildTupleHashTable(node->numCols,
+                                                                                         node->grpColIdx,
+                                                                                         aggstate->eqfunctions,
+                                                                                         aggstate->hashfunctions,
+                                                                                         node->numGroups,
+                                                                                         entrysize,
+                                                                                         aggstate->aggcontext,
+                                                                                         tmpmem);
+}
  
-/* ---------------------------------------
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
   *
+ * When called, CurrentMemoryContext should be the per-query context.
+ */
+static AggHashEntry
+lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
+{
+       AggHashEntry entry;
+       bool            isnew;
+
+       entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
+                                                                                               slot,
+                                                                                               &isnew);
+
+       if (isnew)
+       {
+               /* initialize aggregates for new tuple group */
+               initialize_aggregates(aggstate, aggstate->peragg, entry->pergroup);
+       }
+
+       return entry;
+}
+
+/*
   * ExecAgg -
   *
   *       ExecAgg receives tuples from its outer subplan and aggregates over
   *       the appropriate attribute for each aggregate function use (Aggref
   *       node) appearing in the targetlist or qual of the node.  The number
- *       of tuples to aggregate over depends on whether a GROUP BY clause is
- *       present.      We can produce an aggregate result row per group, or just
- *       one for the whole query.      The value of each aggregate is stored in
- *       the expression context to be used when ExecProject evaluates the
- *       result tuple.
- *
- *       If the outer subplan is a Group node, ExecAgg returns as many tuples
- *       as there are groups.
- *
- * ------------------------------------------
+ *       of tuples to aggregate over depends on whether grouped or plain
+ *       aggregation is selected.      In grouped aggregation, we produce a result
+ *       row for each group; in plain aggregation there's a single result row
+ *       for the whole query.  In either case, the value of each aggregate is
+ *       stored in the expression context to be used when ExecProject evaluates
+ *       the result tuple.
   */
  TupleTableSlot *
-ExecAgg(Agg *node)
+ExecAgg(AggState *node)
  {
-       AggState   *aggstate;
-       EState     *estate;
-       Plan       *outerPlan;
+       if (node->agg_done)
+               return NULL;
+
+       if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+       {
+               if (!node->table_filled)
+                       agg_fill_hash_table(node);
+               return agg_retrieve_hash_table(node);
+       }
+       else
+               return agg_retrieve_direct(node);
+}
+
+/*
+ * ExecAgg for non-hashed case
+ */
+static TupleTableSlot *
+agg_retrieve_direct(AggState *aggstate)
+{
+       Agg                *node = (Agg *) aggstate->ss.ps.plan;
+       PlanState  *outerPlan;
         ExprContext *econtext;
+       ExprContext *tmpcontext;
         ProjectionInfo *projInfo;
         Datum      *aggvalues;
         bool       *aggnulls;
         AggStatePerAgg peragg;
-       MemoryContext oldContext;
+       AggStatePerGroup pergroup;
+       TupleTableSlot *outerslot;
+       TupleTableSlot *firstSlot;
         TupleTableSlot *resultSlot;
-       HeapTuple       inputTuple;
         int                     aggno;
-       bool            isDone;
-       bool            isNull;
  
-       /* ---------------------
-        *      get state info from node
-        * ---------------------
+       /*
+        * get state info from node
          */
-       aggstate = node->aggstate;
-       estate = node->plan.state;
-       outerPlan = outerPlan(node);
-       econtext = aggstate->csstate.cstate.cs_ExprContext;
+       outerPlan = outerPlanState(aggstate);
+       /* econtext is the per-output-tuple expression context */
+       econtext = aggstate->ss.ps.ps_ExprContext;
         aggvalues = econtext->ecxt_aggvalues;
         aggnulls = econtext->ecxt_aggnulls;
-       projInfo = aggstate->csstate.cstate.cs_ProjInfo;
+       /* tmpcontext is the per-input-tuple expression context */
+       tmpcontext = aggstate->tmpcontext;
+       projInfo = aggstate->ss.ps.ps_ProjInfo;
         peragg = aggstate->peragg;
+       pergroup = aggstate->pergroup;
+       firstSlot = aggstate->ss.ss_ScanTupleSlot;
  
         /*
-        * We loop retrieving groups until we find one matching node->plan.qual
+        * We loop retrieving groups until we find one matching
+        * aggstate->ss.ps.qual
          */
         do
         {
                 if (aggstate->agg_done)
                         return NULL;
  
+               /*
+                * If we don't already have the first tuple of the new group,
+                * fetch it from the outer plan.
+                */
+               if (aggstate->grp_firstTuple == NULL)
+               {
+                       outerslot = ExecProcNode(outerPlan);
+                       if (!TupIsNull(outerslot))
+                       {
+                               /*
+                                * Make a copy of the first input tuple; we will use this
+                                * for comparisons (in group mode) and for projection.
+                                */
+                               aggstate->grp_firstTuple = heap_copytuple(outerslot->val);
+                       }
+                       else
+                       {
+                               /* outer plan produced no tuples at all */
+                               aggstate->agg_done = true;
+                               /* If we are grouping, we should produce no tuples too */
+                               if (node->aggstrategy != AGG_PLAIN)
+                                       return NULL;
+                       }
+               }
+
                 /*
                  * Clear the per-output-tuple context for each group
                  */
-               MemoryContextReset(aggstate->tup_cxt);
+               ResetExprContext(econtext);
  
                 /*
                  * Initialize working state for a new input tuple group
                  */
-               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
-               {
-                       AggStatePerAgg peraggstate = &peragg[aggno];
-
-                       initialize_aggregate(peraggstate);
-               }
+               initialize_aggregates(aggstate, peragg, pergroup);
  
-               inputTuple = NULL;              /* no saved input tuple yet */
-
-               /* ----------------
-                *       for each tuple from the outer plan, update all the aggregates
-                * ----------------
-                */
-               for (;;)
+               if (aggstate->grp_firstTuple != NULL)
                 {
-                       TupleTableSlot *outerslot;
+                       /*
+                        * Store the copied first input tuple in the tuple table slot
+                        * reserved for it.  The tuple will be deleted when it is
+                        * cleared from the slot.
+                        */
+                       ExecStoreTuple(aggstate->grp_firstTuple,
+                                                  firstSlot,
+                                                  InvalidBuffer,
+                                                  true);
+                       aggstate->grp_firstTuple = NULL;        /* don't keep two pointers */
  
-                       outerslot = ExecProcNode(outerPlan, (Plan *) node);
-                       if (TupIsNull(outerslot))
-                               break;
-                       econtext->ecxt_scantuple = outerslot;
+                       /* set up for first advance_aggregates call */
+                       tmpcontext->ecxt_scantuple = firstSlot;
  
                         /*
-                        * Clear and select the current working context for evaluation
-                        * of the input expressions and transition functions at this
-                        * input tuple.
+                        * Process each outer-plan tuple, and then fetch the next one,
+                        * until we exhaust the outer plan or cross a group boundary.
                          */
-                       econtext->ecxt_per_tuple_memory =
-                               aggstate->agg_cxt[aggstate->which_cxt];
-                       ResetExprContext(econtext);
-                       oldContext =
-                               MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
-
-                       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+                       for (;;)
                         {
-                               AggStatePerAgg peraggstate = &peragg[aggno];
-                               Aggref     *aggref = peraggstate->aggref;
-                               Datum           newVal;
+                               advance_aggregates(aggstate, pergroup);
  
-                               newVal = ExecEvalExpr(aggref->target, econtext,
-                                                                         &isNull, &isDone);
+                               /* Reset per-input-tuple context after each tuple */
+                               ResetExprContext(tmpcontext);
  
-                               if (isNull && !aggref->usenulls)
-                                       continue;       /* ignore this tuple for this agg */
-
-                               if (aggref->aggdistinct)
+                               outerslot = ExecProcNode(outerPlan);
+                               if (TupIsNull(outerslot))
+                               {
+                                       /* no more outer-plan tuples available */
+                                       aggstate->agg_done = true;
+                                       break;
+                               }
+                               /* set up for next advance_aggregates call */
+                               tmpcontext->ecxt_scantuple = outerslot;
+
+                               /*
+                                * If we are grouping, check whether we've crossed a group
+                                * boundary.
+                                */
+                               if (node->aggstrategy == AGG_SORTED)
                                 {
-                                       /* putdatum has to be called in per-query context */
-                                       MemoryContextSwitchTo(oldContext);
-                                       tuplesort_putdatum(peraggstate->sortstate,
-                                                                          newVal, isNull);
-                                       MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
+                                       if (!execTuplesMatch(firstSlot->val,
+                                                                                outerslot->val,
+                                                                                firstSlot->ttc_tupleDescriptor,
+                                                                                node->numCols, node->grpColIdx,
+                                                                                aggstate->eqfunctions,
+                                                                         tmpcontext->ecxt_per_tuple_memory))
+                                       {
+                                               /*
+                                                * Save the first input tuple of the next group.
+                                                */
+                                               aggstate->grp_firstTuple = heap_copytuple(outerslot->val);
+                                               break;
+                                       }
                                 }
-                               else
-                                       advance_transition_functions(peraggstate,
-                                                                                                newVal, isNull);
                         }
-
-                       /*
-                        * Make the other context current so that these transition
-                        * results are preserved.
-                        */
-                       aggstate->which_cxt = 1 - aggstate->which_cxt;
-
-                       MemoryContextSwitchTo(oldContext);
-
-                       /*
-                        * Keep a copy of the first input tuple for the projection.
-                        * (We only need one since only the GROUP BY columns in it can
-                        * be referenced, and these will be the same for all tuples
-                        * aggregated over.)
-                        */
-                       if (!inputTuple)
-                               inputTuple = heap_copytuple(outerslot->val);
                 }
  
                 /*
                  * Done scanning input tuple group. Finalize each aggregate
                  * calculation, and stash results in the per-output-tuple context.
-                *
-                * This is a bit tricky when there are both DISTINCT and plain
-                * aggregates: we must first finalize all the plain aggs and then all
-                * the DISTINCT ones.  This is needed because the last transition
-                * values for the plain aggs are stored in the not-current working
-                * context, and we have to evaluate those aggs (and stash the results
-                * in the output tup_cxt!) before we start flipping contexts again
-                * in process_sorted_aggregate.
                  */
-               oldContext = MemoryContextSwitchTo(aggstate->tup_cxt);
                 for (aggno = 0; aggno < aggstate->numaggs; aggno++)
                 {
                         AggStatePerAgg peraggstate = &peragg[aggno];
+                       AggStatePerGroup pergroupstate = &pergroup[aggno];
  
-                       if (! peraggstate->aggref->aggdistinct)
-                               finalize_aggregate(peraggstate,
-                                                                  &aggvalues[aggno], &aggnulls[aggno]);
+                       if (peraggstate->aggref->aggdistinct)
+                               process_sorted_aggregate(aggstate, peraggstate, pergroupstate);
+
+                       finalize_aggregate(aggstate, peraggstate, pergroupstate,
+                                                          &aggvalues[aggno], &aggnulls[aggno]);
                 }
-               MemoryContextSwitchTo(oldContext);
-               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+
+               /*
+                * If we have no first tuple (ie, the outerPlan didn't return
+                * anything), create a dummy all-nulls input tuple for use by
+                * ExecProject. 99.44% of the time this is a waste of cycles,
+                * because ordinarily the projected output tuple's targetlist
+                * cannot contain any direct (non-aggregated) references to input
+                * columns, so the dummy tuple will not be referenced. However
+                * there are special cases where this isn't so --- in particular
+                * an UPDATE involving an aggregate will have a targetlist
+                * reference to ctid.  We need to return a null for ctid in that
+                * situation, not coredump.
+                *
+                * The values returned for the aggregates will be the initial values
+                * of the transition functions.
+                */
+               if (TupIsNull(firstSlot))
                 {
-                       AggStatePerAgg peraggstate = &peragg[aggno];
+                       TupleDesc       tupType;
  
-                       if (peraggstate->aggref->aggdistinct)
+                       /* Should only happen in non-grouped mode */
+                       Assert(node->aggstrategy == AGG_PLAIN);
+                       Assert(aggstate->agg_done);
+
+                       tupType = firstSlot->ttc_tupleDescriptor;
+                       /* watch out for zero-column input tuples, though... */
+                       if (tupType && tupType->natts > 0)
                         {
-                               process_sorted_aggregate(aggstate, peraggstate);
-                               oldContext = MemoryContextSwitchTo(aggstate->tup_cxt);
-                               finalize_aggregate(peraggstate,
-                                                                  &aggvalues[aggno], &aggnulls[aggno]);
-                               MemoryContextSwitchTo(oldContext);
+                               HeapTuple       nullsTuple;
+                               Datum      *dvalues;
+                               char       *dnulls;
+
+                               dvalues = (Datum *) palloc0(sizeof(Datum) * tupType->natts);
+                               dnulls = (char *) palloc(sizeof(char) * tupType->natts);
+                               MemSet(dnulls, 'n', sizeof(char) * tupType->natts);
+                               nullsTuple = heap_formtuple(tupType, dvalues, dnulls);
+                               ExecStoreTuple(nullsTuple,
+                                                          firstSlot,
+                                                          InvalidBuffer,
+                                                          true);
+                               pfree(dvalues);
+                               pfree(dnulls);
                         }
                 }
  
                 /*
-                * If the outerPlan is a Group node, we will reach here after each
-                * group.  We are not done unless the Group node is done (a little
-                * ugliness here while we reach into the Group's state to find
-                * out). Furthermore, when grouping we return nothing at all
-                * unless we had some input tuple(s).  By the nature of Group,
-                * there are no empty groups, so if we get here with no input the
-                * whole scan is empty.
-                *
-                * If the outerPlan isn't a Group, we are done when we get here, and
-                * we will emit a (single) tuple even if there were no input
-                * tuples.
+                * Form a projection tuple using the aggregate results and the
+                * representative input tuple.  Store it in the result tuple slot.
+                * Note we do not support aggregates returning sets ...
+                */
+               econtext->ecxt_scantuple = firstSlot;
+               resultSlot = ExecProject(projInfo, NULL);
+
+               /*
+                * If the completed tuple does not match the qualifications, it is
+                * ignored and we loop back to try to process another group.
+                * Otherwise, return the tuple.
                  */
-               if (IsA(outerPlan, Group))
+       }
+       while (!ExecQual(aggstate->ss.ps.qual, econtext, false));
+
+       return resultSlot;
+}
+
+/*
+ * ExecAgg for hashed case: phase 1, read input and build hash table
+ */
+static void
+agg_fill_hash_table(AggState *aggstate)
+{
+       PlanState  *outerPlan;
+       ExprContext *tmpcontext;
+       AggHashEntry entry;
+       TupleTableSlot *outerslot;
+
+       /*
+        * get state info from node
+        */
+       outerPlan = outerPlanState(aggstate);
+       /* tmpcontext is the per-input-tuple expression context */
+       tmpcontext = aggstate->tmpcontext;
+
+       /*
+        * Process each outer-plan tuple, and then fetch the next one, until
+        * we exhaust the outer plan.
+        */
+       for (;;)
+       {
+               outerslot = ExecProcNode(outerPlan);
+               if (TupIsNull(outerslot))
+                       break;
+               /* set up for advance_aggregates call */
+               tmpcontext->ecxt_scantuple = outerslot;
+
+               /* Find or build hashtable entry for this tuple's group */
+               entry = lookup_hash_entry(aggstate, outerslot);
+
+               /* Advance the aggregates */
+               advance_aggregates(aggstate, entry->pergroup);
+
+               /* Reset per-input-tuple context after each tuple */
+               ResetExprContext(tmpcontext);
+       }
+
+       aggstate->table_filled = true;
+       /* Initialize to walk the hash table */
+       ResetTupleHashIterator(aggstate->hashtable, &aggstate->hashiter);
+}
+
+/*
+ * ExecAgg for hashed case: phase 2, retrieving groups from hash table
+ */
+static TupleTableSlot *
+agg_retrieve_hash_table(AggState *aggstate)
+{
+       ExprContext *econtext;
+       ProjectionInfo *projInfo;
+       Datum      *aggvalues;
+       bool       *aggnulls;
+       AggStatePerAgg peragg;
+       AggStatePerGroup pergroup;
+       AggHashEntry entry;
+       TupleTableSlot *firstSlot;
+       TupleTableSlot *resultSlot;
+       int                     aggno;
+
+       /*
+        * get state info from node
+        */
+       /* econtext is the per-output-tuple expression context */
+       econtext = aggstate->ss.ps.ps_ExprContext;
+       aggvalues = econtext->ecxt_aggvalues;
+       aggnulls = econtext->ecxt_aggnulls;
+       projInfo = aggstate->ss.ps.ps_ProjInfo;
+       peragg = aggstate->peragg;
+       firstSlot = aggstate->ss.ss_ScanTupleSlot;
+
+       /*
+        * We loop retrieving groups until we find one satisfying
+        * aggstate->ss.ps.qual
+        */
+       do
+       {
+               if (aggstate->agg_done)
+                       return NULL;
+
+               /*
+                * Find the next entry in the hash table
+                */
+               entry = (AggHashEntry) ScanTupleHashTable(&aggstate->hashiter);
+               if (entry == NULL)
                 {
-                       /* aggregation over groups */
-                       aggstate->agg_done = ((Group *) outerPlan)->grpstate->grp_done;
-                       /* check for no groups */
-                       if (inputTuple == NULL)
-                               return NULL;
+                       /* No more entries in hashtable, so done */
+                       aggstate->agg_done = TRUE;
+                       return NULL;
                 }
-               else
-               {
-                       aggstate->agg_done = true;
  
-                       /*
-                        * If inputtuple==NULL (ie, the outerPlan didn't return
-                        * anything), create a dummy all-nulls input tuple for use by
-                        * ExecProject. 99.44% of the time this is a waste of cycles,
-                        * because ordinarily the projected output tuple's targetlist
-                        * cannot contain any direct (non-aggregated) references to
-                        * input columns, so the dummy tuple will not be referenced.
-                        * However there are special cases where this isn't so --- in
-                        * particular an UPDATE involving an aggregate will have a
-                        * targetlist reference to ctid.  We need to return a null for
-                        * ctid in that situation, not coredump.
-                        *
-                        * The values returned for the aggregates will be the initial
-                        * values of the transition functions.
-                        */
-                       if (inputTuple == NULL)
-                       {
-                               TupleDesc       tupType;
-                               Datum      *tupValue;
-                               char       *null_array;
-                               AttrNumber      attnum;
-
-                               tupType = aggstate->csstate.css_ScanTupleSlot->ttc_tupleDescriptor;
-                               tupValue = projInfo->pi_tupValue;
-                               /* watch out for null input tuples, though... */
-                               if (tupType && tupValue)
-                               {
-                                       null_array = (char *) palloc(sizeof(char) * tupType->natts);
-                                       for (attnum = 0; attnum < tupType->natts; attnum++)
-                                               null_array[attnum] = 'n';
-                                       inputTuple = heap_formtuple(tupType, tupValue, null_array);
-                                       pfree(null_array);
-                               }
-                       }
-               }
+               /*
+                * Clear the per-output-tuple context for each group
+                */
+               ResetExprContext(econtext);
  
                 /*
-                * Store the representative input tuple in the tuple table slot
-                * reserved for it.  The tuple will be deleted when it is cleared
-                * from the slot.
+                * Store the copied first input tuple in the tuple table slot
+                * reserved for it, so that it can be used in ExecProject.
                  */
-               ExecStoreTuple(inputTuple,
-                                          aggstate->csstate.css_ScanTupleSlot,
+               ExecStoreTuple(entry->shared.firstTuple,
+                                          firstSlot,
                                            InvalidBuffer,
-                                          true);
-               econtext->ecxt_scantuple = aggstate->csstate.css_ScanTupleSlot;
+                                          false);
+
+               pergroup = entry->pergroup;
  
                 /*
-                * Do projection and qual check in the per-output-tuple context.
+                * Finalize each aggregate calculation, and stash results in the
+                * per-output-tuple context.
                  */
-               econtext->ecxt_per_tuple_memory = aggstate->tup_cxt;
+               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
+               {
+                       AggStatePerAgg peraggstate = &peragg[aggno];
+                       AggStatePerGroup pergroupstate = &pergroup[aggno];
+
+                       Assert(!peraggstate->aggref->aggdistinct);
+                       finalize_aggregate(aggstate, peraggstate, pergroupstate,
+                                                          &aggvalues[aggno], &aggnulls[aggno]);
+               }
  
                 /*
                  * Form a projection tuple using the aggregate results and the
                  * representative input tuple.  Store it in the result tuple slot.
+                * Note we do not support aggregates returning sets ...
                  */
-               resultSlot = ExecProject(projInfo, &isDone);
+               econtext->ecxt_scantuple = firstSlot;
+               resultSlot = ExecProject(projInfo, NULL);
  
                 /*
                  * If the completed tuple does not match the qualifications, it is
@@ -747,7 +1000,7 @@ ExecAgg(Agg *node)
                  * Otherwise, return the tuple.
                  */
         }
-       while (!ExecQual(node->plan.qual, econtext, false));
+       while (!ExecQual(aggstate->ss.ps.qual, econtext, false));
  
         return resultSlot;
  }
@@ -759,8 +1012,8 @@ ExecAgg(Agg *node)
   *     planner and initializes its outer subtree
   * -----------------
   */
-bool
-ExecInitAgg(Agg *node, EState *estate, Plan *parent)
+AggState *
+ExecInitAgg(Agg *node, EState *estate)
  {
         AggState   *aggstate;
         AggStatePerAgg peragg;
@@ -770,271 +1023,478 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
                                 aggno;
         List       *alist;
  
-       /*
-        * assign the node's execution state
-        */
-       node->plan.state = estate;
-
         /*
          * create state structure
          */
         aggstate = makeNode(AggState);
-       node->aggstate = aggstate;
+       aggstate->ss.ps.plan = (Plan *) node;
+       aggstate->ss.ps.state = estate;
+
+       aggstate->aggs = NIL;
+       aggstate->numaggs = 0;
+       aggstate->eqfunctions = NULL;
+       aggstate->hashfunctions = NULL;
+       aggstate->peragg = NULL;
         aggstate->agg_done = false;
+       aggstate->pergroup = NULL;
+       aggstate->grp_firstTuple = NULL;
+       aggstate->hashtable = NULL;
  
         /*
-        * find aggregates in targetlist and quals
-        *
-        * Note: pull_agg_clauses also checks that no aggs contain other agg
-        * calls in their arguments.  This would make no sense under SQL
-        * semantics anyway (and it's forbidden by the spec).  Because that is
-        * true, we don't need to worry about evaluating the aggs in any
-        * particular order.
+        * Create expression contexts.  We need two, one for per-input-tuple
+        * processing and one for per-output-tuple processing.  We cheat a
+        * little by using ExecAssignExprContext() to build both.
          */
-       aggstate->aggs = nconc(pull_agg_clause((Node *) node->plan.targetlist),
-                                                  pull_agg_clause((Node *) node->plan.qual));
-       aggstate->numaggs = numaggs = length(aggstate->aggs);
-       if (numaggs <= 0)
-       {
-
-               /*
-                * This used to be treated as an error, but we can't do that
-                * anymore because constant-expression simplification could
-                * optimize away all of the Aggrefs in the targetlist and qual.
-                * So, just make a debug note, and force numaggs positive so that
-                * palloc()s below don't choke.
-                */
-               elog(DEBUG, "ExecInitAgg: could not find any aggregate functions");
-               numaggs = 1;
-       }
-
-       /*
-        * Create expression context
-        */
-       ExecAssignExprContext(estate, &aggstate->csstate.cstate);
+       ExecAssignExprContext(estate, &aggstate->ss.ps);
+       aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
+       ExecAssignExprContext(estate, &aggstate->ss.ps);
  
         /*
-        * We actually need three separate expression memory contexts: one
-        * for calculating per-output-tuple values (ie, the finished aggregate
-        * results), and two that we ping-pong between for per-input-tuple
-        * evaluation of input expressions and transition functions.  The
-        * context made by ExecAssignExprContext() is used as the output context.
+        * We also need a long-lived memory context for holding hashtable data
+        * structures and transition values.  NOTE: the details of what is
+        * stored in aggcontext and what is stored in the regular per-query
+        * memory context are driven by a simple decision: we want to reset
+        * the aggcontext in ExecReScanAgg to recover no-longer-wanted space.
          */
-       aggstate->tup_cxt =
-               aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory;
-       aggstate->agg_cxt[0] = 
+       aggstate->aggcontext =
                 AllocSetContextCreate(CurrentMemoryContext,
-                                                         "AggExprContext1",
+                                                         "AggContext",
                                                           ALLOCSET_DEFAULT_MINSIZE,
                                                           ALLOCSET_DEFAULT_INITSIZE,
                                                           ALLOCSET_DEFAULT_MAXSIZE);
-       aggstate->agg_cxt[1] = 
-               AllocSetContextCreate(CurrentMemoryContext,
-                                                         "AggExprContext2",
-                                                         ALLOCSET_DEFAULT_MINSIZE,
-                                                         ALLOCSET_DEFAULT_INITSIZE,
-                                                         ALLOCSET_DEFAULT_MAXSIZE);
-       aggstate->which_cxt = 0;
  
  #define AGG_NSLOTS 2
  
         /*
          * tuple table initialization
          */
-       ExecInitScanTupleSlot(estate, &aggstate->csstate);
-       ExecInitResultTupleSlot(estate, &aggstate->csstate.cstate);
+       ExecInitScanTupleSlot(estate, &aggstate->ss);
+       ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
  
         /*
-        * Set up aggregate-result storage in the expr context, and also
-        * allocate my private per-agg working storage
+        * initialize child expressions
+        *
+        * Note: ExecInitExpr finds Aggrefs for us, and also checks that no aggs
+        * contain other agg calls in their arguments.  This would make no
+        * sense under SQL semantics anyway (and it's forbidden by the spec).
+        * Because that is true, we don't need to worry about evaluating the
+        * aggs in any particular order.
          */
-       econtext = aggstate->csstate.cstate.cs_ExprContext;
-       econtext->ecxt_aggvalues = (Datum *) palloc(sizeof(Datum) * numaggs);
-       MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * numaggs);
-       econtext->ecxt_aggnulls = (bool *) palloc(sizeof(bool) * numaggs);
-       MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * numaggs);
-
-       peragg = (AggStatePerAgg) palloc(sizeof(AggStatePerAggData) * numaggs);
-       MemSet(peragg, 0, sizeof(AggStatePerAggData) * numaggs);
-       aggstate->peragg = peragg;
+       aggstate->ss.ps.targetlist = (List *)
+               ExecInitExpr((Expr *) node->plan.targetlist,
+                                        (PlanState *) aggstate);
+       aggstate->ss.ps.qual = (List *)
+               ExecInitExpr((Expr *) node->plan.qual,
+                                        (PlanState *) aggstate);
  
         /*
          * initialize child nodes
          */
         outerPlan = outerPlan(node);
-       ExecInitNode(outerPlan, estate, (Plan *) node);
+       outerPlanState(aggstate) = ExecInitNode(outerPlan, estate);
  
-       /* ----------------
-        *      initialize source tuple type.
-        * ----------------
+       /*
+        * initialize source tuple type.
          */
-       ExecAssignScanTypeFromOuterPlan((Plan *) node, &aggstate->csstate);
+       ExecAssignScanTypeFromOuterPlan(&aggstate->ss);
  
         /*
          * Initialize result tuple type and projection info.
          */
-       ExecAssignResultTypeFromTL((Plan *) node, &aggstate->csstate.cstate);
-       ExecAssignProjectionInfo((Plan *) node, &aggstate->csstate.cstate);
+       ExecAssignResultTypeFromTL(&aggstate->ss.ps);
+       ExecAssignProjectionInfo(&aggstate->ss.ps);
+
+       /*
+        * get the count of aggregates in targetlist and quals
+        */
+       numaggs = aggstate->numaggs;
+       Assert(numaggs == length(aggstate->aggs));
+       if (numaggs <= 0)
+       {
+               /*
+                * This is not an error condition: we might be using the Agg node
+                * just to do hash-based grouping.      Even in the regular case,
+                * constant-expression simplification could optimize away all of
+                * the Aggrefs in the targetlist and qual.      So keep going, but
+                * force local copy of numaggs positive so that palloc()s below
+                * don't choke.
+                */
+               numaggs = 1;
+       }
+
+       /*
+        * If we are grouping, precompute fmgr lookup data for inner loop. We
+        * need both equality and hashing functions to do it by hashing, but
+        * only equality if not hashing.
+        */
+       if (node->numCols > 0)
+       {
+               if (node->aggstrategy == AGG_HASHED)
+                       execTuplesHashPrepare(ExecGetScanType(&aggstate->ss),
+                                                                 node->numCols,
+                                                                 node->grpColIdx,
+                                                                 &aggstate->eqfunctions,
+                                                                 &aggstate->hashfunctions);
+               else
+                       aggstate->eqfunctions =
+                               execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
+                                                                          node->numCols,
+                                                                          node->grpColIdx);
+       }
+
+       /*
+        * Set up aggregate-result storage in the output expr context, and
+        * also allocate my private per-agg working storage
+        */
+       econtext = aggstate->ss.ps.ps_ExprContext;
+       econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
+       econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
+
+       peragg = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
+       aggstate->peragg = peragg;
+
+       if (node->aggstrategy == AGG_HASHED)
+       {
+               build_hash_table(aggstate);
+               aggstate->table_filled = false;
+       }
+       else
+       {
+               AggStatePerGroup pergroup;
+
+               pergroup = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData) * numaggs);
+               aggstate->pergroup = pergroup;
+       }
  
         /*
          * Perform lookups of aggregate function info, and initialize the
-        * unchanging fields of the per-agg data
+        * unchanging fields of the per-agg data.  We also detect duplicate
+        * aggregates (for example, "SELECT sum(x) ... HAVING sum(x) > 0").
+        * When duplicates are detected, we only make an AggStatePerAgg struct
+        * for the first one.  The clones are simply pointed at the same
+        * result entry by giving them duplicate aggno values.
          */
         aggno = -1;
         foreach(alist, aggstate->aggs)
         {
-               Aggref     *aggref = (Aggref *) lfirst(alist);
-               AggStatePerAgg peraggstate = &peragg[++aggno];
-               char       *aggname = aggref->aggname;
+               AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(alist);
+               Aggref     *aggref = (Aggref *) aggrefstate->xprstate.expr;
+               AggStatePerAgg peraggstate;
+               Oid                     inputType;
                 HeapTuple       aggTuple;
                 Form_pg_aggregate aggform;
-               Type            typeInfo;
-               Oid                     xfn1_oid,
-                                       xfn2_oid,
+               Oid                     aggtranstype;
+               AclResult       aclresult;
+               Oid                     transfn_oid,
                                         finalfn_oid;
+               Expr       *transfnexpr,
+                                  *finalfnexpr;
+               Datum           textInitVal;
+               int                     i;
  
-               /* Mark Aggref node with its associated index in the result array */
-               aggref->aggno = aggno;
+               /* Planner should have assigned aggregate to correct level */
+               Assert(aggref->agglevelsup == 0);
+
+               /* Look for a previous duplicate aggregate */
+               for (i = 0; i <= aggno; i++)
+               {
+                       if (equal(aggref, peragg[i].aggref) &&
+                               !contain_volatile_functions((Node *) aggref))
+                               break;
+               }
+               if (i <= aggno)
+               {
+                       /* Found a match to an existing entry, so just mark it */
+                       aggrefstate->aggno = i;
+                       continue;
+               }
+
+               /* Nope, so assign a new PerAgg record */
+               peraggstate = &peragg[++aggno];
+
+               /* Mark Aggref state node with assigned index in the result array */
+               aggrefstate->aggno = aggno;
  
                 /* Fill in the peraggstate data */
+               peraggstate->aggrefstate = aggrefstate;
                 peraggstate->aggref = aggref;
  
-               aggTuple = SearchSysCacheTupleCopy(AGGNAME,
-                                                                                  PointerGetDatum(aggname),
-                                                                                  ObjectIdGetDatum(aggref->basetype),
-                                                                                  0, 0);
+               /*
+                * Get actual datatype of the input.  We need this because it may
+                * be different from the agg's declared input type, when the agg
+                * accepts ANY (eg, COUNT(*)) or ANYARRAY or ANYELEMENT.
+                */
+               inputType = exprType((Node *) aggref->target);
+
+               aggTuple = SearchSysCache(AGGFNOID,
+                                                                 ObjectIdGetDatum(aggref->aggfnoid),
+                                                                 0, 0, 0);
                 if (!HeapTupleIsValid(aggTuple))
-                       elog(ERROR, "ExecAgg: cache lookup failed for aggregate %s(%s)",
-                                aggname,
-                                typeidTypeName(aggref->basetype));
+                       elog(ERROR, "cache lookup failed for aggregate %u",
+                                aggref->aggfnoid);
                 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
  
-               typeInfo = typeidType(aggform->aggfinaltype);
-               peraggstate->resulttypeLen = typeLen(typeInfo);
-               peraggstate->resulttypeByVal = typeByVal(typeInfo);
-
-               peraggstate->initValue1 =
-                       AggNameGetInitVal(aggname,
-                                                         aggform->aggbasetype,
-                                                         1,
-                                                         &peraggstate->initValue1IsNull);
-
-               peraggstate->initValue2 =
-                       AggNameGetInitVal(aggname,
-                                                         aggform->aggbasetype,
-                                                         2,
-                                                         &peraggstate->initValue2IsNull);
+               /* Check permission to call aggregate function */
+               aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
+                                                                        ACL_EXECUTE);
+               if (aclresult != ACLCHECK_OK)
+                       aclcheck_error(aclresult, ACL_KIND_PROC,
+                                                  get_func_name(aggref->aggfnoid));
  
-               peraggstate->xfn1_oid = xfn1_oid = aggform->aggtransfn1;
-               peraggstate->xfn2_oid = xfn2_oid = aggform->aggtransfn2;
+               peraggstate->transfn_oid = transfn_oid = aggform->aggtransfn;
                 peraggstate->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
  
-               if (OidIsValid(xfn1_oid))
+               /* resolve actual type of transition state, if polymorphic */
+               aggtranstype = aggform->aggtranstype;
+               if (aggtranstype == ANYARRAYOID || aggtranstype == ANYELEMENTOID)
                 {
-                       fmgr_info(xfn1_oid, &peraggstate->xfn1);
-                       /* If a transfn1 is specified, transtype1 had better be, too */
-                       typeInfo = typeidType(aggform->aggtranstype1);
-                       peraggstate->transtype1Len = typeLen(typeInfo);
-                       peraggstate->transtype1ByVal = typeByVal(typeInfo);
+                       /* have to fetch the agg's declared input type... */
+                       Oid                     agg_arg_types[FUNC_MAX_ARGS];
+                       int                     agg_nargs;
+
+                       (void) get_func_signature(aggref->aggfnoid,
+                                                                         agg_arg_types, &agg_nargs);
+                       Assert(agg_nargs == 1);
+                       aggtranstype = resolve_generic_type(aggtranstype,
+                                                                                               inputType,
+                                                                                               agg_arg_types[0]);
                 }
  
-               if (OidIsValid(xfn2_oid))
-               {
-                       fmgr_info(xfn2_oid, &peraggstate->xfn2);
-                       /* If a transfn2 is specified, transtype2 had better be, too */
-                       typeInfo = typeidType(aggform->aggtranstype2);
-                       peraggstate->transtype2Len = typeLen(typeInfo);
-                       peraggstate->transtype2ByVal = typeByVal(typeInfo);
-                       /* ------------------------------------------
-                        * If there is a second transition function, its initial
-                        * value must exist -- as it does not depend on data values,
-                        * we have no other way of determining an initial value.
-                        * ------------------------------------------
-                        */
-                       if (peraggstate->initValue2IsNull)
-                               elog(ERROR, "ExecInitAgg: agginitval2 is null");
-               }
+               /* build expression trees using actual argument & result types */
+               build_aggregate_fnexprs(inputType,
+                                                               aggtranstype,
+                                                               aggref->aggtype,
+                                                               transfn_oid,
+                                                               finalfn_oid,
+                                                               &transfnexpr,
+                                                               &finalfnexpr);
+
+               fmgr_info(transfn_oid, &peraggstate->transfn);
+               peraggstate->transfn.fn_expr = (Node *) transfnexpr;
  
                 if (OidIsValid(finalfn_oid))
+               {
                         fmgr_info(finalfn_oid, &peraggstate->finalfn);
+                       peraggstate->finalfn.fn_expr = (Node *) finalfnexpr;
+               }
+
+               get_typlenbyval(aggref->aggtype,
+                                               &peraggstate->resulttypeLen,
+                                               &peraggstate->resulttypeByVal);
+               get_typlenbyval(aggtranstype,
+                                               &peraggstate->transtypeLen,
+                                               &peraggstate->transtypeByVal);
+
+               /*
+                * initval is potentially null, so don't try to access it as a
+                * struct field. Must do it the hard way with SysCacheGetAttr.
+                */
+               textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
+                                                                         Anum_pg_aggregate_agginitval,
+                                                                         &peraggstate->initValueIsNull);
+
+               if (peraggstate->initValueIsNull)
+                       peraggstate->initValue = (Datum) 0;
+               else
+                       peraggstate->initValue = GetAggInitVal(textInitVal,
+                                                                                                  aggtranstype);
+
+               /*
+                * If the transfn is strict and the initval is NULL, make sure
+                * input type and transtype are the same (or at least binary-
+                * compatible), so that it's OK to use the first input value as
+                * the initial transValue.      This should have been checked at agg
+                * definition time, but just in case...
+                */
+               if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull)
+               {
+                       if (!IsBinaryCoercible(inputType, aggtranstype))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                                errmsg("aggregate %u needs to have compatible input type and transition type",
+                                                               aggref->aggfnoid)));
+               }
  
                 if (aggref->aggdistinct)
                 {
-                       Oid                     inputType = exprType(aggref->target);
-                       Operator        eq_operator;
-                       Form_pg_operator pgopform;
+                       Oid                     eq_function;
+
+                       /* We don't implement DISTINCT aggs in the HASHED case */
+                       Assert(node->aggstrategy != AGG_HASHED);
  
                         peraggstate->inputType = inputType;
-                       typeInfo = typeidType(inputType);
-                       peraggstate->inputtypeLen = typeLen(typeInfo);
-                       peraggstate->inputtypeByVal = typeByVal(typeInfo);
+                       get_typlenbyval(inputType,
+                                                       &peraggstate->inputtypeLen,
+                                                       &peraggstate->inputtypeByVal);
  
-                       eq_operator = oper("=", inputType, inputType, true);
-                       if (!HeapTupleIsValid(eq_operator))
-                       {
-                               elog(ERROR, "Unable to identify an equality operator for type '%s'",
-                                        typeidTypeName(inputType));
-                       }
-                       pgopform = (Form_pg_operator) GETSTRUCT(eq_operator);
-                       fmgr_info(pgopform->oprcode, &(peraggstate->equalfn));
-                       peraggstate->sortOperator = any_ordering_op(inputType);
+                       eq_function = equality_oper_funcid(inputType);
+                       fmgr_info(eq_function, &(peraggstate->equalfn));
+                       peraggstate->sortOperator = ordering_oper_opid(inputType);
                         peraggstate->sortstate = NULL;
                 }
  
-               heap_freetuple(aggTuple);
+               ReleaseSysCache(aggTuple);
         }
  
-       return TRUE;
+       /* Update numaggs to match number of unique aggregates found */
+       aggstate->numaggs = aggno + 1;
+
+       return aggstate;
+}
+
+static Datum
+GetAggInitVal(Datum textInitVal, Oid transtype)
+{
+       char       *strInitVal;
+       HeapTuple       tup;
+       Oid                     typinput,
+                               typelem;
+       Datum           initVal;
+
+       strInitVal = DatumGetCString(DirectFunctionCall1(textout, textInitVal));
+
+       tup = SearchSysCache(TYPEOID,
+                                                ObjectIdGetDatum(transtype),
+                                                0, 0, 0);
+       if (!HeapTupleIsValid(tup))
+               elog(ERROR, "cache lookup failed for type %u", transtype);
+
+       typinput = ((Form_pg_type) GETSTRUCT(tup))->typinput;
+       typelem = ((Form_pg_type) GETSTRUCT(tup))->typelem;
+       ReleaseSysCache(tup);
+
+       initVal = OidFunctionCall3(typinput,
+                                                          CStringGetDatum(strInitVal),
+                                                          ObjectIdGetDatum(typelem),
+                                                          Int32GetDatum(-1));
+
+       pfree(strInitVal);
+       return initVal;
  }
  
  int
  ExecCountSlotsAgg(Agg *node)
  {
         return ExecCountSlotsNode(outerPlan(node)) +
-       ExecCountSlotsNode(innerPlan(node)) +
-       AGG_NSLOTS;
+               ExecCountSlotsNode(innerPlan(node)) +
+               AGG_NSLOTS;
  }
  
  void
-ExecEndAgg(Agg *node)
+ExecEndAgg(AggState *node)
  {
-       AggState   *aggstate = node->aggstate;
-       Plan       *outerPlan;
+       PlanState  *outerPlan;
+       int                     aggno;
+
+       /* Make sure we have closed any open tuplesorts */
+       for (aggno = 0; aggno < node->numaggs; aggno++)
+       {
+               AggStatePerAgg peraggstate = &node->peragg[aggno];
+
+               if (peraggstate->sortstate)
+                       tuplesort_end(peraggstate->sortstate);
+       }
  
-       ExecFreeProjectionInfo(&aggstate->csstate.cstate);
-       /*
-        * Make sure ExecFreeExprContext() frees the right expr context...
-        */
-       aggstate->csstate.cstate.cs_ExprContext->ecxt_per_tuple_memory =
-               aggstate->tup_cxt;
-       ExecFreeExprContext(&aggstate->csstate.cstate);
         /*
-        * ... and I free the others.
+        * Free both the expr contexts.
          */
-       MemoryContextDelete(aggstate->agg_cxt[0]);
-       MemoryContextDelete(aggstate->agg_cxt[1]);
-
-       outerPlan = outerPlan(node);
-       ExecEndNode(outerPlan, (Plan *) node);
+       ExecFreeExprContext(&node->ss.ps);
+       node->ss.ps.ps_ExprContext = node->tmpcontext;
+       ExecFreeExprContext(&node->ss.ps);
  
         /* clean up tuple table */
-       ExecClearTuple(aggstate->csstate.css_ScanTupleSlot);
+       ExecClearTuple(node->ss.ss_ScanTupleSlot);
+
+       MemoryContextDelete(node->aggcontext);
+
+       outerPlan = outerPlanState(node);
+       ExecEndNode(outerPlan);
  }
  
  void
-ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent)
+ExecReScanAgg(AggState *node, ExprContext *exprCtxt)
  {
-       AggState   *aggstate = node->aggstate;
-       ExprContext *econtext = aggstate->csstate.cstate.cs_ExprContext;
+       ExprContext *econtext = node->ss.ps.ps_ExprContext;
+       int                     aggno;
  
-       aggstate->agg_done = false;
-       MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * aggstate->numaggs);
-       MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * aggstate->numaggs);
+       node->agg_done = false;
+
+       if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+       {
+               /*
+                * In the hashed case, if we haven't yet built the hash table then
+                * we can just return; nothing done yet, so nothing to undo. If
+                * subnode's chgParam is not NULL then it will be re-scanned by
+                * ExecProcNode, else no reason to re-scan it at all.
+                */
+               if (!node->table_filled)
+                       return;
+
+               /*
+                * If we do have the hash table and the subplan does not have any
+                * parameter changes, then we can just rescan the existing hash
+                * table; no need to build it again.
+                */
+               if (((PlanState *) node)->lefttree->chgParam == NULL)
+               {
+                       ResetTupleHashIterator(node->hashtable, &node->hashiter);
+                       return;
+               }
+       }
+
+       /* Make sure we have closed any open tuplesorts */
+       for (aggno = 0; aggno < node->numaggs; aggno++)
+       {
+               AggStatePerAgg peraggstate = &node->peragg[aggno];
+
+               if (peraggstate->sortstate)
+                       tuplesort_end(peraggstate->sortstate);
+               peraggstate->sortstate = NULL;
+       }
+
+       /* Release first tuple of group, if we have made a copy */
+       if (node->grp_firstTuple != NULL)
+       {
+               heap_freetuple(node->grp_firstTuple);
+               node->grp_firstTuple = NULL;
+       }
+
+       /* Forget current agg values */
+       MemSet(econtext->ecxt_aggvalues, 0, sizeof(Datum) * node->numaggs);
+       MemSet(econtext->ecxt_aggnulls, 0, sizeof(bool) * node->numaggs);
+
+       /* Release all temp storage */
+       MemoryContextReset(node->aggcontext);
+
+       if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
+       {
+               /* Rebuild an empty hash table */
+               build_hash_table(node);
+               node->table_filled = false;
+       }
  
         /*
          * if chgParam of subnode is not null then plan will be re-scanned by
          * first ExecProcNode.
          */
-       if (((Plan *) node)->lefttree->chgParam == NULL)
-               ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node);
+       if (((PlanState *) node)->lefttree->chgParam == NULL)
+               ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
+}
+
+/*
+ * aggregate_dummy - dummy execution routine for aggregate functions
+ *
+ * This function is listed as the implementation (prosrc field) of pg_proc
+ * entries for aggregate functions.  Its only purpose is to throw an error
+ * if someone mistakenly executes such a function in the normal way.
+ *
+ * Perhaps someday we could assign real meaning to the prosrc field of
+ * an aggregate?
+ */
+Datum
+aggregate_dummy(PG_FUNCTION_ARGS)
+{
+       elog(ERROR, "aggregate function %u called as normal function",
+                fcinfo->flinfo->fn_oid);
+       return (Datum) 0;                       /* keep compiler quiet */
  }