aggregate(DISTINCT ...) works, per SQL spec.

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c

index 0956af455f1347061cbf3faee2e5c92fe82bb501..0a95c92347fb858ffd470245ef1203807364f51c 100644 (file)
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -3,15 +3,35 @@
   * nodeAgg.c
   *       Routines to handle aggregate nodes.
   *
- * Copyright (c) 1994, Regents of the University of California
+ *       ExecAgg evaluates each aggregate in the following steps: (initcond1,
+ *       initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
+ *       the transition functions.)
+ *
+ *              value1 = initcond1
+ *              value2 = initcond2
+ *              foreach input_value do
+ *                     value1 = sfunc1(value1, input_value)
+ *                     value2 = sfunc2(value2)
+ *              value1 = finalfunc(value1, value2)
+ *
+ *       If initcond1 is NULL then the first non-NULL input_value is
+ *       assigned directly to value1.  sfunc1 isn't applied until value1
+ *       is non-NULL.
+ *
+ *       sfunc1 is never applied when the current tuple's input_value is NULL.
+ *       sfunc2 is applied for each tuple if the aggref is marked 'usenulls',
+ *       otherwise it is only applied when input_value is not NULL.
+ *       (usenulls was formerly used for COUNT(*), but is no longer needed for
+ *       that purpose; as of 10/1999 the support for usenulls is dead code.
+ *       I have not removed it because it seems like a potentially useful
+ *       feature for user-defined aggregates.  We'd just need to add a
+ *       flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
   *
   *
- * NOTE
- *       The implementation of Agg node has been reworked to handle legal
- *       SQL aggregates. (Do not expect POSTQUEL semantics.)    -- ay 2/95
+ * Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.59 1999/10/30 02:35:14 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.60 1999/12/13 01:26:52 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,11 +40,15 @@
  
  #include "access/heapam.h"
  #include "catalog/pg_aggregate.h"
+#include "catalog/pg_operator.h"
  #include "executor/executor.h"
  #include "executor/nodeAgg.h"
  #include "optimizer/clauses.h"
+#include "parser/parse_expr.h"
+#include "parser/parse_oper.h"
  #include "parser/parse_type.h"
  #include "utils/syscache.h"
+#include "utils/tuplesort.h"
  
  /*
   * AggStatePerAggData - per-aggregate working state for the Agg scan
@@ -36,6 +60,9 @@ typedef struct AggStatePerAggData
          * thereafter:
          */
  
+       /* Link to Aggref node this working state is for */
+       Aggref     *aggref;
+
         /* Oids of transfer functions */
         Oid                     xfn1_oid;
         Oid                     xfn2_oid;
@@ -47,6 +74,18 @@ typedef struct AggStatePerAggData
         FmgrInfo        xfn1;
         FmgrInfo        xfn2;
         FmgrInfo        finalfn;
+       /*
+        * Type of input data and Oid of sort operator to use for it;
+        * only set/used when aggregate has DISTINCT flag.  (These are not
+        * used directly by nodeAgg, but must be passed to the Tuplesort object.)
+        */
+       Oid                     inputType;
+       Oid                     sortOperator;
+       /*
+        * fmgr lookup data for input type's equality operator --- only set/used
+        * when aggregate has DISTINCT flag.
+        */
+       FmgrInfo        equalfn;
         /*
          * initial values from pg_aggregate entry
          */
@@ -55,19 +94,29 @@ typedef struct AggStatePerAggData
         bool            initValue1IsNull,
                                 initValue2IsNull;
         /*
-        * We need the len and byval info for the agg's transition status types
-        * in order to know how to copy/delete values.
+        * We need the len and byval info for the agg's input and transition
+        * data types in order to know how to copy/delete values.
          */
-       int                     transtype1Len,
+       int                     inputtypeLen,
+                               transtype1Len,
                                 transtype2Len;
-       bool            transtype1ByVal,
+       bool            inputtypeByVal,
+                               transtype1ByVal,
                                 transtype2ByVal;
  
         /*
          * These values are working state that is initialized at the start
-        * of an input tuple group and updated for each input tuple:
+        * of an input tuple group and updated for each input tuple.
+        *
+        * For a simple (non DISTINCT) aggregate, we just feed the input values
+        * straight to the transition functions.  If it's DISTINCT, we pass the
+        * input values into a Tuplesort object; then at completion of the input
+        * tuple group, we scan the sorted values, eliminate duplicates, and run
+        * the transition functions on the rest.
          */
  
+       Tuplesortstate *sortstate;      /* sort object, if a DISTINCT agg */
+
         Datum           value1,                 /* current transfer values 1 and 2 */
                                 value2;
         bool            value1IsNull,
@@ -82,28 +131,248 @@ typedef struct AggStatePerAggData
  } AggStatePerAggData;
  
  
+static void initialize_aggregate (AggStatePerAgg peraggstate);
+static void advance_transition_functions (AggStatePerAgg peraggstate,
+                                                                                 Datum newVal, bool isNull);
+static void finalize_aggregate (AggStatePerAgg peraggstate,
+                                                               Datum *resultVal, bool *resultIsNull);
+static Datum copyDatum(Datum val, int typLen, bool typByVal);
+
+
  /*
- * Helper routine to make a copy of a Datum.
- *
- * NB: input had better not be a NULL; might cause null-pointer dereference.
+ * Initialize one aggregate for a new set of input values.
   */
-static Datum
-copyDatum(Datum val, int typLen, bool typByVal)
+static void
+initialize_aggregate (AggStatePerAgg peraggstate)
  {
-       if (typByVal)
-               return val;
+       Aggref             *aggref = peraggstate->aggref;
+
+       /*
+        * Start a fresh sort operation for each DISTINCT aggregate.
+        */
+       if (aggref->aggdistinct)
+       {
+               /* In case of rescan, maybe there could be an uncompleted
+                * sort operation?  Clean it up if so.
+                */
+               if (peraggstate->sortstate)
+                       tuplesort_end(peraggstate->sortstate);
+
+               peraggstate->sortstate =
+                       tuplesort_begin_datum(peraggstate->inputType,
+                                                                 peraggstate->sortOperator,
+                                                                 false);
+       }
+
+       /*
+        * (Re)set value1 and value2 to their initial values.
+        */
+       if (OidIsValid(peraggstate->xfn1_oid) &&
+               ! peraggstate->initValue1IsNull)
+               peraggstate->value1 = copyDatum(peraggstate->initValue1, 
+                                                                               peraggstate->transtype1Len,
+                                                                               peraggstate->transtype1ByVal);
+       else
+               peraggstate->value1 = (Datum) NULL;
+       peraggstate->value1IsNull = peraggstate->initValue1IsNull;
+
+       if (OidIsValid(peraggstate->xfn2_oid) &&
+               ! peraggstate->initValue2IsNull)
+               peraggstate->value2 = copyDatum(peraggstate->initValue2, 
+                                                                               peraggstate->transtype2Len,
+                                                                               peraggstate->transtype2ByVal);
         else
+               peraggstate->value2 = (Datum) NULL;
+       peraggstate->value2IsNull = peraggstate->initValue2IsNull;
+
+       /* ------------------------------------------
+        * If the initial value for the first transition function
+        * doesn't exist in the pg_aggregate table then we will let
+        * the first value returned from the outer procNode become
+        * the initial value. (This is useful for aggregates like
+        * max{} and min{}.)  The noInitValue flag signals that we
+        * still need to do this.
+        * ------------------------------------------
+        */
+       peraggstate->noInitValue = peraggstate->initValue1IsNull;
+}
+
+/*
+ * Given a new input value, advance the transition functions of an aggregate.
+ *
+ * Note: if the agg does not have usenulls set, null inputs will be filtered
+ * out before reaching here.
+ */
+static void
+advance_transition_functions (AggStatePerAgg peraggstate,
+                                                         Datum newVal, bool isNull)
+{
+       Datum           args[2];
+
+       if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
         {
-               char   *newVal;
+               if (peraggstate->noInitValue)
+               {
+                       /*
+                        * value1 has not been initialized. This is the first non-NULL
+                        * input value. We use it as the initial value for value1.
+                        *
+                        * XXX We assume, without having checked, that the agg's input
+                        * type is binary-compatible with its transtype1!
+                        *
+                        * We have to copy the datum since the tuple from which it came
+                        * will be freed on the next iteration of the scan.
+                        */
+                       peraggstate->value1 = copyDatum(newVal,
+                                                                                       peraggstate->transtype1Len,
+                                                                                       peraggstate->transtype1ByVal);
+                       peraggstate->value1IsNull = false;
+                       peraggstate->noInitValue = false;
+               }
+               else
+               {
+                       /* apply transition function 1 */
+                       args[0] = peraggstate->value1;
+                       args[1] = newVal;
+                       newVal = (Datum) fmgr_c(&peraggstate->xfn1,
+                                                                       (FmgrValues *) args,
+                                                                       &isNull);
+                       if (! peraggstate->transtype1ByVal)
+                               pfree(peraggstate->value1);
+                       peraggstate->value1 = newVal;
+               }
+       }
  
-               if (typLen == -1)               /* variable length type? */
-                       typLen = VARSIZE((struct varlena *) DatumGetPointer(val));
-               newVal = (char *) palloc(typLen);
-               memcpy(newVal, DatumGetPointer(val), typLen);
-               return PointerGetDatum(newVal);
+       if (OidIsValid(peraggstate->xfn2_oid))
+       {
+               /* apply transition function 2 */
+               args[0] = peraggstate->value2;
+               isNull = false;                 /* value2 cannot be null, currently */
+               newVal = (Datum) fmgr_c(&peraggstate->xfn2,
+                                                               (FmgrValues *) args,
+                                                               &isNull);
+               if (! peraggstate->transtype2ByVal)
+                       pfree(peraggstate->value2);
+               peraggstate->value2 = newVal;
         }
  }
  
+/*
+ * Compute the final value of one aggregate.
+ */
+static void
+finalize_aggregate (AggStatePerAgg peraggstate,
+                                       Datum *resultVal, bool *resultIsNull)
+{
+       Aggref     *aggref = peraggstate->aggref;
+       char       *args[2];
+
+       /*
+        * If it's a DISTINCT aggregate, all we've done so far is to stuff the
+        * input values into the sort object.  Complete the sort, then run
+        * the transition functions on the non-duplicate values.  Note that
+        * DISTINCT always suppresses nulls, per SQL spec, regardless of usenulls.
+        */
+       if (aggref->aggdistinct)
+       {
+               Datum           oldVal = (Datum) 0;
+               bool            haveOldVal = false;
+               Datum           newVal;
+               bool            isNull;
+
+               tuplesort_performsort(peraggstate->sortstate);
+               while (tuplesort_getdatum(peraggstate->sortstate, true,
+                                                                 &newVal, &isNull))
+               {
+                       if (isNull)
+                               continue;
+                       if (haveOldVal)
+                       {
+                               Datum   equal;
+
+                               equal = (Datum) (*fmgr_faddr(&peraggstate->equalfn)) (oldVal,
+                                                                                                                                         newVal);
+                               if (DatumGetInt32(equal) != 0)
+                               {
+                                       if (! peraggstate->inputtypeByVal)
+                                               pfree(DatumGetPointer(newVal));
+                                       continue;
+                               }
+                       }
+                       advance_transition_functions(peraggstate, newVal, false);
+                       if (haveOldVal && ! peraggstate->inputtypeByVal)
+                               pfree(DatumGetPointer(oldVal));
+                       oldVal = newVal;
+                       haveOldVal = true;
+               }
+               if (haveOldVal && ! peraggstate->inputtypeByVal)
+                       pfree(DatumGetPointer(oldVal));
+               tuplesort_end(peraggstate->sortstate);
+               peraggstate->sortstate = NULL;
+       }
+
+       /*
+        * Now apply the agg's finalfn, or substitute the appropriate transition
+        * value if there is no finalfn.
+        *
+        * XXX For now, only apply finalfn if we got at least one
+        * non-null input value.  This prevents zero divide in AVG().
+        * If we had cleaner handling of null inputs/results in functions,
+        * we could probably take out this hack and define the result
+        * for no inputs as whatever finalfn returns for null input.
+        */
+       if (OidIsValid(peraggstate->finalfn_oid) &&
+               ! peraggstate->noInitValue)
+       {
+               if (peraggstate->finalfn.fn_nargs > 1)
+               {
+                       args[0] = (char *) peraggstate->value1;
+                       args[1] = (char *) peraggstate->value2;
+               }
+               else if (OidIsValid(peraggstate->xfn1_oid))
+                       args[0] = (char *) peraggstate->value1;
+               else if (OidIsValid(peraggstate->xfn2_oid))
+                       args[0] = (char *) peraggstate->value2;
+               else
+                       elog(ERROR, "ExecAgg: no valid transition functions??");
+               *resultIsNull = false;
+               *resultVal = (Datum) fmgr_c(&peraggstate->finalfn,
+                                                                       (FmgrValues *) args,
+                                                                       resultIsNull);
+       }
+       else if (OidIsValid(peraggstate->xfn1_oid))
+       {
+               /* Return value1 */
+               *resultVal = peraggstate->value1;
+               *resultIsNull = peraggstate->value1IsNull;
+               /* prevent pfree below */
+               peraggstate->value1IsNull = true;
+       }
+       else if (OidIsValid(peraggstate->xfn2_oid))
+       {
+               /* Return value2 */
+               *resultVal = peraggstate->value2;
+               *resultIsNull = peraggstate->value2IsNull;
+               /* prevent pfree below */
+               peraggstate->value2IsNull = true;
+       }
+       else
+               elog(ERROR, "ExecAgg: no valid transition functions??");
+
+       /*
+        * Release any per-group working storage, unless we're passing
+        * it back as the result of the aggregate.
+        */
+       if (OidIsValid(peraggstate->xfn1_oid) &&
+               ! peraggstate->value1IsNull &&
+               ! peraggstate->transtype1ByVal)
+               pfree(peraggstate->value1);
+       
+       if (OidIsValid(peraggstate->xfn2_oid) &&
+               ! peraggstate->value2IsNull &&
+               ! peraggstate->transtype2ByVal)
+               pfree(peraggstate->value2);
+}
  
  /* ---------------------------------------
   *
@@ -118,30 +387,6 @@ copyDatum(Datum val, int typLen, bool typByVal)
   *       the expression context to be used when ExecProject evaluates the
   *       result tuple.
   *
- *       ExecAgg evaluates each aggregate in the following steps: (initcond1,
- *       initcond2 are the initial values and sfunc1, sfunc2, and finalfunc are
- *       the transition functions.)
- *
- *              value1 = initcond1
- *              value2 = initcond2
- *              foreach tuple do
- *                     value1 = sfunc1(value1, aggregated_value)
- *                     value2 = sfunc2(value2)
- *              value1 = finalfunc(value1, value2)
- *
- *       If initcond1 is NULL then the first non-NULL aggregated_value is
- *       assigned directly to value1.  sfunc1 isn't applied until value1
- *       is non-NULL.
- *
- *       sfunc1 is never applied when the current tuple's aggregated_value
- *       is NULL.  sfunc2 is applied for each tuple if the aggref is marked
- *       'usenulls', otherwise it is only applied when aggregated_value is
- *       not NULL.  (usenulls was formerly used for COUNT(*), but is no longer
- *       needed for that purpose; as of 10/1999 the support for usenulls is
- *       dead code.  I have not removed it because it seems like a potentially
- *       useful feature for user-defined aggregates.  We'd just need to add a
- *       flag column to pg_aggregate and a parameter to CREATE AGGREGATE...)
- *
   *       If the outer subplan is a Group node, ExecAgg returns as many tuples
   *       as there are groups.
   *
@@ -161,7 +406,6 @@ ExecAgg(Agg *node)
         TupleTableSlot *resultSlot;
         HeapTuple       inputTuple;
         int                     aggno;
-       List       *alist;
         bool            isDone;
         bool            isNull;
  
@@ -190,42 +434,11 @@ ExecAgg(Agg *node)
                 /*
                  * Initialize working state for a new input tuple group
                  */
-               aggno = -1;
-               foreach(alist, aggstate->aggs)
+               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
                 {
-                       AggStatePerAgg  peraggstate = &peragg[++aggno];
+                       AggStatePerAgg  peraggstate = &peragg[aggno];
  
-                       /*
-                        * (Re)set value1 and value2 to their initial values.
-                        */
-                       if (OidIsValid(peraggstate->xfn1_oid) &&
-                               ! peraggstate->initValue1IsNull)
-                               peraggstate->value1 = copyDatum(peraggstate->initValue1, 
-                                                                                               peraggstate->transtype1Len,
-                                                                                               peraggstate->transtype1ByVal);
-                       else
-                               peraggstate->value1 = (Datum) NULL;
-                       peraggstate->value1IsNull = peraggstate->initValue1IsNull;
-
-                       if (OidIsValid(peraggstate->xfn2_oid) &&
-                               ! peraggstate->initValue2IsNull)
-                               peraggstate->value2 = copyDatum(peraggstate->initValue2, 
-                                                                                               peraggstate->transtype2Len,
-                                                                                               peraggstate->transtype2ByVal);
-                       else
-                               peraggstate->value2 = (Datum) NULL;
-                       peraggstate->value2IsNull = peraggstate->initValue2IsNull;
-
-                       /* ------------------------------------------
-                        * If the initial value for the first transition function
-                        * doesn't exist in the pg_aggregate table then we will let
-                        * the first value returned from the outer procNode become
-                        * the initial value. (This is useful for aggregates like
-                        * max{} and min{}.)  The noInitValue flag signals that we
-                        * still need to do this.
-                        * ------------------------------------------
-                        */
-                       peraggstate->noInitValue = peraggstate->initValue1IsNull;
+                       initialize_aggregate(peraggstate);
                 }
  
                 inputTuple = NULL;              /* no saved input tuple yet */
@@ -243,13 +456,11 @@ ExecAgg(Agg *node)
                                 break;
                         econtext->ecxt_scantuple = outerslot;
  
-                       aggno = -1;
-                       foreach(alist, aggstate->aggs)
+                       for (aggno = 0; aggno < aggstate->numaggs; aggno++)
                         {
-                               Aggref             *aggref = (Aggref *) lfirst(alist);
-                               AggStatePerAgg  peraggstate = &peragg[++aggno];
+                               AggStatePerAgg  peraggstate = &peragg[aggno];
+                               Aggref             *aggref = peraggstate->aggref;
                                 Datum                   newVal;
-                               Datum                   args[2];
  
                                 newVal = ExecEvalExpr(aggref->target, econtext,
                                                                           &isNull, &isDone);
@@ -257,53 +468,12 @@ ExecAgg(Agg *node)
                                 if (isNull && !aggref->usenulls)
                                         continue;       /* ignore this tuple for this agg */
  
-                               if (OidIsValid(peraggstate->xfn1_oid) && !isNull)
-                               {
-                                       if (peraggstate->noInitValue)
-                                       {
-                                               /*
-                                                * value1 has not been initialized. This is the
-                                                * first non-NULL input value. We use it as the
-                                                * initial value for value1.  XXX We assume,
-                                                * without having checked, that the agg's input type
-                                                * is binary-compatible with its transtype1!
-                                                *
-                                                * We have to copy the datum since the tuple from
-                                                * which it came will be freed on the next iteration
-                                                * of the scan.  
-                                                */
-                                               peraggstate->value1 = copyDatum(newVal,
-                                                                                               peraggstate->transtype1Len,
-                                                                                               peraggstate->transtype1ByVal);
-                                               peraggstate->value1IsNull = false;
-                                               peraggstate->noInitValue = false;
-                                       }
-                                       else
-                                       {
-                                               /* apply transition function 1 */
-                                               args[0] = peraggstate->value1;
-                                               args[1] = newVal;
-                                               newVal = (Datum) fmgr_c(&peraggstate->xfn1,
-                                                                                               (FmgrValues *) args,
-                                                                                               &isNull);
-                                               if (! peraggstate->transtype1ByVal)
-                                                       pfree(peraggstate->value1);
-                                               peraggstate->value1 = newVal;
-                                       }
-                               }
-
-                               if (OidIsValid(peraggstate->xfn2_oid))
-                               {
-                                       /* apply transition function 2 */
-                                       args[0] = peraggstate->value2;
-                                       isNull = false; /* value2 cannot be null, currently */
-                                       newVal = (Datum) fmgr_c(&peraggstate->xfn2,
-                                                                                       (FmgrValues *) args,
-                                                                                       &isNull);
-                                       if (! peraggstate->transtype2ByVal)
-                                               pfree(peraggstate->value2);
-                                       peraggstate->value2 = newVal;
-                               }
+                               if (aggref->aggdistinct)
+                                       tuplesort_putdatum(peraggstate->sortstate,
+                                                                          newVal, isNull);
+                               else
+                                       advance_transition_functions(peraggstate,
+                                                                                                newVal, isNull);
                         }
  
                         /*
@@ -320,70 +490,12 @@ ExecAgg(Agg *node)
                  * Done scanning input tuple group.
                  * Finalize each aggregate calculation.
                  */
-               aggno = -1;
-               foreach(alist, aggstate->aggs)
+               for (aggno = 0; aggno < aggstate->numaggs; aggno++)
                 {
-                       AggStatePerAgg  peraggstate = &peragg[++aggno];
-                       char               *args[2];
-
-                       /*
-                        * XXX For now, only apply finalfn if we got at least one
-                        * non-null input value.  This prevents zero divide in AVG().
-                        * If we had cleaner handling of null inputs/results in functions,
-                        * we could probably take out this hack and define the result
-                        * for no inputs as whatever finalfn returns for null input.
-                        */
-                       if (OidIsValid(peraggstate->finalfn_oid) &&
-                               ! peraggstate->noInitValue)
-                       {
-                               if (peraggstate->finalfn.fn_nargs > 1)
-                               {
-                                       args[0] = (char *) peraggstate->value1;
-                                       args[1] = (char *) peraggstate->value2;
-                               }
-                               else if (OidIsValid(peraggstate->xfn1_oid))
-                                       args[0] = (char *) peraggstate->value1;
-                               else if (OidIsValid(peraggstate->xfn2_oid))
-                                       args[0] = (char *) peraggstate->value2;
-                               else
-                                       elog(ERROR, "ExecAgg: no valid transition functions??");
-                               aggnulls[aggno] = false;
-                               aggvalues[aggno] = (Datum) fmgr_c(&peraggstate->finalfn,
-                                                                                                 (FmgrValues *) args,
-                                                                                                 &(aggnulls[aggno]));
-                       }
-                       else if (OidIsValid(peraggstate->xfn1_oid))
-                       {
-                               /* Return value1 */
-                               aggvalues[aggno] = peraggstate->value1;
-                               aggnulls[aggno] = peraggstate->value1IsNull;
-                               /* prevent pfree below */
-                               peraggstate->value1IsNull = true;
-                       }
-                       else if (OidIsValid(peraggstate->xfn2_oid))
-                       {
-                               /* Return value2 */
-                               aggvalues[aggno] = peraggstate->value2;
-                               aggnulls[aggno] = peraggstate->value2IsNull;
-                               /* prevent pfree below */
-                               peraggstate->value2IsNull = true;
-                       }
-                       else
-                               elog(ERROR, "ExecAgg: no valid transition functions??");
-
-                       /*
-                        * Release any per-group working storage, unless we're passing
-                        * it back as the result of the aggregate.
-                        */
-                       if (OidIsValid(peraggstate->xfn1_oid) &&
-                               ! peraggstate->value1IsNull &&
-                               ! peraggstate->transtype1ByVal)
-                               pfree(peraggstate->value1);
+                       AggStatePerAgg  peraggstate = &peragg[aggno];
  
-                       if (OidIsValid(peraggstate->xfn2_oid) &&
-                               ! peraggstate->value2IsNull &&
-                               ! peraggstate->transtype2ByVal)
-                               pfree(peraggstate->value2);
+                       finalize_aggregate(peraggstate,
+                                                          & aggvalues[aggno], & aggnulls[aggno]);
                 }
  
                 /*
@@ -458,14 +570,14 @@ ExecAgg(Agg *node)
  
                 /*
                  * Form a projection tuple using the aggregate results and the
-                * representative input tuple.  Store it in the result tuple slot,
-                * and return it if it meets my qual condition.
+                * representative input tuple.  Store it in the result tuple slot.
                  */
                 resultSlot = ExecProject(projInfo, &isDone);
  
                 /*
                  * If the completed tuple does not match the qualifications,
                  * it is ignored and we loop back to try to process another group.
+                * Otherwise, return the tuple.
                  */
         }
         while (! ExecQual(node->plan.qual, econtext));
@@ -505,6 +617,11 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
  
         /*
          * find aggregates in targetlist and quals
+        *
+        * Note: pull_agg_clauses also checks that no aggs contain other agg
+        * calls in their arguments.  This would make no sense under SQL semantics
+        * anyway (and it's forbidden by the spec).  Because that is true, we
+        * don't need to worry about evaluating the aggs in any particular order.
          */
         aggstate->aggs = nconc(pull_agg_clause((Node *) node->plan.targetlist),
                                                    pull_agg_clause((Node *) node->plan.qual));
@@ -588,6 +705,9 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
                 /* Mark Aggref node with its associated index in the result array */
                 aggref->aggno = aggno;
  
+               /* Fill in the peraggstate data */
+               peraggstate->aggref = aggref;
+
                 aggTuple = SearchSysCacheTuple(AGGNAME,
                                                                            PointerGetDatum(aggname),
                                                                            ObjectIdGetDatum(aggref->basetype),
@@ -644,6 +764,29 @@ ExecInitAgg(Agg *node, EState *estate, Plan *parent)
                 {
                         fmgr_info(finalfn_oid, &peraggstate->finalfn);
                 }
+
+               if (aggref->aggdistinct)
+               {
+                       Oid                     inputType = exprType(aggref->target);
+                       Operator        eq_operator;
+                       Form_pg_operator pgopform;
+
+                       peraggstate->inputType = inputType;
+                       typeInfo = typeidType(inputType);
+                       peraggstate->inputtypeLen = typeLen(typeInfo);
+                       peraggstate->inputtypeByVal = typeByVal(typeInfo);
+
+                       eq_operator = oper("=", inputType, inputType, true);
+                       if (!HeapTupleIsValid(eq_operator))
+                       {
+                               elog(ERROR, "Unable to identify an equality operator for type '%s'",
+                                        typeidTypeName(inputType));
+                       }
+                       pgopform = (Form_pg_operator) GETSTRUCT(eq_operator);
+                       fmgr_info(pgopform->oprcode, &(peraggstate->equalfn));
+                       peraggstate->sortOperator = any_ordering_op(inputType);
+                       peraggstate->sortstate = NULL;
+               }
         }
  
         return TRUE;
@@ -690,3 +833,26 @@ ExecReScanAgg(Agg *node, ExprContext *exprCtxt, Plan *parent)
                 ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node);
  
  }
+
+
+/*
+ * Helper routine to make a copy of a Datum.
+ *
+ * NB: input had better not be a NULL; might cause null-pointer dereference.
+ */
+static Datum
+copyDatum(Datum val, int typLen, bool typByVal)
+{
+       if (typByVal)
+               return val;
+       else
+       {
+               char   *newVal;
+
+               if (typLen == -1)               /* variable length type? */
+                       typLen = VARSIZE((struct varlena *) DatumGetPointer(val));
+               newVal = (char *) palloc(typLen);
+               memcpy(newVal, DatumGetPointer(val), typLen);
+               return PointerGetDatum(newVal);
+       }
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index 1b2726f822647b655497c40c1ac7018b47e48738..884926b9b628f28d2d7372f4bdc4ae9223ef70e1 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.97 1999/11/23 20:06:52 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.98 1999/12/13 01:26:53 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -832,6 +832,8 @@ _copyAggref(Aggref *from)
         newnode->aggtype = from->aggtype;
         Node_Copy(from, newnode, target);
         newnode->usenulls = from->usenulls;
+       newnode->aggstar = from->aggstar;
+       newnode->aggdistinct = from->aggdistinct;
         newnode->aggno = from->aggno; /* probably not needed */
  
         return newnode;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index b35b271275404fab7d284eb823593009f7280211..f70fe508ae58dbb110e633b8b513d1179a795841 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.52 1999/11/23 20:06:52 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.53 1999/12/13 01:26:53 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -219,6 +219,10 @@ _equalAggref(Aggref *a, Aggref *b)
                 return false;
         if (a->usenulls != b->usenulls)
                 return false;
+       if (a->aggstar != b->aggstar)
+               return false;
+       if (a->aggdistinct != b->aggdistinct)
+               return false;
         /* ignore aggno, which is only a private field for the executor */
         return true;
  }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c

index 78bda61b30fc72e4e2e8f674ab5860e8d274dc68..7907f1b62ef8de354cb08656298de17f9990d68c 100644 (file)
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- *     $Id: outfuncs.c,v 1.99 1999/12/10 07:37:31 tgl Exp $
+ *     $Id: outfuncs.c,v 1.100 1999/12/13 01:26:53 tgl Exp $
   *
   * NOTES
   *       Every (plan) node in POSTGRES has an associated "out" routine which
@@ -680,14 +680,17 @@ static void
  _outAggref(StringInfo str, Aggref *node)
  {
         appendStringInfo(str,
-                                " AGGREG :aggname %s :basetype %u :aggtype %u :target ",
+                                        " AGGREG :aggname %s :basetype %u :aggtype %u :target ",
                                          stringStringInfo(node->aggname),
                                          node->basetype,
                                          node->aggtype);
         _outNode(str, node->target);
  
-       appendStringInfo(str, " :usenulls %s ",
-                                        node->usenulls ? "true" : "false");
+       appendStringInfo(str, " :usenulls %s :aggstar %s :aggdistinct %s ",
+                                        node->usenulls ? "true" : "false",
+                                        node->aggstar ? "true" : "false",
+                                        node->aggdistinct ? "true" : "false");
+       /* aggno is not dumped */
  }
  
  /*
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 99be5199fa9b9fc386912cebda8d7a6eed1fb39a..83683ff3b1029d80afdf5d411669d88c9d859bb0 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.75 1999/11/23 20:06:53 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.76 1999/12/13 01:26:54 tgl Exp $
   *
   * NOTES
   *       Most of the read functions for plan nodes are tested. (In fact, they
@@ -1190,6 +1190,14 @@ _readAggref()
         token = lsptok(NULL, &length);          /* get usenulls */
         local_node->usenulls = (token[0] == 't') ? true : false;
  
+       token = lsptok(NULL, &length);          /* eat :aggstar */
+       token = lsptok(NULL, &length);          /* get aggstar */
+       local_node->aggstar = (token[0] == 't') ? true : false;
+
+       token = lsptok(NULL, &length);          /* eat :aggdistinct */
+       token = lsptok(NULL, &length);          /* get aggdistinct */
+       local_node->aggdistinct = (token[0] == 't') ? true : false;
+
         return local_node;
  }
  
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c

index 63b3ff87d9e298ffde33d9821eea02a33dbcd033..63eebae06033e53281d3fc23a846b6e27dbc0182 100644 (file)
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.56 1999/12/09 05:58:53 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/clauses.c,v 1.57 1999/12/13 01:26:55 tgl Exp $
   *
   * HISTORY
   *       AUTHOR                        DATE                    MAJOR EVENT
@@ -45,6 +45,7 @@ typedef struct {
         List       *targetList;
  } check_subplans_for_ungrouped_vars_context;
  
+static bool contain_agg_clause_walker(Node *node, void *context);
  static bool pull_agg_clause_walker(Node *node, List **listptr);
  static bool check_subplans_for_ungrouped_vars_walker(Node *node,
                                         check_subplans_for_ungrouped_vars_context *context);
@@ -393,12 +394,36 @@ pull_constant_clauses(List *quals, List **constantQual)
         return restqual;
  }
  
+/*
+ * contain_agg_clause
+ *       Recursively search for Aggref nodes within a clause.
+ *
+ *       Returns true if any aggregate found.
+ */
+bool
+contain_agg_clause(Node *clause)
+{
+       return contain_agg_clause_walker(clause, NULL);
+}
+
+static bool
+contain_agg_clause_walker(Node *node, void *context)
+{
+       if (node == NULL)
+               return false;
+       if (IsA(node, Aggref))
+               return true;                    /* abort the tree traversal and return true */
+       return expression_tree_walker(node, contain_agg_clause_walker, context);
+}
+
  /*
   * pull_agg_clause
   *       Recursively pulls all Aggref nodes from an expression tree.
   *
   *       Returns list of Aggref nodes found.  Note the nodes themselves are not
   *       copied, only referenced.
+ *
+ *       Note: this also checks for nested aggregates, which are an error.
   */
  List *
  pull_agg_clause(Node *clause)
@@ -417,9 +442,16 @@ pull_agg_clause_walker(Node *node, List **listptr)
         if (IsA(node, Aggref))
         {
                 *listptr = lappend(*listptr, node);
-               /* continue, to iterate over agg's arg as well (do nested aggregates
-                * actually work?)
+               /*
+                * Complain if the aggregate's argument contains any aggregates;
+                * nested agg functions are semantically nonsensical.
+                */
+               if (contain_agg_clause(((Aggref *) node)->target))
+                       elog(ERROR, "Aggregate function calls may not be nested");
+               /*
+                * Having checked that, we need not recurse into the argument.
                  */
+               return false;
         }
         return expression_tree_walker(node, pull_agg_clause_walker,
                                                                   (void *) listptr);
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c

index 68280f7f4a0a9ea8d75ce25951921d4fdb0428c6..21f8efe7f67a4cfc4edc716a1a94c9de6d4356ad 100644 (file)
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.31 1999/12/10 07:37:35 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/parse_agg.c,v 1.32 1999/12/13 01:26:58 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -28,38 +28,11 @@ typedef struct {
         List       *groupClauses;
  } check_ungrouped_columns_context;
  
-static bool contain_agg_clause(Node *clause);
-static bool contain_agg_clause_walker(Node *node, void *context);
  static void check_ungrouped_columns(Node *node, ParseState *pstate,
                                                                         List *groupClauses);
  static bool check_ungrouped_columns_walker(Node *node,
                                                                                    check_ungrouped_columns_context *context);
  
-/*
- * contain_agg_clause
- *       Recursively find aggref nodes within a clause.
- *
- *       Returns true if any aggregate found.
- *
- * NOTE: we assume that the given clause has been transformed suitably for
- * parser output.  This means we can use the planner's expression_tree_walker.
- */
-static bool
-contain_agg_clause(Node *clause)
-{
-       return contain_agg_clause_walker(clause, NULL);
-}
-
-static bool
-contain_agg_clause_walker(Node *node, void *context)
-{
-       if (node == NULL)
-               return false;
-       if (IsA(node, Aggref))
-               return true;                    /* abort the tree traversal and return true */
-       return expression_tree_walker(node, contain_agg_clause_walker, context);
-}
-
  /*
   * check_ungrouped_columns -
   *       Scan the given expression tree for ungrouped variables (variables
@@ -232,7 +205,8 @@ ParseAgg(ParseState *pstate, char *aggname, Oid basetype,
          * Since "1" never evaluates as null, we currently have no need of
          * the "usenulls" flag, but it should be kept around; in fact, we should
          * extend the pg_aggregate table to let usenulls be specified as an
-        * attribute of user-defined aggregates.
+        * attribute of user-defined aggregates.  In the meantime, usenulls
+        * is just always set to "false".
          */
  
         aggform = (Form_pg_aggregate) GETSTRUCT(theAggTuple);
@@ -264,14 +238,8 @@ ParseAgg(ParseState *pstate, char *aggname, Oid basetype,
         aggref->aggtype = fintype;
         aggref->target = lfirst(args);
         aggref->usenulls = usenulls;
-
-       /*
-        * We should store agg_star and agg_distinct into the Aggref node,
-        * and let downstream processing deal with them.  Currently, agg_star
-        * is ignored and agg_distinct is not implemented...
-        */
-       if (agg_distinct)
-               elog(ERROR, "aggregate(DISTINCT ...) is not implemented yet");
+       aggref->aggstar = agg_star;
+       aggref->aggdistinct = agg_distinct;
  
         pstate->p_hasAggs = true;
  
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index b62559ccdde64ae49d4b09c553e4ac5c54953b76..47fd957c9948cbb2e19c718d57a32859607d0a28 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -3,7 +3,7 @@
   *                       out of it's tuple
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.34 1999/12/06 02:37:17 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.35 1999/12/13 01:27:01 tgl Exp $
   *
   *       This software is copyrighted by Jan Wieck - Hamburg.
   *
@@ -1352,9 +1352,13 @@ get_rule_expr(Node *node, deparse_context *context)
                         {
                                 Aggref     *aggref = (Aggref *) node;
  
-                               appendStringInfo(buf, "%s(",
-                                                                quote_identifier(aggref->aggname));
-                               get_rule_expr(aggref->target, context);
+                               appendStringInfo(buf, "%s(%s",
+                                                                quote_identifier(aggref->aggname),
+                                                                aggref->aggdistinct ? "DISTINCT " : "");
+                               if (aggref->aggstar)
+                                       appendStringInfo(buf, "*");
+                               else
+                                       get_rule_expr(aggref->target, context);
                                 appendStringInfo(buf, ")");
                         }
                         break;
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index 5297fde36dcff0b5da385d9fe4a974cebe1535a6..6e9a23f1cd12cb4b50ed48f8baaf07c0a70402aa 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -3,8 +3,8 @@
   * tuplesort.c
   *       Generalized tuple sorting routines.
   *
- * This module handles sorting of either heap tuples or index tuples
- * (and could fairly easily support other kinds of sortable objects,
+ * This module handles sorting of heap tuples, index tuples, or single
+ * Datums (and could easily support other kinds of sortable objects,
   * if necessary).  It works efficiently for both small and large amounts
   * of data.  Small amounts are sorted in-memory using qsort().  Large
   * amounts are sorted using temporary files and a standard external sort
@@ -77,7 +77,7 @@
   * Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.2 1999/10/30 17:27:15 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.3 1999/12/13 01:27:04 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -87,7 +87,9 @@
  #include "access/heapam.h"
  #include "access/nbtree.h"
  #include "miscadmin.h"
+#include "parser/parse_type.h"
  #include "utils/logtape.h"
+#include "utils/lsyscache.h"
  #include "utils/tuplesort.h"
  
  /*
@@ -251,6 +253,17 @@ struct Tuplesortstate
          */
         Relation        indexRel;
         bool            enforceUnique;  /* complain if we find duplicate tuples */
+
+       /*
+        * These variables are specific to the Datum case; they are set
+        * by tuplesort_begin_datum and used only by the DatumTuple routines.
+        */
+       Oid                     datumType;
+       Oid                     sortOperator;
+       FmgrInfo        sortOpFn;               /* cached lookup data for sortOperator */
+       /* we need typelen and byval in order to know how to copy the Datums. */
+       int                     datumTypeLen;
+       bool            datumTypeByVal;
  };
  
  #define COMPARETUP(state,a,b)  ((*(state)->comparetup) (state, a, b))
@@ -321,7 +334,22 @@ struct Tuplesortstate
   *--------------------
   */
  
+/*
+ * For sorting single Datums, we build "pseudo tuples" that just carry
+ * the datum's value and null flag.  For pass-by-reference data types,
+ * the actual data value appears after the DatumTupleHeader (MAXALIGNed,
+ * of course), and the value field in the header is just a pointer to it.
+ */
+
+typedef struct
+{
+       Datum           val;
+       bool            isNull;
+} DatumTuple;
+
+
  static Tuplesortstate *tuplesort_begin_common(bool randomAccess);
+static void puttuple_common(Tuplesortstate *state, void *tuple);
  static void inittapes(Tuplesortstate *state);
  static void selectnewtape(Tuplesortstate *state);
  static void mergeruns(Tuplesortstate *state);
@@ -349,6 +377,13 @@ static void writetup_index(Tuplesortstate *state, int tapenum, void *tup);
  static void *readtup_index(Tuplesortstate *state, int tapenum,
                                                    unsigned int len);
  static unsigned int tuplesize_index(Tuplesortstate *state, void *tup);
+static int comparetup_datum(Tuplesortstate *state,
+                                                       const void *a, const void *b);
+static void *copytup_datum(Tuplesortstate *state, void *tup);
+static void writetup_datum(Tuplesortstate *state, int tapenum, void *tup);
+static void *readtup_datum(Tuplesortstate *state, int tapenum,
+                                                  unsigned int len);
+static unsigned int tuplesize_datum(Tuplesortstate *state, void *tup);
  
  /*
   * Since qsort(3) will not pass any context info to qsort_comparetup(),
@@ -369,6 +404,7 @@ static Tuplesortstate *qsort_tuplesortstate;
   * have been supplied.  After performsort, retrieve the tuples in sorted
   * order by calling tuplesort_gettuple until it returns NULL.  (If random
   * access was requested, rescan, markpos, and restorepos can also be called.)
+ * For Datum sorts, putdatum/getdatum are used instead of puttuple/gettuple.
   * Call tuplesort_end to terminate the operation and release memory/disk space.
   */
  
@@ -444,6 +480,32 @@ tuplesort_begin_index(Relation indexRel,
         return state;
  }
  
+Tuplesortstate *
+tuplesort_begin_datum(Oid datumType,
+                                         Oid sortOperator,
+                                         bool randomAccess)
+{
+       Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+       Type                    typeInfo;
+
+       state->comparetup = comparetup_datum;
+       state->copytup = copytup_datum;
+       state->writetup = writetup_datum;
+       state->readtup = readtup_datum;
+       state->tuplesize = tuplesize_datum;
+
+       state->datumType = datumType;
+       state->sortOperator = sortOperator;
+       /* lookup the function that implements the sort operator */
+       fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+       /* lookup necessary attributes of the datum type */
+       typeInfo = typeidType(datumType);
+       state->datumTypeLen = typeLen(typeInfo);
+       state->datumTypeByVal = typeByVal(typeInfo);
+
+       return state;
+}
+
  /*
   * tuplesort_end
   *
@@ -476,9 +538,60 @@ tuplesort_puttuple(Tuplesortstate *state, void *tuple)
  {
         /*
          * Copy the given tuple into memory we control, and decrease availMem.
+        * Then call the code shared with the Datum case.
          */
         tuple = COPYTUP(state, tuple);
  
+       puttuple_common(state, tuple);
+}
+
+/*
+ * Accept one Datum while collecting input data for sort.
+ *
+ * If the Datum is pass-by-ref type, the value will be copied.
+ */
+void
+tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
+{
+       DatumTuple         *tuple;
+
+       /*
+        * Build pseudo-tuple carrying the datum, and decrease availMem.
+        */
+       if (isNull || state->datumTypeByVal)
+       {
+               USEMEM(state, sizeof(DatumTuple));
+               tuple = (DatumTuple *) palloc(sizeof(DatumTuple));
+               tuple->val = val;
+               tuple->isNull = isNull;
+       }
+       else
+       {
+               int             datalen = state->datumTypeLen;
+               int             tuplelen;
+               char   *newVal;
+
+               if (datalen == -1)              /* variable length type? */
+                       datalen = VARSIZE((struct varlena *) DatumGetPointer(val));
+               tuplelen = datalen + MAXALIGN(sizeof(DatumTuple));
+               USEMEM(state, tuplelen);
+               newVal = (char *) palloc(tuplelen);
+               tuple = (DatumTuple *) newVal;
+               newVal += MAXALIGN(sizeof(DatumTuple));
+               memcpy(newVal, DatumGetPointer(val), datalen);
+               tuple->val = PointerGetDatum(newVal);
+               tuple->isNull = false;
+       }
+
+       puttuple_common(state, (void *) tuple);
+}
+
+/*
+ * Shared code for tuple and datum cases.
+ */
+static void
+puttuple_common(Tuplesortstate *state, void *tuple)
+{
         switch (state->status)
         {
                 case TSS_INITIAL:
@@ -753,6 +866,50 @@ tuplesort_gettuple(Tuplesortstate *state, bool forward,
         }
  }
  
+/*
+ * Fetch the next Datum in either forward or back direction.
+ * Returns FALSE if no more datums.
+ *
+ * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
+ * and is now owned by the caller.
+ */
+bool
+tuplesort_getdatum(Tuplesortstate *state, bool forward,
+                                  Datum *val, bool *isNull)
+{
+       DatumTuple         *tuple;
+       bool                    should_free;
+
+       tuple = (DatumTuple *) tuplesort_gettuple(state, forward, &should_free);
+
+       if (tuple == NULL)
+               return false;
+
+       if (tuple->isNull || state->datumTypeByVal)
+       {
+               *val = tuple->val;
+               *isNull = tuple->isNull;
+       }
+       else
+       {
+               int             datalen = state->datumTypeLen;
+               char   *newVal;
+
+               if (datalen == -1)              /* variable length type? */
+                       datalen = VARSIZE((struct varlena *) DatumGetPointer(tuple->val));
+               newVal = (char *) palloc(datalen);
+               memcpy(newVal, DatumGetPointer(tuple->val), datalen);
+               *val = PointerGetDatum(newVal);
+               *isNull = false;
+       }
+
+       if (should_free)
+               pfree(tuple);
+
+       return true;
+}
+
+
  /*
   * inittapes - initialize for tape sorting.
   *
@@ -1695,3 +1852,103 @@ tuplesize_index(Tuplesortstate *state, void *tup)
  
         return tuplen;
  }
+
+
+/*
+ * Routines specialized for DatumTuple case
+ */
+
+static int
+comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
+{
+       DatumTuple *ltup = (DatumTuple *) a;
+       DatumTuple *rtup = (DatumTuple *) b;
+
+       if (ltup->isNull)
+       {
+               if (!rtup->isNull)
+                       return 1;                       /* NULL sorts after non-NULL */
+               return 0;
+       }
+       else if (rtup->isNull)
+               return -1;
+       else
+       {
+               int             result;
+
+               if (!(result = - (int) (*fmgr_faddr(&state->sortOpFn)) (ltup->val,
+                                                                                                                               rtup->val)))
+                       result = (int) (*fmgr_faddr(&state->sortOpFn)) (rtup->val,
+                                                                                                                       ltup->val);
+               return result;
+       }
+}
+
+static void *
+copytup_datum(Tuplesortstate *state, void *tup)
+{
+       /* Not currently needed */
+       elog(ERROR, "copytup_datum() should not be called");
+       return NULL;
+}
+
+static void
+writetup_datum(Tuplesortstate *state, int tapenum, void *tup)
+{
+       DatumTuple         *tuple = (DatumTuple *) tup;
+       unsigned int    tuplen = tuplesize_datum(state, tup);
+       unsigned int    writtenlen = tuplen + sizeof(unsigned int);
+
+       LogicalTapeWrite(state->tapeset, tapenum,
+                                        (void*) &writtenlen, sizeof(writtenlen));
+       LogicalTapeWrite(state->tapeset, tapenum,
+                                        (void*) tuple, tuplen);
+       if (state->randomAccess)        /* need trailing length word? */
+               LogicalTapeWrite(state->tapeset, tapenum,
+                                                (void*) &writtenlen, sizeof(writtenlen));
+
+       FREEMEM(state, tuplen);
+       pfree(tuple);
+}
+
+static void *
+readtup_datum(Tuplesortstate *state, int tapenum, unsigned int len)
+{
+       unsigned int    tuplen = len - sizeof(unsigned int);
+       DatumTuple         *tuple = (DatumTuple *) palloc(tuplen);
+
+       USEMEM(state, tuplen);
+       if (LogicalTapeRead(state->tapeset, tapenum, (void *) tuple,
+                                               tuplen) != tuplen)
+               elog(ERROR, "tuplesort: unexpected end of data");
+       if (state->randomAccess)        /* need trailing length word? */
+               if (LogicalTapeRead(state->tapeset, tapenum, (void *) &tuplen,
+                                                       sizeof(tuplen)) != sizeof(tuplen))
+                       elog(ERROR, "tuplesort: unexpected end of data");
+
+       if (!tuple->isNull && !state->datumTypeByVal)
+               tuple->val = PointerGetDatum(((char *) tuple) +
+                                                                        MAXALIGN(sizeof(DatumTuple)));
+       return (void *) tuple;
+}
+
+static unsigned int
+tuplesize_datum(Tuplesortstate *state, void *tup)
+{
+       DatumTuple         *tuple = (DatumTuple *) tup;
+
+       if (tuple->isNull || state->datumTypeByVal)
+       {
+               return (unsigned int) sizeof(DatumTuple);
+       }
+       else
+       {
+               int             datalen = state->datumTypeLen;
+               int             tuplelen;
+
+               if (datalen == -1)              /* variable length type? */
+                       datalen = VARSIZE((struct varlena *) DatumGetPointer(tuple->val));
+               tuplelen = datalen + MAXALIGN(sizeof(DatumTuple));
+               return (unsigned int) tuplelen;
+       }
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 728c62b1200e11b7477e5e5ecf7a7461d599cab0..62244f88a475084af47e64fe30e72a0fa74b84b3 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -36,7 +36,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: catversion.h,v 1.4 1999/11/24 16:52:48 momjian Exp $
+ * $Id: catversion.h,v 1.5 1999/12/13 01:27:07 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -51,6 +51,6 @@
   * catalog changes on the same day...)
   */
  
-#define CATALOG_VERSION_NO     199911241
+#define CATALOG_VERSION_NO     199912121
  
  #endif
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 2d585bdcc5aa09d75c2585750405dc67c6f6ec4c..d3fb8f732a508b7852227b69cc54a5baa634473d 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -6,7 +6,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: primnodes.h,v 1.37 1999/11/15 02:00:15 tgl Exp $
+ * $Id: primnodes.h,v 1.38 1999/12/13 01:27:10 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -297,10 +297,12 @@ typedef struct Iter
  /* ----------------
   * Aggref
   *             aggname                 - name of the aggregate
- *             basetype                - base type Oid of the aggregate
+ *             basetype                - base type Oid of the aggregate (ie, input type)
   *             aggtype                 - type Oid of final result of the aggregate
   *             target                  - attribute or expression we are aggregating on
   *             usenulls                - TRUE to accept null values as inputs
+ *             aggstar                 - TRUE if argument was really '*'
+ *             aggdistinct             - TRUE if arguments were labeled DISTINCT
   *             aggno                   - workspace for nodeAgg.c executor
   * ----------------
   */
@@ -312,6 +314,8 @@ typedef struct Aggref
         Oid                     aggtype;
         Node       *target;
         bool            usenulls;
+       bool            aggstar;
+       bool            aggdistinct;
         int                     aggno;
  } Aggref;
  
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h

index 829bf434e787fcbb1f996d89f7750ba2ecd6e3b1..4cd2e486aa4e2cd0cd585ff4fb4cbaf6ec3d35ef 100644 (file)
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -6,7 +6,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: clauses.h,v 1.31 1999/12/09 05:58:55 tgl Exp $
+ * $Id: clauses.h,v 1.32 1999/12/13 01:27:13 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -38,6 +38,7 @@ extern Expr *make_ands_explicit(List *andclauses);
  extern List *make_ands_implicit(Expr *clause);
  
  extern List *pull_constant_clauses(List *quals, List **constantQual);
+extern bool contain_agg_clause(Node *clause);
  extern List *pull_agg_clause(Node *clause);
  extern void check_subplans_for_ungrouped_vars(Node *clause,
                                                                                           Query *query,
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7c5a32098972bb484fb12a2f5a00bce2e8bedc40..4f775f74a55b280dea058e3daa796f877255b7a2 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -3,8 +3,8 @@
   * tuplesort.h
   *       Generalized tuple sorting routines.
   *
- * This module handles sorting of either heap tuples or index tuples
- * (and could fairly easily support other kinds of sortable objects,
+ * This module handles sorting of heap tuples, index tuples, or single
+ * Datums (and could easily support other kinds of sortable objects,
   * if necessary).  It works efficiently for both small and large amounts
   * of data.  Small amounts are sorted in-memory using qsort().  Large
   * amounts are sorted using temporary files and a standard external sort
@@ -12,7 +12,7 @@
   *
   * Copyright (c) 1994, Regents of the University of California
   *
- * $Id: tuplesort.h,v 1.1 1999/10/17 22:15:09 tgl Exp $
+ * $Id: tuplesort.h,v 1.2 1999/12/13 01:27:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -34,6 +34,7 @@ typedef struct Tuplesortstate Tuplesortstate;
   * code: one for sorting HeapTuples and one for sorting IndexTuples.
   * They differ primarily in the way that the sort key information is
   * supplied.
+ * Yet a third slightly different interface supports sorting bare Datums.
   */
  
  extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
@@ -42,9 +43,15 @@ extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
  extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                                                                                          bool enforceUnique,
                                                                                          bool randomAccess);
+extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
+                                                                                        Oid sortOperator,
+                                                                                        bool randomAccess);
  
  extern void tuplesort_puttuple(Tuplesortstate *state, void *tuple);
  
+extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
+                                                          bool isNull);
+
  extern void tuplesort_performsort(Tuplesortstate *state);
  
  extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward,
@@ -54,11 +61,15 @@ extern void *tuplesort_gettuple(Tuplesortstate *state, bool forward,
  #define tuplesort_getindextuple(state, forward, should_free) \
         ((IndexTuple) tuplesort_gettuple(state, forward, should_free))
  
+extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
+                                                          Datum *val, bool *isNull);
+
  extern void tuplesort_end(Tuplesortstate *state);
  
  /*
   * These routines may only be called if randomAccess was specified 'true'.
- * Backwards scan in gettuple is likewise only allowed if randomAccess.
+ * Likewise, backwards scan in gettuple/getdatum is only allowed if
+ * randomAccess was specified.
   */
  
  extern void tuplesort_rescan(Tuplesortstate *state);
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 84958f66937431784793f9b0bebcf1d188ebb9e5..5dac6162b597afae848baf27211062cff35afba0 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -76,6 +76,42 @@ cnt_1000
      1000
  (1 row)
  
+QUERY: SELECT count(DISTINCT four) AS cnt_4 FROM onek;
+cnt_4
+-----
+    4
+(1 row)
+
+QUERY: select ten, count(*), sum(four) from onek group by ten;
+ten|count|sum
+---+-----+---
+  0|  100|100
+  1|  100|200
+  2|  100|100
+  3|  100|200
+  4|  100|100
+  5|  100|200
+  6|  100|100
+  7|  100|200
+  8|  100|100
+  9|  100|200
+(10 rows)
+
+QUERY: select ten, count(four), sum(DISTINCT four) from onek group by ten;
+ten|count|sum
+---+-----+---
+  0|  100|  2
+  1|  100|  4
+  2|  100|  2
+  3|  100|  4
+  4|  100|  2
+  5|  100|  4
+  6|  100|  2
+  7|  100|  4
+  8|  100|  2
+  9|  100|  4
+(10 rows)
+
  QUERY: SELECT newavg(four) AS avg_1 FROM onek;
  avg_1
  -----
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out

index 474ed0bb7f65893433b73bf4ee84c7d24fd92618..5938458a88e762a7acff9f858e5922f5bfc6e72d 100644 (file)
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1075,9 +1075,9 @@ pg_user           |SELECT pg_shadow.usename, pg_shadow.usesysid, pg_shadow.usecr
  pg_views          |SELECT c.relname AS viewname, pg_get_userbyid(c.relowner) AS viewowner, pg_get_viewdef(c.relname) AS definition FROM pg_class c WHERE (c.relhasrules AND (EXISTS (SELECT r.rulename FROM pg_rewrite r WHERE ((r.ev_class = c.oid) AND (r.ev_type = '1'::"char")))));                                                                                                                               
  rtest_v1          |SELECT rtest_t1.a, rtest_t1.b FROM rtest_t1;                                                                                                                                                                                                                                                                                                                                                       
  rtest_vcomp       |SELECT x.part, (x.size * y.factor) AS size_in_cm FROM rtest_comp x, rtest_unitfact y WHERE (x.unit = y.unit);                                                                                                                                                                                                                                                                                      
-rtest_vview1      |SELECT x.a, x.b FROM rtest_view1 x WHERE (0 < (SELECT count(1) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                    
+rtest_vview1      |SELECT x.a, x.b FROM rtest_view1 x WHERE (0 < (SELECT count(*) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                    
  rtest_vview2      |SELECT rtest_view1.a, rtest_view1.b FROM rtest_view1 WHERE rtest_view1.v;                                                                                                                                                                                                                                                                                                                          
-rtest_vview3      |SELECT x.a, x.b FROM rtest_vview2 x WHERE (0 < (SELECT count(1) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                   
+rtest_vview3      |SELECT x.a, x.b FROM rtest_vview2 x WHERE (0 < (SELECT count(*) AS count FROM rtest_view2 y WHERE (y.a = x.a)));                                                                                                                                                                                                                                                                                   
  rtest_vview4      |SELECT x.a, x.b, count(y.a) AS refcount FROM rtest_view1 x, rtest_view2 y WHERE (x.a = y.a) GROUP BY x.a, x.b;                                                                                                                                                                                                                                                                                     
  rtest_vview5      |SELECT rtest_view1.a, rtest_view1.b, rtest_viewfunc1(rtest_view1.a) AS refcount FROM rtest_view1;                                                                                                                                                                                                                                                                                                  
  shoe              |SELECT sh.shoename, sh.sh_avail, sh.slcolor, sh.slminlen, (sh.slminlen * un.un_fact) AS slminlen_cm, sh.slmaxlen, (sh.slmaxlen * un.un_fact) AS slmaxlen_cm, sh.slunit FROM shoe_data sh, unit un WHERE (sh.slunit = un.un_name);                                                                                                                                                                  
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 1fc0996807276fdb0709c2da3d94aa9b50d10d87..03ea7de2bbcbdc60d8aaeadb696101b51d5d1542 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -30,6 +30,12 @@ SELECT max(student.gpa) AS max_3_7 FROM student;
  
  SELECT count(four) AS cnt_1000 FROM onek;
  
+SELECT count(DISTINCT four) AS cnt_4 FROM onek;
+
+select ten, count(*), sum(four) from onek group by ten;
+
+select ten, count(four), sum(DISTINCT four) from onek group by ten;
+
  
  SELECT newavg(four) AS avg_1 FROM onek;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 13 Dec 1999 01:27:21 +0000 (01:27 +0000)
src/backend/executor/nodeAgg.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/outfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/util/clauses.c		patch \| blob \| history
src/backend/parser/parse_agg.c		patch \| blob \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/nodes/primnodes.h		patch \| blob \| history
src/include/optimizer/clauses.h		patch \| blob \| history
src/include/utils/tuplesort.h		patch \| blob \| history
src/test/regress/expected/aggregates.out		patch \| blob \| history
src/test/regress/expected/rules.out		patch \| blob \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| history