]> granicus.if.org Git - postgresql/commitdiff
Fix hash aggregation to suppress unneeded columns from being stored in
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 28 Jun 2006 19:40:52 +0000 (19:40 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 28 Jun 2006 19:40:52 +0000 (19:40 +0000)
tuple hash table entries.  This addresses the problem previously noted
that use of a 'physical tlist' in the input scan node could bloat the
hash table entries far beyond what the planner expects.  It's a better
answer than my previous thought of undoing the physical tlist optimization,
because we can also remove columns that are needed to compute the aggregate
functions but aren't part of the grouping column set.

src/backend/executor/nodeAgg.c
src/include/nodes/execnodes.h

index 84a651b0ad6d3b926fe3386ab6254f4e8883979e..25d763a4f7111f6c2744866e2ad706d0aa5c07d5 100644 (file)
@@ -61,7 +61,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.141 2006/06/28 17:05:49 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.142 2006/06/28 19:40:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -223,9 +223,11 @@ static void finalize_aggregate(AggState *aggstate,
                                   AggStatePerAgg peraggstate,
                                   AggStatePerGroup pergroupstate,
                                   Datum *resultVal, bool *resultIsNull);
+static Bitmapset *find_unaggregated_cols(AggState *aggstate);
+static bool find_unaggregated_cols_walker(Node *node, Bitmapset **colnos);
 static void build_hash_table(AggState *aggstate);
 static AggHashEntry lookup_hash_entry(AggState *aggstate,
-                                 TupleTableSlot *slot);
+                                 TupleTableSlot *inputslot);
 static TupleTableSlot *agg_retrieve_direct(AggState *aggstate);
 static void agg_fill_hash_table(AggState *aggstate);
 static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate);
@@ -579,6 +581,46 @@ finalize_aggregate(AggState *aggstate,
        MemoryContextSwitchTo(oldContext);
 }
 
+/*
+ * find_unaggregated_cols
+ *       Construct a bitmapset of the column numbers of un-aggregated Vars
+ *       appearing in our targetlist and qual (HAVING clause)
+ */
+static Bitmapset *
+find_unaggregated_cols(AggState *aggstate)
+{
+       Agg                *node = (Agg *) aggstate->ss.ps.plan;
+       Bitmapset *colnos;
+
+       colnos = NULL;
+       (void) find_unaggregated_cols_walker((Node *) node->plan.targetlist,
+                                                                                &colnos);
+       (void) find_unaggregated_cols_walker((Node *) node->plan.qual,
+                                                                                &colnos);
+       return colnos;
+}
+
+static bool
+find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
+{
+       if (node == NULL)
+               return false;
+       if (IsA(node, Var))
+       {
+               Var                *var = (Var *) node;
+
+               /* setrefs.c should have set the varno to 0 */
+               Assert(var->varno == 0);
+               Assert(var->varlevelsup == 0);
+               *colnos = bms_add_member(*colnos, var->varattno);
+               return false;
+       }
+       if (IsA(node, Aggref))          /* do not descend into aggregate exprs */
+               return false;
+       return expression_tree_walker(node, find_unaggregated_cols_walker,
+                                                                 (void *) colnos);
+}
+
 /*
  * Initialize the hash table to empty.
  *
@@ -590,6 +632,9 @@ build_hash_table(AggState *aggstate)
        Agg                *node = (Agg *) aggstate->ss.ps.plan;
        MemoryContext tmpmem = aggstate->tmpcontext->ecxt_per_tuple_memory;
        Size            entrysize;
+       Bitmapset  *colnos;
+       List       *collist;
+       int                     i;
 
        Assert(node->aggstrategy == AGG_HASHED);
        Assert(node->numGroups > 0);
@@ -605,13 +650,48 @@ build_hash_table(AggState *aggstate)
                                                                                          entrysize,
                                                                                          aggstate->aggcontext,
                                                                                          tmpmem);
+
+       /*
+        * Create a list of the tuple columns that actually need to be stored
+        * in hashtable entries.  The incoming tuples from the child plan node
+        * will contain grouping columns, other columns referenced in our
+        * targetlist and qual, columns used to compute the aggregate functions,
+        * and perhaps just junk columns we don't use at all.  Only columns of the
+        * first two types need to be stored in the hashtable, and getting rid of
+        * the others can make the table entries significantly smaller.  To avoid
+        * messing up Var numbering, we keep the same tuple descriptor for
+        * hashtable entries as the incoming tuples have, but set unwanted columns
+        * to NULL in the tuples that go into the table.
+        *
+        * To eliminate duplicates, we build a bitmapset of the needed columns,
+        * then convert it to an integer list (cheaper to scan at runtime).
+        * The list is in decreasing order so that the first entry is the largest;
+        * lookup_hash_entry depends on this to use slot_getsomeattrs correctly.
+        *
+        * Note: at present, searching the tlist/qual is not really necessary
+        * since the parser should disallow any unaggregated references to
+        * ungrouped columns.  However, the search will be needed when we add
+        * support for SQL99 semantics that allow use of "functionally dependent"
+        * columns that haven't been explicitly grouped by.
+        */
+
+       /* Find Vars that will be needed in tlist and qual */
+       colnos = find_unaggregated_cols(aggstate);
+       /* Add in all the grouping columns */
+       for (i = 0; i < node->numCols; i++)
+               colnos = bms_add_member(colnos, node->grpColIdx[i]);
+       /* Convert to list, using lcons so largest element ends up first */
+       collist = NIL;
+       while ((i = bms_first_member(colnos)) >= 0)
+               collist = lcons_int(i, collist);
+       aggstate->hash_needed = collist;
 }
 
 /*
  * Estimate per-hash-table-entry overhead for the planner.
  *
  * Note that the estimate does not include space for pass-by-reference
- * transition data values.
+ * transition data values, nor for the representative tuple of each group.
  */
 Size
 hash_agg_entry_size(int numAggs)
@@ -621,9 +701,9 @@ hash_agg_entry_size(int numAggs)
        /* This must match build_hash_table */
        entrysize = sizeof(AggHashEntryData) +
                (numAggs - 1) *sizeof(AggStatePerGroupData);
-       /* Account for hashtable overhead */
-       entrysize += 2 * sizeof(void *);
        entrysize = MAXALIGN(entrysize);
+       /* Account for hashtable overhead (assuming fill factor = 1) */
+       entrysize += 3 * sizeof(void *);
        return entrysize;
 }
 
@@ -634,13 +714,34 @@ hash_agg_entry_size(int numAggs)
  * When called, CurrentMemoryContext should be the per-query context.
  */
 static AggHashEntry
-lookup_hash_entry(AggState *aggstate, TupleTableSlot *slot)
+lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot)
 {
+       TupleTableSlot *hashslot = aggstate->hashslot;
+       ListCell   *l;
        AggHashEntry entry;
        bool            isnew;
 
+       /* if first time through, initialize hashslot by cloning input slot */
+       if (hashslot->tts_tupleDescriptor == NULL)
+       {
+               ExecSetSlotDescriptor(hashslot, inputslot->tts_tupleDescriptor);
+               /* Make sure all unused columns are NULLs */
+               ExecStoreAllNullTuple(hashslot);
+       }
+
+       /* transfer just the needed columns into hashslot */
+       slot_getsomeattrs(inputslot, linitial_int(aggstate->hash_needed));
+       foreach(l, aggstate->hash_needed)
+       {
+               int             varNumber = lfirst_int(l) - 1;
+
+               hashslot->tts_values[varNumber] = inputslot->tts_values[varNumber];
+               hashslot->tts_isnull[varNumber] = inputslot->tts_isnull[varNumber];
+       }
+
+       /* find or create the hashtable entry using the filtered tuple */
        entry = (AggHashEntry) LookupTupleHashEntry(aggstate->hashtable,
-                                                                                               slot,
+                                                                                               hashslot,
                                                                                                &isnew);
 
        if (isnew)
@@ -1063,13 +1164,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
                                                          ALLOCSET_DEFAULT_INITSIZE,
                                                          ALLOCSET_DEFAULT_MAXSIZE);
 
-#define AGG_NSLOTS 2
+#define AGG_NSLOTS 3
 
        /*
         * tuple table initialization
         */
        ExecInitScanTupleSlot(estate, &aggstate->ss);
        ExecInitResultTupleSlot(estate, &aggstate->ss.ps);
+       aggstate->hashslot = ExecInitExtraTupleSlot(estate);
 
        /*
         * initialize child expressions
index 97deb8b15c7bc599b57e8f1c91629facc0822b0e..1bedc6d26e05d149503a8731d6a92b9370bbda4f 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.151 2006/06/28 17:05:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.152 2006/06/28 19:40:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1245,6 +1245,8 @@ typedef struct AggState
        HeapTuple       grp_firstTuple; /* copy of first tuple of current group */
        /* these fields are used in AGG_HASHED mode: */
        TupleHashTable hashtable;       /* hash table with one entry per group */
+       TupleTableSlot *hashslot;       /* slot for loading hash table */
+       List       *hash_needed;        /* list of columns needed in hash table */
        bool            table_filled;   /* hash table filled yet? */
        TupleHashIterator hashiter; /* for iterating through hash table */
 } AggState;