--- /dev/null
+Integer aggregator/enumerator.
+
+Many database systems have the notion of a one to many table.
+
+A one to many table usually sits between two indexed tables,
+as:
+
+create table one_to_many(left int, right int) ;
+
+And it is used like this:
+
+SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right)
+ WHERE one_to_many.left = item;
+
+This will return all the items in the right hand table for an entry
+in the left hand table. This is a very common construct in SQL.
+
+Now, this methodology can be cumbersome with a very large number of
+entries in the one_to_many table. Depending on the order in which
+data was entered, a join like this could result in an index scan
+and a fetch for each right hand entry in the table for a particular
+left hand entry.
+
+If you have a very dynamic system, there is not much you can do.
+However, if you have some data which is fairly static, you can
+create a summary table with the aggregator.
+
+CREATE TABLE summary as SELECT left, int_array_aggregate(right)
+ AS right FROM one_to_many GROUP BY left;
+
+This will create a table with one row per left item, and an array
+of right items. Now this is pretty useless without some way of using
+the array, thats why there is an array enumerator.
+
+SELECT left, int_array_enum(right) FROM summary WHERE left = item;
+
+The above query using int_array_enum, produces the same results as:
+
+SELECT left, right FROM one_to_many WHERE left = item;
+
+The difference is that the query against the summary table has to get
+only one row from the table, where as the query against "one_to_many"
+must index scan and fetch a row for each entry.
+
+On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced
+to a cost of 329. The query is a join between the one_to_many table,
+
+select right, count(right) from
+(
+ select left, int_array_enum(right) as right from summary join
+ (select left from left_table where left = item) as lefts
+ ON (summary.left = lefts.left )
+) as list group by right order by count desc ;
+
+
--- /dev/null
+/*
+ * Integer array aggregator / enumerator
+ *
+ * Mark L. Woodward
+ * DMN Digital Music Network.
+ * www.dmn.com
+ *
+ * Copyright (C) Digital Music Network
+ * December 20, 2001
+ *
+ * This file is the property of the Digital Music Network (DMN).
+ * It is being made available to users of the PostgreSQL system
+ * under the BSD license.
+ *
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <string.h>
+#include "postgres.h"
+#include "access/heapam.h"
+#include "catalog/catname.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_proc.h"
+#include "executor/executor.h"
+#include "utils/fcache.h"
+#include "utils/sets.h"
+#include "utils/syscache.h"
+#include "access/tupmacs.h"
+#include "access/xact.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/lsyscache.h"
+
+
+/* This is actually a postgres version of a one dimentional array */
+
+typedef struct agg
+{
+ ArrayType a;
+ int items;
+ int lower;
+ int4 array[1];
+}PGARRAY;
+
+/* This is used to keep track of our position during enumeration */
+typedef struct callContext
+{
+ PGARRAY *p;
+ int num;
+ int flags;
+}CTX;
+
+#define TOASTED 1
+#define START_NUM 8
+#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
+
+static PGARRAY * GetPGArray(int4 state, int fAdd);
+static PGARRAY *ShrinkPGArray(PGARRAY *p);
+
+Datum int_agg_state(PG_FUNCTION_ARGS);
+Datum int_agg_final_count(PG_FUNCTION_ARGS);
+Datum int_agg_final_array(PG_FUNCTION_ARGS);
+Datum int_enum(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(int_agg_state);
+PG_FUNCTION_INFO_V1(int_agg_final_count);
+PG_FUNCTION_INFO_V1(int_agg_final_array);
+PG_FUNCTION_INFO_V1(int_enum);
+
+/*
+ * Manage the aggregation state of the array
+ * You need to specify the correct memory context, or it will vanish!
+ */
+static PGARRAY * GetPGArray(int4 state, int fAdd)
+{
+ PGARRAY *p = (PGARRAY *) state;
+
+ if(!state)
+ {
+ /* New array */
+ int cb = PGARRAY_SIZE(START_NUM);
+
+ p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb);
+
+ if(!p)
+ {
+ elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory");
+ return 0;
+ }
+
+ p->a.size = cb;
+ p->a.ndim= 0;
+ p->a.flags = 0;
+ p->items = 0;
+ p->lower= START_NUM;
+ }
+ else if(fAdd)
+ { /* Ensure array has space */
+ if(p->items >= p->lower)
+ {
+ PGARRAY *pn;
+ int n = p->lower + p->lower;
+ int cbNew = PGARRAY_SIZE(n);
+
+ pn = (PGARRAY *) repalloc(p, cbNew);
+
+ if(!pn)
+ { /* Realloc failed! Reallocate new block. */
+ pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew);
+ if(!pn)
+ {
+ elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory");
+ return (PGARRAY *) NULL;
+ }
+ memcpy(pn, p, p->a.size);
+ pfree(p);
+ }
+ pn->a.size = cbNew;
+ pn->lower = n;
+ return pn;
+ }
+ }
+ return p;
+}
+
+/* Shrinks the array to its actual size and moves it into the standard
+ * memory allocation context, frees working memory */
+static PGARRAY *ShrinkPGArray(PGARRAY *p)
+{
+ PGARRAY *pnew=NULL;
+ if(p)
+ {
+ /* get target size */
+ int cb = PGARRAY_SIZE(p->items);
+
+ /* use current transaction context */
+ pnew = palloc(cb);
+
+ if(pnew)
+ {
+ /* Fix up the fields in the new structure, so Postgres understands */
+ memcpy(pnew, p, cb);
+ pnew->a.size = cb;
+ pnew->a.ndim=1;
+ pnew->a.flags = 0;
+ pnew->lower = 0;
+ }
+ else
+ {
+ elog(ERROR, "Integer aggregator, can't allocate memory");
+ }
+ pfree(p);
+ }
+ return pnew;
+}
+
+/* Called for each iteration during an aggregate function */
+Datum int_agg_state(PG_FUNCTION_ARGS)
+{
+ int4 state = PG_GETARG_INT32(0);
+ int4 value = PG_GETARG_INT32(1);
+
+ PGARRAY *p = GetPGArray(state, 1);
+ if(!p)
+ {
+ elog(ERROR,"No aggregate storage\n");
+ }
+ else if(p->items >= p->lower)
+ {
+ elog(ERROR,"aggregate storage too small\n");
+ }
+ else
+ {
+ p->array[p->items++]= value;
+ }
+ PG_RETURN_INT32(p);
+}
+
+/* This is the final function used for the integer aggregator. It returns all the integers
+ * collected as a one dimentional integer array */
+Datum int_agg_final_array(PG_FUNCTION_ARGS)
+{
+ PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0));
+ if(pnew)
+ {
+ PG_RETURN_POINTER(pnew);
+ }
+ else
+ {
+ PG_RETURN_NULL();
+ }
+}
+
+/* This function accepts an array, and returns one item for each entry in the array */
+Datum int_enum(PG_FUNCTION_ARGS)
+{
+ CTX *pc;
+ PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0);
+ ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo;
+
+ if(!p)
+ {
+ elog(NOTICE, "No data sent\n");
+ return 0;
+ }
+ if(!rsi)
+ {
+ elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer");
+ PG_RETURN_NULL();
+
+ }
+ if(!fcinfo->context)
+ {
+ /* Allocate a working context */
+ pc = (CTX *) palloc(sizeof(CTX));
+
+ if(!pc)
+ {
+ elog(ERROR, "CTX Alocation failed\n");
+ PG_RETURN_NULL();
+ }
+
+ /* Don't copy atribute if you don't need too */
+ if(VARATT_IS_EXTENDED(p) )
+ {
+ /* Toasted!!! */
+ pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p);
+ pc->flags = TOASTED;
+ if(!pc->p)
+ {
+ elog(ERROR, "Error in toaster!!! no detoasting\n");
+ PG_RETURN_NULL();
+ }
+ }
+ else
+ {
+ /* Untoasted */
+ pc->p = p;
+ pc->flags = 0;
+ }
+ fcinfo->context = (Node *) pc;
+ pc->num=0;
+ }
+ else /* use an existing one */
+ {
+ pc = (CTX *) fcinfo->context;
+ }
+ /* Are we done yet? */
+ if(pc->num >= pc->p->items)
+ {
+ /* We are done */
+ if(pc->flags & TOASTED)
+ pfree(pc->p);
+ pfree(fcinfo->context);
+ fcinfo->context = NULL;
+ rsi->isDone = ExprEndResult ;
+ }
+ else /* nope, return the next value */
+ {
+ int val = pc->p->array[pc->num++];
+ rsi->isDone = ExprMultipleResult;
+ PG_RETURN_INT32(val);
+ }
+ PG_RETURN_NULL();
+}
--- /dev/null
+-- Drop functions
+drop function int_agg_state (int4, int4);
+drop function int_agg_final_array (int4);
+drop aggregate int_array_aggregate(int4);
+drop function int_array_enum (int4[]);
+
+
+-- Internal function for the aggregate
+-- Is called for each item in an aggregation
+create function int_agg_state (int4, int4)
+ returns int4
+ as 'MODULE_FILENAME','int_agg_state'
+ language 'c';
+
+-- Internal function for the aggregate
+-- Is called at the end of the aggregation, and returns an array.
+create function int_agg_final_array (int4)
+ returns int4[]
+ as 'MODULE_FILENAME','int_agg_final_array'
+ language 'c';
+
+-- The aggration funcion.
+-- uses the above functions to create an array of integers from an aggregation.
+create aggregate int_array_aggregate
+(
+ BASETYPE = int4,
+ SFUNC = int_agg_state,
+ STYPE = int4,
+ FINALFUNC = int_agg_final_array,
+ INITCOND = 0
+);
+
+-- The enumeration function
+-- returns each element in a one dimentional integer array
+-- as a row.
+create function int_array_enum(int4[])
+ returns setof integer
+ as 'MODULE_FILENAME','int_enum'
+ language 'c';
+