]> granicus.if.org Git - postgresql/commitdiff
Add integer aggregator to /contrib.
authorBruce Momjian <bruce@momjian.us>
Mon, 25 Feb 2002 03:45:27 +0000 (03:45 +0000)
committerBruce Momjian <bruce@momjian.us>
Mon, 25 Feb 2002 03:45:27 +0000 (03:45 +0000)
mlw

contrib/README
contrib/intagg/Makefile [new file with mode: 0644]
contrib/intagg/README.int_aggrigate [new file with mode: 0644]
contrib/intagg/int_aggregate.c [new file with mode: 0644]
contrib/intagg/int_aggregate.sql.in [new file with mode: 0644]

index ec9c04d3c3dff799a99a89c2da5748bae75202f0..1ffef6df3254c252ab467242b21bafcec32a420d 100644 (file)
@@ -71,10 +71,14 @@ fuzzystrmatch -
        Levenshtein, metaphone, and soundex fuzzy string matching
        by Joe Conway <joseph.conway@home.com>, Joel Burton <jburton@scw.org>
 
+intagg -
+       Integer aggregator
+       by  mlw <markw@mohawksoft.com>
+
+
 intarray -
        Index support for arrays of int4, using GiST
-       by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov
-       <oleg@sai.msu.su>.
+       by Teodor Sigaev <teodor@stack.net> and Oleg Bartunov <oleg@sai.msu.su>
 
 ipc_check -
        Simple test script to help in configuring IPC.
diff --git a/contrib/intagg/Makefile b/contrib/intagg/Makefile
new file mode 100644 (file)
index 0000000..66901cf
--- /dev/null
@@ -0,0 +1,31 @@
+#############################################
+# Makefile for integer aggregator
+# Copyright (C) 2001 Digital Music Network.
+# by Mark L. Woodward
+#
+subdir = contrib/intagg
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+NAME=int_aggregate
+SONAME = $(NAME)$(DLSUFFIX)
+MODULES = int_aggregate
+DATA_built = int_aggregate.so
+DOCS = README.int_aggrigate
+SQLS=int_aggregate.sql
+
+include $(top_srcdir)/contrib/contrib-global.mk
+
+%.sql: %.sql.in
+       sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@
+
+all : $(SONAME) $(SQLS)
+
+
+install : all
+       $(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir)
+               
+
+clean :
+       rm -f $(SONAME)
+       rm -f $(SQLS)
diff --git a/contrib/intagg/README.int_aggrigate b/contrib/intagg/README.int_aggrigate
new file mode 100644 (file)
index 0000000..0c7317c
--- /dev/null
@@ -0,0 +1,55 @@
+Integer aggregator/enumerator.
+
+Many database systems have the notion of a one to many table.
+
+A one to many table usually sits between two indexed tables, 
+as: 
+
+create table one_to_many(left int, right int) ;
+
+And it is used like this:
+
+SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right) 
+       WHERE  one_to_many.left = item;
+
+This will return all the items in the right hand table for an entry 
+in the left hand table. This is a very common construct in SQL.
+
+Now, this methodology can be cumbersome with a very large number of
+entries in the one_to_many table. Depending on the order in which
+data was entered, a join like this could result in an index scan
+and a fetch for each right hand entry in the table for a particular
+left hand entry.
+
+If you have a very dynamic system, there is not much you can do. 
+However, if you have some data which is fairly static, you can
+create a summary table with the aggregator.
+
+CREATE TABLE summary as SELECT left, int_array_aggregate(right) 
+       AS right FROM one_to_many GROUP BY left;
+
+This will create a table with one row per left item, and an array
+of right items. Now this is pretty useless without some way of using
+the array, thats why there is an array enumerator.
+
+SELECT left, int_array_enum(right) FROM summary WHERE left = item;
+
+The above query using int_array_enum, produces the same results as:
+
+SELECT left, right FROM one_to_many WHERE left = item;
+
+The difference is that the query against the summary table has to get
+only one row from the table, where as the query against "one_to_many"
+must index scan and fetch a row for each entry.
+
+On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced
+to a cost of 329. The query is a join between the one_to_many table,
+
+select right, count(right) from 
+(
+       select left, int_array_enum(right) as right from summary join
+                (select left from left_table where left = item) as lefts
+                 ON (summary.left = lefts.left ) 
+) as list group by right order by count desc ;
+
+
diff --git a/contrib/intagg/int_aggregate.c b/contrib/intagg/int_aggregate.c
new file mode 100644 (file)
index 0000000..b2187fd
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ * Integer array aggregator / enumerator
+ *
+ * Mark L. Woodward 
+ * DMN Digital Music Network.
+ * www.dmn.com
+ *
+ * Copyright (C) Digital Music Network
+ * December 20, 2001
+ *
+ * This file is the property of the Digital Music Network (DMN).
+ * It is being made available to users of the PostgreSQL system
+ * under the BSD license.
+ *
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <string.h>
+#include "postgres.h"
+#include "access/heapam.h"
+#include "catalog/catname.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_proc.h"
+#include "executor/executor.h"
+#include "utils/fcache.h"
+#include "utils/sets.h"
+#include "utils/syscache.h"
+#include "access/tupmacs.h"
+#include "access/xact.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/lsyscache.h"
+
+
+/* This is actually a postgres version of a one dimentional array */
+
+typedef struct agg
+{
+       ArrayType a;
+       int     items;
+       int     lower;
+       int4    array[1];
+}PGARRAY;
+
+/* This is used to keep track of our position during enumeration */
+typedef struct callContext
+{
+       PGARRAY *p;
+       int num;
+       int flags;
+}CTX;
+
+#define TOASTED                1
+#define START_NUM      8
+#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4)))
+
+static PGARRAY * GetPGArray(int4 state, int fAdd);
+static PGARRAY *ShrinkPGArray(PGARRAY *p);
+
+Datum int_agg_state(PG_FUNCTION_ARGS);
+Datum int_agg_final_count(PG_FUNCTION_ARGS);
+Datum int_agg_final_array(PG_FUNCTION_ARGS);
+Datum int_enum(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(int_agg_state);
+PG_FUNCTION_INFO_V1(int_agg_final_count);
+PG_FUNCTION_INFO_V1(int_agg_final_array);
+PG_FUNCTION_INFO_V1(int_enum);
+
+/* 
+ * Manage the aggregation state of the array 
+ * You need to specify the correct memory context, or it will vanish! 
+ */
+static PGARRAY * GetPGArray(int4 state, int fAdd)
+{
+       PGARRAY *p = (PGARRAY *) state;
+
+       if(!state)
+       {
+               /* New array */
+               int cb = PGARRAY_SIZE(START_NUM);
+
+               p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb);
+
+               if(!p)
+               {
+                       elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory");
+                       return 0;
+               }
+
+               p->a.size = cb;
+               p->a.ndim= 0;
+               p->a.flags = 0;
+               p->items = 0;
+               p->lower= START_NUM;
+       }
+       else if(fAdd)
+       {       /* Ensure array has space */
+               if(p->items >= p->lower)
+               {
+                       PGARRAY *pn;
+                       int n = p->lower + p->lower;
+                       int cbNew = PGARRAY_SIZE(n);
+
+                       pn = (PGARRAY *) repalloc(p, cbNew);
+
+                       if(!pn)
+                       {       /* Realloc failed! Reallocate new block. */
+                               pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew);
+                               if(!pn)
+                               {
+                                       elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory");
+                                       return (PGARRAY *) NULL;
+                               }
+                               memcpy(pn, p, p->a.size);
+                               pfree(p);
+                       }
+                       pn->a.size = cbNew;
+                       pn->lower = n;
+                       return pn;
+               }
+       }
+       return p;
+}
+
+/* Shrinks the array to its actual size and moves it into the standard 
+ * memory allocation context, frees working memory  */
+static PGARRAY *ShrinkPGArray(PGARRAY *p)
+{
+       PGARRAY *pnew=NULL;
+       if(p)
+       {
+               /* get target size */
+               int cb = PGARRAY_SIZE(p->items);
+
+               /* use current transaction context */
+               pnew = palloc(cb);
+
+               if(pnew)
+               {
+                       /* Fix up the fields in the new structure, so Postgres understands */
+                       memcpy(pnew, p, cb);
+                       pnew->a.size = cb;
+                       pnew->a.ndim=1;
+                       pnew->a.flags = 0;
+                       pnew->lower = 0;
+               }
+               else
+               {
+                       elog(ERROR, "Integer aggregator, can't allocate memory");
+               }
+               pfree(p);
+       }       
+       return pnew;
+}
+
+/* Called for each iteration during an aggregate function */
+Datum int_agg_state(PG_FUNCTION_ARGS)
+{
+       int4 state = PG_GETARG_INT32(0);
+       int4 value = PG_GETARG_INT32(1);
+
+       PGARRAY *p = GetPGArray(state, 1);
+       if(!p)
+       {
+               elog(ERROR,"No aggregate storage\n");
+       }
+       else if(p->items >= p->lower)
+       {
+               elog(ERROR,"aggregate storage too small\n");
+       }
+       else
+       {
+               p->array[p->items++]= value;
+       }
+       PG_RETURN_INT32(p);
+}
+
+/* This is the final function used for the integer aggregator. It returns all the integers
+ * collected as a one dimentional integer array */
+Datum int_agg_final_array(PG_FUNCTION_ARGS)
+{
+       PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0));
+       if(pnew)
+       {
+               PG_RETURN_POINTER(pnew);
+       }
+       else
+       {
+               PG_RETURN_NULL();
+       }
+}
+
+/* This function accepts an array, and returns one item for each entry in the array */
+Datum int_enum(PG_FUNCTION_ARGS)
+{
+       CTX *pc;
+       PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0);
+       ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo;
+
+       if(!p)
+       {
+               elog(NOTICE, "No data sent\n");
+               return 0;
+       }
+       if(!rsi)
+       {
+               elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer");
+               PG_RETURN_NULL();
+               
+       }
+       if(!fcinfo->context)
+       {
+               /* Allocate a working context */
+               pc = (CTX *) palloc(sizeof(CTX));
+
+               if(!pc)
+               {
+                       elog(ERROR, "CTX Alocation failed\n");
+                       PG_RETURN_NULL();
+               }
+
+               /* Don't copy atribute if you don't need too */
+               if(VARATT_IS_EXTENDED(p) )
+               {
+                       /* Toasted!!! */
+                       pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p);
+                       pc->flags = TOASTED;
+                       if(!pc->p)
+                       {
+                               elog(ERROR, "Error in toaster!!! no detoasting\n");
+                               PG_RETURN_NULL();
+                       }
+               }
+               else
+               {
+                       /* Untoasted */
+                       pc->p = p;
+                       pc->flags = 0;
+               }
+               fcinfo->context = (Node *) pc;
+               pc->num=0;
+       }
+       else /* use an existing one */
+       {
+               pc = (CTX *) fcinfo->context;
+       }
+       /* Are we done yet? */
+       if(pc->num >= pc->p->items)
+       {
+               /* We are done */
+               if(pc->flags & TOASTED)
+                       pfree(pc->p);
+               pfree(fcinfo->context);
+               fcinfo->context = NULL;
+               rsi->isDone = ExprEndResult ;
+       }
+       else    /* nope, return the next value */
+       {
+               int val = pc->p->array[pc->num++];
+               rsi->isDone = ExprMultipleResult;
+               PG_RETURN_INT32(val);
+       }
+       PG_RETURN_NULL();
+}
diff --git a/contrib/intagg/int_aggregate.sql.in b/contrib/intagg/int_aggregate.sql.in
new file mode 100644 (file)
index 0000000..e08324a
--- /dev/null
@@ -0,0 +1,40 @@
+-- Drop functions
+drop function int_agg_state (int4, int4);
+drop function int_agg_final_array (int4);
+drop aggregate int_array_aggregate(int4);
+drop function int_array_enum (int4[]);
+
+
+-- Internal function for the aggregate
+-- Is called for each item in an aggregation
+create function int_agg_state (int4, int4)
+       returns int4
+       as 'MODULE_FILENAME','int_agg_state'
+       language 'c';
+
+-- Internal function for the aggregate
+-- Is called at the end of the aggregation, and returns an array.
+create function int_agg_final_array (int4)
+       returns int4[]
+       as 'MODULE_FILENAME','int_agg_final_array'
+       language 'c';
+
+-- The aggration funcion.
+-- uses the above functions to create an array of integers from an aggregation.
+create aggregate int_array_aggregate
+(
+       BASETYPE = int4,
+       SFUNC = int_agg_state,
+       STYPE = int4,
+       FINALFUNC = int_agg_final_array,
+       INITCOND = 0
+);
+
+-- The enumeration function
+-- returns each element in a one dimentional integer array
+-- as a row.
+create function int_array_enum(int4[])
+       returns setof integer
+       as 'MODULE_FILENAME','int_enum'
+       language 'c';
+