From: Bruce Momjian Date: Mon, 25 Feb 2002 03:45:27 +0000 (+0000) Subject: Add integer aggregator to /contrib. X-Git-Tag: REL7_3~2040 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2146d8c6a095617d405ec33333e9bed2a8bfd818;p=postgresql Add integer aggregator to /contrib. mlw --- diff --git a/contrib/README b/contrib/README index ec9c04d3c3..1ffef6df32 100644 --- a/contrib/README +++ b/contrib/README @@ -71,10 +71,14 @@ fuzzystrmatch - Levenshtein, metaphone, and soundex fuzzy string matching by Joe Conway , Joel Burton +intagg - + Integer aggregator + by mlw + + intarray - Index support for arrays of int4, using GiST - by Teodor Sigaev and Oleg Bartunov - . + by Teodor Sigaev and Oleg Bartunov ipc_check - Simple test script to help in configuring IPC. diff --git a/contrib/intagg/Makefile b/contrib/intagg/Makefile new file mode 100644 index 0000000000..66901cf539 --- /dev/null +++ b/contrib/intagg/Makefile @@ -0,0 +1,31 @@ +############################################# +# Makefile for integer aggregator +# Copyright (C) 2001 Digital Music Network. +# by Mark L. Woodward +# +subdir = contrib/intagg +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + +NAME=int_aggregate +SONAME = $(NAME)$(DLSUFFIX) +MODULES = int_aggregate +DATA_built = int_aggregate.so +DOCS = README.int_aggrigate +SQLS=int_aggregate.sql + +include $(top_srcdir)/contrib/contrib-global.mk + +%.sql: %.sql.in + sed 's,MODULE_FILENAME,$$libdir/$(NAME),g' $< >$@ + +all : $(SONAME) $(SQLS) + + +install : all + $(INSTALL_SHLIB) $(SONAME) $(DESTDIR)$(pkglibdir) + + +clean : + rm -f $(SONAME) + rm -f $(SQLS) diff --git a/contrib/intagg/README.int_aggrigate b/contrib/intagg/README.int_aggrigate new file mode 100644 index 0000000000..0c7317ccc9 --- /dev/null +++ b/contrib/intagg/README.int_aggrigate @@ -0,0 +1,55 @@ +Integer aggregator/enumerator. + +Many database systems have the notion of a one to many table. + +A one to many table usually sits between two indexed tables, +as: + +create table one_to_many(left int, right int) ; + +And it is used like this: + +SELECT right.* from right JOIN one_to_many ON (right.id = one_to_many.right) + WHERE one_to_many.left = item; + +This will return all the items in the right hand table for an entry +in the left hand table. This is a very common construct in SQL. + +Now, this methodology can be cumbersome with a very large number of +entries in the one_to_many table. Depending on the order in which +data was entered, a join like this could result in an index scan +and a fetch for each right hand entry in the table for a particular +left hand entry. + +If you have a very dynamic system, there is not much you can do. +However, if you have some data which is fairly static, you can +create a summary table with the aggregator. + +CREATE TABLE summary as SELECT left, int_array_aggregate(right) + AS right FROM one_to_many GROUP BY left; + +This will create a table with one row per left item, and an array +of right items. Now this is pretty useless without some way of using +the array, thats why there is an array enumerator. + +SELECT left, int_array_enum(right) FROM summary WHERE left = item; + +The above query using int_array_enum, produces the same results as: + +SELECT left, right FROM one_to_many WHERE left = item; + +The difference is that the query against the summary table has to get +only one row from the table, where as the query against "one_to_many" +must index scan and fetch a row for each entry. + +On our system, an EXPLAIN shows a query with a cost of 8488 gets reduced +to a cost of 329. The query is a join between the one_to_many table, + +select right, count(right) from +( + select left, int_array_enum(right) as right from summary join + (select left from left_table where left = item) as lefts + ON (summary.left = lefts.left ) +) as list group by right order by count desc ; + + diff --git a/contrib/intagg/int_aggregate.c b/contrib/intagg/int_aggregate.c new file mode 100644 index 0000000000..b2187fdb0d --- /dev/null +++ b/contrib/intagg/int_aggregate.c @@ -0,0 +1,271 @@ +/* + * Integer array aggregator / enumerator + * + * Mark L. Woodward + * DMN Digital Music Network. + * www.dmn.com + * + * Copyright (C) Digital Music Network + * December 20, 2001 + * + * This file is the property of the Digital Music Network (DMN). + * It is being made available to users of the PostgreSQL system + * under the BSD license. + * + */ +#include "postgres.h" + +#include +#include +#include +#include +#include "postgres.h" +#include "access/heapam.h" +#include "catalog/catname.h" +#include "catalog/indexing.h" +#include "catalog/pg_proc.h" +#include "executor/executor.h" +#include "utils/fcache.h" +#include "utils/sets.h" +#include "utils/syscache.h" +#include "access/tupmacs.h" +#include "access/xact.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" + + +/* This is actually a postgres version of a one dimentional array */ + +typedef struct agg +{ + ArrayType a; + int items; + int lower; + int4 array[1]; +}PGARRAY; + +/* This is used to keep track of our position during enumeration */ +typedef struct callContext +{ + PGARRAY *p; + int num; + int flags; +}CTX; + +#define TOASTED 1 +#define START_NUM 8 +#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + ((n-1)*sizeof(int4))) + +static PGARRAY * GetPGArray(int4 state, int fAdd); +static PGARRAY *ShrinkPGArray(PGARRAY *p); + +Datum int_agg_state(PG_FUNCTION_ARGS); +Datum int_agg_final_count(PG_FUNCTION_ARGS); +Datum int_agg_final_array(PG_FUNCTION_ARGS); +Datum int_enum(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(int_agg_state); +PG_FUNCTION_INFO_V1(int_agg_final_count); +PG_FUNCTION_INFO_V1(int_agg_final_array); +PG_FUNCTION_INFO_V1(int_enum); + +/* + * Manage the aggregation state of the array + * You need to specify the correct memory context, or it will vanish! + */ +static PGARRAY * GetPGArray(int4 state, int fAdd) +{ + PGARRAY *p = (PGARRAY *) state; + + if(!state) + { + /* New array */ + int cb = PGARRAY_SIZE(START_NUM); + + p = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cb); + + if(!p) + { + elog(ERROR,"Integer aggregator, cant allocate TopTransactionContext memory"); + return 0; + } + + p->a.size = cb; + p->a.ndim= 0; + p->a.flags = 0; + p->items = 0; + p->lower= START_NUM; + } + else if(fAdd) + { /* Ensure array has space */ + if(p->items >= p->lower) + { + PGARRAY *pn; + int n = p->lower + p->lower; + int cbNew = PGARRAY_SIZE(n); + + pn = (PGARRAY *) repalloc(p, cbNew); + + if(!pn) + { /* Realloc failed! Reallocate new block. */ + pn = (PGARRAY *) MemoryContextAlloc(TopTransactionContext, cbNew); + if(!pn) + { + elog(ERROR, "Integer aggregator, REALLY REALLY can't alloc memory"); + return (PGARRAY *) NULL; + } + memcpy(pn, p, p->a.size); + pfree(p); + } + pn->a.size = cbNew; + pn->lower = n; + return pn; + } + } + return p; +} + +/* Shrinks the array to its actual size and moves it into the standard + * memory allocation context, frees working memory */ +static PGARRAY *ShrinkPGArray(PGARRAY *p) +{ + PGARRAY *pnew=NULL; + if(p) + { + /* get target size */ + int cb = PGARRAY_SIZE(p->items); + + /* use current transaction context */ + pnew = palloc(cb); + + if(pnew) + { + /* Fix up the fields in the new structure, so Postgres understands */ + memcpy(pnew, p, cb); + pnew->a.size = cb; + pnew->a.ndim=1; + pnew->a.flags = 0; + pnew->lower = 0; + } + else + { + elog(ERROR, "Integer aggregator, can't allocate memory"); + } + pfree(p); + } + return pnew; +} + +/* Called for each iteration during an aggregate function */ +Datum int_agg_state(PG_FUNCTION_ARGS) +{ + int4 state = PG_GETARG_INT32(0); + int4 value = PG_GETARG_INT32(1); + + PGARRAY *p = GetPGArray(state, 1); + if(!p) + { + elog(ERROR,"No aggregate storage\n"); + } + else if(p->items >= p->lower) + { + elog(ERROR,"aggregate storage too small\n"); + } + else + { + p->array[p->items++]= value; + } + PG_RETURN_INT32(p); +} + +/* This is the final function used for the integer aggregator. It returns all the integers + * collected as a one dimentional integer array */ +Datum int_agg_final_array(PG_FUNCTION_ARGS) +{ + PGARRAY *pnew = ShrinkPGArray(GetPGArray(PG_GETARG_INT32(0),0)); + if(pnew) + { + PG_RETURN_POINTER(pnew); + } + else + { + PG_RETURN_NULL(); + } +} + +/* This function accepts an array, and returns one item for each entry in the array */ +Datum int_enum(PG_FUNCTION_ARGS) +{ + CTX *pc; + PGARRAY *p = (PGARRAY *) PG_GETARG_POINTER(0); + ReturnSetInfo *rsi = (ReturnSetInfo *)fcinfo->resultinfo; + + if(!p) + { + elog(NOTICE, "No data sent\n"); + return 0; + } + if(!rsi) + { + elog(ERROR, "No ReturnSetInfo sent! function must be declared returning a 'setof' integer"); + PG_RETURN_NULL(); + + } + if(!fcinfo->context) + { + /* Allocate a working context */ + pc = (CTX *) palloc(sizeof(CTX)); + + if(!pc) + { + elog(ERROR, "CTX Alocation failed\n"); + PG_RETURN_NULL(); + } + + /* Don't copy atribute if you don't need too */ + if(VARATT_IS_EXTENDED(p) ) + { + /* Toasted!!! */ + pc->p = (PGARRAY *) PG_DETOAST_DATUM_COPY(p); + pc->flags = TOASTED; + if(!pc->p) + { + elog(ERROR, "Error in toaster!!! no detoasting\n"); + PG_RETURN_NULL(); + } + } + else + { + /* Untoasted */ + pc->p = p; + pc->flags = 0; + } + fcinfo->context = (Node *) pc; + pc->num=0; + } + else /* use an existing one */ + { + pc = (CTX *) fcinfo->context; + } + /* Are we done yet? */ + if(pc->num >= pc->p->items) + { + /* We are done */ + if(pc->flags & TOASTED) + pfree(pc->p); + pfree(fcinfo->context); + fcinfo->context = NULL; + rsi->isDone = ExprEndResult ; + } + else /* nope, return the next value */ + { + int val = pc->p->array[pc->num++]; + rsi->isDone = ExprMultipleResult; + PG_RETURN_INT32(val); + } + PG_RETURN_NULL(); +} diff --git a/contrib/intagg/int_aggregate.sql.in b/contrib/intagg/int_aggregate.sql.in new file mode 100644 index 0000000000..e08324aead --- /dev/null +++ b/contrib/intagg/int_aggregate.sql.in @@ -0,0 +1,40 @@ +-- Drop functions +drop function int_agg_state (int4, int4); +drop function int_agg_final_array (int4); +drop aggregate int_array_aggregate(int4); +drop function int_array_enum (int4[]); + + +-- Internal function for the aggregate +-- Is called for each item in an aggregation +create function int_agg_state (int4, int4) + returns int4 + as 'MODULE_FILENAME','int_agg_state' + language 'c'; + +-- Internal function for the aggregate +-- Is called at the end of the aggregation, and returns an array. +create function int_agg_final_array (int4) + returns int4[] + as 'MODULE_FILENAME','int_agg_final_array' + language 'c'; + +-- The aggration funcion. +-- uses the above functions to create an array of integers from an aggregation. +create aggregate int_array_aggregate +( + BASETYPE = int4, + SFUNC = int_agg_state, + STYPE = int4, + FINALFUNC = int_agg_final_array, + INITCOND = 0 +); + +-- The enumeration function +-- returns each element in a one dimentional integer array +-- as a row. +create function int_array_enum(int4[]) + returns setof integer + as 'MODULE_FILENAME','int_enum' + language 'c'; +