]> granicus.if.org Git - postgresql/commitdiff
Bloom index contrib module
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 1 Apr 2016 13:42:24 +0000 (16:42 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 1 Apr 2016 13:42:24 +0000 (16:42 +0300)
Module provides new access method. It is actually a simple Bloom filter
implemented as pgsql's index. It could give some benefits on search
with large number of columns.

Module is a single way to test generic WAL interface committed earlier.

Author: Teodor Sigaev, Alexander Korotkov
Reviewers: Aleksander Alekseev, Michael Paquier, Jim Nasby

18 files changed:
contrib/Makefile
contrib/bloom/.gitignore [new file with mode: 0644]
contrib/bloom/Makefile [new file with mode: 0644]
contrib/bloom/blcost.c [new file with mode: 0644]
contrib/bloom/blinsert.c [new file with mode: 0644]
contrib/bloom/bloom--1.0.sql [new file with mode: 0644]
contrib/bloom/bloom.control [new file with mode: 0644]
contrib/bloom/bloom.h [new file with mode: 0644]
contrib/bloom/blscan.c [new file with mode: 0644]
contrib/bloom/blutils.c [new file with mode: 0644]
contrib/bloom/blvacuum.c [new file with mode: 0644]
contrib/bloom/blvalidate.c [new file with mode: 0644]
contrib/bloom/expected/bloom.out [new file with mode: 0644]
contrib/bloom/sql/bloom.sql [new file with mode: 0644]
contrib/bloom/t/001_wal.pl [new file with mode: 0644]
doc/src/sgml/bloom.sgml [new file with mode: 0644]
doc/src/sgml/contrib.sgml
doc/src/sgml/filelist.sgml

index d12dd6379b50c7384615a39d48916052369d2cba..25263c0be9494a5ee7943190088e184e4ebcb3cd 100644 (file)
@@ -8,6 +8,7 @@ SUBDIRS = \
                adminpack       \
                auth_delay      \
                auto_explain    \
+               bloom           \
                btree_gin       \
                btree_gist      \
                chkpass         \
diff --git a/contrib/bloom/.gitignore b/contrib/bloom/.gitignore
new file mode 100644 (file)
index 0000000..5dcb3ff
--- /dev/null
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/bloom/Makefile b/contrib/bloom/Makefile
new file mode 100644 (file)
index 0000000..13bd397
--- /dev/null
@@ -0,0 +1,24 @@
+# contrib/bloom/Makefile
+
+MODULE_big = bloom
+OBJS = blcost.o blinsert.o blscan.o blutils.o blvacuum.o blvalidate.o $(WIN32RES)
+
+EXTENSION = bloom
+DATA = bloom--1.0.sql
+PGFILEDESC = "bloom access method - signature file based index"
+
+REGRESS = bloom
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/bloom
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+wal-check: temp-install
+       $(prove_check)
diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c
new file mode 100644 (file)
index 0000000..9897898
--- /dev/null
@@ -0,0 +1,48 @@
+/*-------------------------------------------------------------------------
+ *
+ * blcost.c
+ *             Cost estimate function for bloom indexes.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/blcost.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "optimizer/cost.h"
+#include "utils/selfuncs.h"
+
+#include "bloom.h"
+
+/*
+ * Estimate cost of bloom index scan.
+ */
+void
+blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+                          Cost *indexStartupCost, Cost *indexTotalCost,
+                          Selectivity *indexSelectivity, double *indexCorrelation)
+{
+       IndexOptInfo *index = path->indexinfo;
+       List       *qinfos;
+       GenericCosts costs;
+
+       /* Do preliminary analysis of indexquals */
+       qinfos = deconstruct_indexquals(path);
+
+       MemSet(&costs, 0, sizeof(costs));
+
+       /* We have to visit all index tuples anyway */
+       costs.numIndexTuples = index->tuples;
+
+       /* Use generic estimate */
+       genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+       *indexStartupCost = costs.indexStartupCost;
+       *indexTotalCost = costs.indexTotalCost;
+       *indexSelectivity = costs.indexSelectivity;
+       *indexCorrelation = costs.indexCorrelation;
+}
diff --git a/contrib/bloom/blinsert.c b/contrib/bloom/blinsert.c
new file mode 100644 (file)
index 0000000..9e66780
--- /dev/null
@@ -0,0 +1,313 @@
+/*-------------------------------------------------------------------------
+ *
+ * blinsert.c
+ *             Bloom index build and insert functions.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/blinsert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/generic_xlog.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#include "bloom.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * State of bloom index build.  We accumulate one page data here before
+ * flushing it to buffer manager.
+ */
+typedef struct
+{
+       BloomState      blstate;                /* bloom index state */
+       MemoryContext tmpCtx;           /* temporary memory context reset after
+                                                                * each tuple */
+       char            data[BLCKSZ];   /* cached page */
+       int64           count;                  /* number of tuples in cached page */
+}      BloomBuildState;
+
+/*
+ * Flush page cached in BloomBuildState.
+ */
+static void
+flushCachedPage(Relation index, BloomBuildState *buildstate)
+{
+       Page            page;
+       Buffer          buffer = BloomNewBuffer(index);
+       GenericXLogState *state;
+
+       state = GenericXLogStart(index);
+       page = GenericXLogRegister(state, buffer, true);
+       memcpy(page, buildstate->data, BLCKSZ);
+       GenericXLogFinish(state);
+       UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * (Re)initialize cached page in BloomBuildState.
+ */
+static void
+initCachedPage(BloomBuildState *buildstate)
+{
+       memset(buildstate->data, 0, BLCKSZ);
+       BloomInitPage(buildstate->data, 0);
+       buildstate->count = 0;
+}
+
+/*
+ * Per-tuple callback from IndexBuildHeapScan.
+ */
+static void
+bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
+                                  bool *isnull, bool tupleIsAlive, void *state)
+{
+       BloomBuildState *buildstate = (BloomBuildState *) state;
+       MemoryContext oldCtx;
+       BloomTuple *itup;
+
+       oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+       itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
+
+       /* Try to add next item to cached page */
+       if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
+       {
+               /* Next item was added successfully */
+               buildstate->count++;
+       }
+       else
+       {
+               /* Cached page is full, flush it out and make a new one */
+               flushCachedPage(index, buildstate);
+
+               CHECK_FOR_INTERRUPTS();
+
+               initCachedPage(buildstate);
+
+               if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup) == false)
+               {
+                       /* We shouldn't be here since we're inserting to the empty page */
+                       elog(ERROR, "can not add new tuple");
+               }
+       }
+
+       MemoryContextSwitchTo(oldCtx);
+       MemoryContextReset(buildstate->tmpCtx);
+}
+
+/*
+ * Build a new bloom index.
+ */
+IndexBuildResult *
+blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+       IndexBuildResult *result;
+       double          reltuples;
+       BloomBuildState buildstate;
+
+       if (RelationGetNumberOfBlocks(index) != 0)
+               elog(ERROR, "index \"%s\" already contains data",
+                        RelationGetRelationName(index));
+
+       /* Initialize the meta page */
+       BloomInitMetapage(index);
+
+       /* Initialize the bloom build state */
+       memset(&buildstate, 0, sizeof(buildstate));
+       initBloomState(&buildstate.blstate, index);
+       buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+                                                                                         "Bloom build temporary context",
+                                                                                         ALLOCSET_DEFAULT_MINSIZE,
+                                                                                         ALLOCSET_DEFAULT_INITSIZE,
+                                                                                         ALLOCSET_DEFAULT_MAXSIZE);
+       initCachedPage(&buildstate);
+
+       /* Do the heap scan */
+       reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+                                                                  bloomBuildCallback, (void *) &buildstate);
+
+       /*
+        * There are could be some items in cached page.  Flush this page
+        * if needed.
+        */
+       if (buildstate.count > 0)
+               flushCachedPage(index, &buildstate);
+
+       MemoryContextDelete(buildstate.tmpCtx);
+
+       result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+       result->heap_tuples = result->index_tuples = reltuples;
+
+       return result;
+}
+
+/*
+ * Build an empty bloom index in the initialization fork.
+ */
+void
+blbuildempty(Relation index)
+{
+       if (RelationGetNumberOfBlocks(index) != 0)
+               elog(ERROR, "index \"%s\" already contains data",
+                        RelationGetRelationName(index));
+
+       /* Initialize the meta page */
+       BloomInitMetapage(index);
+}
+
+/*
+ * Insert new tuple to the bloom index.
+ */
+bool
+blinsert(Relation index, Datum *values, bool *isnull,
+                ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique)
+{
+       BloomState      blstate;
+       BloomTuple *itup;
+       MemoryContext oldCtx;
+       MemoryContext insertCtx;
+       BloomMetaPageData *metaData;
+       Buffer          buffer,
+                               metaBuffer;
+       Page            page,
+                               metaPage;
+       BlockNumber blkno = InvalidBlockNumber;
+       OffsetNumber nStart;
+       GenericXLogState *state;
+
+       insertCtx = AllocSetContextCreate(CurrentMemoryContext,
+                                                                         "Bloom insert temporary context",
+                                                                         ALLOCSET_DEFAULT_MINSIZE,
+                                                                         ALLOCSET_DEFAULT_INITSIZE,
+                                                                         ALLOCSET_DEFAULT_MAXSIZE);
+
+       oldCtx = MemoryContextSwitchTo(insertCtx);
+
+       initBloomState(&blstate, index);
+       itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
+
+       /*
+        * At first, try to insert new tuple to the first page in notFullPage
+        * array.  If success we don't need to modify the meta page.
+        */
+       metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+       LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
+       metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
+
+       if (metaData->nEnd > metaData->nStart)
+       {
+               Page            page;
+
+               blkno = metaData->notFullPage[metaData->nStart];
+
+               Assert(blkno != InvalidBlockNumber);
+               LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
+
+               buffer = ReadBuffer(index, blkno);
+               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+               state = GenericXLogStart(index);
+               page = GenericXLogRegister(state, buffer, false);
+
+               if (BloomPageAddItem(&blstate, page, itup))
+               {
+                       GenericXLogFinish(state);
+                       UnlockReleaseBuffer(buffer);
+                       ReleaseBuffer(metaBuffer);
+                       MemoryContextSwitchTo(oldCtx);
+                       MemoryContextDelete(insertCtx);
+                       return false;
+               }
+               else
+               {
+                       GenericXLogAbort(state);
+                       UnlockReleaseBuffer(buffer);
+               }
+       }
+       else
+       {
+               /* First page in notFullPage isn't suitable */
+               LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
+       }
+
+       /*
+        * Try other pages in notFullPage array.  We will have to change nStart in
+        * metapage.  Thus, grab exclusive lock on metapage.
+        */
+       LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+       state = GenericXLogStart(index);
+       metaPage = GenericXLogRegister(state, metaBuffer, false);
+       metaData = BloomPageGetMeta(metaPage);
+
+       /*
+        * Iterate over notFullPage array.  Skip page we already tried first.
+        */
+       nStart = metaData->nStart;
+       if (metaData->nEnd > nStart &&
+               blkno == metaData->notFullPage[nStart])
+               nStart++;
+
+       while (metaData->nEnd > nStart)
+       {
+               blkno = metaData->notFullPage[nStart];
+               Assert(blkno != InvalidBlockNumber);
+
+               buffer = ReadBuffer(index, blkno);
+               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+               page = GenericXLogRegister(state, buffer, false);
+
+               if (BloomPageAddItem(&blstate, page, itup))
+               {
+                       metaData->nStart = nStart;
+                       GenericXLogFinish(state);
+                       UnlockReleaseBuffer(buffer);
+                       UnlockReleaseBuffer(metaBuffer);
+                       MemoryContextSwitchTo(oldCtx);
+                       MemoryContextDelete(insertCtx);
+                       return false;
+               }
+               else
+               {
+                       GenericXLogUnregister(state, buffer);
+                       UnlockReleaseBuffer(buffer);
+               }
+               nStart++;
+       }
+
+       GenericXLogAbort(state);
+
+       /*
+        * Didn't find place to insert in notFullPage array.  Allocate new page.
+        */
+       buffer = BloomNewBuffer(index);
+
+       state = GenericXLogStart(index);
+       metaPage = GenericXLogRegister(state, metaBuffer, false);
+       metaData = BloomPageGetMeta(metaPage);
+       page = GenericXLogRegister(state, buffer, true);
+       BloomInitPage(page, 0);
+       BloomPageAddItem(&blstate, page, itup);
+
+       metaData->nStart = 0;
+       metaData->nEnd = 1;
+       metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
+
+       GenericXLogFinish(state);
+
+       UnlockReleaseBuffer(buffer);
+       UnlockReleaseBuffer(metaBuffer);
+
+       return false;
+}
diff --git a/contrib/bloom/bloom--1.0.sql b/contrib/bloom/bloom--1.0.sql
new file mode 100644 (file)
index 0000000..7fa7513
--- /dev/null
@@ -0,0 +1,19 @@
+CREATE OR REPLACE FUNCTION blhandler(internal)
+RETURNS index_am_handler
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+-- Access method
+CREATE ACCESS METHOD bloom TYPE INDEX HANDLER blhandler;
+
+-- Opclasses
+
+CREATE OPERATOR CLASS int4_ops
+DEFAULT FOR TYPE int4 USING bloom AS
+       OPERATOR        1       =(int4, int4),
+       FUNCTION        1       hashint4(int4);
+
+CREATE OPERATOR CLASS text_ops
+DEFAULT FOR TYPE text USING bloom AS
+       OPERATOR        1       =(text, text),
+       FUNCTION        1       hashtext(text);
diff --git a/contrib/bloom/bloom.control b/contrib/bloom/bloom.control
new file mode 100644 (file)
index 0000000..4d4124b
--- /dev/null
@@ -0,0 +1,5 @@
+# bloom extension
+comment = 'bloom access method - signature file based index'
+default_version = '1.0'
+module_pathname = '$libdir/bloom'
+relocatable = true
diff --git a/contrib/bloom/bloom.h b/contrib/bloom/bloom.h
new file mode 100644 (file)
index 0000000..50bf99b
--- /dev/null
@@ -0,0 +1,178 @@
+/*-------------------------------------------------------------------------
+ *
+ * bloom.h
+ *       Header for bloom index.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/bloom.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _BLOOM_H_
+#define _BLOOM_H_
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "access/itup.h"
+#include "access/xlog.h"
+#include "nodes/relation.h"
+#include "fmgr.h"
+
+/* Support procedures numbers */
+#define BLOOM_HASH_PROC                        1
+#define BLOOM_NPROC                            1
+
+/* Scan strategies */
+#define BLOOM_EQUAL_STRATEGY   1
+#define BLOOM_NSTRATEGIES              1
+
+/* Opaque for bloom pages */
+typedef struct BloomPageOpaqueData
+{
+       OffsetNumber maxoff;
+       uint16          flags;
+}      BloomPageOpaqueData;
+
+typedef BloomPageOpaqueData *BloomPageOpaque;
+
+/* Bloom page flags */
+#define BLOOM_META             (1<<0)
+#define BLOOM_DELETED  (2<<0)
+
+/* Macros for accessing bloom page structures */
+#define BloomPageGetOpaque(page) ((BloomPageOpaque) PageGetSpecialPointer(page))
+#define BloomPageGetMaxOffset(page) (BloomPageGetOpaque(page)->maxoff)
+#define BloomPageIsMeta(page) (BloomPageGetOpaque(page)->flags & BLOOM_META)
+#define BloomPageIsDeleted(page) (BloomPageGetOpaque(page)->flags & BLOOM_DELETED)
+#define BloomPageSetDeleted(page) (BloomPageGetOpaque(page)->flags |= BLOOM_DELETED)
+#define BloomPageSetNonDeleted(page) (BloomPageGetOpaque(page)->flags &= ~BLOOM_DELETED)
+#define BloomPageGetData(page)         ((BloomTuple *)PageGetContents(page))
+#define BloomPageGetTuple(state, page, offset) \
+       ((BloomTuple *)(PageGetContents(page) \
+               + (state)->sizeOfBloomTuple * ((offset) - 1)))
+#define BloomPageGetNextTuple(state, tuple) \
+       ((BloomTuple *)((Pointer)(tuple) + (state)->sizeOfBloomTuple))
+
+/* Preserved page numbers */
+#define BLOOM_METAPAGE_BLKNO   (0)
+#define BLOOM_HEAD_BLKNO               (1)             /* first data page */
+
+/* Bloom index options */
+typedef struct BloomOptions
+{
+       int32           vl_len_;                /* varlena header (do not touch directly!) */
+       int                     bloomLength;    /* length of signature in uint16 */
+       int                     bitSize[INDEX_MAX_KEYS];                /* signature bits per index
+                                                                                                * key */
+}      BloomOptions;
+
+/*
+ * FreeBlockNumberArray - array of block numbers sized so that metadata fill
+ * all space in metapage.
+ */
+typedef BlockNumber FreeBlockNumberArray[
+                                                                                MAXALIGN_DOWN(
+               BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(BloomPageOpaqueData))
+          - MAXALIGN(sizeof(uint16) * 2 + sizeof(uint32) + sizeof(BloomOptions))
+                                                                                                          ) / sizeof(BlockNumber)
+];
+
+/* Metadata of bloom index */
+typedef struct BloomMetaPageData
+{
+       uint32          magickNumber;
+       uint16          nStart;
+       uint16          nEnd;
+       BloomOptions opts;
+       FreeBlockNumberArray notFullPage;
+}      BloomMetaPageData;
+
+/* Magic number to distinguish bloom pages among anothers */
+#define BLOOM_MAGICK_NUMBER (0xDBAC0DED)
+
+/* Number of blocks numbers fit in BloomMetaPageData */
+#define BloomMetaBlockN                (sizeof(FreeBlockNumberArray) / sizeof(BlockNumber))
+
+#define BloomPageGetMeta(page) ((BloomMetaPageData *) PageGetContents(page))
+
+typedef struct BloomState
+{
+       FmgrInfo        hashFn[INDEX_MAX_KEYS];
+       BloomOptions *opts;                     /* stored in rd_amcache and defined at
+                                                                * creation time */
+       int32           nColumns;
+
+       /*
+        * sizeOfBloomTuple is index's specific, and it depends on reloptions, so
+        * precompute it
+        */
+       int32           sizeOfBloomTuple;
+}      BloomState;
+
+#define BloomPageGetFreeSpace(state, page) \
+       (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
+               - BloomPageGetMaxOffset(page) * (state)->sizeOfBloomTuple \
+               - MAXALIGN(sizeof(BloomPageOpaqueData)))
+
+/*
+ * Tuples are very different from all other relations
+ */
+typedef uint16 SignType;
+
+typedef struct BloomTuple
+{
+       ItemPointerData heapPtr;
+       SignType        sign[1];
+}      BloomTuple;
+
+#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
+
+/* Opaque data structure for bloom index scan */
+typedef struct BloomScanOpaqueData
+{
+       SignType   *sign;                       /* Scan signature */
+       BloomState      state;
+}      BloomScanOpaqueData;
+
+typedef BloomScanOpaqueData *BloomScanOpaque;
+
+/* blutils.c */
+extern void _PG_init(void);
+extern Datum blhandler(PG_FUNCTION_ARGS);
+extern void initBloomState(BloomState * state, Relation index);
+extern void BloomInitMetapage(Relation index);
+extern void BloomInitPage(Page page, uint16 flags);
+extern Buffer BloomNewBuffer(Relation index);
+extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
+extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
+extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
+
+/* blvalidate.c */
+extern bool blvalidate(Oid opclassoid);
+
+/* index access method interface functions */
+extern bool blinsert(Relation index, Datum *values, bool *isnull,
+                ItemPointer ht_ctid, Relation heapRel,
+                IndexUniqueCheck checkUnique);
+extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys);
+extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern void blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+                ScanKey orderbys, int norderbys);
+extern void blendscan(IndexScanDesc scan);
+extern IndexBuildResult *blbuild(Relation heap, Relation index,
+               struct IndexInfo *indexInfo);
+extern void blbuildempty(Relation index);
+extern IndexBulkDeleteResult *blbulkdelete(IndexVacuumInfo *info,
+                        IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback,
+                        void *callback_state);
+extern IndexBulkDeleteResult *blvacuumcleanup(IndexVacuumInfo *info,
+                               IndexBulkDeleteResult *stats);
+extern bytea *bloptions(Datum reloptions, bool validate);
+extern void blcostestimate(PlannerInfo *root, IndexPath *path,
+                          double loop_count, Cost *indexStartupCost,
+                          Cost *indexTotalCost, Selectivity *indexSelectivity,
+                          double *indexCorrelation);
+
+#endif
diff --git a/contrib/bloom/blscan.c b/contrib/bloom/blscan.c
new file mode 100644 (file)
index 0000000..d156e88
--- /dev/null
@@ -0,0 +1,175 @@
+/*-------------------------------------------------------------------------
+ *
+ * blscan.c
+ *             Bloom index scan functions.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/blscan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "pgstat.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#include "bloom.h"
+
+/*
+ * Begin scan of bloom index.
+ */
+IndexScanDesc
+blbeginscan(Relation r, int nkeys, int norderbys)
+{
+       IndexScanDesc scan;
+
+       scan = RelationGetIndexScan(r, nkeys, norderbys);
+
+       return scan;
+}
+
+/*
+ * Rescan a bloom index.
+ */
+void
+blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+                ScanKey orderbys, int norderbys)
+{
+       BloomScanOpaque so;
+
+       so = (BloomScanOpaque) scan->opaque;
+
+       if (so == NULL)
+       {
+               /* if called from blbeginscan */
+               so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData));
+               initBloomState(&so->state, scan->indexRelation);
+               scan->opaque = so;
+
+       }
+       else
+       {
+               if (so->sign)
+                       pfree(so->sign);
+       }
+       so->sign = NULL;
+
+       if (scankey && scan->numberOfKeys > 0)
+       {
+               memmove(scan->keyData, scankey,
+                               scan->numberOfKeys * sizeof(ScanKeyData));
+       }
+}
+
+/*
+ * End scan of bloom index.
+ */
+void
+blendscan(IndexScanDesc scan)
+{
+       BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+       if (so->sign)
+               pfree(so->sign);
+       so->sign = NULL;
+}
+
+/*
+ * Insert all matching tuples into to a bitmap.
+ */
+int64
+blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
+{
+       int64           ntids = 0;
+       BlockNumber blkno = BLOOM_HEAD_BLKNO,
+                               npages;
+       int                     i;
+       BufferAccessStrategy bas;
+       BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+       if (so->sign == NULL && scan->numberOfKeys > 0)
+       {
+               /* New search: have to calculate search signature */
+               ScanKey         skey = scan->keyData;
+
+               so->sign = palloc0(sizeof(SignType) * so->state.opts->bloomLength);
+
+               for (i = 0; i < scan->numberOfKeys; i++)
+               {
+                       /*
+                        * Assume bloom-indexable operators to be strict, so nothing could
+                        * be found for NULL key.
+                        */
+                       if (skey->sk_flags & SK_ISNULL)
+                       {
+                               pfree(so->sign);
+                               so->sign = NULL;
+                               return 0;
+                       }
+
+                       /* Add next value to the signature */
+                       signValue(&so->state, so->sign, skey->sk_argument,
+                                         skey->sk_attno - 1);
+
+                       skey++;
+               }
+       }
+
+       /*
+        * We're going to read the whole index. This is why we use appropriate
+        * buffer access strategy.
+        */
+       bas = GetAccessStrategy(BAS_BULKREAD);
+       npages = RelationGetNumberOfBlocks(scan->indexRelation);
+
+       for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+       {
+               Buffer          buffer;
+               Page            page;
+
+               buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
+                                                                       blkno, RBM_NORMAL, bas);
+
+               LockBuffer(buffer, BUFFER_LOCK_SHARE);
+               page = BufferGetPage(buffer);
+
+               if (!BloomPageIsDeleted(page))
+               {
+                       OffsetNumber offset,
+                                               maxOffset = BloomPageGetMaxOffset(page);
+
+                       for (offset = 1; offset <= maxOffset; offset++)
+                       {
+                               BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset);
+                               bool            res = true;
+
+                               /* Check index signature with scan signature */
+                               for (i = 0; res && i < so->state.opts->bloomLength; i++)
+                               {
+                                       if ((itup->sign[i] & so->sign[i]) != so->sign[i])
+                                               res = false;
+                               }
+
+                               /* Add matching tuples to bitmap */
+                               if (res)
+                               {
+                                       tbm_add_tuples(tbm, &itup->heapPtr, 1, true);
+                                       ntids++;
+                               }
+                       }
+               }
+
+               UnlockReleaseBuffer(buffer);
+               CHECK_FOR_INTERRUPTS();
+       }
+       FreeAccessStrategy(bas);
+
+       return ntids;
+}
diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
new file mode 100644 (file)
index 0000000..b86f51f
--- /dev/null
@@ -0,0 +1,463 @@
+/*-------------------------------------------------------------------------
+ *
+ * blutils.c
+ *             Bloom index utilities.
+ *
+ * Portions Copyright (c) 2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1990-1993, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/blutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "catalog/index.h"
+#include "storage/lmgr.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "utils/memutils.h"
+#include "access/reloptions.h"
+#include "storage/freespace.h"
+#include "storage/indexfsm.h"
+
+#include "bloom.h"
+
+/* Signature dealing macros */
+#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
+#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
+#define CLRBIT(x,i)   GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
+#define SETBIT(x,i)   GETWORD(x,i) |=  ( 0x01 << ( (i) % BITSIGNTYPE ) )
+#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
+
+PG_FUNCTION_INFO_V1(blhandler);
+
+/* Kind of relation optioms for bloom index */
+static relopt_kind bl_relopt_kind;
+
+static int32 myRand();
+static void mySrand(uint32 seed);
+
+/*
+ * Module initialize function: initilized relation options.
+ */
+void
+_PG_init(void)
+{
+       int                     i;
+       char            buf[16];
+
+       bl_relopt_kind = add_reloption_kind();
+
+       add_int_reloption(bl_relopt_kind, "length",
+                                         "Length of signature in uint16 type", 5, 1, 256);
+
+       for (i = 0; i < INDEX_MAX_KEYS; i++)
+       {
+               snprintf(buf, 16, "col%d", i + 1);
+               add_int_reloption(bl_relopt_kind, buf,
+                                         "Number of bits for corresponding column", 2, 1, 2048);
+       }
+}
+
+/*
+ * Bloom handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
+ */
+Datum
+blhandler(PG_FUNCTION_ARGS)
+{
+       IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+       amroutine->amstrategies = 1;
+       amroutine->amsupport = 1;
+       amroutine->amcanorder = false;
+       amroutine->amcanorderbyop = false;
+       amroutine->amcanbackward = false;
+       amroutine->amcanunique = false;
+       amroutine->amcanmulticol = true;
+       amroutine->amoptionalkey = true;
+       amroutine->amsearcharray = false;
+       amroutine->amsearchnulls = false;
+       amroutine->amstorage = false;
+       amroutine->amclusterable = false;
+       amroutine->ampredlocks = false;
+       amroutine->amkeytype = 0;
+
+       amroutine->aminsert = blinsert;
+       amroutine->ambeginscan = blbeginscan;
+       amroutine->amgettuple = NULL;
+       amroutine->amgetbitmap = blgetbitmap;
+       amroutine->amrescan = blrescan;
+       amroutine->amendscan = blendscan;
+       amroutine->ammarkpos = NULL;
+       amroutine->amrestrpos = NULL;
+       amroutine->ambuild = blbuild;
+       amroutine->ambuildempty = blbuildempty;
+       amroutine->ambulkdelete = blbulkdelete;
+       amroutine->amvacuumcleanup = blvacuumcleanup;
+       amroutine->amcanreturn = NULL;
+       amroutine->amcostestimate = blcostestimate;
+       amroutine->amoptions = bloptions;
+       amroutine->amvalidate = blvalidate;
+
+       PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ * Fill BloomState structure for particular index.
+ */
+void
+initBloomState(BloomState *state, Relation index)
+{
+       int                     i;
+
+       state->nColumns = index->rd_att->natts;
+
+       /* Initialize hash function for each attribute */
+       for (i = 0; i < index->rd_att->natts; i++)
+       {
+               fmgr_info_copy(&(state->hashFn[i]),
+                                          index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
+                                          CurrentMemoryContext);
+       }
+
+       /* Initialize amcache if needed with options from metapage */
+       if (!index->rd_amcache)
+       {
+               Buffer          buffer;
+               Page            page;
+               BloomMetaPageData *meta;
+               BloomOptions *opts;
+
+               opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
+
+               buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+               LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+               page = BufferGetPage(buffer);
+
+               if (!BloomPageIsMeta(page))
+                       elog(ERROR, "Relation is not a bloom index");
+               meta = BloomPageGetMeta(BufferGetPage(buffer));
+
+               if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
+                       elog(ERROR, "Relation is not a bloom index");
+
+               *opts = meta->opts;
+
+               UnlockReleaseBuffer(buffer);
+
+               index->rd_amcache = (void *) opts;
+       }
+
+       state->opts = (BloomOptions *) index->rd_amcache;
+       state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
+               sizeof(SignType) * state->opts->bloomLength;
+}
+
+/*
+ * Random generator copied from FreeBSD.  Using own random generator here for
+ * two reasons:
+ *
+ * 1) In this case random numbers are used for on-disk storage.  Usage of
+ *       PostgreSQL number generator would obstruct it from all possible changes.
+ * 2) Changing seed of PostgreSQL random generator would be undesirable side
+ *       effect.
+ */
+static int32 next;
+
+static int32
+myRand()
+{
+       /*
+        * Compute x = (7^5 * x) mod (2^31 - 1)
+        * without overflowing 31 bits:
+        *      (2^31 - 1) = 127773 * (7^5) + 2836
+        * From "Random number generators: good ones are hard to find",
+        * Park and Miller, Communications of the ACM, vol. 31, no. 10,
+        * October 1988, p. 1195.
+        */
+       int32 hi, lo, x;
+
+       /* Must be in [1, 0x7ffffffe] range at this point. */
+       hi = next / 127773;
+       lo = next % 127773;
+       x = 16807 * lo - 2836 * hi;
+       if (x < 0)
+               x += 0x7fffffff;
+       next = x;
+       /* Transform to [0, 0x7ffffffd] range. */
+       return (x - 1);
+}
+
+void
+mySrand(uint32 seed)
+{
+       next = seed;
+       /* Transform to [1, 0x7ffffffe] range. */
+       next = (next % 0x7ffffffe) + 1;
+}
+
+/*
+ * Add bits of given value to the signature.
+ */
+void
+signValue(BloomState *state, SignType *sign, Datum value, int attno)
+{
+       uint32          hashVal;
+       int                     nBit,
+                               j;
+
+       /*
+        * init generator with "column's" number to get "hashed" seed for new
+        * value. We don't want to map the same numbers from different columns
+        * into the same bits!
+        */
+       mySrand(attno);
+
+       /*
+        * Init hash sequence to map our value into bits. the same values in
+        * different columns will be mapped into different bits because of step
+        * above
+        */
+       hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
+       mySrand(hashVal ^ myRand());
+
+       for (j = 0; j < state->opts->bitSize[attno]; j++)
+       {
+               /* prevent mutiple evaluation */
+               nBit = myRand() % (state->opts->bloomLength * BITSIGNTYPE);
+               SETBIT(sign, nBit);
+       }
+}
+
+/*
+ * Make bloom tuple from values.
+ */
+BloomTuple *
+BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
+{
+       int                     i;
+       BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
+
+       res->heapPtr = *iptr;
+
+       /* Blooming each column */
+       for (i = 0; i < state->nColumns; i++)
+       {
+               /* skip nulls */
+               if (isnull[i])
+                       continue;
+
+               signValue(state, res->sign, values[i], i);
+       }
+
+       return res;
+}
+
+/*
+ * Add new bloom tuple to the page.  Returns true if new tuple was successfully
+ * added to the page.  Returns false if it doesn't git the page.
+ */
+bool
+BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
+{
+       BloomTuple *itup;
+       BloomPageOpaque opaque;
+       Pointer         ptr;
+
+       /* Does new tuple fit the page */
+       if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
+               return false;
+
+       /* Copy new tuple to the end of page */
+       opaque = BloomPageGetOpaque(page);
+       itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
+       memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
+
+       /* Adjust maxoff and pd_lower */
+       opaque->maxoff++;
+       ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
+       ((PageHeader) page)->pd_lower = ptr - page;
+
+       return true;
+}
+
+/*
+ * Allocate a new page (either by recycling, or by extending the index file)
+ * The returned buffer is already pinned and exclusive-locked
+ * Caller is responsible for initializing the page by calling BloomInitBuffer
+ */
+Buffer
+BloomNewBuffer(Relation index)
+{
+       Buffer          buffer;
+       bool            needLock;
+
+       /* First, try to get a page from FSM */
+       for (;;)
+       {
+               BlockNumber blkno = GetFreeIndexPage(index);
+
+               if (blkno == InvalidBlockNumber)
+                       break;
+
+               buffer = ReadBuffer(index, blkno);
+
+               /*
+                * We have to guard against the possibility that someone else already
+                * recycled this page; the buffer may be locked if so.
+                */
+               if (ConditionalLockBuffer(buffer))
+               {
+                       Page            page = BufferGetPage(buffer);
+
+                       if (PageIsNew(page))
+                               return buffer;  /* OK to use, if never initialized */
+
+                       if (BloomPageIsDeleted(page))
+                               return buffer;  /* OK to use */
+
+                       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+               }
+
+               /* Can't use it, so release buffer and try again */
+               ReleaseBuffer(buffer);
+       }
+
+       /* Must extend the file */
+       needLock = !RELATION_IS_LOCAL(index);
+       if (needLock)
+               LockRelationForExtension(index, ExclusiveLock);
+
+       buffer = ReadBuffer(index, P_NEW);
+       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+       if (needLock)
+               UnlockRelationForExtension(index, ExclusiveLock);
+
+       return buffer;
+}
+
+/*
+ * Initialize bloom page.
+ */
+void
+BloomInitPage(Page page, uint16 flags)
+{
+       BloomPageOpaque opaque;
+
+       PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
+
+       opaque = BloomPageGetOpaque(page);
+       memset(opaque, 0, sizeof(BloomPageOpaqueData));
+       opaque->flags = flags;
+}
+
+/*
+ * Adjust options of bloom index.
+ */
+static void
+adjustBloomOptions(BloomOptions *opts)
+{
+       int                             i;
+
+       /* Default length of bloom filter is 5 of 16-bit integers */
+       if (opts->bloomLength <= 0)
+               opts->bloomLength = 5;
+       else
+               opts->bloomLength = opts->bloomLength;
+
+       /* Check singnature length */
+       for (i = 0; i < INDEX_MAX_KEYS; i++)
+       {
+               /*
+                * Zero and negative number of bits is meaningless.  Also setting
+                * more bits than signature have seems useless.  Replace both cases
+                * with 2 bits default.
+                */
+               if (opts->bitSize[i] <= 0
+                       || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType))
+                       opts->bitSize[i] = 2;
+       }
+}
+
+/*
+ * Initialize metapage for bloom index.
+ */
+void
+BloomInitMetapage(Relation index)
+{
+       Page            metaPage;
+       Buffer          metaBuffer;
+       BloomMetaPageData *metadata;
+       GenericXLogState *state;
+
+       /*
+        * Make a new buffer, since it first buffer it should be associated with
+        * block number 0 (BLOOM_METAPAGE_BLKNO).
+        */
+       metaBuffer = BloomNewBuffer(index);
+       Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
+
+       /* Initialize bloom index options */
+       if (!index->rd_options)
+               index->rd_options = palloc0(sizeof(BloomOptions));
+       adjustBloomOptions((BloomOptions *) index->rd_options);
+
+       /* Initialize contents of meta page */
+       state = GenericXLogStart(index);
+       metaPage = GenericXLogRegister(state, metaBuffer, true);
+
+       BloomInitPage(metaPage, BLOOM_META);
+       metadata = BloomPageGetMeta(metaPage);
+       memset(metadata, 0, sizeof(BloomMetaPageData));
+       metadata->magickNumber = BLOOM_MAGICK_NUMBER;
+       metadata->opts = *((BloomOptions *) index->rd_options);
+       ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
+
+       GenericXLogFinish(state);
+       UnlockReleaseBuffer(metaBuffer);
+}
+
+/*
+ * Initialize options for bloom index.
+ */
+bytea *
+bloptions(Datum reloptions, bool validate)
+{
+       relopt_value *options;
+       int                     numoptions;
+       BloomOptions *rdopts;
+       relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
+       int                     i;
+       char            buf[16];
+
+       /* Option for length of signature */
+       tab[0].optname = "length";
+       tab[0].opttype = RELOPT_TYPE_INT;
+       tab[0].offset = offsetof(BloomOptions, bloomLength);
+
+       /* Number of bits for each of possible columns: col1, col2, ... */
+       for (i = 0; i < INDEX_MAX_KEYS; i++)
+       {
+               snprintf(buf, sizeof(buf), "col%d", i + 1);
+               tab[i + 1].optname = pstrdup(buf);
+               tab[i + 1].opttype = RELOPT_TYPE_INT;
+               tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
+       }
+
+       options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
+       rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
+       fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
+                                  validate, tab, INDEX_MAX_KEYS + 1);
+
+       adjustBloomOptions(rdopts);
+
+       return (bytea *) rdopts;
+}
diff --git a/contrib/bloom/blvacuum.c b/contrib/bloom/blvacuum.c
new file mode 100644 (file)
index 0000000..fb8d9b8
--- /dev/null
@@ -0,0 +1,212 @@
+/*-------------------------------------------------------------------------
+ *
+ * blvacuum.c
+ *             Bloom VACUUM functions.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/blvacuum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "catalog/storage.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+
+#include "bloom.h"
+
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+IndexBulkDeleteResult *
+blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+                        IndexBulkDeleteCallback callback, void *callback_state)
+{
+       Relation        index = info->index;
+       BlockNumber blkno,
+                               npages;
+       FreeBlockNumberArray notFullPage;
+       int                     countPage = 0;
+       BloomState      state;
+       Buffer          buffer;
+       Page            page;
+       GenericXLogState *gxlogState;
+
+       if (stats == NULL)
+               stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+       initBloomState(&state, index);
+
+       /*
+        * Interate over the pages. We don't care about concurrently added pages,
+        * they can't contain tuples to delete.
+        */
+       npages = RelationGetNumberOfBlocks(index);
+       for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+       {
+               BloomTuple *itup,
+                                  *itupPtr,
+                                  *itupEnd;
+
+               buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+                                                                       RBM_NORMAL, info->strategy);
+
+               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+               gxlogState = GenericXLogStart(index);
+               page = GenericXLogRegister(gxlogState, buffer, false);
+
+               if (BloomPageIsDeleted(page))
+               {
+                       UnlockReleaseBuffer(buffer);
+                       CHECK_FOR_INTERRUPTS();
+                       continue;
+               }
+
+               /* Iterate over the tuples */
+               itup = BloomPageGetTuple(&state, page, 1);
+               itupPtr = BloomPageGetTuple(&state, page, 1);
+               itupEnd = BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1);
+               while (itup < itupEnd)
+               {
+                       /* Do we have to delete this tuple? */
+                       if (callback(&itup->heapPtr, callback_state))
+                       {
+                               stats->tuples_removed += 1;
+                               BloomPageGetOpaque(page)->maxoff--;
+                       }
+                       else
+                       {
+                               if (itupPtr != itup)
+                               {
+                                       /*
+                                        * If we already delete something before, we have to move
+                                        * this tuple backward.
+                                        */
+                                       memmove((Pointer) itupPtr, (Pointer) itup,
+                                                       state.sizeOfBloomTuple);
+                               }
+                               stats->num_index_tuples++;
+                               itupPtr = BloomPageGetNextTuple(&state, itupPtr);
+                       }
+
+                       itup = BloomPageGetNextTuple(&state, itup);
+               }
+
+               Assert(itupPtr == BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1));
+
+               if (!BloomPageIsDeleted(page) &&
+                       BloomPageGetFreeSpace(&state, page) > state.sizeOfBloomTuple &&
+                       countPage < BloomMetaBlockN)
+                       notFullPage[countPage++] = blkno;
+
+               /* Did we delete something? */
+               if (itupPtr != itup)
+               {
+                       /* Is it empty page now? */
+                       if (itupPtr == BloomPageGetData(page))
+                               BloomPageSetDeleted(page);
+                       /* Adjust pg_lower */
+                       ((PageHeader) page)->pd_lower = (Pointer) itupPtr - page;
+                       /* Finish WAL-logging */
+                       GenericXLogFinish(gxlogState);
+               }
+               else
+               {
+                       /* Didn't change anything: abort WAL-logging */
+                       GenericXLogAbort(gxlogState);
+               }
+               UnlockReleaseBuffer(buffer);
+               CHECK_FOR_INTERRUPTS();
+       }
+
+       if (countPage > 0)
+       {
+               BloomMetaPageData *metaData;
+
+               buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+               LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+               gxlogState = GenericXLogStart(index);
+               page = GenericXLogRegister(gxlogState, buffer, false);
+
+               metaData = BloomPageGetMeta(page);
+               memcpy(metaData->notFullPage, notFullPage, sizeof(FreeBlockNumberArray));
+               metaData->nStart = 0;
+               metaData->nEnd = countPage;
+
+               GenericXLogFinish(gxlogState);
+               UnlockReleaseBuffer(buffer);
+       }
+
+       return stats;
+}
+
+/*
+ * Post-VACUUM cleanup.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+IndexBulkDeleteResult *
+blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+       Relation        index = info->index;
+       BlockNumber npages,
+                               blkno;
+       BlockNumber totFreePages;
+
+       if (info->analyze_only)
+               return stats;
+
+       if (stats == NULL)
+               stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+       /*
+        * Iterate over the pages: insert deleted pages into FSM and collect
+        * statistics.
+        */
+       npages = RelationGetNumberOfBlocks(index);
+       totFreePages = 0;
+       for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+       {
+               Buffer          buffer;
+               Page            page;
+
+               vacuum_delay_point();
+
+               buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+                                                                       RBM_NORMAL, info->strategy);
+               LockBuffer(buffer, BUFFER_LOCK_SHARE);
+               page = (Page) BufferGetPage(buffer);
+
+               if (BloomPageIsDeleted(page))
+               {
+                       RecordFreeIndexPage(index, blkno);
+                       totFreePages++;
+               }
+               else
+               {
+                       stats->num_index_tuples += BloomPageGetMaxOffset(page);
+                       stats->estimated_count += BloomPageGetMaxOffset(page);
+               }
+
+               UnlockReleaseBuffer(buffer);
+       }
+
+       IndexFreeSpaceMapVacuum(info->index);
+       stats->pages_free = totFreePages;
+       stats->num_pages = RelationGetNumberOfBlocks(index);
+
+       return stats;
+}
diff --git a/contrib/bloom/blvalidate.c b/contrib/bloom/blvalidate.c
new file mode 100644 (file)
index 0000000..12e7c7d
--- /dev/null
@@ -0,0 +1,220 @@
+/*-------------------------------------------------------------------------
+ *
+ * blvalidate.c
+ *       Opclass validator for bloom.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       contrib/bloom/blvalidate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amvalidate.h"
+#include "access/htup_details.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+#include "bloom.h"
+
+/*
+ * Validator for a bloom opclass.
+ */
+bool
+blvalidate(Oid opclassoid)
+{
+       bool            result = true;
+       HeapTuple       classtup;
+       Form_pg_opclass classform;
+       Oid                     opfamilyoid;
+       Oid                     opcintype;
+       Oid                     opckeytype;
+       char       *opclassname;
+       HeapTuple       familytup;
+       Form_pg_opfamily familyform;
+       char       *opfamilyname;
+       CatCList   *proclist,
+                          *oprlist;
+       List       *grouplist;
+       OpFamilyOpFuncGroup *opclassgroup;
+       int                     i;
+       ListCell   *lc;
+
+       /* Fetch opclass information */
+       classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
+       if (!HeapTupleIsValid(classtup))
+               elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
+       classform = (Form_pg_opclass) GETSTRUCT(classtup);
+
+       opfamilyoid = classform->opcfamily;
+       opcintype = classform->opcintype;
+       opckeytype = classform->opckeytype;
+       if (!OidIsValid(opckeytype))
+               opckeytype = opcintype;
+       opclassname = NameStr(classform->opcname);
+
+       /* Fetch opfamily information */
+       familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
+       if (!HeapTupleIsValid(familytup))
+               elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
+       familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
+
+       opfamilyname = NameStr(familyform->opfname);
+
+       /* Fetch all operators and support functions of the opfamily */
+       oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
+       proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+
+       /* Check individual support functions */
+       for (i = 0; i < proclist->n_members; i++)
+       {
+               HeapTuple       proctup = &proclist->members[i]->tuple;
+               Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+               bool            ok;
+
+               /*
+                * All bloom support functions should be registered with matching
+                * left/right types
+                */
+               if (procform->amproclefttype != procform->amprocrighttype)
+               {
+                       ereport(INFO,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("bloom opfamily %s contains support procedure %s with cross-type registration",
+                                                       opfamilyname,
+                                                       format_procedure(procform->amproc))));
+                       result = false;
+               }
+
+               /*
+                * We can't check signatures except within the specific opclass, since
+                * we need to know the associated opckeytype in many cases.
+                */
+               if (procform->amproclefttype != opcintype)
+                       continue;
+
+               /* Check procedure numbers and function signatures */
+               switch (procform->amprocnum)
+               {
+                       case BLOOM_HASH_PROC:
+                               ok = check_amproc_signature(procform->amproc, INT4OID, false,
+                                                                                       1, 1, opckeytype);
+                               break;
+                       default:
+                               ereport(INFO,
+                                               (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                                errmsg("bloom opfamily %s contains function %s with invalid support number %d",
+                                                               opfamilyname,
+                                                               format_procedure(procform->amproc),
+                                                               procform->amprocnum)));
+                               result = false;
+                               continue;               /* don't want additional message */
+               }
+
+               if (!ok)
+               {
+                       ereport(INFO,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("gist opfamily %s contains function %s with wrong signature for support number %d",
+                                                       opfamilyname,
+                                                       format_procedure(procform->amproc),
+                                                       procform->amprocnum)));
+                       result = false;
+               }
+       }
+
+       /* Check individual operators */
+       for (i = 0; i < oprlist->n_members; i++)
+       {
+               HeapTuple       oprtup = &oprlist->members[i]->tuple;
+               Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+
+               /* Check it's allowed strategy for bloom */
+               if (oprform->amopstrategy < 1 ||
+                       oprform->amopstrategy > BLOOM_NSTRATEGIES)
+               {
+                       ereport(INFO,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("bloom opfamily %s contains operator %s with invalid strategy number %d",
+                                                       opfamilyname,
+                                                       format_operator(oprform->amopopr),
+                                                       oprform->amopstrategy)));
+                       result = false;
+               }
+
+               /* bloom doesn't support ORDER BY operators */
+               if (oprform->amoppurpose != AMOP_SEARCH ||
+                       OidIsValid(oprform->amopsortfamily))
+               {
+                       ereport(INFO,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("bloom opfamily %s contains invalid ORDER BY specification for operator %s",
+                                                       opfamilyname,
+                                                       format_operator(oprform->amopopr))));
+                       result = false;
+               }
+
+               /* Check operator signature --- same for all bloom strategies */
+               if (!check_amop_signature(oprform->amopopr, BOOLOID,
+                                                                 oprform->amoplefttype,
+                                                                 oprform->amoprighttype))
+               {
+                       ereport(INFO,
+                                       (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                        errmsg("bloom opfamily %s contains operator %s with wrong signature",
+                                                       opfamilyname,
+                                                       format_operator(oprform->amopopr))));
+                       result = false;
+               }
+       }
+
+       /* Now check for inconsistent groups of operators/functions */
+       grouplist = identify_opfamily_groups(oprlist, proclist);
+       opclassgroup = NULL;
+       foreach(lc, grouplist)
+       {
+               OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
+
+               /* Remember the group exactly matching the test opclass */
+               if (thisgroup->lefttype == opcintype &&
+                       thisgroup->righttype == opcintype)
+                       opclassgroup = thisgroup;
+
+               /*
+                * There is not a lot we can do to check the operator sets, since each
+                * bloom opclass is more or less a law unto itself, and some contain
+                * only operators that are binary-compatible with the opclass datatype
+                * (meaning that empty operator sets can be OK).  That case also means
+                * that we shouldn't insist on nonempty function sets except for the
+                * opclass's own group.
+                */
+       }
+
+       /* Check that the originally-named opclass is complete */
+       for (i = 1; i <= BLOOM_NPROC; i++)
+       {
+               if (opclassgroup &&
+                       (opclassgroup->functionset & (((uint64) 1) << i)) != 0)
+                       continue;                       /* got it */
+               ereport(INFO,
+                               (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                errmsg("bloom opclass %s is missing support function %d",
+                                               opclassname, i)));
+               result = false;
+       }
+
+       ReleaseCatCacheList(proclist);
+       ReleaseCatCacheList(oprlist);
+       ReleaseSysCache(familytup);
+       ReleaseSysCache(classtup);
+
+       return result;
+}
diff --git a/contrib/bloom/expected/bloom.out b/contrib/bloom/expected/bloom.out
new file mode 100644 (file)
index 0000000..5e8269f
--- /dev/null
@@ -0,0 +1,122 @@
+CREATE EXTENSION bloom;
+CREATE TABLE tst (
+       i       int4,
+       t       text
+);
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
+SET enable_seqscan=on;
+SET enable_bitmapscan=off;
+SET enable_indexscan=off;
+SELECT count(*) FROM tst WHERE i = 7;
+ count 
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count 
+-------
+  6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count 
+-------
+   588
+(1 row)
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
+                QUERY PLAN                 
+-------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on tst
+         Recheck Cond: (i = 7)
+         ->  Bitmap Index Scan on bloomidx
+               Index Cond: (i = 7)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
+                QUERY PLAN                 
+-------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on tst
+         Recheck Cond: (t = '5'::text)
+         ->  Bitmap Index Scan on bloomidx
+               Index Cond: (t = '5'::text)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Aggregate
+   ->  Bitmap Heap Scan on tst
+         Recheck Cond: ((i = 7) AND (t = '5'::text))
+         ->  Bitmap Index Scan on bloomidx
+               Index Cond: ((i = 7) AND (t = '5'::text))
+(5 rows)
+
+SELECT count(*) FROM tst WHERE i = 7;
+ count 
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count 
+-------
+  6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count 
+-------
+   588
+(1 row)
+
+DELETE FROM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+VACUUM ANALYZE tst;
+SELECT count(*) FROM tst WHERE i = 7;
+ count 
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count 
+-------
+  6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count 
+-------
+   588
+(1 row)
+
+VACUUM FULL tst;
+SELECT count(*) FROM tst WHERE i = 7;
+ count 
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count 
+-------
+  6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count 
+-------
+   588
+(1 row)
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+RESET enable_indexscan;
diff --git a/contrib/bloom/sql/bloom.sql b/contrib/bloom/sql/bloom.sql
new file mode 100644 (file)
index 0000000..f9d0ad4
--- /dev/null
@@ -0,0 +1,47 @@
+CREATE EXTENSION bloom;
+
+CREATE TABLE tst (
+       i       int4,
+       t       text
+);
+
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
+
+SET enable_seqscan=on;
+SET enable_bitmapscan=off;
+SET enable_indexscan=off;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+DELETE FROM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+VACUUM ANALYZE tst;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+VACUUM FULL tst;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+RESET enable_indexscan;
diff --git a/contrib/bloom/t/001_wal.pl b/contrib/bloom/t/001_wal.pl
new file mode 100644 (file)
index 0000000..dbb6a90
--- /dev/null
@@ -0,0 +1,75 @@
+# Test generic xlog record work for bloom index replication.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 31;
+
+my $node_master;
+my $node_standby;
+
+# Run few queries on both master and standby and check their results match.
+sub test_index_replay
+{
+       my ($test_name) = @_;
+
+       # Wait for standby to catch up
+       my $applname = $node_standby->name;
+       my $caughtup_query =
+               "SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';";
+       $node_master->poll_query_until('postgres', $caughtup_query)
+         or die "Timed out while waiting for standby 1 to catch up";
+
+       my $queries = qq(SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+SELECT * FROM tst WHERE i = 0;
+SELECT * FROM tst WHERE i = 3;
+SELECT * FROM tst WHERE t = 'b';
+SELECT * FROM tst WHERE t = 'f';
+SELECT * FROM tst WHERE i = 3 AND t = 'c';
+SELECT * FROM tst WHERE i = 7 AND t = 'e';
+);
+
+       # Run test queries and compare their result
+       my $master_result = $node_master->psql("postgres", $queries);
+       my $standby_result = $node_standby->psql("postgres", $queries);
+
+       is($master_result, $standby_result, "$test_name: query result matches");
+}
+
+# Initialize master node
+$node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_master->backup($backup_name);
+
+# Create streaming standby linking to master
+$node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_master, $backup_name,
+       has_streaming => 1);
+$node_standby->start;
+
+# Create some bloom index on master
+$node_master->psql("postgres", "CREATE EXTENSION bloom;");
+$node_master->psql("postgres", "CREATE TABLE tst (i int4, t text);");
+$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;");
+$node_master->psql("postgres", "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);");
+
+# Test that queries give same result
+test_index_replay('initial');
+
+# Run 10 cycles of table modification. Run test queries after each modification.
+for my $i (1..10)
+{
+       $node_master->psql("postgres", "DELETE FROM tst WHERE i = $i;");
+       test_index_replay("delete $i");
+       $node_master->psql("postgres", "VACUUM tst;");
+       test_index_replay("vacuum $i");
+       my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000);
+       $node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series($start,$end) i;");
+       test_index_replay("insert $i");
+}
diff --git a/doc/src/sgml/bloom.sgml b/doc/src/sgml/bloom.sgml
new file mode 100644 (file)
index 0000000..c207e6d
--- /dev/null
@@ -0,0 +1,218 @@
+<!-- doc/src/sgml/bloom.sgml -->
+
+<sect1 id="bloom" xreflabel="bloom">
+ <title>bloom</title>
+
+ <indexterm zone="bloom">
+  <primary>bloom</primary>
+ </indexterm>
+
+ <para>
+  <literal>bloom</> is a contrib which implements index access method.  It comes
+  as example of custom access methods and generic WAL records usage.  But it
+  is also useful itself.
+ </para>
+
+ <sect2>
+  <title>Introduction</title>
+
+  <para>
+   Implementation of
+   <ulink url="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</ulink>
+   allows fast exclusion of non-candidate tuples.
+   Since signature is a lossy representation of all indexed attributes, 
+   search results should be rechecked using heap information. 
+   User can specify signature length (in uint16, default is 5) and the number of 
+   bits, which can be setted, per attribute (1 < colN < 2048).
+  </para>
+
+  <para>
+   This index is useful if table has many attributes and queries can include
+   their arbitary combinations.  Traditional <literal>btree</> index is faster
+   than bloom index, but it'd require too many indexes to support all possible 
+   queries, while one need only one bloom index.  Bloom index supports only 
+   equality comparison.  Since it's a signature file, not a tree, it always
+   should be readed fully, but sequentially, so index search performance is 
+   constant and doesn't depend on a query. 
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Parameters</title>
+
+  <para>
+   <literal>bloom</> indexes accept following parameters in <literal>WITH</>
+   clause.
+  </para>
+
+   <variablelist>
+   <varlistentry>
+    <term><literal>length</></term>
+    <listitem>
+     <para>
+      Length of signature in uint16 type values
+     </para>
+    </listitem>
+   </varlistentry>
+   </variablelist>
+   <variablelist>
+   <varlistentry>
+    <term><literal>col1 &mdash; col16</></term>
+    <listitem>
+     <para>
+      Number of bits for corresponding column
+     </para>
+    </listitem>
+   </varlistentry>
+   </variablelist>
+ </sect2>
+
+ <sect2>
+  <title>Examples</title>
+
+  <para>
+   Example of index definition is given below.
+  </para>
+
+<programlisting>
+CREATE INDEX bloomidx ON tbloom(i1,i2,i3) 
+       WITH (length=5, col1=2, col2=2, col3=4);
+</programlisting>
+
+  <para>
+   Here, we create bloom index with signature length 80 bits and attributes
+   i1, i2  mapped to 2 bits, attribute i3 - to 4 bits.
+  </para>
+
+  <para>
+   Example of index definition and usage is given below.
+  </para>
+
+<programlisting>
+CREATE TABLE tbloom AS
+SELECT
+    random()::int as i1,
+    random()::int as i2,
+    random()::int as i3,
+    random()::int as i4,
+    random()::int as i5,
+    random()::int as i6,
+    random()::int as i7,
+    random()::int as i8,
+    random()::int as i9,
+    random()::int as i10,
+    random()::int as i11,
+    random()::int as i12,
+    random()::int as i13
+FROM
+    generate_series(1,1000);
+CREATE INDEX bloomidx ON tbloom USING
+             bloom (i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12);
+SELECT pg_relation_size('bloomidx');
+CREATE index btree_idx ON tbloom(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12);
+SELECT pg_relation_size('btree_idx');
+</programlisting>
+
+<programlisting>
+=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
+                                                   QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on tbloom  (cost=1.50..5.52 rows=1 width=52) (actual time=0.057..0.057 rows=0 loops=1)
+   Recheck Cond: ((i2 = 20) AND (i10 = 15))
+   ->  Bitmap Index Scan on bloomidx  (cost=0.00..1.50 rows=1 width=0) (actual time=0.041..0.041 rows=9 loops=1)
+         Index Cond: ((i2 = 20) AND (i10 = 15))
+ Total runtime: 0.081 ms
+(5 rows)
+</programlisting>
+
+  <para>
+   Seqscan is slow.
+  </para>
+
+<programlisting>
+=# SET enable_bitmapscan = off;
+=# SET enable_indexscan = off;
+=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
+                                            QUERY PLAN
+--------------------------------------------------------------------------------------------------
+ Seq Scan on tbloom  (cost=0.00..25.00 rows=1 width=52) (actual time=0.162..0.162 rows=0 loops=1)
+   Filter: ((i2 = 20) AND (i10 = 15))
+ Total runtime: 0.181 ms
+(3 rows)
+</programlisting>
+
+ <para>
+  Btree index will be not used for this query.
+ </para>
+
+<programlisting>
+=# DROP INDEX bloomidx;
+=# CREATE INDEX btree_idx ON tbloom(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12);
+=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
+                                            QUERY PLAN
+--------------------------------------------------------------------------------------------------
+ Seq Scan on tbloom (cost=0.00..25.00 rows=1 width=52) (actual time=0.210..0.210 rows=0 loops=1)
+   Filter: ((i2 = 20) AND (i10 = 15))
+ Total runtime: 0.250 ms
+(3 rows)
+</programlisting>
+ </sect2>
+
+ <sect2>
+  <title>Opclass interface</title>
+
+  <para>
+   Bloom opclass interface is simple.  It requires 1 supporting function:
+   hash function for indexing datatype.  And it provides 1 search operator:
+   equality operator.  The example below shows <literal>opclass</> definition
+   for <literal>text</> datatype.
+  </para>
+
+<programlisting>
+CREATE OPERATOR CLASS text_ops
+DEFAULT FOR TYPE text USING bloom AS
+    OPERATOR    1   =(text, text),
+    FUNCTION    1   hashtext(text);
+</programlisting>
+ </sect2>
+
+ <sect2>
+  <title>Limitation</title>
+  <para>
+
+   <itemizedlist>
+    <listitem>
+     <para>
+      For now, only opclasses for <literal>int4</>, <literal>text</> comes
+      with contrib.  However, users may define more of them.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      Only <literal>=</literal> operator is supported for search now.  But it's
+      possible to add support of arrays with contains and intersection
+      operations in future.
+     </para>
+    </listitem>
+   </itemizedlist>
+  </para>
+ </sect2>
+
+ <sect2>
+  <title>Authors</title>
+
+  <para>
+   Teodor Sigaev <email>teodor@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
+  </para>
+
+  <para>
+   Alexander Korotkov <email>a.korotkov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
+  </para>
+
+  <para>
+   Oleg Bartunov <email>obartunov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
+  </para>
+ </sect2>
+
+</sect1>
index 4e3f337125185f85d5c212db55ad13e683a2de7d..c8708ecf8bbe923339f2565e462f442c0363e62d 100644 (file)
@@ -105,6 +105,7 @@ CREATE EXTENSION <replaceable>module_name</> FROM unpackaged;
  &adminpack;
  &auth-delay;
  &auto-explain;
+ &bloom;
  &btree-gin;
  &btree-gist;
  &chkpass;
index 9046f506281f0a4ea8778290312649ca1ab7b6ee..6c0ad3ffaa60fa52c33aa4b6551e981c2c35453f 100644 (file)
 <!ENTITY adminpack       SYSTEM "adminpack.sgml">
 <!ENTITY auth-delay      SYSTEM "auth-delay.sgml">
 <!ENTITY auto-explain    SYSTEM "auto-explain.sgml">
+<!ENTITY bloom           SYSTEM "bloom.sgml">
 <!ENTITY btree-gin       SYSTEM "btree-gin.sgml">
 <!ENTITY btree-gist      SYSTEM "btree-gist.sgml">
 <!ENTITY chkpass         SYSTEM "chkpass.sgml">