adminpack \
auth_delay \
auto_explain \
+ bloom \
btree_gin \
btree_gist \
chkpass \
--- /dev/null
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
--- /dev/null
+# contrib/bloom/Makefile
+
+MODULE_big = bloom
+OBJS = blcost.o blinsert.o blscan.o blutils.o blvacuum.o blvalidate.o $(WIN32RES)
+
+EXTENSION = bloom
+DATA = bloom--1.0.sql
+PGFILEDESC = "bloom access method - signature file based index"
+
+REGRESS = bloom
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/bloom
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+wal-check: temp-install
+ $(prove_check)
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * blcost.c
+ * Cost estimate function for bloom indexes.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blcost.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "optimizer/cost.h"
+#include "utils/selfuncs.h"
+
+#include "bloom.h"
+
+/*
+ * Estimate cost of bloom index scan.
+ */
+void
+blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation)
+{
+ IndexOptInfo *index = path->indexinfo;
+ List *qinfos;
+ GenericCosts costs;
+
+ /* Do preliminary analysis of indexquals */
+ qinfos = deconstruct_indexquals(path);
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ /* We have to visit all index tuples anyway */
+ costs.numIndexTuples = index->tuples;
+
+ /* Use generic estimate */
+ genericcostestimate(root, path, loop_count, qinfos, &costs);
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * blinsert.c
+ * Bloom index build and insert functions.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blinsert.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/generic_xlog.h"
+#include "catalog/index.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#include "bloom.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * State of bloom index build. We accumulate one page data here before
+ * flushing it to buffer manager.
+ */
+typedef struct
+{
+ BloomState blstate; /* bloom index state */
+ MemoryContext tmpCtx; /* temporary memory context reset after
+ * each tuple */
+ char data[BLCKSZ]; /* cached page */
+ int64 count; /* number of tuples in cached page */
+} BloomBuildState;
+
+/*
+ * Flush page cached in BloomBuildState.
+ */
+static void
+flushCachedPage(Relation index, BloomBuildState *buildstate)
+{
+ Page page;
+ Buffer buffer = BloomNewBuffer(index);
+ GenericXLogState *state;
+
+ state = GenericXLogStart(index);
+ page = GenericXLogRegister(state, buffer, true);
+ memcpy(page, buildstate->data, BLCKSZ);
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * (Re)initialize cached page in BloomBuildState.
+ */
+static void
+initCachedPage(BloomBuildState *buildstate)
+{
+ memset(buildstate->data, 0, BLCKSZ);
+ BloomInitPage(buildstate->data, 0);
+ buildstate->count = 0;
+}
+
+/*
+ * Per-tuple callback from IndexBuildHeapScan.
+ */
+static void
+bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *state)
+{
+ BloomBuildState *buildstate = (BloomBuildState *) state;
+ MemoryContext oldCtx;
+ BloomTuple *itup;
+
+ oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+ itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
+
+ /* Try to add next item to cached page */
+ if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
+ {
+ /* Next item was added successfully */
+ buildstate->count++;
+ }
+ else
+ {
+ /* Cached page is full, flush it out and make a new one */
+ flushCachedPage(index, buildstate);
+
+ CHECK_FOR_INTERRUPTS();
+
+ initCachedPage(buildstate);
+
+ if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup) == false)
+ {
+ /* We shouldn't be here since we're inserting to the empty page */
+ elog(ERROR, "can not add new tuple");
+ }
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(buildstate->tmpCtx);
+}
+
+/*
+ * Build a new bloom index.
+ */
+IndexBuildResult *
+blbuild(Relation heap, Relation index, IndexInfo *indexInfo)
+{
+ IndexBuildResult *result;
+ double reltuples;
+ BloomBuildState buildstate;
+
+ if (RelationGetNumberOfBlocks(index) != 0)
+ elog(ERROR, "index \"%s\" already contains data",
+ RelationGetRelationName(index));
+
+ /* Initialize the meta page */
+ BloomInitMetapage(index);
+
+ /* Initialize the bloom build state */
+ memset(&buildstate, 0, sizeof(buildstate));
+ initBloomState(&buildstate.blstate, index);
+ buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Bloom build temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ initCachedPage(&buildstate);
+
+ /* Do the heap scan */
+ reltuples = IndexBuildHeapScan(heap, index, indexInfo, true,
+ bloomBuildCallback, (void *) &buildstate);
+
+ /*
+ * There are could be some items in cached page. Flush this page
+ * if needed.
+ */
+ if (buildstate.count > 0)
+ flushCachedPage(index, &buildstate);
+
+ MemoryContextDelete(buildstate.tmpCtx);
+
+ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+ result->heap_tuples = result->index_tuples = reltuples;
+
+ return result;
+}
+
+/*
+ * Build an empty bloom index in the initialization fork.
+ */
+void
+blbuildempty(Relation index)
+{
+ if (RelationGetNumberOfBlocks(index) != 0)
+ elog(ERROR, "index \"%s\" already contains data",
+ RelationGetRelationName(index));
+
+ /* Initialize the meta page */
+ BloomInitMetapage(index);
+}
+
+/*
+ * Insert new tuple to the bloom index.
+ */
+bool
+blinsert(Relation index, Datum *values, bool *isnull,
+ ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique)
+{
+ BloomState blstate;
+ BloomTuple *itup;
+ MemoryContext oldCtx;
+ MemoryContext insertCtx;
+ BloomMetaPageData *metaData;
+ Buffer buffer,
+ metaBuffer;
+ Page page,
+ metaPage;
+ BlockNumber blkno = InvalidBlockNumber;
+ OffsetNumber nStart;
+ GenericXLogState *state;
+
+ insertCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "Bloom insert temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ oldCtx = MemoryContextSwitchTo(insertCtx);
+
+ initBloomState(&blstate, index);
+ itup = BloomFormTuple(&blstate, ht_ctid, values, isnull);
+
+ /*
+ * At first, try to insert new tuple to the first page in notFullPage
+ * array. If success we don't need to modify the meta page.
+ */
+ metaBuffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+ LockBuffer(metaBuffer, BUFFER_LOCK_SHARE);
+ metaData = BloomPageGetMeta(BufferGetPage(metaBuffer));
+
+ if (metaData->nEnd > metaData->nStart)
+ {
+ Page page;
+
+ blkno = metaData->notFullPage[metaData->nStart];
+
+ Assert(blkno != InvalidBlockNumber);
+ LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
+
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ state = GenericXLogStart(index);
+ page = GenericXLogRegister(state, buffer, false);
+
+ if (BloomPageAddItem(&blstate, page, itup))
+ {
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(buffer);
+ ReleaseBuffer(metaBuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(insertCtx);
+ return false;
+ }
+ else
+ {
+ GenericXLogAbort(state);
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+ else
+ {
+ /* First page in notFullPage isn't suitable */
+ LockBuffer(metaBuffer, BUFFER_LOCK_UNLOCK);
+ }
+
+ /*
+ * Try other pages in notFullPage array. We will have to change nStart in
+ * metapage. Thus, grab exclusive lock on metapage.
+ */
+ LockBuffer(metaBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ state = GenericXLogStart(index);
+ metaPage = GenericXLogRegister(state, metaBuffer, false);
+ metaData = BloomPageGetMeta(metaPage);
+
+ /*
+ * Iterate over notFullPage array. Skip page we already tried first.
+ */
+ nStart = metaData->nStart;
+ if (metaData->nEnd > nStart &&
+ blkno == metaData->notFullPage[nStart])
+ nStart++;
+
+ while (metaData->nEnd > nStart)
+ {
+ blkno = metaData->notFullPage[nStart];
+ Assert(blkno != InvalidBlockNumber);
+
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ page = GenericXLogRegister(state, buffer, false);
+
+ if (BloomPageAddItem(&blstate, page, itup))
+ {
+ metaData->nStart = nStart;
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(metaBuffer);
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(insertCtx);
+ return false;
+ }
+ else
+ {
+ GenericXLogUnregister(state, buffer);
+ UnlockReleaseBuffer(buffer);
+ }
+ nStart++;
+ }
+
+ GenericXLogAbort(state);
+
+ /*
+ * Didn't find place to insert in notFullPage array. Allocate new page.
+ */
+ buffer = BloomNewBuffer(index);
+
+ state = GenericXLogStart(index);
+ metaPage = GenericXLogRegister(state, metaBuffer, false);
+ metaData = BloomPageGetMeta(metaPage);
+ page = GenericXLogRegister(state, buffer, true);
+ BloomInitPage(page, 0);
+ BloomPageAddItem(&blstate, page, itup);
+
+ metaData->nStart = 0;
+ metaData->nEnd = 1;
+ metaData->notFullPage[0] = BufferGetBlockNumber(buffer);
+
+ GenericXLogFinish(state);
+
+ UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(metaBuffer);
+
+ return false;
+}
--- /dev/null
+CREATE OR REPLACE FUNCTION blhandler(internal)
+RETURNS index_am_handler
+AS 'MODULE_PATHNAME'
+LANGUAGE C;
+
+-- Access method
+CREATE ACCESS METHOD bloom TYPE INDEX HANDLER blhandler;
+
+-- Opclasses
+
+CREATE OPERATOR CLASS int4_ops
+DEFAULT FOR TYPE int4 USING bloom AS
+ OPERATOR 1 =(int4, int4),
+ FUNCTION 1 hashint4(int4);
+
+CREATE OPERATOR CLASS text_ops
+DEFAULT FOR TYPE text USING bloom AS
+ OPERATOR 1 =(text, text),
+ FUNCTION 1 hashtext(text);
--- /dev/null
+# bloom extension
+comment = 'bloom access method - signature file based index'
+default_version = '1.0'
+module_pathname = '$libdir/bloom'
+relocatable = true
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * bloom.h
+ * Header for bloom index.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/bloom.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _BLOOM_H_
+#define _BLOOM_H_
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "access/itup.h"
+#include "access/xlog.h"
+#include "nodes/relation.h"
+#include "fmgr.h"
+
+/* Support procedures numbers */
+#define BLOOM_HASH_PROC 1
+#define BLOOM_NPROC 1
+
+/* Scan strategies */
+#define BLOOM_EQUAL_STRATEGY 1
+#define BLOOM_NSTRATEGIES 1
+
+/* Opaque for bloom pages */
+typedef struct BloomPageOpaqueData
+{
+ OffsetNumber maxoff;
+ uint16 flags;
+} BloomPageOpaqueData;
+
+typedef BloomPageOpaqueData *BloomPageOpaque;
+
+/* Bloom page flags */
+#define BLOOM_META (1<<0)
+#define BLOOM_DELETED (2<<0)
+
+/* Macros for accessing bloom page structures */
+#define BloomPageGetOpaque(page) ((BloomPageOpaque) PageGetSpecialPointer(page))
+#define BloomPageGetMaxOffset(page) (BloomPageGetOpaque(page)->maxoff)
+#define BloomPageIsMeta(page) (BloomPageGetOpaque(page)->flags & BLOOM_META)
+#define BloomPageIsDeleted(page) (BloomPageGetOpaque(page)->flags & BLOOM_DELETED)
+#define BloomPageSetDeleted(page) (BloomPageGetOpaque(page)->flags |= BLOOM_DELETED)
+#define BloomPageSetNonDeleted(page) (BloomPageGetOpaque(page)->flags &= ~BLOOM_DELETED)
+#define BloomPageGetData(page) ((BloomTuple *)PageGetContents(page))
+#define BloomPageGetTuple(state, page, offset) \
+ ((BloomTuple *)(PageGetContents(page) \
+ + (state)->sizeOfBloomTuple * ((offset) - 1)))
+#define BloomPageGetNextTuple(state, tuple) \
+ ((BloomTuple *)((Pointer)(tuple) + (state)->sizeOfBloomTuple))
+
+/* Preserved page numbers */
+#define BLOOM_METAPAGE_BLKNO (0)
+#define BLOOM_HEAD_BLKNO (1) /* first data page */
+
+/* Bloom index options */
+typedef struct BloomOptions
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int bloomLength; /* length of signature in uint16 */
+ int bitSize[INDEX_MAX_KEYS]; /* signature bits per index
+ * key */
+} BloomOptions;
+
+/*
+ * FreeBlockNumberArray - array of block numbers sized so that metadata fill
+ * all space in metapage.
+ */
+typedef BlockNumber FreeBlockNumberArray[
+ MAXALIGN_DOWN(
+ BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(BloomPageOpaqueData))
+ - MAXALIGN(sizeof(uint16) * 2 + sizeof(uint32) + sizeof(BloomOptions))
+ ) / sizeof(BlockNumber)
+];
+
+/* Metadata of bloom index */
+typedef struct BloomMetaPageData
+{
+ uint32 magickNumber;
+ uint16 nStart;
+ uint16 nEnd;
+ BloomOptions opts;
+ FreeBlockNumberArray notFullPage;
+} BloomMetaPageData;
+
+/* Magic number to distinguish bloom pages among anothers */
+#define BLOOM_MAGICK_NUMBER (0xDBAC0DED)
+
+/* Number of blocks numbers fit in BloomMetaPageData */
+#define BloomMetaBlockN (sizeof(FreeBlockNumberArray) / sizeof(BlockNumber))
+
+#define BloomPageGetMeta(page) ((BloomMetaPageData *) PageGetContents(page))
+
+typedef struct BloomState
+{
+ FmgrInfo hashFn[INDEX_MAX_KEYS];
+ BloomOptions *opts; /* stored in rd_amcache and defined at
+ * creation time */
+ int32 nColumns;
+
+ /*
+ * sizeOfBloomTuple is index's specific, and it depends on reloptions, so
+ * precompute it
+ */
+ int32 sizeOfBloomTuple;
+} BloomState;
+
+#define BloomPageGetFreeSpace(state, page) \
+ (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
+ - BloomPageGetMaxOffset(page) * (state)->sizeOfBloomTuple \
+ - MAXALIGN(sizeof(BloomPageOpaqueData)))
+
+/*
+ * Tuples are very different from all other relations
+ */
+typedef uint16 SignType;
+
+typedef struct BloomTuple
+{
+ ItemPointerData heapPtr;
+ SignType sign[1];
+} BloomTuple;
+
+#define BLOOMTUPLEHDRSZ offsetof(BloomTuple, sign)
+
+/* Opaque data structure for bloom index scan */
+typedef struct BloomScanOpaqueData
+{
+ SignType *sign; /* Scan signature */
+ BloomState state;
+} BloomScanOpaqueData;
+
+typedef BloomScanOpaqueData *BloomScanOpaque;
+
+/* blutils.c */
+extern void _PG_init(void);
+extern Datum blhandler(PG_FUNCTION_ARGS);
+extern void initBloomState(BloomState * state, Relation index);
+extern void BloomInitMetapage(Relation index);
+extern void BloomInitPage(Page page, uint16 flags);
+extern Buffer BloomNewBuffer(Relation index);
+extern void signValue(BloomState * state, SignType * sign, Datum value, int attno);
+extern BloomTuple *BloomFormTuple(BloomState * state, ItemPointer iptr, Datum *values, bool *isnull);
+extern bool BloomPageAddItem(BloomState * state, Page page, BloomTuple * tuple);
+
+/* blvalidate.c */
+extern bool blvalidate(Oid opclassoid);
+
+/* index access method interface functions */
+extern bool blinsert(Relation index, Datum *values, bool *isnull,
+ ItemPointer ht_ctid, Relation heapRel,
+ IndexUniqueCheck checkUnique);
+extern IndexScanDesc blbeginscan(Relation r, int nkeys, int norderbys);
+extern int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
+extern void blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+ ScanKey orderbys, int norderbys);
+extern void blendscan(IndexScanDesc scan);
+extern IndexBuildResult *blbuild(Relation heap, Relation index,
+ struct IndexInfo *indexInfo);
+extern void blbuildempty(Relation index);
+extern IndexBulkDeleteResult *blbulkdelete(IndexVacuumInfo *info,
+ IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback,
+ void *callback_state);
+extern IndexBulkDeleteResult *blvacuumcleanup(IndexVacuumInfo *info,
+ IndexBulkDeleteResult *stats);
+extern bytea *bloptions(Datum reloptions, bool validate);
+extern void blcostestimate(PlannerInfo *root, IndexPath *path,
+ double loop_count, Cost *indexStartupCost,
+ Cost *indexTotalCost, Selectivity *indexSelectivity,
+ double *indexCorrelation);
+
+#endif
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * blscan.c
+ * Bloom index scan functions.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blscan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "pgstat.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/lmgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+#include "bloom.h"
+
+/*
+ * Begin scan of bloom index.
+ */
+IndexScanDesc
+blbeginscan(Relation r, int nkeys, int norderbys)
+{
+ IndexScanDesc scan;
+
+ scan = RelationGetIndexScan(r, nkeys, norderbys);
+
+ return scan;
+}
+
+/*
+ * Rescan a bloom index.
+ */
+void
+blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
+ ScanKey orderbys, int norderbys)
+{
+ BloomScanOpaque so;
+
+ so = (BloomScanOpaque) scan->opaque;
+
+ if (so == NULL)
+ {
+ /* if called from blbeginscan */
+ so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData));
+ initBloomState(&so->state, scan->indexRelation);
+ scan->opaque = so;
+
+ }
+ else
+ {
+ if (so->sign)
+ pfree(so->sign);
+ }
+ so->sign = NULL;
+
+ if (scankey && scan->numberOfKeys > 0)
+ {
+ memmove(scan->keyData, scankey,
+ scan->numberOfKeys * sizeof(ScanKeyData));
+ }
+}
+
+/*
+ * End scan of bloom index.
+ */
+void
+blendscan(IndexScanDesc scan)
+{
+ BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+ if (so->sign)
+ pfree(so->sign);
+ so->sign = NULL;
+}
+
+/*
+ * Insert all matching tuples into to a bitmap.
+ */
+int64
+blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
+{
+ int64 ntids = 0;
+ BlockNumber blkno = BLOOM_HEAD_BLKNO,
+ npages;
+ int i;
+ BufferAccessStrategy bas;
+ BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
+
+ if (so->sign == NULL && scan->numberOfKeys > 0)
+ {
+ /* New search: have to calculate search signature */
+ ScanKey skey = scan->keyData;
+
+ so->sign = palloc0(sizeof(SignType) * so->state.opts->bloomLength);
+
+ for (i = 0; i < scan->numberOfKeys; i++)
+ {
+ /*
+ * Assume bloom-indexable operators to be strict, so nothing could
+ * be found for NULL key.
+ */
+ if (skey->sk_flags & SK_ISNULL)
+ {
+ pfree(so->sign);
+ so->sign = NULL;
+ return 0;
+ }
+
+ /* Add next value to the signature */
+ signValue(&so->state, so->sign, skey->sk_argument,
+ skey->sk_attno - 1);
+
+ skey++;
+ }
+ }
+
+ /*
+ * We're going to read the whole index. This is why we use appropriate
+ * buffer access strategy.
+ */
+ bas = GetAccessStrategy(BAS_BULKREAD);
+ npages = RelationGetNumberOfBlocks(scan->indexRelation);
+
+ for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+ {
+ Buffer buffer;
+ Page page;
+
+ buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
+ blkno, RBM_NORMAL, bas);
+
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ page = BufferGetPage(buffer);
+
+ if (!BloomPageIsDeleted(page))
+ {
+ OffsetNumber offset,
+ maxOffset = BloomPageGetMaxOffset(page);
+
+ for (offset = 1; offset <= maxOffset; offset++)
+ {
+ BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset);
+ bool res = true;
+
+ /* Check index signature with scan signature */
+ for (i = 0; res && i < so->state.opts->bloomLength; i++)
+ {
+ if ((itup->sign[i] & so->sign[i]) != so->sign[i])
+ res = false;
+ }
+
+ /* Add matching tuples to bitmap */
+ if (res)
+ {
+ tbm_add_tuples(tbm, &itup->heapPtr, 1, true);
+ ntids++;
+ }
+ }
+ }
+
+ UnlockReleaseBuffer(buffer);
+ CHECK_FOR_INTERRUPTS();
+ }
+ FreeAccessStrategy(bas);
+
+ return ntids;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * blutils.c
+ * Bloom index utilities.
+ *
+ * Portions Copyright (c) 2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1990-1993, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/generic_xlog.h"
+#include "catalog/index.h"
+#include "storage/lmgr.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "utils/memutils.h"
+#include "access/reloptions.h"
+#include "storage/freespace.h"
+#include "storage/indexfsm.h"
+
+#include "bloom.h"
+
+/* Signature dealing macros */
+#define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
+#define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
+#define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
+#define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
+#define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
+
+PG_FUNCTION_INFO_V1(blhandler);
+
+/* Kind of relation optioms for bloom index */
+static relopt_kind bl_relopt_kind;
+
+static int32 myRand();
+static void mySrand(uint32 seed);
+
+/*
+ * Module initialize function: initilized relation options.
+ */
+void
+_PG_init(void)
+{
+ int i;
+ char buf[16];
+
+ bl_relopt_kind = add_reloption_kind();
+
+ add_int_reloption(bl_relopt_kind, "length",
+ "Length of signature in uint16 type", 5, 1, 256);
+
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ {
+ snprintf(buf, 16, "col%d", i + 1);
+ add_int_reloption(bl_relopt_kind, buf,
+ "Number of bits for corresponding column", 2, 1, 2048);
+ }
+}
+
+/*
+ * Bloom handler function: return IndexAmRoutine with access method parameters
+ * and callbacks.
+ */
+Datum
+blhandler(PG_FUNCTION_ARGS)
+{
+ IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
+
+ amroutine->amstrategies = 1;
+ amroutine->amsupport = 1;
+ amroutine->amcanorder = false;
+ amroutine->amcanorderbyop = false;
+ amroutine->amcanbackward = false;
+ amroutine->amcanunique = false;
+ amroutine->amcanmulticol = true;
+ amroutine->amoptionalkey = true;
+ amroutine->amsearcharray = false;
+ amroutine->amsearchnulls = false;
+ amroutine->amstorage = false;
+ amroutine->amclusterable = false;
+ amroutine->ampredlocks = false;
+ amroutine->amkeytype = 0;
+
+ amroutine->aminsert = blinsert;
+ amroutine->ambeginscan = blbeginscan;
+ amroutine->amgettuple = NULL;
+ amroutine->amgetbitmap = blgetbitmap;
+ amroutine->amrescan = blrescan;
+ amroutine->amendscan = blendscan;
+ amroutine->ammarkpos = NULL;
+ amroutine->amrestrpos = NULL;
+ amroutine->ambuild = blbuild;
+ amroutine->ambuildempty = blbuildempty;
+ amroutine->ambulkdelete = blbulkdelete;
+ amroutine->amvacuumcleanup = blvacuumcleanup;
+ amroutine->amcanreturn = NULL;
+ amroutine->amcostestimate = blcostestimate;
+ amroutine->amoptions = bloptions;
+ amroutine->amvalidate = blvalidate;
+
+ PG_RETURN_POINTER(amroutine);
+}
+
+/*
+ * Fill BloomState structure for particular index.
+ */
+void
+initBloomState(BloomState *state, Relation index)
+{
+ int i;
+
+ state->nColumns = index->rd_att->natts;
+
+ /* Initialize hash function for each attribute */
+ for (i = 0; i < index->rd_att->natts; i++)
+ {
+ fmgr_info_copy(&(state->hashFn[i]),
+ index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
+ CurrentMemoryContext);
+ }
+
+ /* Initialize amcache if needed with options from metapage */
+ if (!index->rd_amcache)
+ {
+ Buffer buffer;
+ Page page;
+ BloomMetaPageData *meta;
+ BloomOptions *opts;
+
+ opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
+
+ buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buffer);
+
+ if (!BloomPageIsMeta(page))
+ elog(ERROR, "Relation is not a bloom index");
+ meta = BloomPageGetMeta(BufferGetPage(buffer));
+
+ if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
+ elog(ERROR, "Relation is not a bloom index");
+
+ *opts = meta->opts;
+
+ UnlockReleaseBuffer(buffer);
+
+ index->rd_amcache = (void *) opts;
+ }
+
+ state->opts = (BloomOptions *) index->rd_amcache;
+ state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
+ sizeof(SignType) * state->opts->bloomLength;
+}
+
+/*
+ * Random generator copied from FreeBSD. Using own random generator here for
+ * two reasons:
+ *
+ * 1) In this case random numbers are used for on-disk storage. Usage of
+ * PostgreSQL number generator would obstruct it from all possible changes.
+ * 2) Changing seed of PostgreSQL random generator would be undesirable side
+ * effect.
+ */
+static int32 next;
+
+static int32
+myRand()
+{
+ /*
+ * Compute x = (7^5 * x) mod (2^31 - 1)
+ * without overflowing 31 bits:
+ * (2^31 - 1) = 127773 * (7^5) + 2836
+ * From "Random number generators: good ones are hard to find",
+ * Park and Miller, Communications of the ACM, vol. 31, no. 10,
+ * October 1988, p. 1195.
+ */
+ int32 hi, lo, x;
+
+ /* Must be in [1, 0x7ffffffe] range at this point. */
+ hi = next / 127773;
+ lo = next % 127773;
+ x = 16807 * lo - 2836 * hi;
+ if (x < 0)
+ x += 0x7fffffff;
+ next = x;
+ /* Transform to [0, 0x7ffffffd] range. */
+ return (x - 1);
+}
+
+void
+mySrand(uint32 seed)
+{
+ next = seed;
+ /* Transform to [1, 0x7ffffffe] range. */
+ next = (next % 0x7ffffffe) + 1;
+}
+
+/*
+ * Add bits of given value to the signature.
+ */
+void
+signValue(BloomState *state, SignType *sign, Datum value, int attno)
+{
+ uint32 hashVal;
+ int nBit,
+ j;
+
+ /*
+ * init generator with "column's" number to get "hashed" seed for new
+ * value. We don't want to map the same numbers from different columns
+ * into the same bits!
+ */
+ mySrand(attno);
+
+ /*
+ * Init hash sequence to map our value into bits. the same values in
+ * different columns will be mapped into different bits because of step
+ * above
+ */
+ hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
+ mySrand(hashVal ^ myRand());
+
+ for (j = 0; j < state->opts->bitSize[attno]; j++)
+ {
+ /* prevent mutiple evaluation */
+ nBit = myRand() % (state->opts->bloomLength * BITSIGNTYPE);
+ SETBIT(sign, nBit);
+ }
+}
+
+/*
+ * Make bloom tuple from values.
+ */
+BloomTuple *
+BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
+{
+ int i;
+ BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
+
+ res->heapPtr = *iptr;
+
+ /* Blooming each column */
+ for (i = 0; i < state->nColumns; i++)
+ {
+ /* skip nulls */
+ if (isnull[i])
+ continue;
+
+ signValue(state, res->sign, values[i], i);
+ }
+
+ return res;
+}
+
+/*
+ * Add new bloom tuple to the page. Returns true if new tuple was successfully
+ * added to the page. Returns false if it doesn't git the page.
+ */
+bool
+BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
+{
+ BloomTuple *itup;
+ BloomPageOpaque opaque;
+ Pointer ptr;
+
+ /* Does new tuple fit the page */
+ if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
+ return false;
+
+ /* Copy new tuple to the end of page */
+ opaque = BloomPageGetOpaque(page);
+ itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
+ memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
+
+ /* Adjust maxoff and pd_lower */
+ opaque->maxoff++;
+ ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
+ ((PageHeader) page)->pd_lower = ptr - page;
+
+ return true;
+}
+
+/*
+ * Allocate a new page (either by recycling, or by extending the index file)
+ * The returned buffer is already pinned and exclusive-locked
+ * Caller is responsible for initializing the page by calling BloomInitBuffer
+ */
+Buffer
+BloomNewBuffer(Relation index)
+{
+ Buffer buffer;
+ bool needLock;
+
+ /* First, try to get a page from FSM */
+ for (;;)
+ {
+ BlockNumber blkno = GetFreeIndexPage(index);
+
+ if (blkno == InvalidBlockNumber)
+ break;
+
+ buffer = ReadBuffer(index, blkno);
+
+ /*
+ * We have to guard against the possibility that someone else already
+ * recycled this page; the buffer may be locked if so.
+ */
+ if (ConditionalLockBuffer(buffer))
+ {
+ Page page = BufferGetPage(buffer);
+
+ if (PageIsNew(page))
+ return buffer; /* OK to use, if never initialized */
+
+ if (BloomPageIsDeleted(page))
+ return buffer; /* OK to use */
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ }
+
+ /* Can't use it, so release buffer and try again */
+ ReleaseBuffer(buffer);
+ }
+
+ /* Must extend the file */
+ needLock = !RELATION_IS_LOCAL(index);
+ if (needLock)
+ LockRelationForExtension(index, ExclusiveLock);
+
+ buffer = ReadBuffer(index, P_NEW);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ if (needLock)
+ UnlockRelationForExtension(index, ExclusiveLock);
+
+ return buffer;
+}
+
+/*
+ * Initialize bloom page.
+ */
+void
+BloomInitPage(Page page, uint16 flags)
+{
+ BloomPageOpaque opaque;
+
+ PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
+
+ opaque = BloomPageGetOpaque(page);
+ memset(opaque, 0, sizeof(BloomPageOpaqueData));
+ opaque->flags = flags;
+}
+
+/*
+ * Adjust options of bloom index.
+ */
+static void
+adjustBloomOptions(BloomOptions *opts)
+{
+ int i;
+
+ /* Default length of bloom filter is 5 of 16-bit integers */
+ if (opts->bloomLength <= 0)
+ opts->bloomLength = 5;
+ else
+ opts->bloomLength = opts->bloomLength;
+
+ /* Check singnature length */
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ {
+ /*
+ * Zero and negative number of bits is meaningless. Also setting
+ * more bits than signature have seems useless. Replace both cases
+ * with 2 bits default.
+ */
+ if (opts->bitSize[i] <= 0
+ || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType))
+ opts->bitSize[i] = 2;
+ }
+}
+
+/*
+ * Initialize metapage for bloom index.
+ */
+void
+BloomInitMetapage(Relation index)
+{
+ Page metaPage;
+ Buffer metaBuffer;
+ BloomMetaPageData *metadata;
+ GenericXLogState *state;
+
+ /*
+ * Make a new buffer, since it first buffer it should be associated with
+ * block number 0 (BLOOM_METAPAGE_BLKNO).
+ */
+ metaBuffer = BloomNewBuffer(index);
+ Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
+
+ /* Initialize bloom index options */
+ if (!index->rd_options)
+ index->rd_options = palloc0(sizeof(BloomOptions));
+ adjustBloomOptions((BloomOptions *) index->rd_options);
+
+ /* Initialize contents of meta page */
+ state = GenericXLogStart(index);
+ metaPage = GenericXLogRegister(state, metaBuffer, true);
+
+ BloomInitPage(metaPage, BLOOM_META);
+ metadata = BloomPageGetMeta(metaPage);
+ memset(metadata, 0, sizeof(BloomMetaPageData));
+ metadata->magickNumber = BLOOM_MAGICK_NUMBER;
+ metadata->opts = *((BloomOptions *) index->rd_options);
+ ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
+
+ GenericXLogFinish(state);
+ UnlockReleaseBuffer(metaBuffer);
+}
+
+/*
+ * Initialize options for bloom index.
+ */
+bytea *
+bloptions(Datum reloptions, bool validate)
+{
+ relopt_value *options;
+ int numoptions;
+ BloomOptions *rdopts;
+ relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
+ int i;
+ char buf[16];
+
+ /* Option for length of signature */
+ tab[0].optname = "length";
+ tab[0].opttype = RELOPT_TYPE_INT;
+ tab[0].offset = offsetof(BloomOptions, bloomLength);
+
+ /* Number of bits for each of possible columns: col1, col2, ... */
+ for (i = 0; i < INDEX_MAX_KEYS; i++)
+ {
+ snprintf(buf, sizeof(buf), "col%d", i + 1);
+ tab[i + 1].optname = pstrdup(buf);
+ tab[i + 1].opttype = RELOPT_TYPE_INT;
+ tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
+ }
+
+ options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
+ rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
+ fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
+ validate, tab, INDEX_MAX_KEYS + 1);
+
+ adjustBloomOptions(rdopts);
+
+ return (bytea *) rdopts;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * blvacuum.c
+ * Bloom VACUUM functions.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blvacuum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "catalog/storage.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
+#include "storage/bufmgr.h"
+#include "storage/indexfsm.h"
+#include "storage/lmgr.h"
+
+#include "bloom.h"
+
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+IndexBulkDeleteResult *
+blbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
+ IndexBulkDeleteCallback callback, void *callback_state)
+{
+ Relation index = info->index;
+ BlockNumber blkno,
+ npages;
+ FreeBlockNumberArray notFullPage;
+ int countPage = 0;
+ BloomState state;
+ Buffer buffer;
+ Page page;
+ GenericXLogState *gxlogState;
+
+ if (stats == NULL)
+ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+ initBloomState(&state, index);
+
+ /*
+ * Interate over the pages. We don't care about concurrently added pages,
+ * they can't contain tuples to delete.
+ */
+ npages = RelationGetNumberOfBlocks(index);
+ for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+ {
+ BloomTuple *itup,
+ *itupPtr,
+ *itupEnd;
+
+ buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, info->strategy);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ gxlogState = GenericXLogStart(index);
+ page = GenericXLogRegister(gxlogState, buffer, false);
+
+ if (BloomPageIsDeleted(page))
+ {
+ UnlockReleaseBuffer(buffer);
+ CHECK_FOR_INTERRUPTS();
+ continue;
+ }
+
+ /* Iterate over the tuples */
+ itup = BloomPageGetTuple(&state, page, 1);
+ itupPtr = BloomPageGetTuple(&state, page, 1);
+ itupEnd = BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1);
+ while (itup < itupEnd)
+ {
+ /* Do we have to delete this tuple? */
+ if (callback(&itup->heapPtr, callback_state))
+ {
+ stats->tuples_removed += 1;
+ BloomPageGetOpaque(page)->maxoff--;
+ }
+ else
+ {
+ if (itupPtr != itup)
+ {
+ /*
+ * If we already delete something before, we have to move
+ * this tuple backward.
+ */
+ memmove((Pointer) itupPtr, (Pointer) itup,
+ state.sizeOfBloomTuple);
+ }
+ stats->num_index_tuples++;
+ itupPtr = BloomPageGetNextTuple(&state, itupPtr);
+ }
+
+ itup = BloomPageGetNextTuple(&state, itup);
+ }
+
+ Assert(itupPtr == BloomPageGetTuple(&state, page, BloomPageGetMaxOffset(page) + 1));
+
+ if (!BloomPageIsDeleted(page) &&
+ BloomPageGetFreeSpace(&state, page) > state.sizeOfBloomTuple &&
+ countPage < BloomMetaBlockN)
+ notFullPage[countPage++] = blkno;
+
+ /* Did we delete something? */
+ if (itupPtr != itup)
+ {
+ /* Is it empty page now? */
+ if (itupPtr == BloomPageGetData(page))
+ BloomPageSetDeleted(page);
+ /* Adjust pg_lower */
+ ((PageHeader) page)->pd_lower = (Pointer) itupPtr - page;
+ /* Finish WAL-logging */
+ GenericXLogFinish(gxlogState);
+ }
+ else
+ {
+ /* Didn't change anything: abort WAL-logging */
+ GenericXLogAbort(gxlogState);
+ }
+ UnlockReleaseBuffer(buffer);
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ if (countPage > 0)
+ {
+ BloomMetaPageData *metaData;
+
+ buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ gxlogState = GenericXLogStart(index);
+ page = GenericXLogRegister(gxlogState, buffer, false);
+
+ metaData = BloomPageGetMeta(page);
+ memcpy(metaData->notFullPage, notFullPage, sizeof(FreeBlockNumberArray));
+ metaData->nStart = 0;
+ metaData->nEnd = countPage;
+
+ GenericXLogFinish(gxlogState);
+ UnlockReleaseBuffer(buffer);
+ }
+
+ return stats;
+}
+
+/*
+ * Post-VACUUM cleanup.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+IndexBulkDeleteResult *
+blvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
+{
+ Relation index = info->index;
+ BlockNumber npages,
+ blkno;
+ BlockNumber totFreePages;
+
+ if (info->analyze_only)
+ return stats;
+
+ if (stats == NULL)
+ stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+
+ /*
+ * Iterate over the pages: insert deleted pages into FSM and collect
+ * statistics.
+ */
+ npages = RelationGetNumberOfBlocks(index);
+ totFreePages = 0;
+ for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
+ {
+ Buffer buffer;
+ Page page;
+
+ vacuum_delay_point();
+
+ buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, info->strategy);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ page = (Page) BufferGetPage(buffer);
+
+ if (BloomPageIsDeleted(page))
+ {
+ RecordFreeIndexPage(index, blkno);
+ totFreePages++;
+ }
+ else
+ {
+ stats->num_index_tuples += BloomPageGetMaxOffset(page);
+ stats->estimated_count += BloomPageGetMaxOffset(page);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ IndexFreeSpaceMapVacuum(info->index);
+ stats->pages_free = totFreePages;
+ stats->num_pages = RelationGetNumberOfBlocks(index);
+
+ return stats;
+}
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * blvalidate.c
+ * Opclass validator for bloom.
+ *
+ * Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/bloom/blvalidate.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amvalidate.h"
+#include "access/htup_details.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+#include "bloom.h"
+
+/*
+ * Validator for a bloom opclass.
+ */
+bool
+blvalidate(Oid opclassoid)
+{
+ bool result = true;
+ HeapTuple classtup;
+ Form_pg_opclass classform;
+ Oid opfamilyoid;
+ Oid opcintype;
+ Oid opckeytype;
+ char *opclassname;
+ HeapTuple familytup;
+ Form_pg_opfamily familyform;
+ char *opfamilyname;
+ CatCList *proclist,
+ *oprlist;
+ List *grouplist;
+ OpFamilyOpFuncGroup *opclassgroup;
+ int i;
+ ListCell *lc;
+
+ /* Fetch opclass information */
+ classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
+ if (!HeapTupleIsValid(classtup))
+ elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
+ classform = (Form_pg_opclass) GETSTRUCT(classtup);
+
+ opfamilyoid = classform->opcfamily;
+ opcintype = classform->opcintype;
+ opckeytype = classform->opckeytype;
+ if (!OidIsValid(opckeytype))
+ opckeytype = opcintype;
+ opclassname = NameStr(classform->opcname);
+
+ /* Fetch opfamily information */
+ familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
+ if (!HeapTupleIsValid(familytup))
+ elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
+ familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
+
+ opfamilyname = NameStr(familyform->opfname);
+
+ /* Fetch all operators and support functions of the opfamily */
+ oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
+ proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
+
+ /* Check individual support functions */
+ for (i = 0; i < proclist->n_members; i++)
+ {
+ HeapTuple proctup = &proclist->members[i]->tuple;
+ Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
+ bool ok;
+
+ /*
+ * All bloom support functions should be registered with matching
+ * left/right types
+ */
+ if (procform->amproclefttype != procform->amprocrighttype)
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains support procedure %s with cross-type registration",
+ opfamilyname,
+ format_procedure(procform->amproc))));
+ result = false;
+ }
+
+ /*
+ * We can't check signatures except within the specific opclass, since
+ * we need to know the associated opckeytype in many cases.
+ */
+ if (procform->amproclefttype != opcintype)
+ continue;
+
+ /* Check procedure numbers and function signatures */
+ switch (procform->amprocnum)
+ {
+ case BLOOM_HASH_PROC:
+ ok = check_amproc_signature(procform->amproc, INT4OID, false,
+ 1, 1, opckeytype);
+ break;
+ default:
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains function %s with invalid support number %d",
+ opfamilyname,
+ format_procedure(procform->amproc),
+ procform->amprocnum)));
+ result = false;
+ continue; /* don't want additional message */
+ }
+
+ if (!ok)
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("gist opfamily %s contains function %s with wrong signature for support number %d",
+ opfamilyname,
+ format_procedure(procform->amproc),
+ procform->amprocnum)));
+ result = false;
+ }
+ }
+
+ /* Check individual operators */
+ for (i = 0; i < oprlist->n_members; i++)
+ {
+ HeapTuple oprtup = &oprlist->members[i]->tuple;
+ Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
+
+ /* Check it's allowed strategy for bloom */
+ if (oprform->amopstrategy < 1 ||
+ oprform->amopstrategy > BLOOM_NSTRATEGIES)
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains operator %s with invalid strategy number %d",
+ opfamilyname,
+ format_operator(oprform->amopopr),
+ oprform->amopstrategy)));
+ result = false;
+ }
+
+ /* bloom doesn't support ORDER BY operators */
+ if (oprform->amoppurpose != AMOP_SEARCH ||
+ OidIsValid(oprform->amopsortfamily))
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains invalid ORDER BY specification for operator %s",
+ opfamilyname,
+ format_operator(oprform->amopopr))));
+ result = false;
+ }
+
+ /* Check operator signature --- same for all bloom strategies */
+ if (!check_amop_signature(oprform->amopopr, BOOLOID,
+ oprform->amoplefttype,
+ oprform->amoprighttype))
+ {
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opfamily %s contains operator %s with wrong signature",
+ opfamilyname,
+ format_operator(oprform->amopopr))));
+ result = false;
+ }
+ }
+
+ /* Now check for inconsistent groups of operators/functions */
+ grouplist = identify_opfamily_groups(oprlist, proclist);
+ opclassgroup = NULL;
+ foreach(lc, grouplist)
+ {
+ OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
+
+ /* Remember the group exactly matching the test opclass */
+ if (thisgroup->lefttype == opcintype &&
+ thisgroup->righttype == opcintype)
+ opclassgroup = thisgroup;
+
+ /*
+ * There is not a lot we can do to check the operator sets, since each
+ * bloom opclass is more or less a law unto itself, and some contain
+ * only operators that are binary-compatible with the opclass datatype
+ * (meaning that empty operator sets can be OK). That case also means
+ * that we shouldn't insist on nonempty function sets except for the
+ * opclass's own group.
+ */
+ }
+
+ /* Check that the originally-named opclass is complete */
+ for (i = 1; i <= BLOOM_NPROC; i++)
+ {
+ if (opclassgroup &&
+ (opclassgroup->functionset & (((uint64) 1) << i)) != 0)
+ continue; /* got it */
+ ereport(INFO,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("bloom opclass %s is missing support function %d",
+ opclassname, i)));
+ result = false;
+ }
+
+ ReleaseCatCacheList(proclist);
+ ReleaseCatCacheList(oprlist);
+ ReleaseSysCache(familytup);
+ ReleaseSysCache(classtup);
+
+ return result;
+}
--- /dev/null
+CREATE EXTENSION bloom;
+CREATE TABLE tst (
+ i int4,
+ t text
+);
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
+SET enable_seqscan=on;
+SET enable_bitmapscan=off;
+SET enable_indexscan=off;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 588
+(1 row)
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
+ QUERY PLAN
+-------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tst
+ Recheck Cond: (i = 7)
+ -> Bitmap Index Scan on bloomidx
+ Index Cond: (i = 7)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
+ QUERY PLAN
+-------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tst
+ Recheck Cond: (t = '5'::text)
+ -> Bitmap Index Scan on bloomidx
+ Index Cond: (t = '5'::text)
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ QUERY PLAN
+---------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tst
+ Recheck Cond: ((i = 7) AND (t = '5'::text))
+ -> Bitmap Index Scan on bloomidx
+ Index Cond: ((i = 7) AND (t = '5'::text))
+(5 rows)
+
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 588
+(1 row)
+
+DELETE FROM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+VACUUM ANALYZE tst;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 588
+(1 row)
+
+VACUUM FULL tst;
+SELECT count(*) FROM tst WHERE i = 7;
+ count
+-------
+ 10000
+(1 row)
+
+SELECT count(*) FROM tst WHERE t = '5';
+ count
+-------
+ 6264
+(1 row)
+
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+ count
+-------
+ 588
+(1 row)
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+RESET enable_indexscan;
--- /dev/null
+CREATE EXTENSION bloom;
+
+CREATE TABLE tst (
+ i int4,
+ t text
+);
+
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
+
+SET enable_seqscan=on;
+SET enable_bitmapscan=off;
+SET enable_indexscan=off;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
+EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+DELETE FROM tst;
+INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;
+VACUUM ANALYZE tst;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+VACUUM FULL tst;
+
+SELECT count(*) FROM tst WHERE i = 7;
+SELECT count(*) FROM tst WHERE t = '5';
+SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
+
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+RESET enable_indexscan;
--- /dev/null
+# Test generic xlog record work for bloom index replication.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 31;
+
+my $node_master;
+my $node_standby;
+
+# Run few queries on both master and standby and check their results match.
+sub test_index_replay
+{
+ my ($test_name) = @_;
+
+ # Wait for standby to catch up
+ my $applname = $node_standby->name;
+ my $caughtup_query =
+ "SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';";
+ $node_master->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby 1 to catch up";
+
+ my $queries = qq(SET enable_seqscan=off;
+SET enable_bitmapscan=on;
+SET enable_indexscan=on;
+SELECT * FROM tst WHERE i = 0;
+SELECT * FROM tst WHERE i = 3;
+SELECT * FROM tst WHERE t = 'b';
+SELECT * FROM tst WHERE t = 'f';
+SELECT * FROM tst WHERE i = 3 AND t = 'c';
+SELECT * FROM tst WHERE i = 7 AND t = 'e';
+);
+
+ # Run test queries and compare their result
+ my $master_result = $node_master->psql("postgres", $queries);
+ my $standby_result = $node_standby->psql("postgres", $queries);
+
+ is($master_result, $standby_result, "$test_name: query result matches");
+}
+
+# Initialize master node
+$node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_master->backup($backup_name);
+
+# Create streaming standby linking to master
+$node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby->start;
+
+# Create some bloom index on master
+$node_master->psql("postgres", "CREATE EXTENSION bloom;");
+$node_master->psql("postgres", "CREATE TABLE tst (i int4, t text);");
+$node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,100000) i;");
+$node_master->psql("postgres", "CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);");
+
+# Test that queries give same result
+test_index_replay('initial');
+
+# Run 10 cycles of table modification. Run test queries after each modification.
+for my $i (1..10)
+{
+ $node_master->psql("postgres", "DELETE FROM tst WHERE i = $i;");
+ test_index_replay("delete $i");
+ $node_master->psql("postgres", "VACUUM tst;");
+ test_index_replay("vacuum $i");
+ my ($start, $end) = (100001 + ($i - 1) * 10000, 100000 + $i * 10000);
+ $node_master->psql("postgres", "INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series($start,$end) i;");
+ test_index_replay("insert $i");
+}
--- /dev/null
+<!-- doc/src/sgml/bloom.sgml -->
+
+<sect1 id="bloom" xreflabel="bloom">
+ <title>bloom</title>
+
+ <indexterm zone="bloom">
+ <primary>bloom</primary>
+ </indexterm>
+
+ <para>
+ <literal>bloom</> is a contrib which implements index access method. It comes
+ as example of custom access methods and generic WAL records usage. But it
+ is also useful itself.
+ </para>
+
+ <sect2>
+ <title>Introduction</title>
+
+ <para>
+ Implementation of
+ <ulink url="http://en.wikipedia.org/wiki/Bloom_filter">Bloom filter</ulink>
+ allows fast exclusion of non-candidate tuples.
+ Since signature is a lossy representation of all indexed attributes,
+ search results should be rechecked using heap information.
+ User can specify signature length (in uint16, default is 5) and the number of
+ bits, which can be setted, per attribute (1 < colN < 2048).
+ </para>
+
+ <para>
+ This index is useful if table has many attributes and queries can include
+ their arbitary combinations. Traditional <literal>btree</> index is faster
+ than bloom index, but it'd require too many indexes to support all possible
+ queries, while one need only one bloom index. Bloom index supports only
+ equality comparison. Since it's a signature file, not a tree, it always
+ should be readed fully, but sequentially, so index search performance is
+ constant and doesn't depend on a query.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Parameters</title>
+
+ <para>
+ <literal>bloom</> indexes accept following parameters in <literal>WITH</>
+ clause.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>length</></term>
+ <listitem>
+ <para>
+ Length of signature in uint16 type values
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term><literal>col1 — col16</></term>
+ <listitem>
+ <para>
+ Number of bits for corresponding column
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </sect2>
+
+ <sect2>
+ <title>Examples</title>
+
+ <para>
+ Example of index definition is given below.
+ </para>
+
+<programlisting>
+CREATE INDEX bloomidx ON tbloom(i1,i2,i3)
+ WITH (length=5, col1=2, col2=2, col3=4);
+</programlisting>
+
+ <para>
+ Here, we create bloom index with signature length 80 bits and attributes
+ i1, i2 mapped to 2 bits, attribute i3 - to 4 bits.
+ </para>
+
+ <para>
+ Example of index definition and usage is given below.
+ </para>
+
+<programlisting>
+CREATE TABLE tbloom AS
+SELECT
+ random()::int as i1,
+ random()::int as i2,
+ random()::int as i3,
+ random()::int as i4,
+ random()::int as i5,
+ random()::int as i6,
+ random()::int as i7,
+ random()::int as i8,
+ random()::int as i9,
+ random()::int as i10,
+ random()::int as i11,
+ random()::int as i12,
+ random()::int as i13
+FROM
+ generate_series(1,1000);
+CREATE INDEX bloomidx ON tbloom USING
+ bloom (i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12);
+SELECT pg_relation_size('bloomidx');
+CREATE index btree_idx ON tbloom(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12);
+SELECT pg_relation_size('btree_idx');
+</programlisting>
+
+<programlisting>
+=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on tbloom (cost=1.50..5.52 rows=1 width=52) (actual time=0.057..0.057 rows=0 loops=1)
+ Recheck Cond: ((i2 = 20) AND (i10 = 15))
+ -> Bitmap Index Scan on bloomidx (cost=0.00..1.50 rows=1 width=0) (actual time=0.041..0.041 rows=9 loops=1)
+ Index Cond: ((i2 = 20) AND (i10 = 15))
+ Total runtime: 0.081 ms
+(5 rows)
+</programlisting>
+
+ <para>
+ Seqscan is slow.
+ </para>
+
+<programlisting>
+=# SET enable_bitmapscan = off;
+=# SET enable_indexscan = off;
+=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------
+ Seq Scan on tbloom (cost=0.00..25.00 rows=1 width=52) (actual time=0.162..0.162 rows=0 loops=1)
+ Filter: ((i2 = 20) AND (i10 = 15))
+ Total runtime: 0.181 ms
+(3 rows)
+</programlisting>
+
+ <para>
+ Btree index will be not used for this query.
+ </para>
+
+<programlisting>
+=# DROP INDEX bloomidx;
+=# CREATE INDEX btree_idx ON tbloom(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12);
+=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 20 AND i10 = 15;
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------
+ Seq Scan on tbloom (cost=0.00..25.00 rows=1 width=52) (actual time=0.210..0.210 rows=0 loops=1)
+ Filter: ((i2 = 20) AND (i10 = 15))
+ Total runtime: 0.250 ms
+(3 rows)
+</programlisting>
+ </sect2>
+
+ <sect2>
+ <title>Opclass interface</title>
+
+ <para>
+ Bloom opclass interface is simple. It requires 1 supporting function:
+ hash function for indexing datatype. And it provides 1 search operator:
+ equality operator. The example below shows <literal>opclass</> definition
+ for <literal>text</> datatype.
+ </para>
+
+<programlisting>
+CREATE OPERATOR CLASS text_ops
+DEFAULT FOR TYPE text USING bloom AS
+ OPERATOR 1 =(text, text),
+ FUNCTION 1 hashtext(text);
+</programlisting>
+ </sect2>
+
+ <sect2>
+ <title>Limitation</title>
+ <para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ For now, only opclasses for <literal>int4</>, <literal>text</> comes
+ with contrib. However, users may define more of them.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Only <literal>=</literal> operator is supported for search now. But it's
+ possible to add support of arrays with contains and intersection
+ operations in future.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Authors</title>
+
+ <para>
+ Teodor Sigaev <email>teodor@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
+ </para>
+
+ <para>
+ Alexander Korotkov <email>a.korotkov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
+ </para>
+
+ <para>
+ Oleg Bartunov <email>obartunov@postgrespro.ru</email>, Postgres Professional, Moscow, Russia
+ </para>
+ </sect2>
+
+</sect1>
&adminpack;
&auth-delay;
&auto-explain;
+ &bloom;
&btree-gin;
&btree-gist;
&chkpass;
<!ENTITY adminpack SYSTEM "adminpack.sgml">
<!ENTITY auth-delay SYSTEM "auth-delay.sgml">
<!ENTITY auto-explain SYSTEM "auto-explain.sgml">
+<!ENTITY bloom SYSTEM "bloom.sgml">
<!ENTITY btree-gin SYSTEM "btree-gin.sgml">
<!ENTITY btree-gist SYSTEM "btree-gist.sgml">
<!ENTITY chkpass SYSTEM "chkpass.sgml">