1 /*-------------------------------------------------------------------------
4 * Bloom index utilities.
6 * Portions Copyright (c) 2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1990-1993, Regents of the University of California
10 * contrib/bloom/blutils.c
12 *-------------------------------------------------------------------------
16 #include "access/amapi.h"
17 #include "access/generic_xlog.h"
18 #include "catalog/index.h"
19 #include "storage/lmgr.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "storage/indexfsm.h"
23 #include "utils/memutils.h"
24 #include "access/reloptions.h"
25 #include "storage/freespace.h"
26 #include "storage/indexfsm.h"
30 /* Signature dealing macros */
31 #define BITSIGNTYPE (BITS_PER_BYTE * sizeof(SignType))
32 #define GETWORD(x,i) ( *( (SignType*)(x) + (int)( (i) / BITSIGNTYPE ) ) )
33 #define CLRBIT(x,i) GETWORD(x,i) &= ~( 0x01 << ( (i) % BITSIGNTYPE ) )
34 #define SETBIT(x,i) GETWORD(x,i) |= ( 0x01 << ( (i) % BITSIGNTYPE ) )
35 #define GETBIT(x,i) ( (GETWORD(x,i) >> ( (i) % BITSIGNTYPE )) & 0x01 )
37 PG_FUNCTION_INFO_V1(blhandler);
39 /* Kind of relation optioms for bloom index */
40 static relopt_kind bl_relopt_kind;
42 static int32 myRand(void);
43 static void mySrand(uint32 seed);
46 * Module initialize function: initilized relation options.
54 bl_relopt_kind = add_reloption_kind();
56 add_int_reloption(bl_relopt_kind, "length",
57 "Length of signature in uint16 type", 5, 1, 256);
59 for (i = 0; i < INDEX_MAX_KEYS; i++)
61 snprintf(buf, 16, "col%d", i + 1);
62 add_int_reloption(bl_relopt_kind, buf,
63 "Number of bits for corresponding column", 2, 1, 2048);
68 * Bloom handler function: return IndexAmRoutine with access method parameters
72 blhandler(PG_FUNCTION_ARGS)
74 IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
76 amroutine->amstrategies = 1;
77 amroutine->amsupport = 1;
78 amroutine->amcanorder = false;
79 amroutine->amcanorderbyop = false;
80 amroutine->amcanbackward = false;
81 amroutine->amcanunique = false;
82 amroutine->amcanmulticol = true;
83 amroutine->amoptionalkey = true;
84 amroutine->amsearcharray = false;
85 amroutine->amsearchnulls = false;
86 amroutine->amstorage = false;
87 amroutine->amclusterable = false;
88 amroutine->ampredlocks = false;
89 amroutine->amkeytype = 0;
91 amroutine->aminsert = blinsert;
92 amroutine->ambeginscan = blbeginscan;
93 amroutine->amgettuple = NULL;
94 amroutine->amgetbitmap = blgetbitmap;
95 amroutine->amrescan = blrescan;
96 amroutine->amendscan = blendscan;
97 amroutine->ammarkpos = NULL;
98 amroutine->amrestrpos = NULL;
99 amroutine->ambuild = blbuild;
100 amroutine->ambuildempty = blbuildempty;
101 amroutine->ambulkdelete = blbulkdelete;
102 amroutine->amvacuumcleanup = blvacuumcleanup;
103 amroutine->amcanreturn = NULL;
104 amroutine->amcostestimate = blcostestimate;
105 amroutine->amoptions = bloptions;
106 amroutine->amvalidate = blvalidate;
108 PG_RETURN_POINTER(amroutine);
112 * Fill BloomState structure for particular index.
115 initBloomState(BloomState *state, Relation index)
119 state->nColumns = index->rd_att->natts;
121 /* Initialize hash function for each attribute */
122 for (i = 0; i < index->rd_att->natts; i++)
124 fmgr_info_copy(&(state->hashFn[i]),
125 index_getprocinfo(index, i + 1, BLOOM_HASH_PROC),
126 CurrentMemoryContext);
129 /* Initialize amcache if needed with options from metapage */
130 if (!index->rd_amcache)
134 BloomMetaPageData *meta;
137 opts = MemoryContextAlloc(index->rd_indexcxt, sizeof(BloomOptions));
139 buffer = ReadBuffer(index, BLOOM_METAPAGE_BLKNO);
140 LockBuffer(buffer, BUFFER_LOCK_SHARE);
142 page = BufferGetPage(buffer);
144 if (!BloomPageIsMeta(page))
145 elog(ERROR, "Relation is not a bloom index");
146 meta = BloomPageGetMeta(BufferGetPage(buffer));
148 if (meta->magickNumber != BLOOM_MAGICK_NUMBER)
149 elog(ERROR, "Relation is not a bloom index");
153 UnlockReleaseBuffer(buffer);
155 index->rd_amcache = (void *) opts;
158 memcpy(&state->opts, index->rd_amcache, sizeof(state->opts));
159 state->sizeOfBloomTuple = BLOOMTUPLEHDRSZ +
160 sizeof(SignType) * state->opts.bloomLength;
164 * Random generator copied from FreeBSD. Using own random generator here for
167 * 1) In this case random numbers are used for on-disk storage. Usage of
168 * PostgreSQL number generator would obstruct it from all possible changes.
169 * 2) Changing seed of PostgreSQL random generator would be undesirable side
178 * Compute x = (7^5 * x) mod (2^31 - 1)
179 * without overflowing 31 bits:
180 * (2^31 - 1) = 127773 * (7^5) + 2836
181 * From "Random number generators: good ones are hard to find",
182 * Park and Miller, Communications of the ACM, vol. 31, no. 10,
183 * October 1988, p. 1195.
188 /* Must be in [1, 0x7ffffffe] range at this point. */
191 x = 16807 * lo - 2836 * hi;
195 /* Transform to [0, 0x7ffffffd] range. */
203 /* Transform to [1, 0x7ffffffe] range. */
204 next = (next % 0x7ffffffe) + 1;
208 * Add bits of given value to the signature.
211 signValue(BloomState *state, SignType *sign, Datum value, int attno)
218 * init generator with "column's" number to get "hashed" seed for new
219 * value. We don't want to map the same numbers from different columns
220 * into the same bits!
225 * Init hash sequence to map our value into bits. the same values in
226 * different columns will be mapped into different bits because of step
229 hashVal = DatumGetInt32(FunctionCall1(&state->hashFn[attno], value));
230 mySrand(hashVal ^ myRand());
232 for (j = 0; j < state->opts.bitSize[attno]; j++)
234 /* prevent mutiple evaluation */
235 nBit = myRand() % (state->opts.bloomLength * BITSIGNTYPE);
241 * Make bloom tuple from values.
244 BloomFormTuple(BloomState *state, ItemPointer iptr, Datum *values, bool *isnull)
247 BloomTuple *res = (BloomTuple *) palloc0(state->sizeOfBloomTuple);
249 res->heapPtr = *iptr;
251 /* Blooming each column */
252 for (i = 0; i < state->nColumns; i++)
258 signValue(state, res->sign, values[i], i);
265 * Add new bloom tuple to the page. Returns true if new tuple was successfully
266 * added to the page. Returns false if it doesn't fit the page.
269 BloomPageAddItem(BloomState *state, Page page, BloomTuple *tuple)
272 BloomPageOpaque opaque;
275 /* Does new tuple fit the page */
276 if (BloomPageGetFreeSpace(state, page) < state->sizeOfBloomTuple)
279 /* Copy new tuple to the end of page */
280 opaque = BloomPageGetOpaque(page);
281 itup = BloomPageGetTuple(state, page, opaque->maxoff + 1);
282 memcpy((Pointer) itup, (Pointer) tuple, state->sizeOfBloomTuple);
284 /* Adjust maxoff and pd_lower */
286 ptr = (Pointer) BloomPageGetTuple(state, page, opaque->maxoff + 1);
287 ((PageHeader) page)->pd_lower = ptr - page;
293 * Allocate a new page (either by recycling, or by extending the index file)
294 * The returned buffer is already pinned and exclusive-locked
295 * Caller is responsible for initializing the page by calling BloomInitBuffer
298 BloomNewBuffer(Relation index)
303 /* First, try to get a page from FSM */
306 BlockNumber blkno = GetFreeIndexPage(index);
308 if (blkno == InvalidBlockNumber)
311 buffer = ReadBuffer(index, blkno);
314 * We have to guard against the possibility that someone else already
315 * recycled this page; the buffer may be locked if so.
317 if (ConditionalLockBuffer(buffer))
319 Page page = BufferGetPage(buffer);
322 return buffer; /* OK to use, if never initialized */
324 if (BloomPageIsDeleted(page))
325 return buffer; /* OK to use */
327 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
330 /* Can't use it, so release buffer and try again */
331 ReleaseBuffer(buffer);
334 /* Must extend the file */
335 needLock = !RELATION_IS_LOCAL(index);
337 LockRelationForExtension(index, ExclusiveLock);
339 buffer = ReadBuffer(index, P_NEW);
340 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
343 UnlockRelationForExtension(index, ExclusiveLock);
349 * Initialize bloom page.
352 BloomInitPage(Page page, uint16 flags)
354 BloomPageOpaque opaque;
356 PageInit(page, BLCKSZ, sizeof(BloomPageOpaqueData));
358 opaque = BloomPageGetOpaque(page);
359 memset(opaque, 0, sizeof(BloomPageOpaqueData));
360 opaque->flags = flags;
361 opaque->bloom_page_id = BLOOM_PAGE_ID;
365 * Adjust options of bloom index.
368 adjustBloomOptions(BloomOptions *opts)
372 /* Default length of bloom filter is 5 of 16-bit integers */
373 if (opts->bloomLength <= 0)
374 opts->bloomLength = 5;
375 else if (opts->bloomLength > MAX_BLOOM_LENGTH)
377 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
378 errmsg("length of bloom signature (%d) is greater than maximum %d",
379 opts->bloomLength, MAX_BLOOM_LENGTH)));
381 /* Check singnature length */
382 for (i = 0; i < INDEX_MAX_KEYS; i++)
385 * Zero and negative number of bits is meaningless. Also setting
386 * more bits than signature have seems useless. Replace both cases
387 * with 2 bits default.
389 if (opts->bitSize[i] <= 0
390 || opts->bitSize[i] >= opts->bloomLength * sizeof(SignType) * BITS_PER_BYTE)
391 opts->bitSize[i] = 2;
396 * Initialize metapage for bloom index.
399 BloomInitMetapage(Relation index)
403 BloomMetaPageData *metadata;
404 GenericXLogState *state;
407 * Make a new buffer, since it first buffer it should be associated with
408 * block number 0 (BLOOM_METAPAGE_BLKNO).
410 metaBuffer = BloomNewBuffer(index);
411 Assert(BufferGetBlockNumber(metaBuffer) == BLOOM_METAPAGE_BLKNO);
413 /* Initialize bloom index options */
414 if (!index->rd_options)
415 index->rd_options = palloc0(sizeof(BloomOptions));
416 adjustBloomOptions((BloomOptions *) index->rd_options);
418 /* Initialize contents of meta page */
419 state = GenericXLogStart(index);
420 metaPage = GenericXLogRegisterBuffer(state, metaBuffer, GENERIC_XLOG_FULL_IMAGE);
422 BloomInitPage(metaPage, BLOOM_META);
423 metadata = BloomPageGetMeta(metaPage);
424 memset(metadata, 0, sizeof(BloomMetaPageData));
425 metadata->magickNumber = BLOOM_MAGICK_NUMBER;
426 metadata->opts = *((BloomOptions *) index->rd_options);
427 ((PageHeader) metaPage)->pd_lower += sizeof(BloomMetaPageData);
429 GenericXLogFinish(state);
430 UnlockReleaseBuffer(metaBuffer);
434 * Initialize options for bloom index.
437 bloptions(Datum reloptions, bool validate)
439 relopt_value *options;
441 BloomOptions *rdopts;
442 relopt_parse_elt tab[INDEX_MAX_KEYS + 1];
446 /* Option for length of signature */
447 tab[0].optname = "length";
448 tab[0].opttype = RELOPT_TYPE_INT;
449 tab[0].offset = offsetof(BloomOptions, bloomLength);
451 /* Number of bits for each of possible columns: col1, col2, ... */
452 for (i = 0; i < INDEX_MAX_KEYS; i++)
454 snprintf(buf, sizeof(buf), "col%d", i + 1);
455 tab[i + 1].optname = pstrdup(buf);
456 tab[i + 1].opttype = RELOPT_TYPE_INT;
457 tab[i + 1].offset = offsetof(BloomOptions, bitSize[i]);
460 options = parseRelOptions(reloptions, validate, bl_relopt_kind, &numoptions);
461 rdopts = allocateReloptStruct(sizeof(BloomOptions), options, numoptions);
462 fillRelOptions((void *) rdopts, sizeof(BloomOptions), options, numoptions,
463 validate, tab, INDEX_MAX_KEYS + 1);
465 adjustBloomOptions(rdopts);
467 return (bytea *) rdopts;