1 /*-------------------------------------------------------------------------
4 * utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginutil.c
12 *-------------------------------------------------------------------------
17 #include "access/gin_private.h"
18 #include "access/reloptions.h"
19 #include "access/xloginsert.h"
20 #include "catalog/pg_collation.h"
21 #include "catalog/pg_type.h"
22 #include "miscadmin.h"
23 #include "storage/indexfsm.h"
24 #include "storage/lmgr.h"
28 * initGinState: fill in an empty GinState struct to describe the index
30 * Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
33 initGinState(GinState *state, Relation index)
35 TupleDesc origTupdesc = RelationGetDescr(index);
38 MemSet(state, 0, sizeof(GinState));
41 state->oneCol = (origTupdesc->natts == 1) ? true : false;
42 state->origTupdesc = origTupdesc;
44 for (i = 0; i < origTupdesc->natts; i++)
47 state->tupdesc[i] = state->origTupdesc;
50 state->tupdesc[i] = CreateTemplateTupleDesc(2, false);
52 TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
54 TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
55 origTupdesc->attrs[i]->atttypid,
56 origTupdesc->attrs[i]->atttypmod,
57 origTupdesc->attrs[i]->attndims);
58 TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
59 origTupdesc->attrs[i]->attcollation);
62 fmgr_info_copy(&(state->compareFn[i]),
63 index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
64 CurrentMemoryContext);
65 fmgr_info_copy(&(state->extractValueFn[i]),
66 index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
67 CurrentMemoryContext);
68 fmgr_info_copy(&(state->extractQueryFn[i]),
69 index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
70 CurrentMemoryContext);
73 * Check opclass capability to do tri-state or binary logic consistent
76 if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid)
78 fmgr_info_copy(&(state->triConsistentFn[i]),
79 index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC),
80 CurrentMemoryContext);
83 if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid)
85 fmgr_info_copy(&(state->consistentFn[i]),
86 index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
87 CurrentMemoryContext);
90 if (state->consistentFn[i].fn_oid == InvalidOid &&
91 state->triConsistentFn[i].fn_oid == InvalidOid)
93 elog(ERROR, "missing GIN support function (%d or %d) for attribute %d of index \"%s\"",
94 GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC,
95 i + 1, RelationGetRelationName(index));
99 * Check opclass capability to do partial match.
101 if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
103 fmgr_info_copy(&(state->comparePartialFn[i]),
104 index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
105 CurrentMemoryContext);
106 state->canPartialMatch[i] = true;
110 state->canPartialMatch[i] = false;
114 * If the index column has a specified collation, we should honor that
115 * while doing comparisons. However, we may have a collatable storage
116 * type for a noncollatable indexed data type (for instance, hstore
117 * uses text index entries). If there's no index collation then
118 * specify default collation in case the support functions need
119 * collation. This is harmless if the support functions don't care
120 * about collation, so we just do it unconditionally. (We could
121 * alternatively call get_typcollation, but that seems like expensive
122 * overkill --- there aren't going to be any cases where a GIN storage
123 * type has a nondefault collation.)
125 if (OidIsValid(index->rd_indcollation[i]))
126 state->supportCollation[i] = index->rd_indcollation[i];
128 state->supportCollation[i] = DEFAULT_COLLATION_OID;
133 * Extract attribute (column) number of stored entry from GIN tuple
136 gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
140 if (ginstate->oneCol)
142 /* column number is not stored explicitly */
143 colN = FirstOffsetNumber;
151 * First attribute is always int16, so we can safely use any tuple
152 * descriptor to obtain first attribute of tuple
154 res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
158 colN = DatumGetUInt16(res);
159 Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
166 * Extract stored datum (and possible null category) from GIN tuple
169 gintuple_get_key(GinState *ginstate, IndexTuple tuple,
170 GinNullCategory *category)
175 if (ginstate->oneCol)
178 * Single column index doesn't store attribute numbers in tuples
180 res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
186 * Since the datum type depends on which index column it's from, we
187 * must be careful to use the right tuple descriptor here.
189 OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
191 res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
192 ginstate->tupdesc[colN - 1],
197 *category = GinGetNullCategory(tuple, ginstate);
199 *category = GIN_CAT_NORM_KEY;
205 * Allocate a new page (either by recycling, or by extending the index file)
206 * The returned buffer is already pinned and exclusive-locked
207 * Caller is responsible for initializing the page by calling GinInitBuffer
210 GinNewBuffer(Relation index)
215 /* First, try to get a page from FSM */
218 BlockNumber blkno = GetFreeIndexPage(index);
220 if (blkno == InvalidBlockNumber)
223 buffer = ReadBuffer(index, blkno);
226 * We have to guard against the possibility that someone else already
227 * recycled this page; the buffer may be locked if so.
229 if (ConditionalLockBuffer(buffer))
231 Page page = BufferGetPage(buffer);
234 return buffer; /* OK to use, if never initialized */
236 if (GinPageIsDeleted(page))
237 return buffer; /* OK to use */
239 LockBuffer(buffer, GIN_UNLOCK);
242 /* Can't use it, so release buffer and try again */
243 ReleaseBuffer(buffer);
246 /* Must extend the file */
247 needLock = !RELATION_IS_LOCAL(index);
249 LockRelationForExtension(index, ExclusiveLock);
251 buffer = ReadBuffer(index, P_NEW);
252 LockBuffer(buffer, GIN_EXCLUSIVE);
255 UnlockRelationForExtension(index, ExclusiveLock);
261 GinInitPage(Page page, uint32 f, Size pageSize)
263 GinPageOpaque opaque;
265 PageInit(page, pageSize, sizeof(GinPageOpaqueData));
267 opaque = GinPageGetOpaque(page);
268 memset(opaque, 0, sizeof(GinPageOpaqueData));
270 opaque->rightlink = InvalidBlockNumber;
274 GinInitBuffer(Buffer b, uint32 f)
276 GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
280 GinInitMetabuffer(Buffer b)
282 GinMetaPageData *metadata;
283 Page page = BufferGetPage(b);
285 GinInitPage(page, GIN_META, BufferGetPageSize(b));
287 metadata = GinPageGetMeta(page);
289 metadata->head = metadata->tail = InvalidBlockNumber;
290 metadata->tailFreeSize = 0;
291 metadata->nPendingPages = 0;
292 metadata->nPendingHeapTuples = 0;
293 metadata->nTotalPages = 0;
294 metadata->nEntryPages = 0;
295 metadata->nDataPages = 0;
296 metadata->nEntries = 0;
297 metadata->ginVersion = GIN_CURRENT_VERSION;
301 * Compare two keys of the same index column
304 ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
305 Datum a, GinNullCategory categorya,
306 Datum b, GinNullCategory categoryb)
308 /* if not of same null category, sort by that first */
309 if (categorya != categoryb)
310 return (categorya < categoryb) ? -1 : 1;
312 /* all null items in same category are equal */
313 if (categorya != GIN_CAT_NORM_KEY)
316 /* both not null, so safe to call the compareFn */
317 return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1],
318 ginstate->supportCollation[attnum - 1],
323 * Compare two keys of possibly different index columns
326 ginCompareAttEntries(GinState *ginstate,
327 OffsetNumber attnuma, Datum a, GinNullCategory categorya,
328 OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
330 /* attribute number is the first sort key */
331 if (attnuma != attnumb)
332 return (attnuma < attnumb) ? -1 : 1;
334 return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
339 * Support for sorting key datums in ginExtractEntries
341 * Note: we only have to worry about null and not-null keys here;
342 * ginExtractEntries never generates more than one placeholder null,
343 * so it doesn't have to sort those.
353 FmgrInfo *cmpDatumFunc;
359 cmpEntries(const void *a, const void *b, void *arg)
361 const keyEntryData *aa = (const keyEntryData *) a;
362 const keyEntryData *bb = (const keyEntryData *) b;
363 cmpEntriesArg *data = (cmpEntriesArg *) arg;
369 res = 0; /* NULL "=" NULL */
371 res = 1; /* NULL ">" not-NULL */
374 res = -1; /* not-NULL "<" NULL */
376 res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
378 aa->datum, bb->datum));
381 * Detect if we have any duplicates. If there are equal keys, qsort must
382 * compare them at some point, else it wouldn't know whether one should go
383 * before or after the other.
386 data->haveDups = true;
393 * Extract the index key values from an indexable item
395 * The resulting key values are sorted, and any duplicates are removed.
396 * This avoids generating redundant index entries.
399 ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
400 Datum value, bool isNull,
401 int32 *nentries, GinNullCategory **categories)
408 * We don't call the extractValueFn on a null item. Instead generate a
414 entries = (Datum *) palloc(sizeof(Datum));
415 entries[0] = (Datum) 0;
416 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
417 (*categories)[0] = GIN_CAT_NULL_ITEM;
421 /* OK, call the opclass's extractValueFn */
422 nullFlags = NULL; /* in case extractValue doesn't set it */
424 DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
425 ginstate->supportCollation[attnum - 1],
427 PointerGetDatum(nentries),
428 PointerGetDatum(&nullFlags)));
431 * Generate a placeholder if the item contained no keys.
433 if (entries == NULL || *nentries <= 0)
436 entries = (Datum *) palloc(sizeof(Datum));
437 entries[0] = (Datum) 0;
438 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
439 (*categories)[0] = GIN_CAT_EMPTY_ITEM;
444 * If the extractValueFn didn't create a nullFlags array, create one,
445 * assuming that everything's non-null. Otherwise, run through the array
446 * and make sure each value is exactly 0 or 1; this ensures binary
447 * compatibility with the GinNullCategory representation.
449 if (nullFlags == NULL)
450 nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
453 for (i = 0; i < *nentries; i++)
454 nullFlags[i] = (nullFlags[i] ? true : false);
456 /* now we can use the nullFlags as category codes */
457 *categories = (GinNullCategory *) nullFlags;
460 * If there's more than one key, sort and unique-ify.
462 * XXX Using qsort here is notationally painful, and the overhead is
463 * pretty bad too. For small numbers of keys it'd likely be better to use
464 * a simple insertion sort.
468 keyEntryData *keydata;
471 keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
472 for (i = 0; i < *nentries; i++)
474 keydata[i].datum = entries[i];
475 keydata[i].isnull = nullFlags[i];
478 arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
479 arg.collation = ginstate->supportCollation[attnum - 1];
480 arg.haveDups = false;
481 qsort_arg(keydata, *nentries, sizeof(keyEntryData),
482 cmpEntries, (void *) &arg);
486 /* there are duplicates, must get rid of 'em */
489 entries[0] = keydata[0].datum;
490 nullFlags[0] = keydata[0].isnull;
492 for (i = 1; i < *nentries; i++)
494 if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
496 entries[j] = keydata[i].datum;
497 nullFlags[j] = keydata[i].isnull;
505 /* easy, no duplicates */
506 for (i = 0; i < *nentries; i++)
508 entries[i] = keydata[i].datum;
509 nullFlags[i] = keydata[i].isnull;
520 ginoptions(PG_FUNCTION_ARGS)
522 Datum reloptions = PG_GETARG_DATUM(0);
523 bool validate = PG_GETARG_BOOL(1);
524 relopt_value *options;
527 static const relopt_parse_elt tab[] = {
528 {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)},
529 {"gin_pending_list_limit", RELOPT_TYPE_INT, offsetof(GinOptions,
530 pendingListCleanupSize)}
533 options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
536 /* if none set, we're done */
540 rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
542 fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
543 validate, tab, lengthof(tab));
547 PG_RETURN_BYTEA_P(rdopts);
551 * Fetch index's statistical data into *stats
553 * Note: in the result, nPendingPages can be trusted to be up-to-date,
554 * as can ginVersion; but the other fields are as of the last VACUUM.
557 ginGetStats(Relation index, GinStatsData *stats)
561 GinMetaPageData *metadata;
563 metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
564 LockBuffer(metabuffer, GIN_SHARE);
565 metapage = BufferGetPage(metabuffer);
566 metadata = GinPageGetMeta(metapage);
568 stats->nPendingPages = metadata->nPendingPages;
569 stats->nTotalPages = metadata->nTotalPages;
570 stats->nEntryPages = metadata->nEntryPages;
571 stats->nDataPages = metadata->nDataPages;
572 stats->nEntries = metadata->nEntries;
573 stats->ginVersion = metadata->ginVersion;
575 UnlockReleaseBuffer(metabuffer);
579 * Write the given statistics to the index's metapage
581 * Note: nPendingPages and ginVersion are *not* copied over
584 ginUpdateStats(Relation index, const GinStatsData *stats)
588 GinMetaPageData *metadata;
590 metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
591 LockBuffer(metabuffer, GIN_EXCLUSIVE);
592 metapage = BufferGetPage(metabuffer);
593 metadata = GinPageGetMeta(metapage);
595 START_CRIT_SECTION();
597 metadata->nTotalPages = stats->nTotalPages;
598 metadata->nEntryPages = stats->nEntryPages;
599 metadata->nDataPages = stats->nDataPages;
600 metadata->nEntries = stats->nEntries;
602 MarkBufferDirty(metabuffer);
604 if (RelationNeedsWAL(index))
607 ginxlogUpdateMeta data;
609 data.node = index->rd_node;
611 data.newRightlink = data.prevTail = InvalidBlockNumber;
612 memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
615 XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
616 XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
618 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
619 PageSetLSN(metapage, recptr);
622 UnlockReleaseBuffer(metabuffer);