]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/ginutil.c
Rewrite the way GIN posting lists are packed on a page, to reduce WAL volume.
[postgresql] / src / backend / access / gin / ginutil.c
1 /*-------------------------------------------------------------------------
2  *
3  * ginutil.c
4  *        utilities routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      src/backend/access/gin/ginutil.c
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "access/gin_private.h"
18 #include "access/reloptions.h"
19 #include "catalog/pg_collation.h"
20 #include "catalog/pg_type.h"
21 #include "miscadmin.h"
22 #include "storage/indexfsm.h"
23 #include "storage/lmgr.h"
24
25
26 /*
27  * initGinState: fill in an empty GinState struct to describe the index
28  *
29  * Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
30  */
31 void
32 initGinState(GinState *state, Relation index)
33 {
34         TupleDesc       origTupdesc = RelationGetDescr(index);
35         int                     i;
36
37         MemSet(state, 0, sizeof(GinState));
38
39         state->index = index;
40         state->oneCol = (origTupdesc->natts == 1) ? true : false;
41         state->origTupdesc = origTupdesc;
42
43         for (i = 0; i < origTupdesc->natts; i++)
44         {
45                 if (state->oneCol)
46                         state->tupdesc[i] = state->origTupdesc;
47                 else
48                 {
49                         state->tupdesc[i] = CreateTemplateTupleDesc(2, false);
50
51                         TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
52                                                            INT2OID, -1, 0);
53                         TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
54                                                            origTupdesc->attrs[i]->atttypid,
55                                                            origTupdesc->attrs[i]->atttypmod,
56                                                            origTupdesc->attrs[i]->attndims);
57                         TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
58                                                                                 origTupdesc->attrs[i]->attcollation);
59                 }
60
61                 fmgr_info_copy(&(state->compareFn[i]),
62                                            index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
63                                            CurrentMemoryContext);
64                 fmgr_info_copy(&(state->extractValueFn[i]),
65                                            index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
66                                            CurrentMemoryContext);
67                 fmgr_info_copy(&(state->extractQueryFn[i]),
68                                            index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
69                                            CurrentMemoryContext);
70                 /*
71                  * Check opclass capability to do tri-state or binary logic consistent
72                  * check.
73                  */
74                 if (index_getprocid(index, i + 1, GIN_TRICONSISTENT_PROC) != InvalidOid)
75                 {
76                         fmgr_info_copy(&(state->triConsistentFn[i]),
77                            index_getprocinfo(index, i + 1, GIN_TRICONSISTENT_PROC),
78                                                    CurrentMemoryContext);
79                 }
80
81                 if (index_getprocid(index, i + 1, GIN_CONSISTENT_PROC) != InvalidOid)
82                 {
83                         fmgr_info_copy(&(state->consistentFn[i]),
84                                                    index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
85                                                    CurrentMemoryContext);
86                 }
87
88                 if (state->consistentFn[i].fn_oid == InvalidOid &&
89                         state->triConsistentFn[i].fn_oid == InvalidOid)
90                 {
91                         elog(ERROR, "missing GIN support function (%d or %d) for attribute %d of index \"%s\"",
92                                  GIN_CONSISTENT_PROC, GIN_TRICONSISTENT_PROC,
93                                  i + 1, RelationGetRelationName(index));
94                 }
95
96                 /*
97                  * Check opclass capability to do partial match.
98                  */
99                 if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
100                 {
101                         fmgr_info_copy(&(state->comparePartialFn[i]),
102                                    index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
103                                                    CurrentMemoryContext);
104                         state->canPartialMatch[i] = true;
105                 }
106                 else
107                 {
108                         state->canPartialMatch[i] = false;
109                 }
110
111                 /*
112                  * If the index column has a specified collation, we should honor that
113                  * while doing comparisons.  However, we may have a collatable storage
114                  * type for a noncollatable indexed data type (for instance, hstore
115                  * uses text index entries).  If there's no index collation then
116                  * specify default collation in case the support functions need
117                  * collation.  This is harmless if the support functions don't care
118                  * about collation, so we just do it unconditionally.  (We could
119                  * alternatively call get_typcollation, but that seems like expensive
120                  * overkill --- there aren't going to be any cases where a GIN storage
121                  * type has a nondefault collation.)
122                  */
123                 if (OidIsValid(index->rd_indcollation[i]))
124                         state->supportCollation[i] = index->rd_indcollation[i];
125                 else
126                         state->supportCollation[i] = DEFAULT_COLLATION_OID;
127         }
128 }
129
130 /*
131  * Extract attribute (column) number of stored entry from GIN tuple
132  */
133 OffsetNumber
134 gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
135 {
136         OffsetNumber colN;
137
138         if (ginstate->oneCol)
139         {
140                 /* column number is not stored explicitly */
141                 colN = FirstOffsetNumber;
142         }
143         else
144         {
145                 Datum           res;
146                 bool            isnull;
147
148                 /*
149                  * First attribute is always int16, so we can safely use any tuple
150                  * descriptor to obtain first attribute of tuple
151                  */
152                 res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
153                                                         &isnull);
154                 Assert(!isnull);
155
156                 colN = DatumGetUInt16(res);
157                 Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
158         }
159
160         return colN;
161 }
162
163 /*
164  * Extract stored datum (and possible null category) from GIN tuple
165  */
166 Datum
167 gintuple_get_key(GinState *ginstate, IndexTuple tuple,
168                                  GinNullCategory *category)
169 {
170         Datum           res;
171         bool            isnull;
172
173         if (ginstate->oneCol)
174         {
175                 /*
176                  * Single column index doesn't store attribute numbers in tuples
177                  */
178                 res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
179                                                         &isnull);
180         }
181         else
182         {
183                 /*
184                  * Since the datum type depends on which index column it's from, we
185                  * must be careful to use the right tuple descriptor here.
186                  */
187                 OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
188
189                 res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
190                                                         ginstate->tupdesc[colN - 1],
191                                                         &isnull);
192         }
193
194         if (isnull)
195                 *category = GinGetNullCategory(tuple, ginstate);
196         else
197                 *category = GIN_CAT_NORM_KEY;
198
199         return res;
200 }
201
202 /*
203  * Allocate a new page (either by recycling, or by extending the index file)
204  * The returned buffer is already pinned and exclusive-locked
205  * Caller is responsible for initializing the page by calling GinInitBuffer
206  */
207 Buffer
208 GinNewBuffer(Relation index)
209 {
210         Buffer          buffer;
211         bool            needLock;
212
213         /* First, try to get a page from FSM */
214         for (;;)
215         {
216                 BlockNumber blkno = GetFreeIndexPage(index);
217
218                 if (blkno == InvalidBlockNumber)
219                         break;
220
221                 buffer = ReadBuffer(index, blkno);
222
223                 /*
224                  * We have to guard against the possibility that someone else already
225                  * recycled this page; the buffer may be locked if so.
226                  */
227                 if (ConditionalLockBuffer(buffer))
228                 {
229                         Page            page = BufferGetPage(buffer);
230
231                         if (PageIsNew(page))
232                                 return buffer;  /* OK to use, if never initialized */
233
234                         if (GinPageIsDeleted(page))
235                                 return buffer;  /* OK to use */
236
237                         LockBuffer(buffer, GIN_UNLOCK);
238                 }
239
240                 /* Can't use it, so release buffer and try again */
241                 ReleaseBuffer(buffer);
242         }
243
244         /* Must extend the file */
245         needLock = !RELATION_IS_LOCAL(index);
246         if (needLock)
247                 LockRelationForExtension(index, ExclusiveLock);
248
249         buffer = ReadBuffer(index, P_NEW);
250         LockBuffer(buffer, GIN_EXCLUSIVE);
251
252         if (needLock)
253                 UnlockRelationForExtension(index, ExclusiveLock);
254
255         return buffer;
256 }
257
258 void
259 GinInitPage(Page page, uint32 f, Size pageSize)
260 {
261         GinPageOpaque opaque;
262
263         PageInit(page, pageSize, sizeof(GinPageOpaqueData));
264
265         opaque = GinPageGetOpaque(page);
266         memset(opaque, 0, sizeof(GinPageOpaqueData));
267         opaque->flags = f;
268         opaque->rightlink = InvalidBlockNumber;
269 }
270
271 void
272 GinInitBuffer(Buffer b, uint32 f)
273 {
274         GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
275 }
276
277 void
278 GinInitMetabuffer(Buffer b)
279 {
280         GinMetaPageData *metadata;
281         Page            page = BufferGetPage(b);
282
283         GinInitPage(page, GIN_META, BufferGetPageSize(b));
284
285         metadata = GinPageGetMeta(page);
286
287         metadata->head = metadata->tail = InvalidBlockNumber;
288         metadata->tailFreeSize = 0;
289         metadata->nPendingPages = 0;
290         metadata->nPendingHeapTuples = 0;
291         metadata->nTotalPages = 0;
292         metadata->nEntryPages = 0;
293         metadata->nDataPages = 0;
294         metadata->nEntries = 0;
295         metadata->ginVersion = GIN_CURRENT_VERSION;
296 }
297
298 /*
299  * Compare two keys of the same index column
300  */
301 int
302 ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
303                                   Datum a, GinNullCategory categorya,
304                                   Datum b, GinNullCategory categoryb)
305 {
306         /* if not of same null category, sort by that first */
307         if (categorya != categoryb)
308                 return (categorya < categoryb) ? -1 : 1;
309
310         /* all null items in same category are equal */
311         if (categorya != GIN_CAT_NORM_KEY)
312                 return 0;
313
314         /* both not null, so safe to call the compareFn */
315         return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1],
316                                                                           ginstate->supportCollation[attnum - 1],
317                                                                                    a, b));
318 }
319
320 /*
321  * Compare two keys of possibly different index columns
322  */
323 int
324 ginCompareAttEntries(GinState *ginstate,
325                                          OffsetNumber attnuma, Datum a, GinNullCategory categorya,
326                                          OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
327 {
328         /* attribute number is the first sort key */
329         if (attnuma != attnumb)
330                 return (attnuma < attnumb) ? -1 : 1;
331
332         return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
333 }
334
335
336 /*
337  * Support for sorting key datums in ginExtractEntries
338  *
339  * Note: we only have to worry about null and not-null keys here;
340  * ginExtractEntries never generates more than one placeholder null,
341  * so it doesn't have to sort those.
342  */
343 typedef struct
344 {
345         Datum           datum;
346         bool            isnull;
347 } keyEntryData;
348
349 typedef struct
350 {
351         FmgrInfo   *cmpDatumFunc;
352         Oid                     collation;
353         bool            haveDups;
354 } cmpEntriesArg;
355
356 static int
357 cmpEntries(const void *a, const void *b, void *arg)
358 {
359         const keyEntryData *aa = (const keyEntryData *) a;
360         const keyEntryData *bb = (const keyEntryData *) b;
361         cmpEntriesArg *data = (cmpEntriesArg *) arg;
362         int                     res;
363
364         if (aa->isnull)
365         {
366                 if (bb->isnull)
367                         res = 0;                        /* NULL "=" NULL */
368                 else
369                         res = 1;                        /* NULL ">" not-NULL */
370         }
371         else if (bb->isnull)
372                 res = -1;                               /* not-NULL "<" NULL */
373         else
374                 res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
375                                                                                           data->collation,
376                                                                                           aa->datum, bb->datum));
377
378         /*
379          * Detect if we have any duplicates.  If there are equal keys, qsort must
380          * compare them at some point, else it wouldn't know whether one should go
381          * before or after the other.
382          */
383         if (res == 0)
384                 data->haveDups = true;
385
386         return res;
387 }
388
389
390 /*
391  * Extract the index key values from an indexable item
392  *
393  * The resulting key values are sorted, and any duplicates are removed.
394  * This avoids generating redundant index entries.
395  */
396 Datum *
397 ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
398                                   Datum value, bool isNull,
399                                   int32 *nentries, GinNullCategory **categories)
400 {
401         Datum      *entries;
402         bool       *nullFlags;
403         int32           i;
404
405         /*
406          * We don't call the extractValueFn on a null item.  Instead generate a
407          * placeholder.
408          */
409         if (isNull)
410         {
411                 *nentries = 1;
412                 entries = (Datum *) palloc(sizeof(Datum));
413                 entries[0] = (Datum) 0;
414                 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
415                 (*categories)[0] = GIN_CAT_NULL_ITEM;
416                 return entries;
417         }
418
419         /* OK, call the opclass's extractValueFn */
420         nullFlags = NULL;                       /* in case extractValue doesn't set it */
421         entries = (Datum *)
422                 DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
423                                                                           ginstate->supportCollation[attnum - 1],
424                                                                                   value,
425                                                                                   PointerGetDatum(nentries),
426                                                                                   PointerGetDatum(&nullFlags)));
427
428         /*
429          * Generate a placeholder if the item contained no keys.
430          */
431         if (entries == NULL || *nentries <= 0)
432         {
433                 *nentries = 1;
434                 entries = (Datum *) palloc(sizeof(Datum));
435                 entries[0] = (Datum) 0;
436                 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
437                 (*categories)[0] = GIN_CAT_EMPTY_ITEM;
438                 return entries;
439         }
440
441         /*
442          * If the extractValueFn didn't create a nullFlags array, create one,
443          * assuming that everything's non-null.  Otherwise, run through the array
444          * and make sure each value is exactly 0 or 1; this ensures binary
445          * compatibility with the GinNullCategory representation.
446          */
447         if (nullFlags == NULL)
448                 nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
449         else
450         {
451                 for (i = 0; i < *nentries; i++)
452                         nullFlags[i] = (nullFlags[i] ? true : false);
453         }
454         /* now we can use the nullFlags as category codes */
455         *categories = (GinNullCategory *) nullFlags;
456
457         /*
458          * If there's more than one key, sort and unique-ify.
459          *
460          * XXX Using qsort here is notationally painful, and the overhead is
461          * pretty bad too.      For small numbers of keys it'd likely be better to use
462          * a simple insertion sort.
463          */
464         if (*nentries > 1)
465         {
466                 keyEntryData *keydata;
467                 cmpEntriesArg arg;
468
469                 keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
470                 for (i = 0; i < *nentries; i++)
471                 {
472                         keydata[i].datum = entries[i];
473                         keydata[i].isnull = nullFlags[i];
474                 }
475
476                 arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
477                 arg.collation = ginstate->supportCollation[attnum - 1];
478                 arg.haveDups = false;
479                 qsort_arg(keydata, *nentries, sizeof(keyEntryData),
480                                   cmpEntries, (void *) &arg);
481
482                 if (arg.haveDups)
483                 {
484                         /* there are duplicates, must get rid of 'em */
485                         int32           j;
486
487                         entries[0] = keydata[0].datum;
488                         nullFlags[0] = keydata[0].isnull;
489                         j = 1;
490                         for (i = 1; i < *nentries; i++)
491                         {
492                                 if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
493                                 {
494                                         entries[j] = keydata[i].datum;
495                                         nullFlags[j] = keydata[i].isnull;
496                                         j++;
497                                 }
498                         }
499                         *nentries = j;
500                 }
501                 else
502                 {
503                         /* easy, no duplicates */
504                         for (i = 0; i < *nentries; i++)
505                         {
506                                 entries[i] = keydata[i].datum;
507                                 nullFlags[i] = keydata[i].isnull;
508                         }
509                 }
510
511                 pfree(keydata);
512         }
513
514         return entries;
515 }
516
517 Datum
518 ginoptions(PG_FUNCTION_ARGS)
519 {
520         Datum           reloptions = PG_GETARG_DATUM(0);
521         bool            validate = PG_GETARG_BOOL(1);
522         relopt_value *options;
523         GinOptions *rdopts;
524         int                     numoptions;
525         static const relopt_parse_elt tab[] = {
526                 {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
527         };
528
529         options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
530                                                           &numoptions);
531
532         /* if none set, we're done */
533         if (numoptions == 0)
534                 PG_RETURN_NULL();
535
536         rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
537
538         fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
539                                    validate, tab, lengthof(tab));
540
541         pfree(options);
542
543         PG_RETURN_BYTEA_P(rdopts);
544 }
545
546 /*
547  * Fetch index's statistical data into *stats
548  *
549  * Note: in the result, nPendingPages can be trusted to be up-to-date,
550  * as can ginVersion; but the other fields are as of the last VACUUM.
551  */
552 void
553 ginGetStats(Relation index, GinStatsData *stats)
554 {
555         Buffer          metabuffer;
556         Page            metapage;
557         GinMetaPageData *metadata;
558
559         metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
560         LockBuffer(metabuffer, GIN_SHARE);
561         metapage = BufferGetPage(metabuffer);
562         metadata = GinPageGetMeta(metapage);
563
564         stats->nPendingPages = metadata->nPendingPages;
565         stats->nTotalPages = metadata->nTotalPages;
566         stats->nEntryPages = metadata->nEntryPages;
567         stats->nDataPages = metadata->nDataPages;
568         stats->nEntries = metadata->nEntries;
569         stats->ginVersion = metadata->ginVersion;
570
571         UnlockReleaseBuffer(metabuffer);
572 }
573
574 /*
575  * Write the given statistics to the index's metapage
576  *
577  * Note: nPendingPages and ginVersion are *not* copied over
578  */
579 void
580 ginUpdateStats(Relation index, const GinStatsData *stats)
581 {
582         Buffer          metabuffer;
583         Page            metapage;
584         GinMetaPageData *metadata;
585
586         metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
587         LockBuffer(metabuffer, GIN_EXCLUSIVE);
588         metapage = BufferGetPage(metabuffer);
589         metadata = GinPageGetMeta(metapage);
590
591         START_CRIT_SECTION();
592
593         metadata->nTotalPages = stats->nTotalPages;
594         metadata->nEntryPages = stats->nEntryPages;
595         metadata->nDataPages = stats->nDataPages;
596         metadata->nEntries = stats->nEntries;
597
598         MarkBufferDirty(metabuffer);
599
600         if (RelationNeedsWAL(index))
601         {
602                 XLogRecPtr      recptr;
603                 ginxlogUpdateMeta data;
604                 XLogRecData rdata;
605
606                 data.node = index->rd_node;
607                 data.ntuples = 0;
608                 data.newRightlink = data.prevTail = InvalidBlockNumber;
609                 memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
610
611                 rdata.buffer = InvalidBuffer;
612                 rdata.data = (char *) &data;
613                 rdata.len = sizeof(ginxlogUpdateMeta);
614                 rdata.next = NULL;
615
616                 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
617                 PageSetLSN(metapage, recptr);
618         }
619
620         UnlockReleaseBuffer(metabuffer);
621
622         END_CRIT_SECTION();
623 }