]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/ginutil.c
Update copyright notices for year 2012.
[postgresql] / src / backend / access / gin / ginutil.c
1 /*-------------------------------------------------------------------------
2  *
3  * ginutil.c
4  *        utilities routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      src/backend/access/gin/ginutil.c
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "access/gin_private.h"
18 #include "access/reloptions.h"
19 #include "catalog/pg_collation.h"
20 #include "catalog/pg_type.h"
21 #include "miscadmin.h"
22 #include "storage/indexfsm.h"
23 #include "storage/lmgr.h"
24
25
26 /*
27  * initGinState: fill in an empty GinState struct to describe the index
28  *
29  * Note: assorted subsidiary data is allocated in the CurrentMemoryContext.
30  */
31 void
32 initGinState(GinState *state, Relation index)
33 {
34         TupleDesc       origTupdesc = RelationGetDescr(index);
35         int                     i;
36
37         MemSet(state, 0, sizeof(GinState));
38
39         state->index = index;
40         state->oneCol = (origTupdesc->natts == 1) ? true : false;
41         state->origTupdesc = origTupdesc;
42
43         for (i = 0; i < origTupdesc->natts; i++)
44         {
45                 if (state->oneCol)
46                         state->tupdesc[i] = state->origTupdesc;
47                 else
48                 {
49                         state->tupdesc[i] = CreateTemplateTupleDesc(2, false);
50
51                         TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL,
52                                                            INT2OID, -1, 0);
53                         TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL,
54                                                            origTupdesc->attrs[i]->atttypid,
55                                                            origTupdesc->attrs[i]->atttypmod,
56                                                            origTupdesc->attrs[i]->attndims);
57                         TupleDescInitEntryCollation(state->tupdesc[i], (AttrNumber) 2,
58                                                                                 origTupdesc->attrs[i]->attcollation);
59                 }
60
61                 fmgr_info_copy(&(state->compareFn[i]),
62                                            index_getprocinfo(index, i + 1, GIN_COMPARE_PROC),
63                                            CurrentMemoryContext);
64                 fmgr_info_copy(&(state->extractValueFn[i]),
65                                            index_getprocinfo(index, i + 1, GIN_EXTRACTVALUE_PROC),
66                                            CurrentMemoryContext);
67                 fmgr_info_copy(&(state->extractQueryFn[i]),
68                                            index_getprocinfo(index, i + 1, GIN_EXTRACTQUERY_PROC),
69                                            CurrentMemoryContext);
70                 fmgr_info_copy(&(state->consistentFn[i]),
71                                            index_getprocinfo(index, i + 1, GIN_CONSISTENT_PROC),
72                                            CurrentMemoryContext);
73
74                 /*
75                  * Check opclass capability to do partial match.
76                  */
77                 if (index_getprocid(index, i + 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid)
78                 {
79                         fmgr_info_copy(&(state->comparePartialFn[i]),
80                                    index_getprocinfo(index, i + 1, GIN_COMPARE_PARTIAL_PROC),
81                                                    CurrentMemoryContext);
82                         state->canPartialMatch[i] = true;
83                 }
84                 else
85                 {
86                         state->canPartialMatch[i] = false;
87                 }
88
89                 /*
90                  * If the index column has a specified collation, we should honor that
91                  * while doing comparisons.  However, we may have a collatable storage
92                  * type for a noncollatable indexed data type (for instance, hstore
93                  * uses text index entries).  If there's no index collation then
94                  * specify default collation in case the support functions need
95                  * collation.  This is harmless if the support functions don't care
96                  * about collation, so we just do it unconditionally.  (We could
97                  * alternatively call get_typcollation, but that seems like expensive
98                  * overkill --- there aren't going to be any cases where a GIN storage
99                  * type has a nondefault collation.)
100                  */
101                 if (OidIsValid(index->rd_indcollation[i]))
102                         state->supportCollation[i] = index->rd_indcollation[i];
103                 else
104                         state->supportCollation[i] = DEFAULT_COLLATION_OID;
105         }
106 }
107
108 /*
109  * Extract attribute (column) number of stored entry from GIN tuple
110  */
111 OffsetNumber
112 gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
113 {
114         OffsetNumber colN;
115
116         if (ginstate->oneCol)
117         {
118                 /* column number is not stored explicitly */
119                 colN = FirstOffsetNumber;
120         }
121         else
122         {
123                 Datum           res;
124                 bool            isnull;
125
126                 /*
127                  * First attribute is always int16, so we can safely use any tuple
128                  * descriptor to obtain first attribute of tuple
129                  */
130                 res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
131                                                         &isnull);
132                 Assert(!isnull);
133
134                 colN = DatumGetUInt16(res);
135                 Assert(colN >= FirstOffsetNumber && colN <= ginstate->origTupdesc->natts);
136         }
137
138         return colN;
139 }
140
141 /*
142  * Extract stored datum (and possible null category) from GIN tuple
143  */
144 Datum
145 gintuple_get_key(GinState *ginstate, IndexTuple tuple,
146                                  GinNullCategory *category)
147 {
148         Datum           res;
149         bool            isnull;
150
151         if (ginstate->oneCol)
152         {
153                 /*
154                  * Single column index doesn't store attribute numbers in tuples
155                  */
156                 res = index_getattr(tuple, FirstOffsetNumber, ginstate->origTupdesc,
157                                                         &isnull);
158         }
159         else
160         {
161                 /*
162                  * Since the datum type depends on which index column it's from, we
163                  * must be careful to use the right tuple descriptor here.
164                  */
165                 OffsetNumber colN = gintuple_get_attrnum(ginstate, tuple);
166
167                 res = index_getattr(tuple, OffsetNumberNext(FirstOffsetNumber),
168                                                         ginstate->tupdesc[colN - 1],
169                                                         &isnull);
170         }
171
172         if (isnull)
173                 *category = GinGetNullCategory(tuple, ginstate);
174         else
175                 *category = GIN_CAT_NORM_KEY;
176
177         return res;
178 }
179
180 /*
181  * Allocate a new page (either by recycling, or by extending the index file)
182  * The returned buffer is already pinned and exclusive-locked
183  * Caller is responsible for initializing the page by calling GinInitBuffer
184  */
185 Buffer
186 GinNewBuffer(Relation index)
187 {
188         Buffer          buffer;
189         bool            needLock;
190
191         /* First, try to get a page from FSM */
192         for (;;)
193         {
194                 BlockNumber blkno = GetFreeIndexPage(index);
195
196                 if (blkno == InvalidBlockNumber)
197                         break;
198
199                 buffer = ReadBuffer(index, blkno);
200
201                 /*
202                  * We have to guard against the possibility that someone else already
203                  * recycled this page; the buffer may be locked if so.
204                  */
205                 if (ConditionalLockBuffer(buffer))
206                 {
207                         Page            page = BufferGetPage(buffer);
208
209                         if (PageIsNew(page))
210                                 return buffer;  /* OK to use, if never initialized */
211
212                         if (GinPageIsDeleted(page))
213                                 return buffer;  /* OK to use */
214
215                         LockBuffer(buffer, GIN_UNLOCK);
216                 }
217
218                 /* Can't use it, so release buffer and try again */
219                 ReleaseBuffer(buffer);
220         }
221
222         /* Must extend the file */
223         needLock = !RELATION_IS_LOCAL(index);
224         if (needLock)
225                 LockRelationForExtension(index, ExclusiveLock);
226
227         buffer = ReadBuffer(index, P_NEW);
228         LockBuffer(buffer, GIN_EXCLUSIVE);
229
230         if (needLock)
231                 UnlockRelationForExtension(index, ExclusiveLock);
232
233         return buffer;
234 }
235
236 void
237 GinInitPage(Page page, uint32 f, Size pageSize)
238 {
239         GinPageOpaque opaque;
240
241         PageInit(page, pageSize, sizeof(GinPageOpaqueData));
242
243         opaque = GinPageGetOpaque(page);
244         memset(opaque, 0, sizeof(GinPageOpaqueData));
245         opaque->flags = f;
246         opaque->rightlink = InvalidBlockNumber;
247 }
248
249 void
250 GinInitBuffer(Buffer b, uint32 f)
251 {
252         GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
253 }
254
255 void
256 GinInitMetabuffer(Buffer b)
257 {
258         GinMetaPageData *metadata;
259         Page            page = BufferGetPage(b);
260
261         GinInitPage(page, GIN_META, BufferGetPageSize(b));
262
263         metadata = GinPageGetMeta(page);
264
265         metadata->head = metadata->tail = InvalidBlockNumber;
266         metadata->tailFreeSize = 0;
267         metadata->nPendingPages = 0;
268         metadata->nPendingHeapTuples = 0;
269         metadata->nTotalPages = 0;
270         metadata->nEntryPages = 0;
271         metadata->nDataPages = 0;
272         metadata->nEntries = 0;
273         metadata->ginVersion = GIN_CURRENT_VERSION;
274 }
275
276 /*
277  * Compare two keys of the same index column
278  */
279 int
280 ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
281                                   Datum a, GinNullCategory categorya,
282                                   Datum b, GinNullCategory categoryb)
283 {
284         /* if not of same null category, sort by that first */
285         if (categorya != categoryb)
286                 return (categorya < categoryb) ? -1 : 1;
287
288         /* all null items in same category are equal */
289         if (categorya != GIN_CAT_NORM_KEY)
290                 return 0;
291
292         /* both not null, so safe to call the compareFn */
293         return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1],
294                                                                           ginstate->supportCollation[attnum - 1],
295                                                                                    a, b));
296 }
297
298 /*
299  * Compare two keys of possibly different index columns
300  */
301 int
302 ginCompareAttEntries(GinState *ginstate,
303                                          OffsetNumber attnuma, Datum a, GinNullCategory categorya,
304                                          OffsetNumber attnumb, Datum b, GinNullCategory categoryb)
305 {
306         /* attribute number is the first sort key */
307         if (attnuma != attnumb)
308                 return (attnuma < attnumb) ? -1 : 1;
309
310         return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb);
311 }
312
313
314 /*
315  * Support for sorting key datums in ginExtractEntries
316  *
317  * Note: we only have to worry about null and not-null keys here;
318  * ginExtractEntries never generates more than one placeholder null,
319  * so it doesn't have to sort those.
320  */
321 typedef struct
322 {
323         Datum           datum;
324         bool            isnull;
325 } keyEntryData;
326
327 typedef struct
328 {
329         FmgrInfo   *cmpDatumFunc;
330         Oid                     collation;
331         bool            haveDups;
332 } cmpEntriesArg;
333
334 static int
335 cmpEntries(const void *a, const void *b, void *arg)
336 {
337         const keyEntryData *aa = (const keyEntryData *) a;
338         const keyEntryData *bb = (const keyEntryData *) b;
339         cmpEntriesArg *data = (cmpEntriesArg *) arg;
340         int                     res;
341
342         if (aa->isnull)
343         {
344                 if (bb->isnull)
345                         res = 0;                        /* NULL "=" NULL */
346                 else
347                         res = 1;                        /* NULL ">" not-NULL */
348         }
349         else if (bb->isnull)
350                 res = -1;                               /* not-NULL "<" NULL */
351         else
352                 res = DatumGetInt32(FunctionCall2Coll(data->cmpDatumFunc,
353                                                                                           data->collation,
354                                                                                           aa->datum, bb->datum));
355
356         /*
357          * Detect if we have any duplicates.  If there are equal keys, qsort must
358          * compare them at some point, else it wouldn't know whether one should go
359          * before or after the other.
360          */
361         if (res == 0)
362                 data->haveDups = true;
363
364         return res;
365 }
366
367
368 /*
369  * Extract the index key values from an indexable item
370  *
371  * The resulting key values are sorted, and any duplicates are removed.
372  * This avoids generating redundant index entries.
373  */
374 Datum *
375 ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
376                                   Datum value, bool isNull,
377                                   int32 *nentries, GinNullCategory **categories)
378 {
379         Datum      *entries;
380         bool       *nullFlags;
381         int32           i;
382
383         /*
384          * We don't call the extractValueFn on a null item.  Instead generate a
385          * placeholder.
386          */
387         if (isNull)
388         {
389                 *nentries = 1;
390                 entries = (Datum *) palloc(sizeof(Datum));
391                 entries[0] = (Datum) 0;
392                 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
393                 (*categories)[0] = GIN_CAT_NULL_ITEM;
394                 return entries;
395         }
396
397         /* OK, call the opclass's extractValueFn */
398         nullFlags = NULL;                       /* in case extractValue doesn't set it */
399         entries = (Datum *)
400                 DatumGetPointer(FunctionCall3Coll(&ginstate->extractValueFn[attnum - 1],
401                                                                           ginstate->supportCollation[attnum - 1],
402                                                                                   value,
403                                                                                   PointerGetDatum(nentries),
404                                                                                   PointerGetDatum(&nullFlags)));
405
406         /*
407          * Generate a placeholder if the item contained no keys.
408          */
409         if (entries == NULL || *nentries <= 0)
410         {
411                 *nentries = 1;
412                 entries = (Datum *) palloc(sizeof(Datum));
413                 entries[0] = (Datum) 0;
414                 *categories = (GinNullCategory *) palloc(sizeof(GinNullCategory));
415                 (*categories)[0] = GIN_CAT_EMPTY_ITEM;
416                 return entries;
417         }
418
419         /*
420          * If the extractValueFn didn't create a nullFlags array, create one,
421          * assuming that everything's non-null.  Otherwise, run through the array
422          * and make sure each value is exactly 0 or 1; this ensures binary
423          * compatibility with the GinNullCategory representation.
424          */
425         if (nullFlags == NULL)
426                 nullFlags = (bool *) palloc0(*nentries * sizeof(bool));
427         else
428         {
429                 for (i = 0; i < *nentries; i++)
430                         nullFlags[i] = (nullFlags[i] ? true : false);
431         }
432         /* now we can use the nullFlags as category codes */
433         *categories = (GinNullCategory *) nullFlags;
434
435         /*
436          * If there's more than one key, sort and unique-ify.
437          *
438          * XXX Using qsort here is notationally painful, and the overhead is
439          * pretty bad too.      For small numbers of keys it'd likely be better to use
440          * a simple insertion sort.
441          */
442         if (*nentries > 1)
443         {
444                 keyEntryData *keydata;
445                 cmpEntriesArg arg;
446
447                 keydata = (keyEntryData *) palloc(*nentries * sizeof(keyEntryData));
448                 for (i = 0; i < *nentries; i++)
449                 {
450                         keydata[i].datum = entries[i];
451                         keydata[i].isnull = nullFlags[i];
452                 }
453
454                 arg.cmpDatumFunc = &ginstate->compareFn[attnum - 1];
455                 arg.collation = ginstate->supportCollation[attnum - 1];
456                 arg.haveDups = false;
457                 qsort_arg(keydata, *nentries, sizeof(keyEntryData),
458                                   cmpEntries, (void *) &arg);
459
460                 if (arg.haveDups)
461                 {
462                         /* there are duplicates, must get rid of 'em */
463                         int32           j;
464
465                         entries[0] = keydata[0].datum;
466                         nullFlags[0] = keydata[0].isnull;
467                         j = 1;
468                         for (i = 1; i < *nentries; i++)
469                         {
470                                 if (cmpEntries(&keydata[i - 1], &keydata[i], &arg) != 0)
471                                 {
472                                         entries[j] = keydata[i].datum;
473                                         nullFlags[j] = keydata[i].isnull;
474                                         j++;
475                                 }
476                         }
477                         *nentries = j;
478                 }
479                 else
480                 {
481                         /* easy, no duplicates */
482                         for (i = 0; i < *nentries; i++)
483                         {
484                                 entries[i] = keydata[i].datum;
485                                 nullFlags[i] = keydata[i].isnull;
486                         }
487                 }
488
489                 pfree(keydata);
490         }
491
492         return entries;
493 }
494
495 Datum
496 ginoptions(PG_FUNCTION_ARGS)
497 {
498         Datum           reloptions = PG_GETARG_DATUM(0);
499         bool            validate = PG_GETARG_BOOL(1);
500         relopt_value *options;
501         GinOptions *rdopts;
502         int                     numoptions;
503         static const relopt_parse_elt tab[] = {
504                 {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
505         };
506
507         options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
508                                                           &numoptions);
509
510         /* if none set, we're done */
511         if (numoptions == 0)
512                 PG_RETURN_NULL();
513
514         rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
515
516         fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
517                                    validate, tab, lengthof(tab));
518
519         pfree(options);
520
521         PG_RETURN_BYTEA_P(rdopts);
522 }
523
524 /*
525  * Fetch index's statistical data into *stats
526  *
527  * Note: in the result, nPendingPages can be trusted to be up-to-date,
528  * as can ginVersion; but the other fields are as of the last VACUUM.
529  */
530 void
531 ginGetStats(Relation index, GinStatsData *stats)
532 {
533         Buffer          metabuffer;
534         Page            metapage;
535         GinMetaPageData *metadata;
536
537         metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
538         LockBuffer(metabuffer, GIN_SHARE);
539         metapage = BufferGetPage(metabuffer);
540         metadata = GinPageGetMeta(metapage);
541
542         stats->nPendingPages = metadata->nPendingPages;
543         stats->nTotalPages = metadata->nTotalPages;
544         stats->nEntryPages = metadata->nEntryPages;
545         stats->nDataPages = metadata->nDataPages;
546         stats->nEntries = metadata->nEntries;
547         stats->ginVersion = metadata->ginVersion;
548
549         UnlockReleaseBuffer(metabuffer);
550 }
551
552 /*
553  * Write the given statistics to the index's metapage
554  *
555  * Note: nPendingPages and ginVersion are *not* copied over
556  */
557 void
558 ginUpdateStats(Relation index, const GinStatsData *stats)
559 {
560         Buffer          metabuffer;
561         Page            metapage;
562         GinMetaPageData *metadata;
563
564         metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
565         LockBuffer(metabuffer, GIN_EXCLUSIVE);
566         metapage = BufferGetPage(metabuffer);
567         metadata = GinPageGetMeta(metapage);
568
569         START_CRIT_SECTION();
570
571         metadata->nTotalPages = stats->nTotalPages;
572         metadata->nEntryPages = stats->nEntryPages;
573         metadata->nDataPages = stats->nDataPages;
574         metadata->nEntries = stats->nEntries;
575
576         MarkBufferDirty(metabuffer);
577
578         if (RelationNeedsWAL(index))
579         {
580                 XLogRecPtr      recptr;
581                 ginxlogUpdateMeta data;
582                 XLogRecData rdata;
583
584                 data.node = index->rd_node;
585                 data.ntuples = 0;
586                 data.newRightlink = data.prevTail = InvalidBlockNumber;
587                 memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
588
589                 rdata.buffer = InvalidBuffer;
590                 rdata.data = (char *) &data;
591                 rdata.len = sizeof(ginxlogUpdateMeta);
592                 rdata.next = NULL;
593
594                 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
595                 PageSetLSN(metapage, recptr);
596                 PageSetTLI(metapage, ThisTimeLineID);
597         }
598
599         UnlockReleaseBuffer(metabuffer);
600
601         END_CRIT_SECTION();
602 }