]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/gininsert.c
Rewrite the way GIN posting lists are packed on a page, to reduce WAL volume.
[postgresql] / src / backend / access / gin / gininsert.c
1 /*-------------------------------------------------------------------------
2  *
3  * gininsert.c
4  *        insert routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      src/backend/access/gin/gininsert.c
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "access/gin_private.h"
18 #include "access/heapam_xlog.h"
19 #include "catalog/index.h"
20 #include "miscadmin.h"
21 #include "storage/bufmgr.h"
22 #include "storage/smgr.h"
23 #include "storage/indexfsm.h"
24 #include "utils/memutils.h"
25 #include "utils/rel.h"
26
27
28 typedef struct
29 {
30         GinState        ginstate;
31         double          indtuples;
32         GinStatsData buildStats;
33         MemoryContext tmpCtx;
34         MemoryContext funcCtx;
35         BuildAccumulator accum;
36 } GinBuildState;
37
38
39 /*
40  * Adds array of item pointers to tuple's posting list, or
41  * creates posting tree and tuple pointing to tree in case
42  * of not enough space.  Max size of tuple is defined in
43  * GinFormTuple().      Returns a new, modified index tuple.
44  * items[] must be in sorted order with no duplicates.
45  */
46 static IndexTuple
47 addItemPointersToLeafTuple(GinState *ginstate,
48                                                    IndexTuple old,
49                                                    ItemPointerData *items, uint32 nitem,
50                                                    GinStatsData *buildStats)
51 {
52         OffsetNumber attnum;
53         Datum           key;
54         GinNullCategory category;
55         IndexTuple      res;
56         ItemPointerData *newItems,
57                            *oldItems;
58         int                     oldNPosting,
59                                 newNPosting;
60         GinPostingList *compressedList;
61
62         Assert(!GinIsPostingTree(old));
63
64         attnum = gintuple_get_attrnum(ginstate, old);
65         key = gintuple_get_key(ginstate, old, &category);
66
67         /* merge the old and new posting lists */
68         oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting);
69
70         newItems = ginMergeItemPointers(items, nitem,
71                                                                         oldItems, oldNPosting,
72                                                                         &newNPosting);
73
74         /* Compress the posting list, and try to a build tuple with room for it */
75         res = NULL;
76         compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize,
77                                                                                         NULL);
78         pfree(newItems);
79         if (compressedList)
80         {
81                 res = GinFormTuple(ginstate, attnum, key, category,
82                                                    (char *) compressedList,
83                                                    SizeOfGinPostingList(compressedList),
84                                                    newNPosting,
85                                                    false);
86                 pfree(compressedList);
87         }
88         if (!res)
89         {
90                 /* posting list would be too big, convert to posting tree */
91                 BlockNumber postingRoot;
92
93                 /*
94                  * Initialize posting tree with the old tuple's posting list.  It's
95                  * surely small enough to fit on one posting-tree page, and should
96                  * already be in order with no duplicates.
97                  */
98                 postingRoot = createPostingTree(ginstate->index,
99                                                                                 oldItems,
100                                                                                 oldNPosting,
101                                                                                 buildStats);
102
103                 /* Now insert the TIDs-to-be-added into the posting tree */
104                 ginInsertItemPointers(ginstate->index, postingRoot,
105                                                           items, nitem,
106                                                           buildStats);
107
108                 /* And build a new posting-tree-only result tuple */
109                 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
110                 GinSetPostingTree(res, postingRoot);
111         }
112         pfree(oldItems);
113
114         return res;
115 }
116
117 /*
118  * Build a fresh leaf tuple, either posting-list or posting-tree format
119  * depending on whether the given items list will fit.
120  * items[] must be in sorted order with no duplicates.
121  *
122  * This is basically the same logic as in addItemPointersToLeafTuple,
123  * but working from slightly different input.
124  */
125 static IndexTuple
126 buildFreshLeafTuple(GinState *ginstate,
127                                         OffsetNumber attnum, Datum key, GinNullCategory category,
128                                         ItemPointerData *items, uint32 nitem,
129                                         GinStatsData *buildStats)
130 {
131         IndexTuple      res = NULL;
132         GinPostingList *compressedList;
133
134         /* try to build a posting list tuple with all the items */
135         compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL);
136         if (compressedList)
137         {
138                 res = GinFormTuple(ginstate, attnum, key, category,
139                                                    (char *) compressedList,
140                                                    SizeOfGinPostingList(compressedList),
141                                                    nitem, false);
142                 pfree(compressedList);
143         }
144         if (!res)
145         {
146                 /* posting list would be too big, build posting tree */
147                 BlockNumber postingRoot;
148
149                 /*
150                  * Build posting-tree-only result tuple.  We do this first so as to
151                  * fail quickly if the key is too big.
152                  */
153                 res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true);
154
155                 /*
156                  * Initialize a new posting tree with the TIDs.
157                  */
158                 postingRoot = createPostingTree(ginstate->index, items, nitem,
159                                                                                 buildStats);
160
161                 /* And save the root link in the result tuple */
162                 GinSetPostingTree(res, postingRoot);
163         }
164
165         return res;
166 }
167
168 /*
169  * Insert one or more heap TIDs associated with the given key value.
170  * This will either add a single key entry, or enlarge a pre-existing entry.
171  *
172  * During an index build, buildStats is non-null and the counters
173  * it contains should be incremented as needed.
174  */
175 void
176 ginEntryInsert(GinState *ginstate,
177                            OffsetNumber attnum, Datum key, GinNullCategory category,
178                            ItemPointerData *items, uint32 nitem,
179                            GinStatsData *buildStats)
180 {
181         GinBtreeData btree;
182         GinBtreeEntryInsertData insertdata;
183         GinBtreeStack *stack;
184         IndexTuple      itup;
185         Page            page;
186
187         insertdata.isDelete = FALSE;
188
189         /* During index build, count the to-be-inserted entry */
190         if (buildStats)
191                 buildStats->nEntries++;
192
193         ginPrepareEntryScan(&btree, attnum, key, category, ginstate);
194
195         stack = ginFindLeafPage(&btree, false);
196         page = BufferGetPage(stack->buffer);
197
198         if (btree.findItem(&btree, stack))
199         {
200                 /* found pre-existing entry */
201                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
202
203                 if (GinIsPostingTree(itup))
204                 {
205                         /* add entries to existing posting tree */
206                         BlockNumber rootPostingTree = GinGetPostingTree(itup);
207
208                         /* release all stack */
209                         LockBuffer(stack->buffer, GIN_UNLOCK);
210                         freeGinBtreeStack(stack);
211
212                         /* insert into posting tree */
213                         ginInsertItemPointers(ginstate->index, rootPostingTree,
214                                                                   items, nitem,
215                                                                   buildStats);
216                         return;
217                 }
218
219                 /* modify an existing leaf entry */
220                 itup = addItemPointersToLeafTuple(ginstate, itup,
221                                                                                   items, nitem, buildStats);
222
223                 insertdata.isDelete = TRUE;
224         }
225         else
226         {
227                 /* no match, so construct a new leaf entry */
228                 itup = buildFreshLeafTuple(ginstate, attnum, key, category,
229                                                                    items, nitem, buildStats);
230         }
231
232         /* Insert the new or modified leaf tuple */
233         insertdata.entry = itup;
234         ginInsertValue(&btree, stack, &insertdata, buildStats);
235         pfree(itup);
236 }
237
238 /*
239  * Extract index entries for a single indexable item, and add them to the
240  * BuildAccumulator's state.
241  *
242  * This function is used only during initial index creation.
243  */
244 static void
245 ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum,
246                                            Datum value, bool isNull,
247                                            ItemPointer heapptr)
248 {
249         Datum      *entries;
250         GinNullCategory *categories;
251         int32           nentries;
252         MemoryContext oldCtx;
253
254         oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
255         entries = ginExtractEntries(buildstate->accum.ginstate, attnum,
256                                                                 value, isNull,
257                                                                 &nentries, &categories);
258         MemoryContextSwitchTo(oldCtx);
259
260         ginInsertBAEntries(&buildstate->accum, heapptr, attnum,
261                                            entries, categories, nentries);
262
263         buildstate->indtuples += nentries;
264
265         MemoryContextReset(buildstate->funcCtx);
266 }
267
268 static void
269 ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
270                                  bool *isnull, bool tupleIsAlive, void *state)
271 {
272         GinBuildState *buildstate = (GinBuildState *) state;
273         MemoryContext oldCtx;
274         int                     i;
275
276         oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
277
278         for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
279                 ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
280                                                            values[i], isnull[i],
281                                                            &htup->t_self);
282
283         /* If we've maxed out our available memory, dump everything to the index */
284         if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L)
285         {
286                 ItemPointerData *list;
287                 Datum           key;
288                 GinNullCategory category;
289                 uint32          nlist;
290                 OffsetNumber attnum;
291
292                 ginBeginBAScan(&buildstate->accum);
293                 while ((list = ginGetBAEntry(&buildstate->accum,
294                                                                   &attnum, &key, &category, &nlist)) != NULL)
295                 {
296                         /* there could be many entries, so be willing to abort here */
297                         CHECK_FOR_INTERRUPTS();
298                         ginEntryInsert(&buildstate->ginstate, attnum, key, category,
299                                                    list, nlist, &buildstate->buildStats);
300                 }
301
302                 MemoryContextReset(buildstate->tmpCtx);
303                 ginInitBA(&buildstate->accum);
304         }
305
306         MemoryContextSwitchTo(oldCtx);
307 }
308
309 Datum
310 ginbuild(PG_FUNCTION_ARGS)
311 {
312         Relation        heap = (Relation) PG_GETARG_POINTER(0);
313         Relation        index = (Relation) PG_GETARG_POINTER(1);
314         IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
315         IndexBuildResult *result;
316         double          reltuples;
317         GinBuildState buildstate;
318         Buffer          RootBuffer,
319                                 MetaBuffer;
320         ItemPointerData *list;
321         Datum           key;
322         GinNullCategory category;
323         uint32          nlist;
324         MemoryContext oldCtx;
325         OffsetNumber attnum;
326
327         if (RelationGetNumberOfBlocks(index) != 0)
328                 elog(ERROR, "index \"%s\" already contains data",
329                          RelationGetRelationName(index));
330
331         initGinState(&buildstate.ginstate, index);
332         buildstate.indtuples = 0;
333         memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
334
335         /* initialize the meta page */
336         MetaBuffer = GinNewBuffer(index);
337
338         /* initialize the root page */
339         RootBuffer = GinNewBuffer(index);
340
341         START_CRIT_SECTION();
342         GinInitMetabuffer(MetaBuffer);
343         MarkBufferDirty(MetaBuffer);
344         GinInitBuffer(RootBuffer, GIN_LEAF);
345         MarkBufferDirty(RootBuffer);
346
347         if (RelationNeedsWAL(index))
348         {
349                 XLogRecPtr      recptr;
350                 XLogRecData rdata;
351                 Page            page;
352
353                 rdata.buffer = InvalidBuffer;
354                 rdata.data = (char *) &(index->rd_node);
355                 rdata.len = sizeof(RelFileNode);
356                 rdata.next = NULL;
357
358                 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
359
360                 page = BufferGetPage(RootBuffer);
361                 PageSetLSN(page, recptr);
362
363                 page = BufferGetPage(MetaBuffer);
364                 PageSetLSN(page, recptr);
365         }
366
367         UnlockReleaseBuffer(MetaBuffer);
368         UnlockReleaseBuffer(RootBuffer);
369         END_CRIT_SECTION();
370
371         /* count the root as first entry page */
372         buildstate.buildStats.nEntryPages++;
373
374         /*
375          * create a temporary memory context that is reset once for each tuple
376          * inserted into the index
377          */
378         buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
379                                                                                           "Gin build temporary context",
380                                                                                           ALLOCSET_DEFAULT_MINSIZE,
381                                                                                           ALLOCSET_DEFAULT_INITSIZE,
382                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
383
384         buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
385                                          "Gin build temporary context for user-defined function",
386                                                                                            ALLOCSET_DEFAULT_MINSIZE,
387                                                                                            ALLOCSET_DEFAULT_INITSIZE,
388                                                                                            ALLOCSET_DEFAULT_MAXSIZE);
389
390         buildstate.accum.ginstate = &buildstate.ginstate;
391         ginInitBA(&buildstate.accum);
392
393         /*
394          * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
395          * prefers to receive tuples in TID order.
396          */
397         reltuples = IndexBuildHeapScan(heap, index, indexInfo, false,
398                                                                    ginBuildCallback, (void *) &buildstate);
399
400         /* dump remaining entries to the index */
401         oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
402         ginBeginBAScan(&buildstate.accum);
403         while ((list = ginGetBAEntry(&buildstate.accum,
404                                                                  &attnum, &key, &category, &nlist)) != NULL)
405         {
406                 /* there could be many entries, so be willing to abort here */
407                 CHECK_FOR_INTERRUPTS();
408                 ginEntryInsert(&buildstate.ginstate, attnum, key, category,
409                                            list, nlist, &buildstate.buildStats);
410         }
411         MemoryContextSwitchTo(oldCtx);
412
413         MemoryContextDelete(buildstate.tmpCtx);
414
415         /*
416          * Update metapage stats
417          */
418         buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
419         ginUpdateStats(index, &buildstate.buildStats);
420
421         /*
422          * Return statistics
423          */
424         result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
425
426         result->heap_tuples = reltuples;
427         result->index_tuples = buildstate.indtuples;
428
429         PG_RETURN_POINTER(result);
430 }
431
432 /*
433  *      ginbuildempty() -- build an empty gin index in the initialization fork
434  */
435 Datum
436 ginbuildempty(PG_FUNCTION_ARGS)
437 {
438         Relation        index = (Relation) PG_GETARG_POINTER(0);
439         Buffer          RootBuffer,
440                                 MetaBuffer;
441
442         /* An empty GIN index has two pages. */
443         MetaBuffer =
444                 ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
445         LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE);
446         RootBuffer =
447                 ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
448         LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE);
449
450         /* Initialize and xlog metabuffer and root buffer. */
451         START_CRIT_SECTION();
452         GinInitMetabuffer(MetaBuffer);
453         MarkBufferDirty(MetaBuffer);
454         log_newpage_buffer(MetaBuffer, false);
455         GinInitBuffer(RootBuffer, GIN_LEAF);
456         MarkBufferDirty(RootBuffer);
457         log_newpage_buffer(RootBuffer, false);
458         END_CRIT_SECTION();
459
460         /* Unlock and release the buffers. */
461         UnlockReleaseBuffer(MetaBuffer);
462         UnlockReleaseBuffer(RootBuffer);
463
464         PG_RETURN_VOID();
465 }
466
467 /*
468  * Insert index entries for a single indexable item during "normal"
469  * (non-fast-update) insertion
470  */
471 static void
472 ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum,
473                                    Datum value, bool isNull,
474                                    ItemPointer item)
475 {
476         Datum      *entries;
477         GinNullCategory *categories;
478         int32           i,
479                                 nentries;
480
481         entries = ginExtractEntries(ginstate, attnum, value, isNull,
482                                                                 &nentries, &categories);
483
484         for (i = 0; i < nentries; i++)
485                 ginEntryInsert(ginstate, attnum, entries[i], categories[i],
486                                            item, 1, NULL);
487 }
488
489 Datum
490 gininsert(PG_FUNCTION_ARGS)
491 {
492         Relation        index = (Relation) PG_GETARG_POINTER(0);
493         Datum      *values = (Datum *) PG_GETARG_POINTER(1);
494         bool       *isnull = (bool *) PG_GETARG_POINTER(2);
495         ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);
496
497 #ifdef NOT_USED
498         Relation        heapRel = (Relation) PG_GETARG_POINTER(4);
499         IndexUniqueCheck checkUnique = (IndexUniqueCheck) PG_GETARG_INT32(5);
500 #endif
501         GinState        ginstate;
502         MemoryContext oldCtx;
503         MemoryContext insertCtx;
504         int                     i;
505
506         insertCtx = AllocSetContextCreate(CurrentMemoryContext,
507                                                                           "Gin insert temporary context",
508                                                                           ALLOCSET_DEFAULT_MINSIZE,
509                                                                           ALLOCSET_DEFAULT_INITSIZE,
510                                                                           ALLOCSET_DEFAULT_MAXSIZE);
511
512         oldCtx = MemoryContextSwitchTo(insertCtx);
513
514         initGinState(&ginstate, index);
515
516         if (GinGetUseFastUpdate(index))
517         {
518                 GinTupleCollector collector;
519
520                 memset(&collector, 0, sizeof(GinTupleCollector));
521
522                 for (i = 0; i < ginstate.origTupdesc->natts; i++)
523                         ginHeapTupleFastCollect(&ginstate, &collector,
524                                                                         (OffsetNumber) (i + 1),
525                                                                         values[i], isnull[i],
526                                                                         ht_ctid);
527
528                 ginHeapTupleFastInsert(&ginstate, &collector);
529         }
530         else
531         {
532                 for (i = 0; i < ginstate.origTupdesc->natts; i++)
533                         ginHeapTupleInsert(&ginstate, (OffsetNumber) (i + 1),
534                                                            values[i], isnull[i],
535                                                            ht_ctid);
536         }
537
538         MemoryContextSwitchTo(oldCtx);
539         MemoryContextDelete(insertCtx);
540
541         PG_RETURN_BOOL(false);
542 }