1 /*-------------------------------------------------------------------------
4 * routines for fast build of inverted index
7 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/ginbulk.c
12 *-------------------------------------------------------------------------
17 #include "access/gin_private.h"
18 #include "utils/datum.h"
19 #include "utils/memutils.h"
22 #define DEF_NENTRY 2048 /* GinEntryAccumulator allocation quantum */
23 #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */
26 /* Combiner function for rbtree.c */
28 ginCombineData(RBNode *existing, const RBNode *newdata, void *arg)
30 GinEntryAccumulator *eo = (GinEntryAccumulator *) existing;
31 const GinEntryAccumulator *en = (const GinEntryAccumulator *) newdata;
32 BuildAccumulator *accum = (BuildAccumulator *) arg;
35 * Note this code assumes that newdata contains only one itempointer.
37 if (eo->count >= eo->maxcount)
39 accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
41 eo->list = (ItemPointerData *)
42 repalloc(eo->list, sizeof(ItemPointerData) * eo->maxcount);
43 accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
46 /* If item pointers are not ordered, they will need to be sorted later */
47 if (eo->shouldSort == FALSE)
51 res = ginCompareItemPointers(eo->list + eo->count - 1, en->list);
55 eo->shouldSort = TRUE;
58 eo->list[eo->count] = en->list[0];
62 /* Comparator function for rbtree.c */
64 cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg)
66 const GinEntryAccumulator *ea = (const GinEntryAccumulator *) a;
67 const GinEntryAccumulator *eb = (const GinEntryAccumulator *) b;
68 BuildAccumulator *accum = (BuildAccumulator *) arg;
70 return ginCompareAttEntries(accum->ginstate,
71 ea->attnum, ea->key, ea->category,
72 eb->attnum, eb->key, eb->category);
75 /* Allocator function for rbtree.c */
77 ginAllocEntryAccumulator(void *arg)
79 BuildAccumulator *accum = (BuildAccumulator *) arg;
80 GinEntryAccumulator *ea;
83 * Allocate memory by rather big chunks to decrease overhead. We have no
84 * need to reclaim RBNodes individually, so this costs nothing.
86 if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY)
88 accum->entryallocator = palloc(sizeof(GinEntryAccumulator) * DEF_NENTRY);
89 accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
93 /* Allocate new RBNode from current chunk */
94 ea = accum->entryallocator + accum->eas_used;
101 ginInitBA(BuildAccumulator *accum)
103 /* accum->ginstate is intentionally not set here */
104 accum->allocatedMemory = 0;
105 accum->entryallocator = NULL;
107 accum->tree = rb_create(sizeof(GinEntryAccumulator),
110 ginAllocEntryAccumulator,
111 NULL, /* no freefunc needed */
116 * This is basically the same as datumCopy(), but extended to count
117 * palloc'd space in accum->allocatedMemory.
120 getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
122 Form_pg_attribute att = accum->ginstate->origTupdesc->attrs[attnum - 1];
129 res = datumCopy(value, false, att->attlen);
130 accum->allocatedMemory += GetMemoryChunkSpace(DatumGetPointer(res));
136 * Find/store one entry from indexed value.
139 ginInsertBAEntry(BuildAccumulator *accum,
140 ItemPointer heapptr, OffsetNumber attnum,
141 Datum key, GinNullCategory category)
143 GinEntryAccumulator eatmp;
144 GinEntryAccumulator *ea;
148 * For the moment, fill only the fields of eatmp that will be looked at by
149 * cmpEntryAccumulator or ginCombineData.
151 eatmp.attnum = attnum;
153 eatmp.category = category;
154 /* temporarily set up single-entry itempointer list */
155 eatmp.list = heapptr;
157 ea = (GinEntryAccumulator *) rb_insert(accum->tree, (RBNode *) &eatmp,
163 * Finish initializing new tree entry, including making permanent
164 * copies of the datum (if it's not null) and itempointer.
166 if (category == GIN_CAT_NORM_KEY)
167 ea->key = getDatumCopy(accum, attnum, key);
168 ea->maxcount = DEF_NPTR;
170 ea->shouldSort = FALSE;
172 (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
173 ea->list[0] = *heapptr;
174 accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
179 * ginCombineData did everything needed.
185 * Insert the entries for one heap pointer.
187 * Since the entries are being inserted into a balanced binary tree, you
188 * might think that the order of insertion wouldn't be critical, but it turns
189 * out that inserting the entries in sorted order results in a lot of
190 * rebalancing operations and is slow. To prevent this, we attempt to insert
191 * the nodes in an order that will produce a nearly-balanced tree if the input
194 * We do this as follows. First, we imagine that we have an array whose size
195 * is the smallest power of two greater than or equal to the actual array
196 * size. Second, we insert the middle entry of our virtual array into the
197 * tree; then, we insert the middles of each half of our virtual array, then
198 * middles of quarters, etc.
201 ginInsertBAEntries(BuildAccumulator *accum,
202 ItemPointer heapptr, OffsetNumber attnum,
203 Datum *entries, GinNullCategory *categories,
206 uint32 step = nentries;
211 Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
214 * step will contain largest power of 2 and <= nentries
220 step |= (step >> 16);
228 for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ )
229 ginInsertBAEntry(accum, heapptr, attnum,
230 entries[i], categories[i]);
237 qsortCompareItemPointers(const void *a, const void *b)
239 int res = ginCompareItemPointers((ItemPointer) a, (ItemPointer) b);
241 /* Assert that there are no equal item pointers being sorted */
246 /* Prepare to read out the rbtree contents using ginGetBAEntry */
248 ginBeginBAScan(BuildAccumulator *accum)
250 rb_begin_iterate(accum->tree, LeftRightWalk);
254 * Get the next entry in sequence from the BuildAccumulator's rbtree.
255 * This consists of a single key datum and a list (array) of one or more
256 * heap TIDs in which that key is found. The list is guaranteed sorted.
259 ginGetBAEntry(BuildAccumulator *accum,
260 OffsetNumber *attnum, Datum *key, GinNullCategory *category,
263 GinEntryAccumulator *entry;
264 ItemPointerData *list;
266 entry = (GinEntryAccumulator *) rb_iterate(accum->tree);
269 return NULL; /* no more entries */
271 *attnum = entry->attnum;
273 *category = entry->category;
277 Assert(list != NULL && entry->count > 0);
279 if (entry->shouldSort && entry->count > 1)
280 qsort(list, entry->count, sizeof(ItemPointerData),
281 qsortCompareItemPointers);