]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/ginentrypage.c
Update CVS HEAD for 2007 copyright. Back branches are typically not
[postgresql] / src / backend / access / gin / ginentrypage.c
1 /*-------------------------------------------------------------------------
2  *
3  * ginentrypage.c
4  *        page utilities routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.6 2007/01/05 22:19:21 momjian Exp $
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16 #include "access/gin.h"
17 #include "access/tuptoaster.h"
18
19 /*
20  * forms tuple for entry tree. On leaf page, Index tuple has
21  * non-traditional layout. Tuple may contain posting list or
22  * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
23  * 1) Posting list
24  *              - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
25  *              - ItemPointerGetBlockNumber(&itup->t_tid) contains original
26  *                size of tuple (without posting list).
27  *                Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
28  *              - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
29  *                of elements in posting list (number of heap itempointer)
30  *                Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
31  *              - After usual part of tuple there is a posting list
32  *                Macros: GinGetPosting(itup)
33  * 2) Posting tree
34  *              - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
35  *              - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
36  *                root of posting tree
37  *              - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING
38  */
39 IndexTuple
40 GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd)
41 {
42         bool            isnull = FALSE;
43         IndexTuple      itup;
44
45         itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
46
47         GinSetOrigSizePosting(itup, IndexTupleSize(itup));
48
49         if (nipd > 0)
50         {
51                 uint32          newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
52
53                 if (newsize >= INDEX_SIZE_MASK)
54                         return NULL;
55
56                 if (newsize > TOAST_INDEX_TARGET && nipd > 1)
57                         return NULL;
58
59                 itup = repalloc(itup, newsize);
60
61                 /* set new size */
62                 itup->t_info &= ~INDEX_SIZE_MASK;
63                 itup->t_info |= newsize;
64
65                 if (ipd)
66                         memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
67                 GinSetNPosting(itup, nipd);
68         }
69         else
70         {
71                 GinSetNPosting(itup, 0);
72         }
73         return itup;
74 }
75
76 /*
77  * Entry tree is a "static", ie tuple never deletes from it,
78  * so we don't use right bound, we use rightest key instead.
79  */
80 static IndexTuple
81 getRightMostTuple(Page page)
82 {
83         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
84
85         return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
86 }
87
88 Datum
89 ginGetHighKey(GinState *ginstate, Page page)
90 {
91         IndexTuple      itup;
92         bool            isnull;
93
94         itup = getRightMostTuple(page);
95
96         return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
97 }
98
99 static bool
100 entryIsMoveRight(GinBtree btree, Page page)
101 {
102         Datum           highkey;
103
104         if (GinPageRightMost(page))
105                 return FALSE;
106
107         highkey = ginGetHighKey(btree->ginstate, page);
108
109         if (compareEntries(btree->ginstate, btree->entryValue, highkey) > 0)
110                 return TRUE;
111
112         return FALSE;
113 }
114
115 /*
116  * Find correct tuple in non-leaf page. It supposed that
117  * page correctly choosen and searching value SHOULD be on page
118  */
119 static BlockNumber
120 entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
121 {
122         OffsetNumber low,
123                                 high,
124                                 maxoff;
125         IndexTuple      itup = NULL;
126         int                     result;
127         Page            page = BufferGetPage(stack->buffer);
128
129         Assert(!GinPageIsLeaf(page));
130         Assert(!GinPageIsData(page));
131
132         if (btree->fullScan)
133         {
134                 stack->off = FirstOffsetNumber;
135                 stack->predictNumber *= PageGetMaxOffsetNumber(page);
136                 return btree->getLeftMostPage(btree, page);
137         }
138
139         low = FirstOffsetNumber;
140         maxoff = high = PageGetMaxOffsetNumber(page);
141         Assert(high >= low);
142
143         high++;
144
145         while (high > low)
146         {
147                 OffsetNumber mid = low + ((high - low) / 2);
148
149                 if (mid == maxoff && GinPageRightMost(page))
150                         /* Right infinity */
151                         result = -1;
152                 else
153                 {
154                         bool            isnull;
155
156                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
157                         result = compareEntries(btree->ginstate, btree->entryValue,
158                                                                         index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
159                 }
160
161                 if (result == 0)
162                 {
163                         stack->off = mid;
164                         Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
165                         return GinItemPointerGetBlockNumber(&(itup)->t_tid);
166                 }
167                 else if (result > 0)
168                         low = mid + 1;
169                 else
170                         high = mid;
171         }
172
173         Assert(high >= FirstOffsetNumber && high <= maxoff);
174
175         stack->off = high;
176         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
177         Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
178         return GinItemPointerGetBlockNumber(&(itup)->t_tid);
179 }
180
181 /*
182  * Searches correct position for value on leaf page.
183  * Page should be corrrectly choosen.
184  * Returns true if value found on page.
185  */
186 static bool
187 entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
188 {
189         Page            page = BufferGetPage(stack->buffer);
190         OffsetNumber low,
191                                 high;
192         IndexTuple      itup;
193
194         Assert(GinPageIsLeaf(page));
195         Assert(!GinPageIsData(page));
196
197         if (btree->fullScan)
198         {
199                 stack->off = FirstOffsetNumber;
200                 return TRUE;
201         }
202
203         low = FirstOffsetNumber;
204         high = PageGetMaxOffsetNumber(page);
205
206         if (high < low)
207         {
208                 stack->off = FirstOffsetNumber;
209                 return false;
210         }
211
212         high++;
213
214         while (high > low)
215         {
216                 OffsetNumber mid = low + ((high - low) / 2);
217                 bool            isnull;
218                 int                     result;
219
220                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
221                 result = compareEntries(btree->ginstate, btree->entryValue,
222                                                                 index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
223
224                 if (result == 0)
225                 {
226                         stack->off = mid;
227                         return true;
228                 }
229                 else if (result > 0)
230                         low = mid + 1;
231                 else
232                         high = mid;
233         }
234
235         stack->off = high;
236         return false;
237 }
238
239 static OffsetNumber
240 entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
241 {
242         OffsetNumber i,
243                                 maxoff = PageGetMaxOffsetNumber(page);
244         IndexTuple      itup;
245
246         Assert(!GinPageIsLeaf(page));
247         Assert(!GinPageIsData(page));
248
249         /* if page isn't changed, we returns storedOff */
250         if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
251         {
252                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
253                 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
254                         return storedOff;
255
256                 /*
257                  * we hope, that needed pointer goes to right. It's true if there
258                  * wasn't a deletion
259                  */
260                 for (i = storedOff + 1; i <= maxoff; i++)
261                 {
262                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
263                         if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
264                                 return i;
265                 }
266                 maxoff = storedOff - 1;
267         }
268
269         /* last chance */
270         for (i = FirstOffsetNumber; i <= maxoff; i++)
271         {
272                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
273                 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
274                         return i;
275         }
276
277         return InvalidOffsetNumber;
278 }
279
280 static BlockNumber
281 entryGetLeftMostPage(GinBtree btree, Page page)
282 {
283         IndexTuple      itup;
284
285         Assert(!GinPageIsLeaf(page));
286         Assert(!GinPageIsData(page));
287         Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
288
289         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
290         return GinItemPointerGetBlockNumber(&(itup)->t_tid);
291 }
292
293 static bool
294 entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
295 {
296         Size            itupsz = 0;
297         Page            page = BufferGetPage(buf);
298
299         Assert(btree->entry);
300         Assert(!GinPageIsData(page));
301
302         if (btree->isDelete)
303         {
304                 IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
305
306                 itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
307         }
308
309         if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
310                 return true;
311
312         return false;
313 }
314
315 /*
316  * Delete tuple on leaf page if tuples was existed and we
317  * should update it, update old child blkno to new right page
318  * if child split is occured
319  */
320 static BlockNumber
321 entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
322 {
323         BlockNumber ret = InvalidBlockNumber;
324
325         Assert(btree->entry);
326         Assert(!GinPageIsData(page));
327
328         if (btree->isDelete)
329         {
330                 Assert(GinPageIsLeaf(page));
331                 PageIndexTupleDelete(page, off);
332         }
333
334         if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
335         {
336                 IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
337
338                 ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
339                 ret = btree->rightblkno;
340         }
341
342         btree->rightblkno = InvalidBlockNumber;
343
344         return ret;
345 }
346
347 /*
348  * Place tuple on page and fills WAL record
349  */
350 static void
351 entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
352 {
353         Page            page = BufferGetPage(buf);
354         static XLogRecData rdata[3];
355         OffsetNumber placed;
356         static ginxlogInsert data;
357
358         *prdata = rdata;
359         data.updateBlkno = entryPreparePage(btree, page, off);
360
361         placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
362         if (placed != off)
363                 elog(ERROR, "failed to add item to index page in \"%s\"",
364                          RelationGetRelationName(btree->index));
365
366         data.node = btree->index->rd_node;
367         data.blkno = BufferGetBlockNumber(buf);
368         data.offset = off;
369         data.nitem = 1;
370         data.isDelete = btree->isDelete;
371         data.isData = false;
372         data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
373
374         rdata[0].buffer = buf;
375         rdata[0].buffer_std = TRUE;
376         rdata[0].data = NULL;
377         rdata[0].len = 0;
378         rdata[0].next = &rdata[1];
379
380         rdata[1].buffer = InvalidBuffer;
381         rdata[1].data = (char *) &data;
382         rdata[1].len = sizeof(ginxlogInsert);
383         rdata[1].next = &rdata[2];
384
385         rdata[2].buffer = InvalidBuffer;
386         rdata[2].data = (char *) btree->entry;
387         rdata[2].len = IndexTupleSize(btree->entry);
388         rdata[2].next = NULL;
389
390         btree->entry = NULL;
391 }
392
393 /*
394  * Place tuple and split page, original buffer(lbuf) leaves untouched,
395  * returns shadow page of lbuf filled new data.
396  * Tuples are distributed between pages by equal size on its, not
397  * an equal number!
398  */
399 static Page
400 entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
401 {
402         static XLogRecData rdata[2];
403         OffsetNumber i,
404                                 maxoff,
405                                 separator = InvalidOffsetNumber;
406         Size            totalsize = 0;
407         Size            lsize = 0,
408                                 size;
409         static char tupstore[2 * BLCKSZ];
410         char       *ptr;
411         IndexTuple      itup,
412                                 leftrightmost = NULL;
413         static ginxlogSplit data;
414         Datum           value;
415         bool            isnull;
416         Page            page;
417         Page            lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
418         Page            rpage = BufferGetPage(rbuf);
419         Size            pageSize = PageGetPageSize(lpage);
420
421         *prdata = rdata;
422         data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
423                 InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
424         data.updateBlkno = entryPreparePage(btree, lpage, off);
425
426         maxoff = PageGetMaxOffsetNumber(lpage);
427         ptr = tupstore;
428
429         for (i = FirstOffsetNumber; i <= maxoff; i++)
430         {
431                 if (i == off)
432                 {
433                         size = MAXALIGN(IndexTupleSize(btree->entry));
434                         memcpy(ptr, btree->entry, size);
435                         ptr += size;
436                         totalsize += size + sizeof(ItemIdData);
437                 }
438
439                 itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
440                 size = MAXALIGN(IndexTupleSize(itup));
441                 memcpy(ptr, itup, size);
442                 ptr += size;
443                 totalsize += size + sizeof(ItemIdData);
444         }
445
446         if (off == maxoff + 1)
447         {
448                 size = MAXALIGN(IndexTupleSize(btree->entry));
449                 memcpy(ptr, btree->entry, size);
450                 ptr += size;
451                 totalsize += size + sizeof(ItemIdData);
452         }
453
454         GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
455         GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
456
457         ptr = tupstore;
458         maxoff++;
459         lsize = 0;
460
461         page = lpage;
462         for (i = FirstOffsetNumber; i <= maxoff; i++)
463         {
464                 itup = (IndexTuple) ptr;
465
466                 if (lsize > totalsize / 2)
467                 {
468                         if (separator == InvalidOffsetNumber)
469                                 separator = i - 1;
470                         page = rpage;
471                 }
472                 else
473                 {
474                         leftrightmost = itup;
475                         lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
476                 }
477
478                 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
479                         elog(ERROR, "failed to add item to index page in \"%s\"",
480                                  RelationGetRelationName(btree->index));
481                 ptr += MAXALIGN(IndexTupleSize(itup));
482         }
483
484         value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
485         btree->entry = GinFormTuple(btree->ginstate, value, NULL, 0);
486         ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
487         btree->rightblkno = BufferGetBlockNumber(rbuf);
488
489         data.node = btree->index->rd_node;
490         data.rootBlkno = InvalidBlockNumber;
491         data.lblkno = BufferGetBlockNumber(lbuf);
492         data.rblkno = BufferGetBlockNumber(rbuf);
493         data.separator = separator;
494         data.nitem = maxoff;
495         data.isData = FALSE;
496         data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
497         data.isRootSplit = FALSE;
498
499         rdata[0].buffer = InvalidBuffer;
500         rdata[0].data = (char *) &data;
501         rdata[0].len = sizeof(ginxlogSplit);
502         rdata[0].next = &rdata[1];
503
504         rdata[1].buffer = InvalidBuffer;
505         rdata[1].data = tupstore;
506         rdata[1].len = MAXALIGN(totalsize);
507         rdata[1].next = NULL;
508
509         return lpage;
510 }
511
512 /*
513  * return newly allocate rightmost tuple
514  */
515 IndexTuple
516 ginPageGetLinkItup(Buffer buf)
517 {
518         IndexTuple      itup,
519                                 nitup;
520         Page            page = BufferGetPage(buf);
521
522         itup = getRightMostTuple(page);
523         if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
524         {
525                 nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
526                 memcpy(nitup, itup, GinGetOrigSizePosting(itup));
527                 nitup->t_info &= ~INDEX_SIZE_MASK;
528                 nitup->t_info |= GinGetOrigSizePosting(itup);
529         }
530         else
531         {
532                 nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
533                 memcpy(nitup, itup, IndexTupleSize(itup));
534         }
535
536         ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
537         return nitup;
538 }
539
540 /*
541  * Fills new root by rightest values from child.
542  * Also called from ginxlog, should not use btree
543  */
544 void
545 entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
546 {
547         Page            page;
548         IndexTuple      itup;
549
550         page = BufferGetPage(root);
551
552         itup = ginPageGetLinkItup(lbuf);
553         if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
554                 elog(ERROR, "failed to add item to index root page");
555
556         itup = ginPageGetLinkItup(rbuf);
557         if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
558                 elog(ERROR, "failed to add item to index root page");
559 }
560
561 void
562 prepareEntryScan(GinBtree btree, Relation index, Datum value, GinState *ginstate)
563 {
564         memset(btree, 0, sizeof(GinBtreeData));
565
566         btree->isMoveRight = entryIsMoveRight;
567         btree->findChildPage = entryLocateEntry;
568         btree->findItem = entryLocateLeafEntry;
569         btree->findChildPtr = entryFindChildPtr;
570         btree->getLeftMostPage = entryGetLeftMostPage;
571         btree->isEnoughSpace = entryIsEnoughSpace;
572         btree->placeToPage = entryPlaceToPage;
573         btree->splitPage = entrySplitPage;
574         btree->fillRoot = entryFillRoot;
575
576         btree->index = index;
577         btree->ginstate = ginstate;
578         btree->entryValue = value;
579
580         btree->isDelete = FALSE;
581         btree->searchMode = FALSE;
582         btree->fullScan = FALSE;
583         btree->isBuild = FALSE;
584 }