1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.9 2007/09/20 17:56:30 tgl Exp $
12 *-------------------------------------------------------------------------
16 #include "access/gin.h"
17 #include "access/tuptoaster.h"
20 * forms tuple for entry tree. On leaf page, Index tuple has
21 * non-traditional layout. Tuple may contain posting list or
22 * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
24 * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
25 * - ItemPointerGetBlockNumber(&itup->t_tid) contains original
26 * size of tuple (without posting list).
27 * Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
28 * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
29 * of elements in posting list (number of heap itempointer)
30 * Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
31 * - After usual part of tuple there is a posting list
32 * Macros: GinGetPosting(itup)
34 * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
35 * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
36 * root of posting tree
37 * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING
40 GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd)
45 itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
47 GinSetOrigSizePosting(itup, IndexTupleSize(itup));
51 uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
53 if (newsize >= INDEX_SIZE_MASK)
56 if (newsize > TOAST_INDEX_TARGET && nipd > 1)
59 itup = repalloc(itup, newsize);
62 itup->t_info &= ~INDEX_SIZE_MASK;
63 itup->t_info |= newsize;
66 memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
67 GinSetNPosting(itup, nipd);
71 GinSetNPosting(itup, 0);
77 * Entry tree is a "static", ie tuple never deletes from it,
78 * so we don't use right bound, we use rightest key instead.
81 getRightMostTuple(Page page)
83 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
85 return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
89 ginGetHighKey(GinState *ginstate, Page page)
94 itup = getRightMostTuple(page);
96 return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
100 entryIsMoveRight(GinBtree btree, Page page)
104 if (GinPageRightMost(page))
107 highkey = ginGetHighKey(btree->ginstate, page);
109 if (compareEntries(btree->ginstate, btree->entryValue, highkey) > 0)
116 * Find correct tuple in non-leaf page. It supposed that
117 * page correctly choosen and searching value SHOULD be on page
120 entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
125 IndexTuple itup = NULL;
127 Page page = BufferGetPage(stack->buffer);
129 Assert(!GinPageIsLeaf(page));
130 Assert(!GinPageIsData(page));
134 stack->off = FirstOffsetNumber;
135 stack->predictNumber *= PageGetMaxOffsetNumber(page);
136 return btree->getLeftMostPage(btree, page);
139 low = FirstOffsetNumber;
140 maxoff = high = PageGetMaxOffsetNumber(page);
147 OffsetNumber mid = low + ((high - low) / 2);
149 if (mid == maxoff && GinPageRightMost(page))
156 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
157 result = compareEntries(btree->ginstate, btree->entryValue,
158 index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
164 Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
165 return GinItemPointerGetBlockNumber(&(itup)->t_tid);
173 Assert(high >= FirstOffsetNumber && high <= maxoff);
176 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
177 Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
178 return GinItemPointerGetBlockNumber(&(itup)->t_tid);
182 * Searches correct position for value on leaf page.
183 * Page should be corrrectly choosen.
184 * Returns true if value found on page.
187 entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
189 Page page = BufferGetPage(stack->buffer);
194 Assert(GinPageIsLeaf(page));
195 Assert(!GinPageIsData(page));
199 stack->off = FirstOffsetNumber;
203 low = FirstOffsetNumber;
204 high = PageGetMaxOffsetNumber(page);
208 stack->off = FirstOffsetNumber;
216 OffsetNumber mid = low + ((high - low) / 2);
220 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
221 result = compareEntries(btree->ginstate, btree->entryValue,
222 index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
240 entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
243 maxoff = PageGetMaxOffsetNumber(page);
246 Assert(!GinPageIsLeaf(page));
247 Assert(!GinPageIsData(page));
249 /* if page isn't changed, we returns storedOff */
250 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
252 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
253 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
257 * we hope, that needed pointer goes to right. It's true if there
260 for (i = storedOff + 1; i <= maxoff; i++)
262 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
263 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
266 maxoff = storedOff - 1;
270 for (i = FirstOffsetNumber; i <= maxoff; i++)
272 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
273 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
277 return InvalidOffsetNumber;
281 entryGetLeftMostPage(GinBtree btree, Page page)
285 Assert(!GinPageIsLeaf(page));
286 Assert(!GinPageIsData(page));
287 Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
289 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
290 return GinItemPointerGetBlockNumber(&(itup)->t_tid);
294 entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
297 Page page = BufferGetPage(buf);
299 Assert(btree->entry);
300 Assert(!GinPageIsData(page));
304 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
306 itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
309 if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
316 * Delete tuple on leaf page if tuples was existed and we
317 * should update it, update old child blkno to new right page
318 * if child split is occured
321 entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
323 BlockNumber ret = InvalidBlockNumber;
325 Assert(btree->entry);
326 Assert(!GinPageIsData(page));
330 Assert(GinPageIsLeaf(page));
331 PageIndexTupleDelete(page, off);
334 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
336 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
338 ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
339 ret = btree->rightblkno;
342 btree->rightblkno = InvalidBlockNumber;
348 * Place tuple on page and fills WAL record
351 entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
353 Page page = BufferGetPage(buf);
354 static XLogRecData rdata[3];
356 static ginxlogInsert data;
360 data.updateBlkno = entryPreparePage(btree, page, off);
362 placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, false, false);
364 elog(ERROR, "failed to add item to index page in \"%s\"",
365 RelationGetRelationName(btree->index));
367 data.node = btree->index->rd_node;
368 data.blkno = BufferGetBlockNumber(buf);
371 data.isDelete = btree->isDelete;
373 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
376 * Prevent full page write if child's split occurs. That is needed
377 * to remove incomplete splits while replaying WAL
379 * data.updateBlkno contains new block number (of newly created right page)
380 * for recently splited page.
382 if ( data.updateBlkno == InvalidBlockNumber )
384 rdata[0].buffer = buf;
385 rdata[0].buffer_std = TRUE;
386 rdata[0].data = NULL;
388 rdata[0].next = &rdata[1];
392 rdata[cnt].buffer = InvalidBuffer;
393 rdata[cnt].data = (char *) &data;
394 rdata[cnt].len = sizeof(ginxlogInsert);
395 rdata[cnt].next = &rdata[cnt+1];
398 rdata[cnt].buffer = InvalidBuffer;
399 rdata[cnt].data = (char *) btree->entry;
400 rdata[cnt].len = IndexTupleSize(btree->entry);
401 rdata[cnt].next = NULL;
407 * Place tuple and split page, original buffer(lbuf) leaves untouched,
408 * returns shadow page of lbuf filled new data.
409 * Tuples are distributed between pages by equal size on its, not
413 entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
415 static XLogRecData rdata[2];
418 separator = InvalidOffsetNumber;
422 static char tupstore[2 * BLCKSZ];
425 leftrightmost = NULL;
426 static ginxlogSplit data;
430 Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
431 Page rpage = BufferGetPage(rbuf);
432 Size pageSize = PageGetPageSize(lpage);
435 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
436 InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
437 data.updateBlkno = entryPreparePage(btree, lpage, off);
439 maxoff = PageGetMaxOffsetNumber(lpage);
442 for (i = FirstOffsetNumber; i <= maxoff; i++)
446 size = MAXALIGN(IndexTupleSize(btree->entry));
447 memcpy(ptr, btree->entry, size);
449 totalsize += size + sizeof(ItemIdData);
452 itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
453 size = MAXALIGN(IndexTupleSize(itup));
454 memcpy(ptr, itup, size);
456 totalsize += size + sizeof(ItemIdData);
459 if (off == maxoff + 1)
461 size = MAXALIGN(IndexTupleSize(btree->entry));
462 memcpy(ptr, btree->entry, size);
464 totalsize += size + sizeof(ItemIdData);
467 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
468 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
475 for (i = FirstOffsetNumber; i <= maxoff; i++)
477 itup = (IndexTuple) ptr;
479 if (lsize > totalsize / 2)
481 if (separator == InvalidOffsetNumber)
487 leftrightmost = itup;
488 lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
491 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
492 elog(ERROR, "failed to add item to index page in \"%s\"",
493 RelationGetRelationName(btree->index));
494 ptr += MAXALIGN(IndexTupleSize(itup));
497 value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
498 btree->entry = GinFormTuple(btree->ginstate, value, NULL, 0);
499 ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
500 btree->rightblkno = BufferGetBlockNumber(rbuf);
502 data.node = btree->index->rd_node;
503 data.rootBlkno = InvalidBlockNumber;
504 data.lblkno = BufferGetBlockNumber(lbuf);
505 data.rblkno = BufferGetBlockNumber(rbuf);
506 data.separator = separator;
509 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
510 data.isRootSplit = FALSE;
512 rdata[0].buffer = InvalidBuffer;
513 rdata[0].data = (char *) &data;
514 rdata[0].len = sizeof(ginxlogSplit);
515 rdata[0].next = &rdata[1];
517 rdata[1].buffer = InvalidBuffer;
518 rdata[1].data = tupstore;
519 rdata[1].len = MAXALIGN(totalsize);
520 rdata[1].next = NULL;
526 * return newly allocate rightmost tuple
529 ginPageGetLinkItup(Buffer buf)
533 Page page = BufferGetPage(buf);
535 itup = getRightMostTuple(page);
536 if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
538 nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
539 memcpy(nitup, itup, GinGetOrigSizePosting(itup));
540 nitup->t_info &= ~INDEX_SIZE_MASK;
541 nitup->t_info |= GinGetOrigSizePosting(itup);
545 nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
546 memcpy(nitup, itup, IndexTupleSize(itup));
549 ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
554 * Fills new root by rightest values from child.
555 * Also called from ginxlog, should not use btree
558 entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
563 page = BufferGetPage(root);
565 itup = ginPageGetLinkItup(lbuf);
566 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
567 elog(ERROR, "failed to add item to index root page");
569 itup = ginPageGetLinkItup(rbuf);
570 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
571 elog(ERROR, "failed to add item to index root page");
575 prepareEntryScan(GinBtree btree, Relation index, Datum value, GinState *ginstate)
577 memset(btree, 0, sizeof(GinBtreeData));
579 btree->isMoveRight = entryIsMoveRight;
580 btree->findChildPage = entryLocateEntry;
581 btree->findItem = entryLocateLeafEntry;
582 btree->findChildPtr = entryFindChildPtr;
583 btree->getLeftMostPage = entryGetLeftMostPage;
584 btree->isEnoughSpace = entryIsEnoughSpace;
585 btree->placeToPage = entryPlaceToPage;
586 btree->splitPage = entrySplitPage;
587 btree->fillRoot = entryFillRoot;
589 btree->index = index;
590 btree->ginstate = ginstate;
591 btree->entryValue = value;
593 btree->isDelete = FALSE;
594 btree->searchMode = FALSE;
595 btree->fullScan = FALSE;
596 btree->isBuild = FALSE;