1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.6 2007/01/05 22:19:21 momjian Exp $
12 *-------------------------------------------------------------------------
16 #include "access/gin.h"
17 #include "access/tuptoaster.h"
20 * forms tuple for entry tree. On leaf page, Index tuple has
21 * non-traditional layout. Tuple may contain posting list or
22 * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
24 * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
25 * - ItemPointerGetBlockNumber(&itup->t_tid) contains original
26 * size of tuple (without posting list).
27 * Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
28 * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
29 * of elements in posting list (number of heap itempointer)
30 * Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
31 * - After usual part of tuple there is a posting list
32 * Macros: GinGetPosting(itup)
34 * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual
35 * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of
36 * root of posting tree
37 * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING
40 GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd)
45 itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);
47 GinSetOrigSizePosting(itup, IndexTupleSize(itup));
51 uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData) * nipd);
53 if (newsize >= INDEX_SIZE_MASK)
56 if (newsize > TOAST_INDEX_TARGET && nipd > 1)
59 itup = repalloc(itup, newsize);
62 itup->t_info &= ~INDEX_SIZE_MASK;
63 itup->t_info |= newsize;
66 memcpy(GinGetPosting(itup), ipd, sizeof(ItemPointerData) * nipd);
67 GinSetNPosting(itup, nipd);
71 GinSetNPosting(itup, 0);
77 * Entry tree is a "static", ie tuple never deletes from it,
78 * so we don't use right bound, we use rightest key instead.
81 getRightMostTuple(Page page)
83 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
85 return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
89 ginGetHighKey(GinState *ginstate, Page page)
94 itup = getRightMostTuple(page);
96 return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull);
100 entryIsMoveRight(GinBtree btree, Page page)
104 if (GinPageRightMost(page))
107 highkey = ginGetHighKey(btree->ginstate, page);
109 if (compareEntries(btree->ginstate, btree->entryValue, highkey) > 0)
116 * Find correct tuple in non-leaf page. It supposed that
117 * page correctly choosen and searching value SHOULD be on page
120 entryLocateEntry(GinBtree btree, GinBtreeStack *stack)
125 IndexTuple itup = NULL;
127 Page page = BufferGetPage(stack->buffer);
129 Assert(!GinPageIsLeaf(page));
130 Assert(!GinPageIsData(page));
134 stack->off = FirstOffsetNumber;
135 stack->predictNumber *= PageGetMaxOffsetNumber(page);
136 return btree->getLeftMostPage(btree, page);
139 low = FirstOffsetNumber;
140 maxoff = high = PageGetMaxOffsetNumber(page);
147 OffsetNumber mid = low + ((high - low) / 2);
149 if (mid == maxoff && GinPageRightMost(page))
156 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
157 result = compareEntries(btree->ginstate, btree->entryValue,
158 index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
164 Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
165 return GinItemPointerGetBlockNumber(&(itup)->t_tid);
173 Assert(high >= FirstOffsetNumber && high <= maxoff);
176 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
177 Assert(GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO);
178 return GinItemPointerGetBlockNumber(&(itup)->t_tid);
182 * Searches correct position for value on leaf page.
183 * Page should be corrrectly choosen.
184 * Returns true if value found on page.
187 entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack)
189 Page page = BufferGetPage(stack->buffer);
194 Assert(GinPageIsLeaf(page));
195 Assert(!GinPageIsData(page));
199 stack->off = FirstOffsetNumber;
203 low = FirstOffsetNumber;
204 high = PageGetMaxOffsetNumber(page);
208 stack->off = FirstOffsetNumber;
216 OffsetNumber mid = low + ((high - low) / 2);
220 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
221 result = compareEntries(btree->ginstate, btree->entryValue,
222 index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull));
240 entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
243 maxoff = PageGetMaxOffsetNumber(page);
246 Assert(!GinPageIsLeaf(page));
247 Assert(!GinPageIsData(page));
249 /* if page isn't changed, we returns storedOff */
250 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
252 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
253 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
257 * we hope, that needed pointer goes to right. It's true if there
260 for (i = storedOff + 1; i <= maxoff; i++)
262 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
263 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
266 maxoff = storedOff - 1;
270 for (i = FirstOffsetNumber; i <= maxoff; i++)
272 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
273 if (GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno)
277 return InvalidOffsetNumber;
281 entryGetLeftMostPage(GinBtree btree, Page page)
285 Assert(!GinPageIsLeaf(page));
286 Assert(!GinPageIsData(page));
287 Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber);
289 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
290 return GinItemPointerGetBlockNumber(&(itup)->t_tid);
294 entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
297 Page page = BufferGetPage(buf);
299 Assert(btree->entry);
300 Assert(!GinPageIsData(page));
304 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
306 itupsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
309 if (PageGetFreeSpace(page) + itupsz >= MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData))
316 * Delete tuple on leaf page if tuples was existed and we
317 * should update it, update old child blkno to new right page
318 * if child split is occured
321 entryPreparePage(GinBtree btree, Page page, OffsetNumber off)
323 BlockNumber ret = InvalidBlockNumber;
325 Assert(btree->entry);
326 Assert(!GinPageIsData(page));
330 Assert(GinPageIsLeaf(page));
331 PageIndexTupleDelete(page, off);
334 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
336 IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
338 ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
339 ret = btree->rightblkno;
342 btree->rightblkno = InvalidBlockNumber;
348 * Place tuple on page and fills WAL record
351 entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
353 Page page = BufferGetPage(buf);
354 static XLogRecData rdata[3];
356 static ginxlogInsert data;
359 data.updateBlkno = entryPreparePage(btree, page, off);
361 placed = PageAddItem(page, (Item) btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
363 elog(ERROR, "failed to add item to index page in \"%s\"",
364 RelationGetRelationName(btree->index));
366 data.node = btree->index->rd_node;
367 data.blkno = BufferGetBlockNumber(buf);
370 data.isDelete = btree->isDelete;
372 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
374 rdata[0].buffer = buf;
375 rdata[0].buffer_std = TRUE;
376 rdata[0].data = NULL;
378 rdata[0].next = &rdata[1];
380 rdata[1].buffer = InvalidBuffer;
381 rdata[1].data = (char *) &data;
382 rdata[1].len = sizeof(ginxlogInsert);
383 rdata[1].next = &rdata[2];
385 rdata[2].buffer = InvalidBuffer;
386 rdata[2].data = (char *) btree->entry;
387 rdata[2].len = IndexTupleSize(btree->entry);
388 rdata[2].next = NULL;
394 * Place tuple and split page, original buffer(lbuf) leaves untouched,
395 * returns shadow page of lbuf filled new data.
396 * Tuples are distributed between pages by equal size on its, not
400 entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
402 static XLogRecData rdata[2];
405 separator = InvalidOffsetNumber;
409 static char tupstore[2 * BLCKSZ];
412 leftrightmost = NULL;
413 static ginxlogSplit data;
417 Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
418 Page rpage = BufferGetPage(rbuf);
419 Size pageSize = PageGetPageSize(lpage);
422 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
423 InvalidOffsetNumber : GinItemPointerGetBlockNumber(&(btree->entry->t_tid));
424 data.updateBlkno = entryPreparePage(btree, lpage, off);
426 maxoff = PageGetMaxOffsetNumber(lpage);
429 for (i = FirstOffsetNumber; i <= maxoff; i++)
433 size = MAXALIGN(IndexTupleSize(btree->entry));
434 memcpy(ptr, btree->entry, size);
436 totalsize += size + sizeof(ItemIdData);
439 itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i));
440 size = MAXALIGN(IndexTupleSize(itup));
441 memcpy(ptr, itup, size);
443 totalsize += size + sizeof(ItemIdData);
446 if (off == maxoff + 1)
448 size = MAXALIGN(IndexTupleSize(btree->entry));
449 memcpy(ptr, btree->entry, size);
451 totalsize += size + sizeof(ItemIdData);
454 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
455 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
462 for (i = FirstOffsetNumber; i <= maxoff; i++)
464 itup = (IndexTuple) ptr;
466 if (lsize > totalsize / 2)
468 if (separator == InvalidOffsetNumber)
474 leftrightmost = itup;
475 lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData);
478 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
479 elog(ERROR, "failed to add item to index page in \"%s\"",
480 RelationGetRelationName(btree->index));
481 ptr += MAXALIGN(IndexTupleSize(itup));
484 value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
485 btree->entry = GinFormTuple(btree->ginstate, value, NULL, 0);
486 ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber(lbuf), InvalidOffsetNumber);
487 btree->rightblkno = BufferGetBlockNumber(rbuf);
489 data.node = btree->index->rd_node;
490 data.rootBlkno = InvalidBlockNumber;
491 data.lblkno = BufferGetBlockNumber(lbuf);
492 data.rblkno = BufferGetBlockNumber(rbuf);
493 data.separator = separator;
496 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
497 data.isRootSplit = FALSE;
499 rdata[0].buffer = InvalidBuffer;
500 rdata[0].data = (char *) &data;
501 rdata[0].len = sizeof(ginxlogSplit);
502 rdata[0].next = &rdata[1];
504 rdata[1].buffer = InvalidBuffer;
505 rdata[1].data = tupstore;
506 rdata[1].len = MAXALIGN(totalsize);
507 rdata[1].next = NULL;
513 * return newly allocate rightmost tuple
516 ginPageGetLinkItup(Buffer buf)
520 Page page = BufferGetPage(buf);
522 itup = getRightMostTuple(page);
523 if (GinPageIsLeaf(page) && !GinIsPostingTree(itup))
525 nitup = (IndexTuple) palloc(MAXALIGN(GinGetOrigSizePosting(itup)));
526 memcpy(nitup, itup, GinGetOrigSizePosting(itup));
527 nitup->t_info &= ~INDEX_SIZE_MASK;
528 nitup->t_info |= GinGetOrigSizePosting(itup);
532 nitup = (IndexTuple) palloc(MAXALIGN(IndexTupleSize(itup)));
533 memcpy(nitup, itup, IndexTupleSize(itup));
536 ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
541 * Fills new root by rightest values from child.
542 * Also called from ginxlog, should not use btree
545 entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
550 page = BufferGetPage(root);
552 itup = ginPageGetLinkItup(lbuf);
553 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
554 elog(ERROR, "failed to add item to index root page");
556 itup = ginPageGetLinkItup(rbuf);
557 if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber)
558 elog(ERROR, "failed to add item to index root page");
562 prepareEntryScan(GinBtree btree, Relation index, Datum value, GinState *ginstate)
564 memset(btree, 0, sizeof(GinBtreeData));
566 btree->isMoveRight = entryIsMoveRight;
567 btree->findChildPage = entryLocateEntry;
568 btree->findItem = entryLocateLeafEntry;
569 btree->findChildPtr = entryFindChildPtr;
570 btree->getLeftMostPage = entryGetLeftMostPage;
571 btree->isEnoughSpace = entryIsEnoughSpace;
572 btree->placeToPage = entryPlaceToPage;
573 btree->splitPage = entrySplitPage;
574 btree->fillRoot = entryFillRoot;
576 btree->index = index;
577 btree->ginstate = ginstate;
578 btree->entryValue = value;
580 btree->isDelete = FALSE;
581 btree->searchMode = FALSE;
582 btree->fullScan = FALSE;
583 btree->isBuild = FALSE;