1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.5 2006/11/12 06:55:53 neilc Exp $
12 *-------------------------------------------------------------------------
16 #include "access/gin.h"
19 compareItemPointers(ItemPointer a, ItemPointer b)
21 if (GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b))
23 if (GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b))
25 return (GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b)) ? 1 : -1;
28 return (GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b)) ? 1 : -1;
32 * Merge two ordered array of itempointer
35 MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
37 ItemPointerData *dptr = dst;
38 ItemPointerData *aptr = a,
41 while (aptr - a < na && bptr - b < nb)
43 if (compareItemPointers(aptr, bptr) > 0)
57 * Checks, should we move to right link...
58 * Compares inserting itemp pointer with right bound of current page
61 dataIsMoveRight(GinBtree btree, Page page)
63 ItemPointer iptr = GinDataPageGetRightBound(page);
65 if (GinPageRightMost(page))
68 return (compareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
72 * Find correct PostingItem in non-leaf page. It supposed that page
73 * correctly chosen and searching value SHOULD be on page
76 dataLocateItem(GinBtree btree, GinBtreeStack *stack)
81 PostingItem *pitem = NULL;
83 Page page = BufferGetPage(stack->buffer);
85 Assert(!GinPageIsLeaf(page));
86 Assert(GinPageIsData(page));
90 stack->off = FirstOffsetNumber;
91 stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
92 return btree->getLeftMostPage(btree, page);
95 low = FirstOffsetNumber;
96 maxoff = high = GinPageGetOpaque(page)->maxoff;
103 OffsetNumber mid = low + ((high - low) / 2);
105 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
110 * Right infinity, page already correctly chosen with a help of
116 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
117 result = compareItemPointers(btree->items + btree->curitem, &(pitem->key));
123 return PostingItemGetBlockNumber(pitem);
131 Assert(high >= FirstOffsetNumber && high <= maxoff);
134 pitem = (PostingItem *) GinDataPageGetItem(page, high);
135 return PostingItemGetBlockNumber(pitem);
139 * Searches correct position for value on leaf page.
140 * Page should be correctly chosen.
141 * Returns true if value found on page.
144 dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
146 Page page = BufferGetPage(stack->buffer);
151 Assert(GinPageIsLeaf(page));
152 Assert(GinPageIsData(page));
156 stack->off = FirstOffsetNumber;
160 low = FirstOffsetNumber;
161 high = GinPageGetOpaque(page)->maxoff;
165 stack->off = FirstOffsetNumber;
173 OffsetNumber mid = low + ((high - low) / 2);
175 result = compareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
193 * Finds links to blkno on non-leaf page, returns
194 * offset of PostingItem
197 dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
200 maxoff = GinPageGetOpaque(page)->maxoff;
203 Assert(!GinPageIsLeaf(page));
204 Assert(GinPageIsData(page));
206 /* if page isn't changed, we returns storedOff */
207 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
209 pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
210 if (PostingItemGetBlockNumber(pitem) == blkno)
214 * we hope, that needed pointer goes to right. It's true if there
217 for (i = storedOff + 1; i <= maxoff; i++)
219 pitem = (PostingItem *) GinDataPageGetItem(page, i);
220 if (PostingItemGetBlockNumber(pitem) == blkno)
224 maxoff = storedOff - 1;
228 for (i = FirstOffsetNumber; i <= maxoff; i++)
230 pitem = (PostingItem *) GinDataPageGetItem(page, i);
231 if (PostingItemGetBlockNumber(pitem) == blkno)
235 return InvalidOffsetNumber;
239 * returns blkno of leftmost child
242 dataGetLeftMostPage(GinBtree btree, Page page)
246 Assert(!GinPageIsLeaf(page));
247 Assert(GinPageIsData(page));
248 Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
250 pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
251 return PostingItemGetBlockNumber(pitem);
255 * add ItemPointer or PostingItem to page. data should point to
256 * correct value! depending on leaf or non-leaf page
259 GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
261 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
264 if (offset == InvalidOffsetNumber)
266 ptr = GinDataPageGetItem(page, maxoff + 1);
270 ptr = GinDataPageGetItem(page, offset);
271 if (maxoff + 1 - offset != 0)
272 memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page));
274 memcpy(ptr, data, GinSizeOfItem(page));
276 GinPageGetOpaque(page)->maxoff++;
280 * Deletes posting item from non-leaf page
283 PageDeletePostingItem(Page page, OffsetNumber offset)
285 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
287 Assert(!GinPageIsLeaf(page));
288 Assert(offset >= FirstOffsetNumber && offset <= maxoff);
290 if (offset != maxoff)
291 memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
292 sizeof(PostingItem) * (maxoff - offset));
294 GinPageGetOpaque(page)->maxoff--;
298 * checks space to install new value,
299 * item pointer never deletes!
302 dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
304 Page page = BufferGetPage(buf);
306 Assert(GinPageIsData(page));
307 Assert(!btree->isDelete);
309 if (GinPageIsLeaf(page))
311 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
313 if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
316 else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
319 else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
326 * In case of previous split update old child blkno to
328 * item pointer never deletes!
331 dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
333 BlockNumber ret = InvalidBlockNumber;
335 Assert(GinPageIsData(page));
337 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
339 PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
341 PostingItemSetBlockNumber(pitem, btree->rightblkno);
342 ret = btree->rightblkno;
345 btree->rightblkno = InvalidBlockNumber;
351 * Places keys to page and fills WAL record. In case leaf page and
352 * build mode puts all ItemPointers to page.
355 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
357 Page page = BufferGetPage(buf);
358 static XLogRecData rdata[3];
359 int sizeofitem = GinSizeOfItem(page);
360 static ginxlogInsert data;
363 Assert(GinPageIsData(page));
365 data.updateBlkno = dataPrepareData(btree, page, off);
367 data.node = btree->index->rd_node;
368 data.blkno = BufferGetBlockNumber(buf);
371 data.isDelete = FALSE;
373 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
375 rdata[0].buffer = buf;
376 rdata[0].buffer_std = FALSE;
377 rdata[0].data = NULL;
379 rdata[0].next = &rdata[1];
381 rdata[1].buffer = InvalidBuffer;
382 rdata[1].data = (char *) &data;
383 rdata[1].len = sizeof(ginxlogInsert);
384 rdata[1].next = &rdata[2];
386 rdata[2].buffer = InvalidBuffer;
387 rdata[2].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
388 rdata[2].len = sizeofitem;
389 rdata[2].next = NULL;
391 if (GinPageIsLeaf(page))
393 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
395 /* usually, create index... */
396 uint32 savedPos = btree->curitem;
398 while (btree->curitem < btree->nitem)
400 GinDataPageAddItem(page, btree->items + btree->curitem, off);
404 data.nitem = btree->curitem - savedPos;
405 rdata[2].len = sizeofitem * data.nitem;
409 GinDataPageAddItem(page, btree->items + btree->curitem, off);
414 GinDataPageAddItem(page, &(btree->pitem), off);
418 * split page and fills WAL record. original buffer(lbuf) leaves untouched,
419 * returns shadow page of lbuf filled new data. In leaf page and build mode puts all
420 * ItemPointers to pages. Also, in build mode splits data by way to full fulled
424 dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
426 static ginxlogSplit data;
427 static XLogRecData rdata[4];
428 static char vector[2 * BLCKSZ];
430 OffsetNumber separator;
432 Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
433 ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
434 int sizeofitem = GinSizeOfItem(lpage);
435 OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
436 Page rpage = BufferGetPage(rbuf);
437 Size pageSize = PageGetPageSize(lpage);
441 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
442 freeSpace = GinDataPageGetFreeSpace(rpage);
445 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
446 InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
447 data.updateBlkno = dataPrepareData(btree, lpage, off);
449 memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
450 maxoff * sizeofitem);
452 if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
455 while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
457 memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem,
458 sizeof(ItemPointerData));
466 ptr = vector + (off - 1) * sizeofitem;
467 if (maxoff + 1 - off != 0)
468 memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
469 if (GinPageIsLeaf(lpage))
471 memcpy(ptr, btree->items + btree->curitem, sizeofitem);
475 memcpy(ptr, &(btree->pitem), sizeofitem);
481 * we suppose that during index creation table scaned from begin to end,
482 * so ItemPointers are monotonically increased..
484 if (btree->isBuild && GinPageRightMost(lpage))
485 separator = freeSpace / sizeofitem;
487 separator = maxoff / 2;
489 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
490 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
492 memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
493 GinPageGetOpaque(lpage)->maxoff = separator;
494 memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
495 vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
496 GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
498 PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
499 if (GinPageIsLeaf(lpage))
500 btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
501 GinPageGetOpaque(lpage)->maxoff);
503 btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
504 GinPageGetOpaque(lpage)->maxoff))->key;
505 btree->rightblkno = BufferGetBlockNumber(rbuf);
507 /* set up right bound for left page */
508 bound = GinDataPageGetRightBound(lpage);
509 *bound = btree->pitem.key;
511 /* set up right bound for right page */
512 bound = GinDataPageGetRightBound(rpage);
515 data.node = btree->index->rd_node;
516 data.rootBlkno = InvalidBlockNumber;
517 data.lblkno = BufferGetBlockNumber(lbuf);
518 data.rblkno = BufferGetBlockNumber(rbuf);
519 data.separator = separator;
522 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
523 data.isRootSplit = FALSE;
524 data.rightbound = oldbound;
526 rdata[0].buffer = InvalidBuffer;
527 rdata[0].data = (char *) &data;
528 rdata[0].len = sizeof(ginxlogSplit);
529 rdata[0].next = &rdata[1];
531 rdata[1].buffer = InvalidBuffer;
532 rdata[1].data = vector;
533 rdata[1].len = MAXALIGN(maxoff * sizeofitem);
534 rdata[1].next = NULL;
540 * Fills new root by right bound values from child.
541 * Also called from ginxlog, should not use btree
544 dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
546 Page page = BufferGetPage(root),
547 lpage = BufferGetPage(lbuf),
548 rpage = BufferGetPage(rbuf);
552 li.key = *GinDataPageGetRightBound(lpage);
553 PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
554 GinDataPageAddItem(page, &li, InvalidOffsetNumber);
556 ri.key = *GinDataPageGetRightBound(rpage);
557 PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
558 GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
562 prepareDataScan(GinBtree btree, Relation index)
564 memset(btree, 0, sizeof(GinBtreeData));
565 btree->index = index;
566 btree->isMoveRight = dataIsMoveRight;
567 btree->findChildPage = dataLocateItem;
568 btree->findItem = dataLocateLeafItem;
569 btree->findChildPtr = dataFindChildPtr;
570 btree->getLeftMostPage = dataGetLeftMostPage;
571 btree->isEnoughSpace = dataIsEnoughSpace;
572 btree->placeToPage = dataPlaceToPage;
573 btree->splitPage = dataSplitPage;
574 btree->fillRoot = dataFillRoot;
576 btree->searchMode = FALSE;
577 btree->isDelete = FALSE;
578 btree->fullScan = FALSE;
579 btree->isBuild = FALSE;
583 prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
585 GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
587 prepareDataScan(&gdi->btree, index);
589 gdi->btree.searchMode = searchMode;
590 gdi->btree.fullScan = searchMode;
592 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
598 * Inserts array of item pointers, may execute several tree scan (very rare)
601 insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
603 BlockNumber rootBlkno = gdi->stack->blkno;
605 gdi->btree.items = items;
606 gdi->btree.nitem = nitem;
607 gdi->btree.curitem = 0;
609 while (gdi->btree.curitem < gdi->btree.nitem)
612 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
614 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
616 if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
617 elog(ERROR, "item pointer (%u,%d) already exists",
618 ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
619 ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
621 ginInsertValue(&(gdi->btree), gdi->stack);
628 scanBeginPostingTree(GinPostingTreeScan *gdi)
630 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
631 return gdi->stack->buffer;