1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.7 2007/06/04 15:56:28 teodor Exp $
12 *-------------------------------------------------------------------------
16 #include "access/gin.h"
19 compareItemPointers(ItemPointer a, ItemPointer b)
21 if (GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b))
23 if (GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b))
25 return (GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b)) ? 1 : -1;
28 return (GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b)) ? 1 : -1;
32 * Merge two ordered array of itempointer
35 MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
37 ItemPointerData *dptr = dst;
38 ItemPointerData *aptr = a,
41 while (aptr - a < na && bptr - b < nb)
43 if (compareItemPointers(aptr, bptr) > 0)
57 * Checks, should we move to right link...
58 * Compares inserting itemp pointer with right bound of current page
61 dataIsMoveRight(GinBtree btree, Page page)
63 ItemPointer iptr = GinDataPageGetRightBound(page);
65 if (GinPageRightMost(page))
68 return (compareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
72 * Find correct PostingItem in non-leaf page. It supposed that page
73 * correctly chosen and searching value SHOULD be on page
76 dataLocateItem(GinBtree btree, GinBtreeStack *stack)
81 PostingItem *pitem = NULL;
83 Page page = BufferGetPage(stack->buffer);
85 Assert(!GinPageIsLeaf(page));
86 Assert(GinPageIsData(page));
90 stack->off = FirstOffsetNumber;
91 stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
92 return btree->getLeftMostPage(btree, page);
95 low = FirstOffsetNumber;
96 maxoff = high = GinPageGetOpaque(page)->maxoff;
103 OffsetNumber mid = low + ((high - low) / 2);
105 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
110 * Right infinity, page already correctly chosen with a help of
116 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
117 result = compareItemPointers(btree->items + btree->curitem, &(pitem->key));
123 return PostingItemGetBlockNumber(pitem);
131 Assert(high >= FirstOffsetNumber && high <= maxoff);
134 pitem = (PostingItem *) GinDataPageGetItem(page, high);
135 return PostingItemGetBlockNumber(pitem);
139 * Searches correct position for value on leaf page.
140 * Page should be correctly chosen.
141 * Returns true if value found on page.
144 dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
146 Page page = BufferGetPage(stack->buffer);
151 Assert(GinPageIsLeaf(page));
152 Assert(GinPageIsData(page));
156 stack->off = FirstOffsetNumber;
160 low = FirstOffsetNumber;
161 high = GinPageGetOpaque(page)->maxoff;
165 stack->off = FirstOffsetNumber;
173 OffsetNumber mid = low + ((high - low) / 2);
175 result = compareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
193 * Finds links to blkno on non-leaf page, returns
194 * offset of PostingItem
197 dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
200 maxoff = GinPageGetOpaque(page)->maxoff;
203 Assert(!GinPageIsLeaf(page));
204 Assert(GinPageIsData(page));
206 /* if page isn't changed, we returns storedOff */
207 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
209 pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
210 if (PostingItemGetBlockNumber(pitem) == blkno)
214 * we hope, that needed pointer goes to right. It's true if there
217 for (i = storedOff + 1; i <= maxoff; i++)
219 pitem = (PostingItem *) GinDataPageGetItem(page, i);
220 if (PostingItemGetBlockNumber(pitem) == blkno)
224 maxoff = storedOff - 1;
228 for (i = FirstOffsetNumber; i <= maxoff; i++)
230 pitem = (PostingItem *) GinDataPageGetItem(page, i);
231 if (PostingItemGetBlockNumber(pitem) == blkno)
235 return InvalidOffsetNumber;
239 * returns blkno of leftmost child
242 dataGetLeftMostPage(GinBtree btree, Page page)
246 Assert(!GinPageIsLeaf(page));
247 Assert(GinPageIsData(page));
248 Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
250 pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
251 return PostingItemGetBlockNumber(pitem);
255 * add ItemPointer or PostingItem to page. data should point to
256 * correct value! depending on leaf or non-leaf page
259 GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
261 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
264 if (offset == InvalidOffsetNumber)
266 ptr = GinDataPageGetItem(page, maxoff + 1);
270 ptr = GinDataPageGetItem(page, offset);
271 if (maxoff + 1 - offset != 0)
272 memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page));
274 memcpy(ptr, data, GinSizeOfItem(page));
276 GinPageGetOpaque(page)->maxoff++;
280 * Deletes posting item from non-leaf page
283 PageDeletePostingItem(Page page, OffsetNumber offset)
285 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
287 Assert(!GinPageIsLeaf(page));
288 Assert(offset >= FirstOffsetNumber && offset <= maxoff);
290 if (offset != maxoff)
291 memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
292 sizeof(PostingItem) * (maxoff - offset));
294 GinPageGetOpaque(page)->maxoff--;
298 * checks space to install new value,
299 * item pointer never deletes!
302 dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
304 Page page = BufferGetPage(buf);
306 Assert(GinPageIsData(page));
307 Assert(!btree->isDelete);
309 if (GinPageIsLeaf(page))
311 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
313 if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
316 else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
319 else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
326 * In case of previous split update old child blkno to
328 * item pointer never deletes!
331 dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
333 BlockNumber ret = InvalidBlockNumber;
335 Assert(GinPageIsData(page));
337 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
339 PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
341 PostingItemSetBlockNumber(pitem, btree->rightblkno);
342 ret = btree->rightblkno;
345 btree->rightblkno = InvalidBlockNumber;
351 * Places keys to page and fills WAL record. In case leaf page and
352 * build mode puts all ItemPointers to page.
355 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
357 Page page = BufferGetPage(buf);
358 static XLogRecData rdata[3];
359 int sizeofitem = GinSizeOfItem(page);
360 static ginxlogInsert data;
364 Assert(GinPageIsData(page));
366 data.updateBlkno = dataPrepareData(btree, page, off);
368 data.node = btree->index->rd_node;
369 data.blkno = BufferGetBlockNumber(buf);
372 data.isDelete = FALSE;
374 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
377 * Prevent full page write if child's split occurs. That is needed
378 * to remove incomplete splits while replaying WAL
380 * data.updateBlkno contains new block number (of newly created right page)
381 * for recently splited page.
383 if ( data.updateBlkno == InvalidBlockNumber )
385 rdata[0].buffer = buf;
386 rdata[0].buffer_std = FALSE;
387 rdata[0].data = NULL;
389 rdata[0].next = &rdata[1];
393 rdata[cnt].buffer = InvalidBuffer;
394 rdata[cnt].data = (char *) &data;
395 rdata[cnt].len = sizeof(ginxlogInsert);
396 rdata[cnt].next = &rdata[cnt+1];
399 rdata[cnt].buffer = InvalidBuffer;
400 rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
401 rdata[cnt].len = sizeofitem;
402 rdata[cnt].next = NULL;
404 if (GinPageIsLeaf(page))
406 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
408 /* usually, create index... */
409 uint32 savedPos = btree->curitem;
411 while (btree->curitem < btree->nitem)
413 GinDataPageAddItem(page, btree->items + btree->curitem, off);
417 data.nitem = btree->curitem - savedPos;
418 rdata[cnt].len = sizeofitem * data.nitem;
422 GinDataPageAddItem(page, btree->items + btree->curitem, off);
427 GinDataPageAddItem(page, &(btree->pitem), off);
431 * split page and fills WAL record. original buffer(lbuf) leaves untouched,
432 * returns shadow page of lbuf filled new data. In leaf page and build mode puts all
433 * ItemPointers to pages. Also, in build mode splits data by way to full fulled
437 dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
439 static ginxlogSplit data;
440 static XLogRecData rdata[4];
441 static char vector[2 * BLCKSZ];
443 OffsetNumber separator;
445 Page lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
446 ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
447 int sizeofitem = GinSizeOfItem(lpage);
448 OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
449 Page rpage = BufferGetPage(rbuf);
450 Size pageSize = PageGetPageSize(lpage);
454 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
455 freeSpace = GinDataPageGetFreeSpace(rpage);
458 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
459 InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
460 data.updateBlkno = dataPrepareData(btree, lpage, off);
462 memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
463 maxoff * sizeofitem);
465 if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
468 while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
470 memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem,
471 sizeof(ItemPointerData));
479 ptr = vector + (off - 1) * sizeofitem;
480 if (maxoff + 1 - off != 0)
481 memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
482 if (GinPageIsLeaf(lpage))
484 memcpy(ptr, btree->items + btree->curitem, sizeofitem);
488 memcpy(ptr, &(btree->pitem), sizeofitem);
494 * we suppose that during index creation table scaned from begin to end,
495 * so ItemPointers are monotonically increased..
497 if (btree->isBuild && GinPageRightMost(lpage))
498 separator = freeSpace / sizeofitem;
500 separator = maxoff / 2;
502 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
503 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
505 memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
506 GinPageGetOpaque(lpage)->maxoff = separator;
507 memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
508 vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
509 GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
511 PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
512 if (GinPageIsLeaf(lpage))
513 btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
514 GinPageGetOpaque(lpage)->maxoff);
516 btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
517 GinPageGetOpaque(lpage)->maxoff))->key;
518 btree->rightblkno = BufferGetBlockNumber(rbuf);
520 /* set up right bound for left page */
521 bound = GinDataPageGetRightBound(lpage);
522 *bound = btree->pitem.key;
524 /* set up right bound for right page */
525 bound = GinDataPageGetRightBound(rpage);
528 data.node = btree->index->rd_node;
529 data.rootBlkno = InvalidBlockNumber;
530 data.lblkno = BufferGetBlockNumber(lbuf);
531 data.rblkno = BufferGetBlockNumber(rbuf);
532 data.separator = separator;
535 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
536 data.isRootSplit = FALSE;
537 data.rightbound = oldbound;
539 rdata[0].buffer = InvalidBuffer;
540 rdata[0].data = (char *) &data;
541 rdata[0].len = sizeof(ginxlogSplit);
542 rdata[0].next = &rdata[1];
544 rdata[1].buffer = InvalidBuffer;
545 rdata[1].data = vector;
546 rdata[1].len = MAXALIGN(maxoff * sizeofitem);
547 rdata[1].next = NULL;
553 * Fills new root by right bound values from child.
554 * Also called from ginxlog, should not use btree
557 dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
559 Page page = BufferGetPage(root),
560 lpage = BufferGetPage(lbuf),
561 rpage = BufferGetPage(rbuf);
565 li.key = *GinDataPageGetRightBound(lpage);
566 PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
567 GinDataPageAddItem(page, &li, InvalidOffsetNumber);
569 ri.key = *GinDataPageGetRightBound(rpage);
570 PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
571 GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
575 prepareDataScan(GinBtree btree, Relation index)
577 memset(btree, 0, sizeof(GinBtreeData));
578 btree->index = index;
579 btree->isMoveRight = dataIsMoveRight;
580 btree->findChildPage = dataLocateItem;
581 btree->findItem = dataLocateLeafItem;
582 btree->findChildPtr = dataFindChildPtr;
583 btree->getLeftMostPage = dataGetLeftMostPage;
584 btree->isEnoughSpace = dataIsEnoughSpace;
585 btree->placeToPage = dataPlaceToPage;
586 btree->splitPage = dataSplitPage;
587 btree->fillRoot = dataFillRoot;
589 btree->searchMode = FALSE;
590 btree->isDelete = FALSE;
591 btree->fullScan = FALSE;
592 btree->isBuild = FALSE;
596 prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
598 GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
600 prepareDataScan(&gdi->btree, index);
602 gdi->btree.searchMode = searchMode;
603 gdi->btree.fullScan = searchMode;
605 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
611 * Inserts array of item pointers, may execute several tree scan (very rare)
614 insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
616 BlockNumber rootBlkno = gdi->stack->blkno;
618 gdi->btree.items = items;
619 gdi->btree.nitem = nitem;
620 gdi->btree.curitem = 0;
622 while (gdi->btree.curitem < gdi->btree.nitem)
625 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
627 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
629 if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
630 elog(ERROR, "item pointer (%u,%d) already exists",
631 ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
632 ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
634 ginInsertValue(&(gdi->btree), gdi->stack);
641 scanBeginPostingTree(GinPostingTreeScan *gdi)
643 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
644 return gdi->stack->buffer;