1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/gindatapage.c
12 *-------------------------------------------------------------------------
17 #include "access/gin_private.h"
18 #include "storage/bufmgr.h"
19 #include "utils/rel.h"
22 ginCompareItemPointers(ItemPointer a, ItemPointer b)
24 BlockNumber ba = GinItemPointerGetBlockNumber(a);
25 BlockNumber bb = GinItemPointerGetBlockNumber(b);
29 OffsetNumber oa = GinItemPointerGetOffsetNumber(a);
30 OffsetNumber ob = GinItemPointerGetOffsetNumber(b);
34 return (oa > ob) ? 1 : -1;
37 return (ba > bb) ? 1 : -1;
41 * Merge two ordered arrays of itempointers, eliminating any duplicates.
42 * Returns the number of items in the result.
43 * Caller is responsible that there is enough space at *dst.
46 ginMergeItemPointers(ItemPointerData *dst,
47 ItemPointerData *a, uint32 na,
48 ItemPointerData *b, uint32 nb)
50 ItemPointerData *dptr = dst;
51 ItemPointerData *aptr = a,
54 while (aptr - a < na && bptr - b < nb)
56 int cmp = ginCompareItemPointers(aptr, bptr);
62 /* we want only one copy of the identical items */
80 * Checks, should we move to right link...
81 * Compares inserting itemp pointer with right bound of current page
84 dataIsMoveRight(GinBtree btree, Page page)
86 ItemPointer iptr = GinDataPageGetRightBound(page);
88 if (GinPageRightMost(page))
91 return (ginCompareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
95 * Find correct PostingItem in non-leaf page. It supposed that page
96 * correctly chosen and searching value SHOULD be on page
99 dataLocateItem(GinBtree btree, GinBtreeStack *stack)
104 PostingItem *pitem = NULL;
106 Page page = BufferGetPage(stack->buffer);
108 Assert(!GinPageIsLeaf(page));
109 Assert(GinPageIsData(page));
113 stack->off = FirstOffsetNumber;
114 stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
115 return btree->getLeftMostPage(btree, page);
118 low = FirstOffsetNumber;
119 maxoff = high = GinPageGetOpaque(page)->maxoff;
126 OffsetNumber mid = low + ((high - low) / 2);
128 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
133 * Right infinity, page already correctly chosen with a help of
140 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
141 result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key));
147 return PostingItemGetBlockNumber(pitem);
155 Assert(high >= FirstOffsetNumber && high <= maxoff);
158 pitem = (PostingItem *) GinDataPageGetItem(page, high);
159 return PostingItemGetBlockNumber(pitem);
163 * Searches correct position for value on leaf page.
164 * Page should be correctly chosen.
165 * Returns true if value found on page.
168 dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
170 Page page = BufferGetPage(stack->buffer);
175 Assert(GinPageIsLeaf(page));
176 Assert(GinPageIsData(page));
180 stack->off = FirstOffsetNumber;
184 low = FirstOffsetNumber;
185 high = GinPageGetOpaque(page)->maxoff;
189 stack->off = FirstOffsetNumber;
197 OffsetNumber mid = low + ((high - low) / 2);
199 result = ginCompareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
217 * Finds links to blkno on non-leaf page, returns
218 * offset of PostingItem
221 dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
224 maxoff = GinPageGetOpaque(page)->maxoff;
227 Assert(!GinPageIsLeaf(page));
228 Assert(GinPageIsData(page));
230 /* if page isn't changed, we return storedOff */
231 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
233 pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
234 if (PostingItemGetBlockNumber(pitem) == blkno)
238 * we hope, that needed pointer goes to right. It's true if there
241 for (i = storedOff + 1; i <= maxoff; i++)
243 pitem = (PostingItem *) GinDataPageGetItem(page, i);
244 if (PostingItemGetBlockNumber(pitem) == blkno)
248 maxoff = storedOff - 1;
252 for (i = FirstOffsetNumber; i <= maxoff; i++)
254 pitem = (PostingItem *) GinDataPageGetItem(page, i);
255 if (PostingItemGetBlockNumber(pitem) == blkno)
259 return InvalidOffsetNumber;
263 * returns blkno of leftmost child
266 dataGetLeftMostPage(GinBtree btree, Page page)
270 Assert(!GinPageIsLeaf(page));
271 Assert(GinPageIsData(page));
272 Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
274 pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
275 return PostingItemGetBlockNumber(pitem);
279 * add ItemPointer or PostingItem to page. data should point to
280 * correct value! depending on leaf or non-leaf page
283 GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
285 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
288 if (offset == InvalidOffsetNumber)
290 ptr = GinDataPageGetItem(page, maxoff + 1);
294 ptr = GinDataPageGetItem(page, offset);
295 if (maxoff + 1 - offset != 0)
296 memmove(ptr + GinSizeOfDataPageItem(page),
298 (maxoff - offset + 1) * GinSizeOfDataPageItem(page));
300 memcpy(ptr, data, GinSizeOfDataPageItem(page));
302 GinPageGetOpaque(page)->maxoff++;
306 * Deletes posting item from non-leaf page
309 GinPageDeletePostingItem(Page page, OffsetNumber offset)
311 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
313 Assert(!GinPageIsLeaf(page));
314 Assert(offset >= FirstOffsetNumber && offset <= maxoff);
316 if (offset != maxoff)
317 memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
318 sizeof(PostingItem) * (maxoff - offset));
320 GinPageGetOpaque(page)->maxoff--;
324 * checks space to install new value,
325 * item pointer never deletes!
328 dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
330 Page page = BufferGetPage(buf);
332 Assert(GinPageIsData(page));
333 Assert(!btree->isDelete);
335 if (GinPageIsLeaf(page))
337 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
339 if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
342 else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
345 else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
352 * In case of previous split update old child blkno to
354 * item pointer never deletes!
357 dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
359 BlockNumber ret = InvalidBlockNumber;
361 Assert(GinPageIsData(page));
363 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
365 PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
367 PostingItemSetBlockNumber(pitem, btree->rightblkno);
368 ret = btree->rightblkno;
371 btree->rightblkno = InvalidBlockNumber;
377 * Places keys to page and fills WAL record. In case leaf page and
378 * build mode puts all ItemPointers to page.
381 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
383 Page page = BufferGetPage(buf);
384 int sizeofitem = GinSizeOfDataPageItem(page);
387 /* these must be static so they can be returned to caller */
388 static XLogRecData rdata[3];
389 static ginxlogInsert data;
392 Assert(GinPageIsData(page));
394 data.updateBlkno = dataPrepareData(btree, page, off);
396 data.node = btree->index->rd_node;
397 data.blkno = BufferGetBlockNumber(buf);
400 data.isDelete = FALSE;
402 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
405 * Prevent full page write if child's split occurs. That is needed to
406 * remove incomplete splits while replaying WAL
408 * data.updateBlkno contains new block number (of newly created right
409 * page) for recently splited page.
411 if (data.updateBlkno == InvalidBlockNumber)
413 rdata[0].buffer = buf;
414 rdata[0].buffer_std = FALSE;
415 rdata[0].data = NULL;
417 rdata[0].next = &rdata[1];
421 rdata[cnt].buffer = InvalidBuffer;
422 rdata[cnt].data = (char *) &data;
423 rdata[cnt].len = sizeof(ginxlogInsert);
424 rdata[cnt].next = &rdata[cnt + 1];
427 rdata[cnt].buffer = InvalidBuffer;
428 rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
429 rdata[cnt].len = sizeofitem;
430 rdata[cnt].next = NULL;
432 if (GinPageIsLeaf(page))
434 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
436 /* usually, create index... */
437 uint32 savedPos = btree->curitem;
439 while (btree->curitem < btree->nitem)
441 GinDataPageAddItem(page, btree->items + btree->curitem, off);
445 data.nitem = btree->curitem - savedPos;
446 rdata[cnt].len = sizeofitem * data.nitem;
450 GinDataPageAddItem(page, btree->items + btree->curitem, off);
455 GinDataPageAddItem(page, &(btree->pitem), off);
459 * split page and fills WAL record. original buffer(lbuf) leaves untouched,
460 * returns shadow page of lbuf filled new data. In leaf page and build mode puts all
461 * ItemPointers to pages. Also, in build mode splits data by way to full fulled
465 dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
468 OffsetNumber separator;
470 Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf));
471 ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
472 int sizeofitem = GinSizeOfDataPageItem(lpage);
473 OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
474 Page rpage = BufferGetPage(rbuf);
475 Size pageSize = PageGetPageSize(lpage);
479 /* these must be static so they can be returned to caller */
480 static ginxlogSplit data;
481 static XLogRecData rdata[4];
482 static char vector[2 * BLCKSZ];
484 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
485 freeSpace = GinDataPageGetFreeSpace(rpage);
488 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
489 InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
490 data.updateBlkno = dataPrepareData(btree, lpage, off);
492 memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
493 maxoff * sizeofitem);
495 if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
498 while (btree->curitem < btree->nitem &&
499 maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
501 memcpy(vector + maxoff * sizeof(ItemPointerData),
502 btree->items + btree->curitem,
503 sizeof(ItemPointerData));
511 ptr = vector + (off - 1) * sizeofitem;
512 if (maxoff + 1 - off != 0)
513 memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
514 if (GinPageIsLeaf(lpage))
516 memcpy(ptr, btree->items + btree->curitem, sizeofitem);
520 memcpy(ptr, &(btree->pitem), sizeofitem);
526 * we suppose that during index creation table scaned from begin to end,
527 * so ItemPointers are monotonically increased..
529 if (btree->isBuild && GinPageRightMost(lpage))
530 separator = freeSpace / sizeofitem;
532 separator = maxoff / 2;
534 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
535 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
537 memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
538 GinPageGetOpaque(lpage)->maxoff = separator;
539 memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
540 vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
541 GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
543 PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
544 if (GinPageIsLeaf(lpage))
545 btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
546 GinPageGetOpaque(lpage)->maxoff);
548 btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
549 GinPageGetOpaque(lpage)->maxoff))->key;
550 btree->rightblkno = BufferGetBlockNumber(rbuf);
552 /* set up right bound for left page */
553 bound = GinDataPageGetRightBound(lpage);
554 *bound = btree->pitem.key;
556 /* set up right bound for right page */
557 bound = GinDataPageGetRightBound(rpage);
560 data.node = btree->index->rd_node;
561 data.rootBlkno = InvalidBlockNumber;
562 data.lblkno = BufferGetBlockNumber(lbuf);
563 data.rblkno = BufferGetBlockNumber(rbuf);
564 data.separator = separator;
567 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
568 data.isRootSplit = FALSE;
569 data.rightbound = oldbound;
571 rdata[0].buffer = InvalidBuffer;
572 rdata[0].data = (char *) &data;
573 rdata[0].len = sizeof(ginxlogSplit);
574 rdata[0].next = &rdata[1];
576 rdata[1].buffer = InvalidBuffer;
577 rdata[1].data = vector;
578 rdata[1].len = MAXALIGN(maxoff * sizeofitem);
579 rdata[1].next = NULL;
585 * Fills new root by right bound values from child.
586 * Also called from ginxlog, should not use btree
589 ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
591 Page page = BufferGetPage(root),
592 lpage = BufferGetPage(lbuf),
593 rpage = BufferGetPage(rbuf);
597 li.key = *GinDataPageGetRightBound(lpage);
598 PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
599 GinDataPageAddItem(page, &li, InvalidOffsetNumber);
601 ri.key = *GinDataPageGetRightBound(rpage);
602 PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
603 GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
607 ginPrepareDataScan(GinBtree btree, Relation index)
609 memset(btree, 0, sizeof(GinBtreeData));
611 btree->index = index;
613 btree->findChildPage = dataLocateItem;
614 btree->isMoveRight = dataIsMoveRight;
615 btree->findItem = dataLocateLeafItem;
616 btree->findChildPtr = dataFindChildPtr;
617 btree->getLeftMostPage = dataGetLeftMostPage;
618 btree->isEnoughSpace = dataIsEnoughSpace;
619 btree->placeToPage = dataPlaceToPage;
620 btree->splitPage = dataSplitPage;
621 btree->fillRoot = ginDataFillRoot;
623 btree->isData = TRUE;
624 btree->searchMode = FALSE;
625 btree->isDelete = FALSE;
626 btree->fullScan = FALSE;
627 btree->isBuild = FALSE;
631 ginPrepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
633 GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
635 ginPrepareDataScan(&gdi->btree, index);
637 gdi->btree.searchMode = searchMode;
638 gdi->btree.fullScan = searchMode;
640 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
646 * Inserts array of item pointers, may execute several tree scan (very rare)
649 ginInsertItemPointers(GinPostingTreeScan *gdi,
650 ItemPointerData *items, uint32 nitem,
651 GinStatsData *buildStats)
653 BlockNumber rootBlkno = gdi->stack->blkno;
655 gdi->btree.items = items;
656 gdi->btree.nitem = nitem;
657 gdi->btree.curitem = 0;
659 while (gdi->btree.curitem < gdi->btree.nitem)
662 gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
664 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
666 if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
669 * gdi->btree.items[gdi->btree.curitem] already exists in index
671 gdi->btree.curitem++;
672 LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
673 freeGinBtreeStack(gdi->stack);
676 ginInsertValue(&(gdi->btree), gdi->stack, buildStats);
683 ginScanBeginPostingTree(GinPostingTreeScan *gdi)
685 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
686 return gdi->stack->buffer;