1 /*-------------------------------------------------------------------------
4 * page utilities routines for the postgres inverted index access method.
7 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/gin/gindatapage.c
12 *-------------------------------------------------------------------------
17 #include "access/gin_private.h"
18 #include "miscadmin.h"
19 #include "utils/rel.h"
22 * Checks, should we move to right link...
23 * Compares inserting itemp pointer with right bound of current page
26 dataIsMoveRight(GinBtree btree, Page page)
28 ItemPointer iptr = GinDataPageGetRightBound(page);
30 if (GinPageRightMost(page))
33 return (ginCompareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
37 * Find correct PostingItem in non-leaf page. It supposed that page
38 * correctly chosen and searching value SHOULD be on page
41 dataLocateItem(GinBtree btree, GinBtreeStack *stack)
46 PostingItem *pitem = NULL;
48 Page page = BufferGetPage(stack->buffer);
50 Assert(!GinPageIsLeaf(page));
51 Assert(GinPageIsData(page));
55 stack->off = FirstOffsetNumber;
56 stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
57 return btree->getLeftMostChild(btree, page);
60 low = FirstOffsetNumber;
61 maxoff = high = GinPageGetOpaque(page)->maxoff;
68 OffsetNumber mid = low + ((high - low) / 2);
70 pitem = GinDataPageGetPostingItem(page, mid);
75 * Right infinity, page already correctly chosen with a help of
82 pitem = GinDataPageGetPostingItem(page, mid);
83 result = ginCompareItemPointers(btree->items + btree->curitem, &(pitem->key));
89 return PostingItemGetBlockNumber(pitem);
97 Assert(high >= FirstOffsetNumber && high <= maxoff);
100 pitem = GinDataPageGetPostingItem(page, high);
101 return PostingItemGetBlockNumber(pitem);
105 * Searches correct position for value on leaf page.
106 * Page should be correctly chosen.
107 * Returns true if value found on page.
110 dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
112 Page page = BufferGetPage(stack->buffer);
117 Assert(GinPageIsLeaf(page));
118 Assert(GinPageIsData(page));
122 stack->off = FirstOffsetNumber;
126 low = FirstOffsetNumber;
127 high = GinPageGetOpaque(page)->maxoff;
131 stack->off = FirstOffsetNumber;
139 OffsetNumber mid = low + ((high - low) / 2);
141 result = ginCompareItemPointers(btree->items + btree->curitem,
142 GinDataPageGetItemPointer(page, mid));
160 * Finds links to blkno on non-leaf page, returns
161 * offset of PostingItem
164 dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
167 maxoff = GinPageGetOpaque(page)->maxoff;
170 Assert(!GinPageIsLeaf(page));
171 Assert(GinPageIsData(page));
173 /* if page isn't changed, we return storedOff */
174 if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
176 pitem = GinDataPageGetPostingItem(page, storedOff);
177 if (PostingItemGetBlockNumber(pitem) == blkno)
181 * we hope, that needed pointer goes to right. It's true if there
184 for (i = storedOff + 1; i <= maxoff; i++)
186 pitem = GinDataPageGetPostingItem(page, i);
187 if (PostingItemGetBlockNumber(pitem) == blkno)
191 maxoff = storedOff - 1;
195 for (i = FirstOffsetNumber; i <= maxoff; i++)
197 pitem = GinDataPageGetPostingItem(page, i);
198 if (PostingItemGetBlockNumber(pitem) == blkno)
202 return InvalidOffsetNumber;
206 * returns blkno of leftmost child
209 dataGetLeftMostPage(GinBtree btree, Page page)
213 Assert(!GinPageIsLeaf(page));
214 Assert(GinPageIsData(page));
215 Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
217 pitem = GinDataPageGetPostingItem(page, FirstOffsetNumber);
218 return PostingItemGetBlockNumber(pitem);
222 * add ItemPointer to a leaf page.
225 GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset)
227 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
230 Assert(GinPageIsLeaf(page));
232 if (offset == InvalidOffsetNumber)
234 ptr = (char *) GinDataPageGetItemPointer(page, maxoff + 1);
238 ptr = (char *) GinDataPageGetItemPointer(page, offset);
239 if (maxoff + 1 - offset != 0)
240 memmove(ptr + sizeof(ItemPointerData),
242 (maxoff - offset + 1) * sizeof(ItemPointerData));
244 memcpy(ptr, data, sizeof(ItemPointerData));
246 GinPageGetOpaque(page)->maxoff++;
250 * add PostingItem to a non-leaf page.
253 GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset)
255 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
258 Assert(!GinPageIsLeaf(page));
260 if (offset == InvalidOffsetNumber)
262 ptr = (char *) GinDataPageGetPostingItem(page, maxoff + 1);
266 ptr = (char *) GinDataPageGetPostingItem(page, offset);
267 if (maxoff + 1 - offset != 0)
268 memmove(ptr + sizeof(PostingItem),
270 (maxoff - offset + 1) * sizeof(PostingItem));
272 memcpy(ptr, data, sizeof(PostingItem));
274 GinPageGetOpaque(page)->maxoff++;
278 * Deletes posting item from non-leaf page
281 GinPageDeletePostingItem(Page page, OffsetNumber offset)
283 OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
285 Assert(!GinPageIsLeaf(page));
286 Assert(offset >= FirstOffsetNumber && offset <= maxoff);
288 if (offset != maxoff)
289 memmove(GinDataPageGetPostingItem(page, offset),
290 GinDataPageGetPostingItem(page, offset + 1),
291 sizeof(PostingItem) * (maxoff - offset));
293 GinPageGetOpaque(page)->maxoff--;
297 * checks space to install new value,
298 * item pointer never deletes!
301 dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
303 Page page = BufferGetPage(buf);
305 Assert(GinPageIsData(page));
306 Assert(!btree->isDelete);
308 if (GinPageIsLeaf(page))
310 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
312 if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
315 else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
318 else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
325 * In case of previous split update old child blkno to
327 * item pointer never deletes!
330 dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
332 BlockNumber ret = InvalidBlockNumber;
334 Assert(GinPageIsData(page));
336 if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
338 PostingItem *pitem = GinDataPageGetPostingItem(page, off);
340 PostingItemSetBlockNumber(pitem, btree->rightblkno);
341 ret = btree->rightblkno;
344 btree->rightblkno = InvalidBlockNumber;
350 * Places keys to page and fills WAL record. In case leaf page and
351 * build mode puts all ItemPointers to page.
353 * If none of the keys fit, returns false without modifying the page.
356 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off,
357 XLogRecData **prdata)
359 Page page = BufferGetPage(buf);
360 int sizeofitem = GinSizeOfDataPageItem(page);
363 /* these must be static so they can be returned to caller */
364 static XLogRecData rdata[3];
365 static ginxlogInsert data;
367 /* quick exit if it doesn't fit */
368 if (!dataIsEnoughSpace(btree, buf, off))
372 Assert(GinPageIsData(page));
374 data.updateBlkno = dataPrepareData(btree, page, off);
376 data.node = btree->index->rd_node;
377 data.blkno = BufferGetBlockNumber(buf);
380 data.isDelete = FALSE;
382 data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
385 * Prevent full page write if child's split occurs. That is needed to
386 * remove incomplete splits while replaying WAL
388 * data.updateBlkno contains new block number (of newly created right
389 * page) for recently splited page.
391 if (data.updateBlkno == InvalidBlockNumber)
393 rdata[0].buffer = buf;
394 rdata[0].buffer_std = FALSE;
395 rdata[0].data = NULL;
397 rdata[0].next = &rdata[1];
401 rdata[cnt].buffer = InvalidBuffer;
402 rdata[cnt].data = (char *) &data;
403 rdata[cnt].len = sizeof(ginxlogInsert);
404 rdata[cnt].next = &rdata[cnt + 1];
407 rdata[cnt].buffer = InvalidBuffer;
408 rdata[cnt].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
409 rdata[cnt].len = sizeofitem;
410 rdata[cnt].next = NULL;
412 if (GinPageIsLeaf(page))
414 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
416 /* usually, create index... */
417 uint32 savedPos = btree->curitem;
419 while (btree->curitem < btree->nitem)
421 GinDataPageAddItemPointer(page, btree->items + btree->curitem, off);
425 data.nitem = btree->curitem - savedPos;
426 rdata[cnt].len = sizeofitem * data.nitem;
430 GinDataPageAddItemPointer(page, btree->items + btree->curitem, off);
435 GinDataPageAddPostingItem(page, &(btree->pitem), off);
441 * split page and fills WAL record. original buffer(lbuf) leaves untouched,
442 * returns shadow page of lbuf filled new data. In leaf page and build mode puts all
443 * ItemPointers to pages. Also, in build mode splits data by way to full fulled
447 dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
450 OffsetNumber separator;
452 Page lpage = PageGetTempPageCopy(BufferGetPage(lbuf));
453 bool isleaf = GinPageIsLeaf(lpage);
454 ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
455 int sizeofitem = GinSizeOfDataPageItem(lpage);
456 OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
457 Page rpage = BufferGetPage(rbuf);
458 Size pageSize = PageGetPageSize(lpage);
462 /* these must be static so they can be returned to caller */
463 static ginxlogSplit data;
464 static XLogRecData rdata[4];
465 static char vector[2 * BLCKSZ];
467 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
468 freeSpace = GinDataPageGetFreeSpace(rpage);
471 data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
472 InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
473 data.updateBlkno = dataPrepareData(btree, lpage, off);
478 GinDataPageGetItemPointer(lpage, FirstOffsetNumber),
479 maxoff * sizeof(ItemPointerData));
484 GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
485 maxoff * sizeof(PostingItem));
488 if (isleaf && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
491 while (btree->curitem < btree->nitem &&
492 maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
494 memcpy(vector + maxoff * sizeof(ItemPointerData),
495 btree->items + btree->curitem,
496 sizeof(ItemPointerData));
504 ptr = vector + (off - 1) * sizeofitem;
505 if (maxoff + 1 - off != 0)
506 memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
509 memcpy(ptr, btree->items + btree->curitem, sizeofitem);
513 memcpy(ptr, &(btree->pitem), sizeofitem);
519 * we assume that during index creation the table scanned from beginning
520 * to end, so ItemPointers are in monotonically increasing order.
522 if (btree->isBuild && GinPageRightMost(lpage))
523 separator = freeSpace / sizeofitem;
525 separator = maxoff / 2;
527 GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
528 GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
531 memcpy(GinDataPageGetItemPointer(lpage, FirstOffsetNumber),
532 vector, separator * sizeof(ItemPointerData));
534 memcpy(GinDataPageGetPostingItem(lpage, FirstOffsetNumber),
535 vector, separator * sizeof(PostingItem));
537 GinPageGetOpaque(lpage)->maxoff = separator;
539 memcpy(GinDataPageGetItemPointer(rpage, FirstOffsetNumber),
540 vector + separator * sizeof(ItemPointerData),
541 (maxoff - separator) * sizeof(ItemPointerData));
543 memcpy(GinDataPageGetPostingItem(rpage, FirstOffsetNumber),
544 vector + separator * sizeof(PostingItem),
545 (maxoff - separator) * sizeof(PostingItem));
547 GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
549 /* set up right bound for left page */
550 bound = GinDataPageGetRightBound(lpage);
551 if (GinPageIsLeaf(lpage))
552 *bound = *GinDataPageGetItemPointer(lpage,
553 GinPageGetOpaque(lpage)->maxoff);
555 *bound = GinDataPageGetPostingItem(lpage,
556 GinPageGetOpaque(lpage)->maxoff)->key;
558 /* set up right bound for right page */
559 bound = GinDataPageGetRightBound(rpage);
562 data.node = btree->index->rd_node;
563 data.rootBlkno = InvalidBlockNumber;
564 data.lblkno = BufferGetBlockNumber(lbuf);
565 data.rblkno = BufferGetBlockNumber(rbuf);
566 data.separator = separator;
569 data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
570 data.isRootSplit = FALSE;
571 data.rightbound = oldbound;
573 rdata[0].buffer = InvalidBuffer;
574 rdata[0].data = (char *) &data;
575 rdata[0].len = sizeof(ginxlogSplit);
576 rdata[0].next = &rdata[1];
578 rdata[1].buffer = InvalidBuffer;
579 rdata[1].data = vector;
580 rdata[1].len = MAXALIGN(maxoff * sizeofitem);
581 rdata[1].next = NULL;
583 /* Prepare a downlink tuple for insertion to the parent */
584 PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
585 btree->pitem.key = *GinDataPageGetRightBound(lpage);
586 btree->rightblkno = BufferGetBlockNumber(rbuf);
592 * Fills new root by right bound values from child.
593 * Also called from ginxlog, should not use btree
596 ginDataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
598 Page page = BufferGetPage(root),
599 lpage = BufferGetPage(lbuf),
600 rpage = BufferGetPage(rbuf);
604 li.key = *GinDataPageGetRightBound(lpage);
605 PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
606 GinDataPageAddPostingItem(page, &li, InvalidOffsetNumber);
608 ri.key = *GinDataPageGetRightBound(rpage);
609 PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
610 GinDataPageAddPostingItem(page, &ri, InvalidOffsetNumber);
614 * Creates new posting tree containing the given TIDs. Returns the page
615 * number of the root of the new posting tree.
617 * items[] must be in sorted order with no duplicates.
620 createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
621 GinStatsData *buildStats)
628 /* Calculate how many TIDs will fit on first page. */
629 nrootitems = Min(nitems, GinMaxLeafDataItems);
632 * Create the root page.
634 buffer = GinNewBuffer(index);
635 page = BufferGetPage(buffer);
636 blkno = BufferGetBlockNumber(buffer);
638 START_CRIT_SECTION();
640 GinInitBuffer(buffer, GIN_DATA | GIN_LEAF);
641 memcpy(GinDataPageGetData(page), items, sizeof(ItemPointerData) * nrootitems);
642 GinPageGetOpaque(page)->maxoff = nrootitems;
644 MarkBufferDirty(buffer);
646 if (RelationNeedsWAL(index))
649 XLogRecData rdata[2];
650 ginxlogCreatePostingTree data;
652 data.node = index->rd_node;
654 data.nitem = nrootitems;
656 rdata[0].buffer = InvalidBuffer;
657 rdata[0].data = (char *) &data;
658 rdata[0].len = sizeof(ginxlogCreatePostingTree);
659 rdata[0].next = &rdata[1];
661 rdata[1].buffer = InvalidBuffer;
662 rdata[1].data = (char *) items;
663 rdata[1].len = sizeof(ItemPointerData) * nrootitems;
664 rdata[1].next = NULL;
666 recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
667 PageSetLSN(page, recptr);
670 UnlockReleaseBuffer(buffer);
674 /* During index build, count the newly-added data page */
676 buildStats->nDataPages++;
679 * Add any remaining TIDs to the newly-created posting tree.
681 if (nitems > nrootitems)
683 ginInsertItemPointers(index, blkno,
693 ginPrepareDataScan(GinBtree btree, Relation index)
695 memset(btree, 0, sizeof(GinBtreeData));
697 btree->index = index;
699 btree->findChildPage = dataLocateItem;
700 btree->getLeftMostChild = dataGetLeftMostPage;
701 btree->isMoveRight = dataIsMoveRight;
702 btree->findItem = dataLocateLeafItem;
703 btree->findChildPtr = dataFindChildPtr;
704 btree->placeToPage = dataPlaceToPage;
705 btree->splitPage = dataSplitPage;
706 btree->fillRoot = ginDataFillRoot;
708 btree->isData = TRUE;
709 btree->isDelete = FALSE;
710 btree->fullScan = FALSE;
711 btree->isBuild = FALSE;
715 * Inserts array of item pointers, may execute several tree scan (very rare)
718 ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
719 ItemPointerData *items, uint32 nitem,
720 GinStatsData *buildStats)
723 GinBtreeStack *stack;
725 ginPrepareDataScan(&btree, index);
726 btree.isBuild = (buildStats != NULL);
731 while (btree.curitem < btree.nitem)
733 stack = ginFindLeafPage(&btree, rootBlkno, false);
735 if (btree.findItem(&btree, stack))
738 * btree.items[btree.curitem] already exists in index
741 LockBuffer(stack->buffer, GIN_UNLOCK);
742 freeGinBtreeStack(stack);
745 ginInsertValue(&btree, stack, buildStats);
750 * Starts a new scan on a posting tree.
753 ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno)
756 GinBtreeStack *stack;
758 ginPrepareDataScan(&btree, index);
760 btree.fullScan = TRUE;
762 stack = ginFindLeafPage(&btree, rootBlkno, TRUE);