]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/gindatapage.c
Fix some typos in comments.
[postgresql] / src / backend / access / gin / gindatapage.c
1 /*-------------------------------------------------------------------------
2  *
3  * gindatapage.c
4  *        page utilities routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.5 2006/11/12 06:55:53 neilc Exp $
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16 #include "access/gin.h"
17
18 int
19 compareItemPointers(ItemPointer a, ItemPointer b)
20 {
21         if (GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b))
22         {
23                 if (GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b))
24                         return 0;
25                 return (GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b)) ? 1 : -1;
26         }
27
28         return (GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b)) ? 1 : -1;
29 }
30
31 /*
32  * Merge two ordered array of itempointer
33  */
34 void
35 MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb)
36 {
37         ItemPointerData *dptr = dst;
38         ItemPointerData *aptr = a,
39                            *bptr = b;
40
41         while (aptr - a < na && bptr - b < nb)
42         {
43                 if (compareItemPointers(aptr, bptr) > 0)
44                         *dptr++ = *bptr++;
45                 else
46                         *dptr++ = *aptr++;
47         }
48
49         while (aptr - a < na)
50                 *dptr++ = *aptr++;
51
52         while (bptr - b < nb)
53                 *dptr++ = *bptr++;
54 }
55
56 /*
57  * Checks, should we move to right link...
58  * Compares inserting itemp pointer with right bound of current page
59  */
60 static bool
61 dataIsMoveRight(GinBtree btree, Page page)
62 {
63         ItemPointer iptr = GinDataPageGetRightBound(page);
64
65         if (GinPageRightMost(page))
66                 return FALSE;
67
68         return (compareItemPointers(btree->items + btree->curitem, iptr) > 0) ? TRUE : FALSE;
69 }
70
71 /*
72  * Find correct PostingItem in non-leaf page. It supposed that page
73  * correctly chosen and searching value SHOULD be on page
74  */
75 static BlockNumber
76 dataLocateItem(GinBtree btree, GinBtreeStack *stack)
77 {
78         OffsetNumber low,
79                                 high,
80                                 maxoff;
81         PostingItem *pitem = NULL;
82         int                     result;
83         Page            page = BufferGetPage(stack->buffer);
84
85         Assert(!GinPageIsLeaf(page));
86         Assert(GinPageIsData(page));
87
88         if (btree->fullScan)
89         {
90                 stack->off = FirstOffsetNumber;
91                 stack->predictNumber *= GinPageGetOpaque(page)->maxoff;
92                 return btree->getLeftMostPage(btree, page);
93         }
94
95         low = FirstOffsetNumber;
96         maxoff = high = GinPageGetOpaque(page)->maxoff;
97         Assert(high >= low);
98
99         high++;
100
101         while (high > low)
102         {
103                 OffsetNumber mid = low + ((high - low) / 2);
104
105                 pitem = (PostingItem *) GinDataPageGetItem(page, mid);
106
107                 if (mid == maxoff)
108
109                         /*
110                          * Right infinity, page already correctly chosen with a help of
111                          * dataIsMoveRight
112                          */
113                         result = -1;
114                 else
115                 {
116                         pitem = (PostingItem *) GinDataPageGetItem(page, mid);
117                         result = compareItemPointers(btree->items + btree->curitem, &(pitem->key));
118                 }
119
120                 if (result == 0)
121                 {
122                         stack->off = mid;
123                         return PostingItemGetBlockNumber(pitem);
124                 }
125                 else if (result > 0)
126                         low = mid + 1;
127                 else
128                         high = mid;
129         }
130
131         Assert(high >= FirstOffsetNumber && high <= maxoff);
132
133         stack->off = high;
134         pitem = (PostingItem *) GinDataPageGetItem(page, high);
135         return PostingItemGetBlockNumber(pitem);
136 }
137
138 /*
139  * Searches correct position for value on leaf page.
140  * Page should be correctly chosen.
141  * Returns true if value found on page.
142  */
143 static bool
144 dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack)
145 {
146         Page            page = BufferGetPage(stack->buffer);
147         OffsetNumber low,
148                                 high;
149         int                     result;
150
151         Assert(GinPageIsLeaf(page));
152         Assert(GinPageIsData(page));
153
154         if (btree->fullScan)
155         {
156                 stack->off = FirstOffsetNumber;
157                 return TRUE;
158         }
159
160         low = FirstOffsetNumber;
161         high = GinPageGetOpaque(page)->maxoff;
162
163         if (high < low)
164         {
165                 stack->off = FirstOffsetNumber;
166                 return false;
167         }
168
169         high++;
170
171         while (high > low)
172         {
173                 OffsetNumber mid = low + ((high - low) / 2);
174
175                 result = compareItemPointers(btree->items + btree->curitem, (ItemPointer) GinDataPageGetItem(page, mid));
176
177                 if (result == 0)
178                 {
179                         stack->off = mid;
180                         return true;
181                 }
182                 else if (result > 0)
183                         low = mid + 1;
184                 else
185                         high = mid;
186         }
187
188         stack->off = high;
189         return false;
190 }
191
192 /*
193  * Finds links to blkno on non-leaf page, returns
194  * offset of PostingItem
195  */
196 static OffsetNumber
197 dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff)
198 {
199         OffsetNumber i,
200                                 maxoff = GinPageGetOpaque(page)->maxoff;
201         PostingItem *pitem;
202
203         Assert(!GinPageIsLeaf(page));
204         Assert(GinPageIsData(page));
205
206         /* if page isn't changed, we returns storedOff */
207         if (storedOff >= FirstOffsetNumber && storedOff <= maxoff)
208         {
209                 pitem = (PostingItem *) GinDataPageGetItem(page, storedOff);
210                 if (PostingItemGetBlockNumber(pitem) == blkno)
211                         return storedOff;
212
213                 /*
214                  * we hope, that needed pointer goes to right. It's true if there
215                  * wasn't a deletion
216                  */
217                 for (i = storedOff + 1; i <= maxoff; i++)
218                 {
219                         pitem = (PostingItem *) GinDataPageGetItem(page, i);
220                         if (PostingItemGetBlockNumber(pitem) == blkno)
221                                 return i;
222                 }
223
224                 maxoff = storedOff - 1;
225         }
226
227         /* last chance */
228         for (i = FirstOffsetNumber; i <= maxoff; i++)
229         {
230                 pitem = (PostingItem *) GinDataPageGetItem(page, i);
231                 if (PostingItemGetBlockNumber(pitem) == blkno)
232                         return i;
233         }
234
235         return InvalidOffsetNumber;
236 }
237
238 /*
239  * returns blkno of leftmost child
240  */
241 static BlockNumber
242 dataGetLeftMostPage(GinBtree btree, Page page)
243 {
244         PostingItem *pitem;
245
246         Assert(!GinPageIsLeaf(page));
247         Assert(GinPageIsData(page));
248         Assert(GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber);
249
250         pitem = (PostingItem *) GinDataPageGetItem(page, FirstOffsetNumber);
251         return PostingItemGetBlockNumber(pitem);
252 }
253
254 /*
255  * add ItemPointer or PostingItem to page. data should point to
256  * correct value! depending on leaf or non-leaf page
257  */
258 void
259 GinDataPageAddItem(Page page, void *data, OffsetNumber offset)
260 {
261         OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
262         char       *ptr;
263
264         if (offset == InvalidOffsetNumber)
265         {
266                 ptr = GinDataPageGetItem(page, maxoff + 1);
267         }
268         else
269         {
270                 ptr = GinDataPageGetItem(page, offset);
271                 if (maxoff + 1 - offset != 0)
272                         memmove(ptr + GinSizeOfItem(page), ptr, (maxoff - offset + 1) * GinSizeOfItem(page));
273         }
274         memcpy(ptr, data, GinSizeOfItem(page));
275
276         GinPageGetOpaque(page)->maxoff++;
277 }
278
279 /*
280  * Deletes posting item from non-leaf page
281  */
282 void
283 PageDeletePostingItem(Page page, OffsetNumber offset)
284 {
285         OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
286
287         Assert(!GinPageIsLeaf(page));
288         Assert(offset >= FirstOffsetNumber && offset <= maxoff);
289
290         if (offset != maxoff)
291                 memmove(GinDataPageGetItem(page, offset), GinDataPageGetItem(page, offset + 1),
292                                 sizeof(PostingItem) * (maxoff - offset));
293
294         GinPageGetOpaque(page)->maxoff--;
295 }
296
297 /*
298  * checks space to install new value,
299  * item pointer never deletes!
300  */
301 static bool
302 dataIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off)
303 {
304         Page            page = BufferGetPage(buf);
305
306         Assert(GinPageIsData(page));
307         Assert(!btree->isDelete);
308
309         if (GinPageIsLeaf(page))
310         {
311                 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
312                 {
313                         if ((btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
314                                 return true;
315                 }
316                 else if (sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page))
317                         return true;
318         }
319         else if (sizeof(PostingItem) <= GinDataPageGetFreeSpace(page))
320                 return true;
321
322         return false;
323 }
324
325 /*
326  * In case of previous split update old child blkno to
327  * new right page
328  * item pointer never deletes!
329  */
330 static BlockNumber
331 dataPrepareData(GinBtree btree, Page page, OffsetNumber off)
332 {
333         BlockNumber ret = InvalidBlockNumber;
334
335         Assert(GinPageIsData(page));
336
337         if (!GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber)
338         {
339                 PostingItem *pitem = (PostingItem *) GinDataPageGetItem(page, off);
340
341                 PostingItemSetBlockNumber(pitem, btree->rightblkno);
342                 ret = btree->rightblkno;
343         }
344
345         btree->rightblkno = InvalidBlockNumber;
346
347         return ret;
348 }
349
350 /*
351  * Places keys to page and fills WAL record. In case leaf page and
352  * build mode puts all ItemPointers to page.
353  */
354 static void
355 dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata)
356 {
357         Page            page = BufferGetPage(buf);
358         static XLogRecData rdata[3];
359         int                     sizeofitem = GinSizeOfItem(page);
360         static ginxlogInsert data;
361
362         *prdata = rdata;
363         Assert(GinPageIsData(page));
364
365         data.updateBlkno = dataPrepareData(btree, page, off);
366
367         data.node = btree->index->rd_node;
368         data.blkno = BufferGetBlockNumber(buf);
369         data.offset = off;
370         data.nitem = 1;
371         data.isDelete = FALSE;
372         data.isData = TRUE;
373         data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
374
375         rdata[0].buffer = buf;
376         rdata[0].buffer_std = FALSE;
377         rdata[0].data = NULL;
378         rdata[0].len = 0;
379         rdata[0].next = &rdata[1];
380
381         rdata[1].buffer = InvalidBuffer;
382         rdata[1].data = (char *) &data;
383         rdata[1].len = sizeof(ginxlogInsert);
384         rdata[1].next = &rdata[2];
385
386         rdata[2].buffer = InvalidBuffer;
387         rdata[2].data = (GinPageIsLeaf(page)) ? ((char *) (btree->items + btree->curitem)) : ((char *) &(btree->pitem));
388         rdata[2].len = sizeofitem;
389         rdata[2].next = NULL;
390
391         if (GinPageIsLeaf(page))
392         {
393                 if (GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff)
394                 {
395                         /* usually, create index... */
396                         uint32          savedPos = btree->curitem;
397
398                         while (btree->curitem < btree->nitem)
399                         {
400                                 GinDataPageAddItem(page, btree->items + btree->curitem, off);
401                                 off++;
402                                 btree->curitem++;
403                         }
404                         data.nitem = btree->curitem - savedPos;
405                         rdata[2].len = sizeofitem * data.nitem;
406                 }
407                 else
408                 {
409                         GinDataPageAddItem(page, btree->items + btree->curitem, off);
410                         btree->curitem++;
411                 }
412         }
413         else
414                 GinDataPageAddItem(page, &(btree->pitem), off);
415 }
416
417 /*
418  * split page and fills WAL record. original buffer(lbuf) leaves untouched,
419  * returns shadow page of lbuf filled new data. In leaf page and build mode puts all
420  * ItemPointers to pages. Also, in build mode splits data by way to full fulled
421  * left page
422  */
423 static Page
424 dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata)
425 {
426         static ginxlogSplit data;
427         static XLogRecData rdata[4];
428         static char vector[2 * BLCKSZ];
429         char       *ptr;
430         OffsetNumber separator;
431         ItemPointer bound;
432         Page            lpage = GinPageGetCopyPage(BufferGetPage(lbuf));
433         ItemPointerData oldbound = *GinDataPageGetRightBound(lpage);
434         int                     sizeofitem = GinSizeOfItem(lpage);
435         OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff;
436         Page            rpage = BufferGetPage(rbuf);
437         Size            pageSize = PageGetPageSize(lpage);
438         Size            freeSpace;
439         uint32          nCopied = 1;
440
441         GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
442         freeSpace = GinDataPageGetFreeSpace(rpage);
443
444         *prdata = rdata;
445         data.leftChildBlkno = (GinPageIsLeaf(lpage)) ?
446                 InvalidOffsetNumber : PostingItemGetBlockNumber(&(btree->pitem));
447         data.updateBlkno = dataPrepareData(btree, lpage, off);
448
449         memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber),
450                    maxoff * sizeofitem);
451
452         if (GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff)
453         {
454                 nCopied = 0;
455                 while (btree->curitem < btree->nitem && maxoff * sizeof(ItemPointerData) < 2 * (freeSpace - sizeof(ItemPointerData)))
456                 {
457                         memcpy(vector + maxoff * sizeof(ItemPointerData), btree->items + btree->curitem,
458                                    sizeof(ItemPointerData));
459                         maxoff++;
460                         nCopied++;
461                         btree->curitem++;
462                 }
463         }
464         else
465         {
466                 ptr = vector + (off - 1) * sizeofitem;
467                 if (maxoff + 1 - off != 0)
468                         memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem);
469                 if (GinPageIsLeaf(lpage))
470                 {
471                         memcpy(ptr, btree->items + btree->curitem, sizeofitem);
472                         btree->curitem++;
473                 }
474                 else
475                         memcpy(ptr, &(btree->pitem), sizeofitem);
476
477                 maxoff++;
478         }
479
480         /*
481          * we suppose that during index creation table scaned from begin to end,
482          * so ItemPointers are monotonically increased..
483          */
484         if (btree->isBuild && GinPageRightMost(lpage))
485                 separator = freeSpace / sizeofitem;
486         else
487                 separator = maxoff / 2;
488
489         GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
490         GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
491
492         memcpy(GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem);
493         GinPageGetOpaque(lpage)->maxoff = separator;
494         memcpy(GinDataPageGetItem(rpage, FirstOffsetNumber),
495                  vector + separator * sizeofitem, (maxoff - separator) * sizeofitem);
496         GinPageGetOpaque(rpage)->maxoff = maxoff - separator;
497
498         PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf));
499         if (GinPageIsLeaf(lpage))
500                 btree->pitem.key = *(ItemPointerData *) GinDataPageGetItem(lpage,
501                                                                                         GinPageGetOpaque(lpage)->maxoff);
502         else
503                 btree->pitem.key = ((PostingItem *) GinDataPageGetItem(lpage,
504                                                                           GinPageGetOpaque(lpage)->maxoff))->key;
505         btree->rightblkno = BufferGetBlockNumber(rbuf);
506
507         /* set up right bound for left page */
508         bound = GinDataPageGetRightBound(lpage);
509         *bound = btree->pitem.key;
510
511         /* set up right bound for right page */
512         bound = GinDataPageGetRightBound(rpage);
513         *bound = oldbound;
514
515         data.node = btree->index->rd_node;
516         data.rootBlkno = InvalidBlockNumber;
517         data.lblkno = BufferGetBlockNumber(lbuf);
518         data.rblkno = BufferGetBlockNumber(rbuf);
519         data.separator = separator;
520         data.nitem = maxoff;
521         data.isData = TRUE;
522         data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
523         data.isRootSplit = FALSE;
524         data.rightbound = oldbound;
525
526         rdata[0].buffer = InvalidBuffer;
527         rdata[0].data = (char *) &data;
528         rdata[0].len = sizeof(ginxlogSplit);
529         rdata[0].next = &rdata[1];
530
531         rdata[1].buffer = InvalidBuffer;
532         rdata[1].data = vector;
533         rdata[1].len = MAXALIGN(maxoff * sizeofitem);
534         rdata[1].next = NULL;
535
536         return lpage;
537 }
538
539 /*
540  * Fills new root by right bound values from child.
541  * Also called from ginxlog, should not use btree
542  */
543 void
544 dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
545 {
546         Page            page = BufferGetPage(root),
547                                 lpage = BufferGetPage(lbuf),
548                                 rpage = BufferGetPage(rbuf);
549         PostingItem li,
550                                 ri;
551
552         li.key = *GinDataPageGetRightBound(lpage);
553         PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf));
554         GinDataPageAddItem(page, &li, InvalidOffsetNumber);
555
556         ri.key = *GinDataPageGetRightBound(rpage);
557         PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf));
558         GinDataPageAddItem(page, &ri, InvalidOffsetNumber);
559 }
560
561 void
562 prepareDataScan(GinBtree btree, Relation index)
563 {
564         memset(btree, 0, sizeof(GinBtreeData));
565         btree->index = index;
566         btree->isMoveRight = dataIsMoveRight;
567         btree->findChildPage = dataLocateItem;
568         btree->findItem = dataLocateLeafItem;
569         btree->findChildPtr = dataFindChildPtr;
570         btree->getLeftMostPage = dataGetLeftMostPage;
571         btree->isEnoughSpace = dataIsEnoughSpace;
572         btree->placeToPage = dataPlaceToPage;
573         btree->splitPage = dataSplitPage;
574         btree->fillRoot = dataFillRoot;
575
576         btree->searchMode = FALSE;
577         btree->isDelete = FALSE;
578         btree->fullScan = FALSE;
579         btree->isBuild = FALSE;
580 }
581
582 GinPostingTreeScan *
583 prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
584 {
585         GinPostingTreeScan *gdi = (GinPostingTreeScan *) palloc0(sizeof(GinPostingTreeScan));
586
587         prepareDataScan(&gdi->btree, index);
588
589         gdi->btree.searchMode = searchMode;
590         gdi->btree.fullScan = searchMode;
591
592         gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
593
594         return gdi;
595 }
596
597 /*
598  * Inserts array of item pointers, may execute several tree scan (very rare)
599  */
600 void
601 insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
602 {
603         BlockNumber rootBlkno = gdi->stack->blkno;
604
605         gdi->btree.items = items;
606         gdi->btree.nitem = nitem;
607         gdi->btree.curitem = 0;
608
609         while (gdi->btree.curitem < gdi->btree.nitem)
610         {
611                 if (!gdi->stack)
612                         gdi->stack = ginPrepareFindLeafPage(&gdi->btree, rootBlkno);
613
614                 gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
615
616                 if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
617                         elog(ERROR, "item pointer (%u,%d) already exists",
618                         ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
619                                  ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
620
621                 ginInsertValue(&(gdi->btree), gdi->stack);
622
623                 gdi->stack = NULL;
624         }
625 }
626
627 Buffer
628 scanBeginPostingTree(GinPostingTreeScan *gdi)
629 {
630         gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
631         return gdi->stack->buffer;
632 }