]> granicus.if.org Git - postgresql/blob - src/backend/access/gin/ginentrypage.c
GIN: Generalized Inverted iNdex.
[postgresql] / src / backend / access / gin / ginentrypage.c
1 /*-------------------------------------------------------------------------
2  *
3  * ginentrypage.c
4  *    page utilities routines for the postgres inverted index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *          $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16 #include "access/genam.h"
17 #include "access/gin.h"
18 #include "access/heapam.h"
19 #include "catalog/index.h"
20 #include "miscadmin.h"
21 #include "storage/freespace.h"
22 #include "access/tuptoaster.h"
23
24 /*
25  * forms tuple for entry tree. On leaf page, Index tuple has
26  * non-traditional layout. Tuple may contain posting list or
27  * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno)
28  * 1) Posting list
29  *              - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial
30  *              - ItemPointerGetBlockNumber(&itup->t_tid) contains original
31  *                size of tuple (without posting list). 
32  *                Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n)
33  *              - ItemPointerGetOffsetNumber(&itup->t_tid) contains number
34  *                of elements in posting list (number of heap itempointer)
35  *                Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n)
36  *      - After usial part of tuple there is a posting list
37  *                Macros: GinGetPosting(itup)
38  * 2) Posting tree
39  *              - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usial
40  *              - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of 
41  *                root of posting tree
42  *              - ItemPointerGetOffsetNumber(&itup->t_tid) contains magick number GIN_TREE_POSTING
43  */
44 IndexTuple
45 GinFormTuple(GinState *ginstate, Datum key, ItemPointerData *ipd, uint32 nipd) {
46         bool    isnull=FALSE;
47         IndexTuple      itup;
48
49         itup = index_form_tuple(ginstate->tupdesc, &key, &isnull);      
50
51         GinSetOrigSizePosting( itup, IndexTupleSize(itup) );
52
53         if ( nipd > 0 ) {
54                 uint32 newsize = MAXALIGN(SHORTALIGN(IndexTupleSize(itup)) + sizeof(ItemPointerData)*nipd);
55
56                 if ( newsize >= INDEX_SIZE_MASK )
57                         return NULL;
58
59                 if ( newsize > TOAST_INDEX_TARGET && nipd > 1 )
60                         return NULL;
61
62                 itup = repalloc( itup, newsize );
63
64                 /* set new size */
65                 itup->t_info &= ~INDEX_SIZE_MASK;  
66                 itup->t_info |= newsize;
67
68                 if ( ipd )
69                         memcpy( GinGetPosting(itup), ipd, sizeof(ItemPointerData)*nipd ); 
70                 GinSetNPosting(itup, nipd); 
71         } else {
72                 GinSetNPosting(itup, 0); 
73         }
74         return itup;
75 }
76
77 /*
78  * Entry tree is a "static", ie tuple never deletes from it,
79  * so we don't use right bound, we use rightest key instead.
80  */
81 static IndexTuple
82 getRightMostTuple(Page page) {
83         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
84         return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff));
85 }
86
87 Datum
88 ginGetHighKey(GinState *ginstate, Page page) {
89         IndexTuple  itup;
90         bool    isnull;
91
92         itup = getRightMostTuple(page);
93
94         return index_getattr(itup, FirstOffsetNumber, ginstate->tupdesc, &isnull); 
95 }
96
97 static bool 
98 entryIsMoveRight(GinBtree btree, Page page) {
99         Datum highkey;
100
101         if ( GinPageRightMost(page) )
102                 return FALSE;
103
104         highkey = ginGetHighKey(btree->ginstate, page);
105
106         if ( compareEntries(btree->ginstate, btree->entryValue, highkey) > 0 )
107                 return TRUE;
108
109         return FALSE;
110 }
111
112 /*
113  * Find correct tuple in non-leaf page. It supposed that
114  * page correctly choosen and searching value SHOULD be on page
115  */
116 static BlockNumber
117 entryLocateEntry(GinBtree btree, GinBtreeStack *stack) {
118         OffsetNumber low, high, maxoff;
119         IndexTuple  itup;
120         int result;
121         Page page = BufferGetPage( stack->buffer );
122
123         Assert( !GinPageIsLeaf(page) );
124         Assert( !GinPageIsData(page) );
125
126         if ( btree->fullScan ) {
127                 stack->off = FirstOffsetNumber;
128                 stack->predictNumber *= PageGetMaxOffsetNumber(page);
129                 return btree->getLeftMostPage(btree, page);
130         }
131
132         low = FirstOffsetNumber;
133         maxoff = high = PageGetMaxOffsetNumber(page);
134         Assert( high >= low );
135
136         high++;
137
138         while (high > low) {
139                 OffsetNumber mid = low + ((high - low) / 2);
140
141                 if ( mid == maxoff && GinPageRightMost(page) )
142                         /* Right infinity */
143                         result = -1;
144                 else {
145                         bool    isnull;
146
147                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
148                         result = compareEntries(btree->ginstate, btree->entryValue, 
149                                 index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) );
150                 }
151
152                 if ( result == 0 ) {
153                         stack->off = mid;
154                         Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO );
155                         return GinItemPointerGetBlockNumber(&(itup)->t_tid);
156                 } else if ( result > 0 )
157                         low = mid + 1;
158                 else
159                         high = mid;
160         }
161
162         Assert( high>=FirstOffsetNumber && high <= maxoff );
163
164         stack->off = high;
165         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high));
166         Assert( GinItemPointerGetBlockNumber(&(itup)->t_tid) != GIN_ROOT_BLKNO );
167         return GinItemPointerGetBlockNumber(&(itup)->t_tid);
168 }
169
170 /*
171  * Searches correct position for value on leaf page.
172  * Page should be corrrectly choosen.
173  * Returns true if value found on page.
174  */
175 static bool
176 entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) {
177         Page page = BufferGetPage( stack->buffer );
178         OffsetNumber low, high;
179         IndexTuple  itup;
180
181         Assert( GinPageIsLeaf(page) );
182         Assert( !GinPageIsData(page) );
183
184         if ( btree->fullScan ) {
185                 stack->off = FirstOffsetNumber;
186                 return TRUE;
187         }
188
189         low = FirstOffsetNumber;
190         high = PageGetMaxOffsetNumber(page);
191
192         if ( high < low ) {
193                 stack->off = FirstOffsetNumber;
194                 return false;
195         }
196
197         high++;
198
199         while (high > low) {
200                 OffsetNumber mid = low + ((high - low) / 2);
201                 bool    isnull;
202                 int             result;
203
204                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid));
205                 result = compareEntries(btree->ginstate, btree->entryValue,
206                         index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull) );
207
208                 if ( result == 0 ) {
209                         stack->off = mid;
210                         return true;
211                 } else if ( result > 0 ) 
212                         low = mid + 1;
213                 else
214                         high = mid;
215         }
216
217         stack->off = high;
218         return false;
219 }
220
221 static OffsetNumber
222 entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) {
223         OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page);
224         IndexTuple  itup;
225
226         Assert( !GinPageIsLeaf(page) );
227         Assert( !GinPageIsData(page) );
228
229         /* if page isn't changed, we returns storedOff */
230         if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) {
231                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff));
232                 if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
233                         return storedOff;
234
235                 /* we hope, that needed pointer goes to right. It's true
236                         if there wasn't a deletion */
237                 for( i=storedOff+1 ; i <= maxoff ; i++ ) {
238                         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
239                         if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
240                                 return i;
241                 }
242                 maxoff = storedOff-1;
243         }
244
245         /* last chance */
246         for( i=FirstOffsetNumber; i <= maxoff ; i++ ) { 
247                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
248                 if ( GinItemPointerGetBlockNumber(&(itup)->t_tid) == blkno )
249                         return i;
250         }
251
252         return InvalidOffsetNumber;
253 }
254
255 static BlockNumber
256 entryGetLeftMostPage(GinBtree btree, Page page) {
257         IndexTuple  itup;
258
259         Assert( !GinPageIsLeaf(page) );
260         Assert( !GinPageIsData(page) );
261         Assert( PageGetMaxOffsetNumber(page) >= FirstOffsetNumber );
262
263         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber));
264         return GinItemPointerGetBlockNumber(&(itup)->t_tid);    
265 }
266
267 static bool
268 entryIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) {
269         Size itupsz = 0;
270         Page page = BufferGetPage(buf);
271
272         Assert( btree->entry );
273         Assert( !GinPageIsData(page) );
274
275         if ( btree->isDelete ) {
276                 IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
277                 itupsz = MAXALIGN( IndexTupleSize( itup ) ) + sizeof(ItemIdData);
278         }
279
280         if (  PageGetFreeSpace(page) + itupsz >=  MAXALIGN(IndexTupleSize(btree->entry)) + sizeof(ItemIdData) )
281                 return true;
282
283         return false;
284 }
285
286 /*
287  * Delete tuple on leaf page if tuples was existed and we
288  * should update it, update old child blkno to new right page
289  * if child split is occured
290  */
291 static BlockNumber
292 entryPreparePage( GinBtree btree, Page page, OffsetNumber off) {
293         BlockNumber ret = InvalidBlockNumber;
294
295         Assert( btree->entry );
296         Assert( !GinPageIsData(page) );
297
298         if ( btree->isDelete ) {
299                 Assert( GinPageIsLeaf(page) );
300                 PageIndexTupleDelete(page, off);
301         }
302
303         if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber )  {
304                 IndexTuple itup = (IndexTuple)PageGetItem(page, PageGetItemId(page, off));
305                 ItemPointerSet(&itup->t_tid, btree->rightblkno, InvalidOffsetNumber);
306                 ret = btree->rightblkno;
307         }
308
309         btree->rightblkno = InvalidBlockNumber;
310
311         return ret;
312 }
313
314 /*
315  * Place tuple on page and fills WAL record
316  */
317 static void
318 entryPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) {
319         Page page = BufferGetPage(buf);
320         static XLogRecData rdata[3];
321         OffsetNumber    placed;
322         static  ginxlogInsert   data;
323
324         *prdata = rdata;
325         data.updateBlkno = entryPreparePage( btree, page, off );
326
327         placed = PageAddItem( page, (Item)btree->entry, IndexTupleSize(btree->entry), off, LP_USED);
328         if ( placed != off )
329                 elog(ERROR, "failed to add item to index page in \"%s\"",
330                         RelationGetRelationName(btree->index));
331
332         data.node = btree->index->rd_node;
333         data.blkno = BufferGetBlockNumber( buf );
334         data.offset = off;
335         data.nitem = 1;
336         data.isDelete = btree->isDelete;
337         data.isData = false;
338         data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE;
339
340         rdata[0].buffer = buf;
341         rdata[0].buffer_std = TRUE;
342         rdata[0].data = NULL;
343         rdata[0].len = 0;
344         rdata[0].next = &rdata[1];
345
346         rdata[1].buffer = InvalidBuffer;
347         rdata[1].data = (char *) &data;
348         rdata[1].len = sizeof(ginxlogInsert);
349         rdata[1].next = &rdata[2];
350
351         rdata[2].buffer = InvalidBuffer;
352         rdata[2].data = (char *) btree->entry;
353         rdata[2].len = IndexTupleSize(btree->entry);
354         rdata[2].next = NULL;
355
356         btree->entry = NULL;
357 }
358
359 /*
360  * Place tuple and split page, original buffer(lbuf) leaves untouched,
361  * returns shadow page of lbuf filled new data.
362  * Tuples are distributed between pages by equal size on its, not
363  * an equal number!
364  */
365 static Page
366 entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) {
367         static XLogRecData rdata[2];
368         OffsetNumber    i, maxoff, separator=InvalidOffsetNumber;
369         Size    totalsize=0;
370         Size    lsize = 0, size;
371         static char     tupstore[ 2*BLCKSZ ];
372         char    *ptr;
373         IndexTuple itup, leftrightmost=NULL;
374         static ginxlogSplit     data;
375         Datum   value;
376         bool    isnull;
377         Page    page;
378         Page    lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) );
379         Page    rpage = BufferGetPage( rbuf );
380         Size    pageSize = PageGetPageSize( lpage ); 
381
382         *prdata = rdata;
383         data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ? 
384                         InvalidOffsetNumber : GinItemPointerGetBlockNumber( &(btree->entry->t_tid) );
385         data.updateBlkno = entryPreparePage( btree, lpage, off );
386
387         maxoff = PageGetMaxOffsetNumber(lpage);
388         ptr = tupstore; 
389
390         for(i=FirstOffsetNumber; i<=maxoff; i++) {
391                 if ( i==off ) {
392                         size = MAXALIGN( IndexTupleSize(btree->entry) );
393                         memcpy(ptr, btree->entry, size);
394                         ptr+=size;
395                         totalsize += size + sizeof(ItemIdData);
396                 }
397
398                 itup = (IndexTuple)PageGetItem(lpage, PageGetItemId(lpage, i));
399                 size = MAXALIGN( IndexTupleSize(itup) );
400                 memcpy(ptr, itup, size);
401                 ptr+=size;
402                 totalsize += size + sizeof(ItemIdData);
403         }
404
405         if ( off==maxoff+1 ) {
406                 size = MAXALIGN( IndexTupleSize(btree->entry) );
407                 memcpy(ptr, btree->entry, size);
408                 ptr+=size;
409                 totalsize += size + sizeof(ItemIdData);
410         }
411
412         GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize ); 
413         GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize ); 
414
415         ptr = tupstore;
416         maxoff++;       
417         lsize = 0;
418
419         page = lpage;
420         for(i=FirstOffsetNumber; i<=maxoff; i++) {
421                 itup = (IndexTuple)ptr;
422
423                 if ( lsize > totalsize/2 ) {
424                         if ( separator==InvalidOffsetNumber )
425                                 separator = i-1;
426                         page = rpage;
427                 } else {
428                         leftrightmost = itup;
429                         lsize += MAXALIGN( IndexTupleSize(itup) ) + sizeof(ItemIdData);
430                 }
431
432                 if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
433                         elog(ERROR, "failed to add item to index page in \"%s\"",
434                                 RelationGetRelationName(btree->index));
435                 ptr += MAXALIGN( IndexTupleSize(itup) );
436         }
437         
438         value = index_getattr(leftrightmost, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
439         btree->entry = GinFormTuple( btree->ginstate, value, NULL, 0);
440         ItemPointerSet(&(btree->entry)->t_tid, BufferGetBlockNumber( lbuf ), InvalidOffsetNumber);
441         btree->rightblkno = BufferGetBlockNumber( rbuf );
442                 
443         data.node = btree->index->rd_node;
444         data.rootBlkno = InvalidBlockNumber;
445         data.lblkno = BufferGetBlockNumber( lbuf );
446         data.rblkno = BufferGetBlockNumber( rbuf );
447         data.separator = separator;
448         data.nitem = maxoff;
449         data.isData = FALSE;
450         data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE;
451         data.isRootSplit = FALSE;
452
453         rdata[0].buffer = InvalidBuffer;
454         rdata[0].data = (char *) &data;
455         rdata[0].len = sizeof(ginxlogSplit);
456         rdata[0].next = &rdata[1];
457
458         rdata[1].buffer = InvalidBuffer;
459         rdata[1].data = tupstore;
460         rdata[1].len = MAXALIGN(totalsize);
461         rdata[1].next = NULL;
462
463         return lpage;
464 }
465
466 /* 
467  * return newly allocate rightmost tuple
468  */
469 IndexTuple
470 ginPageGetLinkItup(Buffer buf) {
471         IndexTuple itup, nitup;
472         Page    page = BufferGetPage(buf);
473
474         itup = getRightMostTuple( page );
475         if ( GinPageIsLeaf(page) && !GinIsPostingTree(itup) ) {
476                 nitup = (IndexTuple)palloc( MAXALIGN(GinGetOrigSizePosting(itup)) );
477                 memcpy( nitup, itup, GinGetOrigSizePosting(itup) );
478                 nitup->t_info &= ~INDEX_SIZE_MASK;
479                 nitup->t_info |= GinGetOrigSizePosting(itup);
480         } else {
481                 nitup = (IndexTuple)palloc( MAXALIGN(IndexTupleSize(itup)) );
482                 memcpy( nitup, itup, IndexTupleSize(itup) );
483         }
484
485         ItemPointerSet(&nitup->t_tid, BufferGetBlockNumber(buf), InvalidOffsetNumber);
486         return nitup;
487 }
488
489 /*
490  * Fills new root by rightest values from child.
491  * Also called from ginxlog, should not use btree
492  */
493 void
494 entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) {
495         Page page;
496         IndexTuple itup;
497
498         page = BufferGetPage(root);
499
500         itup = ginPageGetLinkItup( lbuf );
501         if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
502                 elog(ERROR, "failed to add item to index root page");
503
504         itup = ginPageGetLinkItup( rbuf );
505         if ( PageAddItem( page, (Item)itup, IndexTupleSize(itup), InvalidOffsetNumber, LP_USED) == InvalidOffsetNumber )
506                 elog(ERROR, "failed to add item to index root page");
507 }
508
509 void
510 prepareEntryScan( GinBtree btree, Relation index, Datum value, GinState *ginstate) {
511         memset(btree, 0, sizeof(GinBtreeData));
512
513         btree->isMoveRight = entryIsMoveRight;
514         btree->findChildPage = entryLocateEntry;
515         btree->findItem = entryLocateLeafEntry;
516         btree->findChildPtr = entryFindChildPtr;
517         btree->getLeftMostPage = entryGetLeftMostPage;
518         btree->isEnoughSpace = entryIsEnoughSpace;
519         btree->placeToPage = entryPlaceToPage;
520         btree->splitPage = entrySplitPage;
521         btree->fillRoot = entryFillRoot;
522
523         btree->index = index;
524         btree->ginstate = ginstate;
525         btree->entryValue = value;
526
527         btree->isDelete = FALSE;
528         btree->searchMode = FALSE;
529         btree->fullScan = FALSE;
530         btree->isBuild = FALSE;
531 }
532