]> granicus.if.org Git - postgresql/blob - src/backend/access/gist/gistutil.c
a127627334e80704edeed38d3522e4000a695abb
[postgresql] / src / backend / access / gist / gistutil.c
1 /*-------------------------------------------------------------------------
2  *
3  * gistutil.c
4  *        utilities routines for the postgres GiST index access method.
5  *
6  *
7  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *                      src/backend/access/gist/gistutil.c
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <math.h>
17
18 #include "access/gist_private.h"
19 #include "access/reloptions.h"
20 #include "storage/indexfsm.h"
21 #include "storage/lmgr.h"
22 #include "utils/builtins.h"
23
24 /*
25  * static *S used for temporary storage (saves stack and palloc() call)
26  */
27
28 static Datum attrS[INDEX_MAX_KEYS];
29 static bool isnullS[INDEX_MAX_KEYS];
30
31 /*
32  * Write itup vector to page, has no control of free space.
33  */
34 void
35 gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
36 {
37         OffsetNumber l = InvalidOffsetNumber;
38         int                     i;
39
40         if (off == InvalidOffsetNumber)
41                 off = (PageIsEmpty(page)) ? FirstOffsetNumber :
42                         OffsetNumberNext(PageGetMaxOffsetNumber(page));
43
44         for (i = 0; i < len; i++)
45         {
46                 Size            sz = IndexTupleSize(itup[i]);
47
48                 l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
49                 if (l == InvalidOffsetNumber)
50                         elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
51                                  i, len, (int) sz);
52                 off++;
53         }
54 }
55
56 /*
57  * Check space for itup vector on page
58  */
59 bool
60 gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
61 {
62         unsigned int size = freespace,
63                                 deleted = 0;
64         int                     i;
65
66         for (i = 0; i < len; i++)
67                 size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
68
69         if (todelete != InvalidOffsetNumber)
70         {
71                 IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
72
73                 deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
74         }
75
76         return (PageGetFreeSpace(page) + deleted < size);
77 }
78
79 bool
80 gistfitpage(IndexTuple *itvec, int len)
81 {
82         int                     i;
83         Size            size = 0;
84
85         for (i = 0; i < len; i++)
86                 size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
87
88         /* TODO: Consider fillfactor */
89         return (size <= GiSTPageSize);
90 }
91
92 /*
93  * Read buffer into itup vector
94  */
95 IndexTuple *
96 gistextractpage(Page page, int *len /* out */ )
97 {
98         OffsetNumber i,
99                                 maxoff;
100         IndexTuple *itvec;
101
102         maxoff = PageGetMaxOffsetNumber(page);
103         *len = maxoff;
104         itvec = palloc(sizeof(IndexTuple) * maxoff);
105         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
106                 itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
107
108         return itvec;
109 }
110
111 /*
112  * join two vectors into one
113  */
114 IndexTuple *
115 gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
116 {
117         itvec = (IndexTuple *) repalloc((void *) itvec, sizeof(IndexTuple) * ((*len) + addlen));
118         memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
119         *len += addlen;
120         return itvec;
121 }
122
123 /*
124  * make plain IndexTupleVector
125  */
126
127 IndexTupleData *
128 gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
129 {
130         char       *ptr,
131                            *ret;
132         int                     i;
133
134         *memlen = 0;
135
136         for (i = 0; i < veclen; i++)
137                 *memlen += IndexTupleSize(vec[i]);
138
139         ptr = ret = palloc(*memlen);
140
141         for (i = 0; i < veclen; i++)
142         {
143                 memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
144                 ptr += IndexTupleSize(vec[i]);
145         }
146
147         return (IndexTupleData *) ret;
148 }
149
150 /*
151  * Make unions of keys in IndexTuple vector.
152  * Resulting Datums aren't compressed.
153  */
154
155 void
156 gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
157                                    Datum *attr, bool *isnull)
158 {
159         int                     i;
160         GistEntryVector *evec;
161         int                     attrsize;
162
163         evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
164
165         for (i = startkey; i < giststate->tupdesc->natts; i++)
166         {
167                 int                     j;
168
169                 evec->n = 0;
170                 if (!isnull[i])
171                 {
172                         gistentryinit(evec->vector[evec->n], attr[i],
173                                                   NULL, NULL, (OffsetNumber) 0,
174                                                   FALSE);
175                         evec->n++;
176                 }
177
178                 for (j = 0; j < len; j++)
179                 {
180                         Datum           datum;
181                         bool            IsNull;
182
183                         datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
184                         if (IsNull)
185                                 continue;
186
187                         gistdentryinit(giststate, i,
188                                                    evec->vector + evec->n,
189                                                    datum,
190                                                    NULL, NULL, (OffsetNumber) 0,
191                                                    FALSE, IsNull);
192                         evec->n++;
193                 }
194
195                 /* If this tuple vector was all NULLs, the union is NULL */
196                 if (evec->n == 0)
197                 {
198                         attr[i] = (Datum) 0;
199                         isnull[i] = TRUE;
200                 }
201                 else
202                 {
203                         if (evec->n == 1)
204                         {
205                                 evec->n = 2;
206                                 evec->vector[1] = evec->vector[0];
207                         }
208
209                         /* Make union and store in attr array */
210                         attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
211                                                                                 giststate->supportCollation[i],
212                                                                                 PointerGetDatum(evec),
213                                                                                 PointerGetDatum(&attrsize));
214
215                         isnull[i] = FALSE;
216                 }
217         }
218 }
219
220 /*
221  * Return an IndexTuple containing the result of applying the "union"
222  * method to the specified IndexTuple vector.
223  */
224 IndexTuple
225 gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
226 {
227         memset(isnullS, TRUE, sizeof(bool) * giststate->tupdesc->natts);
228
229         gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS);
230
231         return gistFormTuple(giststate, r, attrS, isnullS, false);
232 }
233
234 /*
235  * makes union of two key
236  */
237 void
238 gistMakeUnionKey(GISTSTATE *giststate, int attno,
239                                  GISTENTRY *entry1, bool isnull1,
240                                  GISTENTRY *entry2, bool isnull2,
241                                  Datum *dst, bool *dstisnull)
242 {
243
244         int                     dstsize;
245
246         static char storage[2 * sizeof(GISTENTRY) + GEVHDRSZ];
247         GistEntryVector *evec = (GistEntryVector *) storage;
248
249         evec->n = 2;
250
251         if (isnull1 && isnull2)
252         {
253                 *dstisnull = TRUE;
254                 *dst = (Datum) 0;
255         }
256         else
257         {
258                 if (isnull1 == FALSE && isnull2 == FALSE)
259                 {
260                         evec->vector[0] = *entry1;
261                         evec->vector[1] = *entry2;
262                 }
263                 else if (isnull1 == FALSE)
264                 {
265                         evec->vector[0] = *entry1;
266                         evec->vector[1] = *entry1;
267                 }
268                 else
269                 {
270                         evec->vector[0] = *entry2;
271                         evec->vector[1] = *entry2;
272                 }
273
274                 *dstisnull = FALSE;
275                 *dst = FunctionCall2Coll(&giststate->unionFn[attno],
276                                                                  giststate->supportCollation[attno],
277                                                                  PointerGetDatum(evec),
278                                                                  PointerGetDatum(&dstsize));
279         }
280 }
281
282 bool
283 gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
284 {
285         bool            result;
286
287         FunctionCall3Coll(&giststate->equalFn[attno],
288                                           giststate->supportCollation[attno],
289                                           a, b,
290                                           PointerGetDatum(&result));
291         return result;
292 }
293
294 /*
295  * Decompress all keys in tuple
296  */
297 void
298 gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
299                                   OffsetNumber o, GISTENTRY *attdata, bool *isnull)
300 {
301         int                     i;
302
303         for (i = 0; i < r->rd_att->natts; i++)
304         {
305                 Datum           datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
306
307                 gistdentryinit(giststate, i, &attdata[i],
308                                            datum, r, p, o,
309                                            FALSE, isnull[i]);
310         }
311 }
312
313 /*
314  * Forms union of oldtup and addtup, if union == oldtup then return NULL
315  */
316 IndexTuple
317 gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
318 {
319         bool            neednew = FALSE;
320         GISTENTRY       oldentries[INDEX_MAX_KEYS],
321                                 addentries[INDEX_MAX_KEYS];
322         bool            oldisnull[INDEX_MAX_KEYS],
323                                 addisnull[INDEX_MAX_KEYS];
324         IndexTuple      newtup = NULL;
325         int                     i;
326
327         gistDeCompressAtt(giststate, r, oldtup, NULL,
328                                           (OffsetNumber) 0, oldentries, oldisnull);
329
330         gistDeCompressAtt(giststate, r, addtup, NULL,
331                                           (OffsetNumber) 0, addentries, addisnull);
332
333         for (i = 0; i < r->rd_att->natts; i++)
334         {
335                 gistMakeUnionKey(giststate, i,
336                                                  oldentries + i, oldisnull[i],
337                                                  addentries + i, addisnull[i],
338                                                  attrS + i, isnullS + i);
339
340                 if (neednew)
341                         /* we already need new key, so we can skip check */
342                         continue;
343
344                 if (isnullS[i])
345                         /* union of key may be NULL if and only if both keys are NULL */
346                         continue;
347
348                 if (!addisnull[i])
349                 {
350                         if (oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i]) == false)
351                                 neednew = true;
352                 }
353         }
354
355         if (neednew)
356         {
357                 /* need to update key */
358                 newtup = gistFormTuple(giststate, r, attrS, isnullS, false);
359                 newtup->t_tid = oldtup->t_tid;
360         }
361
362         return newtup;
363 }
364
365 /*
366  * Search an upper index page for the entry with lowest penalty for insertion
367  * of the new index key contained in "it".
368  *
369  * Returns the index of the page entry to insert into.
370  */
371 OffsetNumber
372 gistchoose(Relation r, Page p, IndexTuple it,   /* it has compressed entry */
373                    GISTSTATE *giststate)
374 {
375         OffsetNumber result;
376         OffsetNumber maxoff;
377         OffsetNumber i;
378         float           best_penalty[INDEX_MAX_KEYS];
379         GISTENTRY       entry,
380                                 identry[INDEX_MAX_KEYS];
381         bool            isnull[INDEX_MAX_KEYS];
382         int                     keep_current_best;
383
384         Assert(!GistPageIsLeaf(p));
385
386         gistDeCompressAtt(giststate, r,
387                                           it, NULL, (OffsetNumber) 0,
388                                           identry, isnull);
389
390         /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
391         result = FirstOffsetNumber;
392
393         /*
394          * The index may have multiple columns, and there's a penalty value for
395          * each column.  The penalty associated with a column that appears earlier
396          * in the index definition is strictly more important than the penalty of
397          * a column that appears later in the index definition.
398          *
399          * best_penalty[j] is the best penalty we have seen so far for column j,
400          * or -1 when we haven't yet examined column j.  Array entries to the
401          * right of the first -1 are undefined.
402          */
403         best_penalty[0] = -1;
404
405         /*
406          * If we find a tuple that's exactly as good as the currently best one, we
407          * could use either one.  When inserting a lot of tuples with the same or
408          * similar keys, it's preferable to descend down the same path when
409          * possible, as that's more cache-friendly.  On the other hand, if all
410          * inserts land on the same leaf page after a split, we're never going to
411          * insert anything to the other half of the split, and will end up using
412          * only 50% of the available space.  Distributing the inserts evenly would
413          * lead to better space usage, but that hurts cache-locality during
414          * insertion.  To get the best of both worlds, when we find a tuple that's
415          * exactly as good as the previous best, choose randomly whether to stick
416          * to the old best, or use the new one.  Once we decide to stick to the
417          * old best, we keep sticking to it for any subsequent equally good tuples
418          * we might find.  This favors tuples with low offsets, but still allows
419          * some inserts to go to other equally-good subtrees.
420          *
421          * keep_current_best is -1 if we haven't yet had to make a random choice
422          * whether to keep the current best tuple.  If we have done so, and
423          * decided to keep it, keep_current_best is 1; if we've decided to
424          * replace, keep_current_best is 0.  (This state will be reset to -1 as
425          * soon as we've made the replacement, but sometimes we make the choice in
426          * advance of actually finding a replacement best tuple.)
427          */
428         keep_current_best = -1;
429
430         /*
431          * Loop over tuples on page.
432          */
433         maxoff = PageGetMaxOffsetNumber(p);
434         Assert(maxoff >= FirstOffsetNumber);
435
436         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
437         {
438                 IndexTuple      itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
439                 bool            zero_penalty;
440                 int                     j;
441
442                 zero_penalty = true;
443
444                 /* Loop over index attributes. */
445                 for (j = 0; j < r->rd_att->natts; j++)
446                 {
447                         Datum           datum;
448                         float           usize;
449                         bool            IsNull;
450
451                         /* Compute penalty for this column. */
452                         datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
453                         gistdentryinit(giststate, j, &entry, datum, r, p, i,
454                                                    FALSE, IsNull);
455                         usize = gistpenalty(giststate, j, &entry, IsNull,
456                                                                 &identry[j], isnull[j]);
457                         if (usize > 0)
458                                 zero_penalty = false;
459
460                         if (best_penalty[j] < 0 || usize < best_penalty[j])
461                         {
462                                 /*
463                                  * New best penalty for column.  Tentatively select this tuple
464                                  * as the target, and record the best penalty.  Then reset the
465                                  * next column's penalty to "unknown" (and indirectly, the
466                                  * same for all the ones to its right).  This will force us to
467                                  * adopt this tuple's penalty values as the best for all the
468                                  * remaining columns during subsequent loop iterations.
469                                  */
470                                 result = i;
471                                 best_penalty[j] = usize;
472
473                                 if (j < r->rd_att->natts - 1)
474                                         best_penalty[j + 1] = -1;
475
476                                 /* we have new best, so reset keep-it decision */
477                                 keep_current_best = -1;
478                         }
479                         else if (best_penalty[j] == usize)
480                         {
481                                 /*
482                                  * The current tuple is exactly as good for this column as the
483                                  * best tuple seen so far.      The next iteration of this loop
484                                  * will compare the next column.
485                                  */
486                         }
487                         else
488                         {
489                                 /*
490                                  * The current tuple is worse for this column than the best
491                                  * tuple seen so far.  Skip the remaining columns and move on
492                                  * to the next tuple, if any.
493                                  */
494                                 zero_penalty = false;   /* so outer loop won't exit */
495                                 break;
496                         }
497                 }
498
499                 /*
500                  * If we looped past the last column, and did not update "result",
501                  * then this tuple is exactly as good as the prior best tuple.
502                  */
503                 if (j == r->rd_att->natts && result != i)
504                 {
505                         if (keep_current_best == -1)
506                         {
507                                 /* we didn't make the random choice yet for this old best */
508                                 keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
509                         }
510                         if (keep_current_best == 0)
511                         {
512                                 /* we choose to use the new tuple */
513                                 result = i;
514                                 /* choose again if there are even more exactly-as-good ones */
515                                 keep_current_best = -1;
516                         }
517                 }
518
519                 /*
520                  * If we find a tuple with zero penalty for all columns, and we've
521                  * decided we don't want to search for another tuple with equal
522                  * penalty, there's no need to examine remaining tuples; just break
523                  * out of the loop and return it.
524                  */
525                 if (zero_penalty)
526                 {
527                         if (keep_current_best == -1)
528                         {
529                                 /* we didn't make the random choice yet for this old best */
530                                 keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
531                         }
532                         if (keep_current_best == 1)
533                                 break;
534                 }
535         }
536
537         return result;
538 }
539
540 /*
541  * initialize a GiST entry with a decompressed version of key
542  */
543 void
544 gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
545                            Datum k, Relation r, Page pg, OffsetNumber o,
546                            bool l, bool isNull)
547 {
548         if (!isNull)
549         {
550                 GISTENTRY  *dep;
551
552                 gistentryinit(*e, k, r, pg, o, l);
553                 dep = (GISTENTRY *)
554                         DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
555                                                                                    giststate->supportCollation[nkey],
556                                                                                           PointerGetDatum(e)));
557                 /* decompressFn may just return the given pointer */
558                 if (dep != e)
559                         gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
560                                                   dep->leafkey);
561         }
562         else
563                 gistentryinit(*e, (Datum) 0, r, pg, o, l);
564 }
565
566
567 /*
568  * initialize a GiST entry with a compressed version of key
569  */
570 void
571 gistcentryinit(GISTSTATE *giststate, int nkey,
572                            GISTENTRY *e, Datum k, Relation r,
573                            Page pg, OffsetNumber o, bool l, bool isNull)
574 {
575         if (!isNull)
576         {
577                 GISTENTRY  *cep;
578
579                 gistentryinit(*e, k, r, pg, o, l);
580                 cep = (GISTENTRY *)
581                         DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[nkey],
582                                                                                    giststate->supportCollation[nkey],
583                                                                                           PointerGetDatum(e)));
584                 /* compressFn may just return the given pointer */
585                 if (cep != e)
586                         gistentryinit(*e, cep->key, cep->rel, cep->page, cep->offset,
587                                                   cep->leafkey);
588         }
589         else
590                 gistentryinit(*e, (Datum) 0, r, pg, o, l);
591 }
592
593 IndexTuple
594 gistFormTuple(GISTSTATE *giststate, Relation r,
595                           Datum attdata[], bool isnull[], bool newValues)
596 {
597         GISTENTRY       centry[INDEX_MAX_KEYS];
598         Datum           compatt[INDEX_MAX_KEYS];
599         int                     i;
600         IndexTuple      res;
601
602         for (i = 0; i < r->rd_att->natts; i++)
603         {
604                 if (isnull[i])
605                         compatt[i] = (Datum) 0;
606                 else
607                 {
608                         gistcentryinit(giststate, i, &centry[i], attdata[i],
609                                                    r, NULL, (OffsetNumber) 0,
610                                                    newValues,
611                                                    FALSE);
612                         compatt[i] = centry[i].key;
613                 }
614         }
615
616         res = index_form_tuple(giststate->tupdesc, compatt, isnull);
617
618         /*
619          * The offset number on tuples on internal pages is unused. For historical
620          * reasons, it is set 0xffff.
621          */
622         ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
623         return res;
624 }
625
626 float
627 gistpenalty(GISTSTATE *giststate, int attno,
628                         GISTENTRY *orig, bool isNullOrig,
629                         GISTENTRY *add, bool isNullAdd)
630 {
631         float           penalty = 0.0;
632
633         if (giststate->penaltyFn[attno].fn_strict == FALSE ||
634                 (isNullOrig == FALSE && isNullAdd == FALSE))
635         {
636                 FunctionCall3Coll(&giststate->penaltyFn[attno],
637                                                   giststate->supportCollation[attno],
638                                                   PointerGetDatum(orig),
639                                                   PointerGetDatum(add),
640                                                   PointerGetDatum(&penalty));
641                 /* disallow negative or NaN penalty */
642                 if (isnan(penalty) || penalty < 0.0)
643                         penalty = 0.0;
644         }
645         else if (isNullOrig && isNullAdd)
646                 penalty = 0.0;
647         else
648         {
649                 /* try to prevent mixing null and non-null values */
650                 penalty = get_float4_infinity();
651         }
652
653         return penalty;
654 }
655
656 /*
657  * Initialize a new index page
658  */
659 void
660 GISTInitBuffer(Buffer b, uint32 f)
661 {
662         GISTPageOpaque opaque;
663         Page            page;
664         Size            pageSize;
665
666         pageSize = BufferGetPageSize(b);
667         page = BufferGetPage(b);
668         PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
669
670         opaque = GistPageGetOpaque(page);
671         /* page was already zeroed by PageInit, so this is not needed: */
672         /* memset(&(opaque->nsn), 0, sizeof(GistNSN)); */
673         opaque->rightlink = InvalidBlockNumber;
674         opaque->flags = f;
675         opaque->gist_page_id = GIST_PAGE_ID;
676 }
677
678 /*
679  * Verify that a freshly-read page looks sane.
680  */
681 void
682 gistcheckpage(Relation rel, Buffer buf)
683 {
684         Page            page = BufferGetPage(buf);
685
686         /*
687          * ReadBuffer verifies that every newly-read page passes
688          * PageHeaderIsValid, which means it either contains a reasonably sane
689          * page header or is all-zero.  We have to defend against the all-zero
690          * case, however.
691          */
692         if (PageIsNew(page))
693                 ereport(ERROR,
694                                 (errcode(ERRCODE_INDEX_CORRUPTED),
695                          errmsg("index \"%s\" contains unexpected zero page at block %u",
696                                         RelationGetRelationName(rel),
697                                         BufferGetBlockNumber(buf)),
698                                  errhint("Please REINDEX it.")));
699
700         /*
701          * Additionally check that the special area looks sane.
702          */
703         if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
704                 ereport(ERROR,
705                                 (errcode(ERRCODE_INDEX_CORRUPTED),
706                                  errmsg("index \"%s\" contains corrupted page at block %u",
707                                                 RelationGetRelationName(rel),
708                                                 BufferGetBlockNumber(buf)),
709                                  errhint("Please REINDEX it.")));
710 }
711
712
713 /*
714  * Allocate a new page (either by recycling, or by extending the index file)
715  *
716  * The returned buffer is already pinned and exclusive-locked
717  *
718  * Caller is responsible for initializing the page by calling GISTInitBuffer
719  */
720 Buffer
721 gistNewBuffer(Relation r)
722 {
723         Buffer          buffer;
724         bool            needLock;
725
726         /* First, try to get a page from FSM */
727         for (;;)
728         {
729                 BlockNumber blkno = GetFreeIndexPage(r);
730
731                 if (blkno == InvalidBlockNumber)
732                         break;                          /* nothing left in FSM */
733
734                 buffer = ReadBuffer(r, blkno);
735
736                 /*
737                  * We have to guard against the possibility that someone else already
738                  * recycled this page; the buffer may be locked if so.
739                  */
740                 if (ConditionalLockBuffer(buffer))
741                 {
742                         Page            page = BufferGetPage(buffer);
743
744                         if (PageIsNew(page))
745                                 return buffer;  /* OK to use, if never initialized */
746
747                         gistcheckpage(r, buffer);
748
749                         if (GistPageIsDeleted(page))
750                                 return buffer;  /* OK to use */
751
752                         LockBuffer(buffer, GIST_UNLOCK);
753                 }
754
755                 /* Can't use it, so release buffer and try again */
756                 ReleaseBuffer(buffer);
757         }
758
759         /* Must extend the file */
760         needLock = !RELATION_IS_LOCAL(r);
761
762         if (needLock)
763                 LockRelationForExtension(r, ExclusiveLock);
764
765         buffer = ReadBuffer(r, P_NEW);
766         LockBuffer(buffer, GIST_EXCLUSIVE);
767
768         if (needLock)
769                 UnlockRelationForExtension(r, ExclusiveLock);
770
771         return buffer;
772 }
773
774 Datum
775 gistoptions(PG_FUNCTION_ARGS)
776 {
777         Datum           reloptions = PG_GETARG_DATUM(0);
778         bool            validate = PG_GETARG_BOOL(1);
779         relopt_value *options;
780         GiSTOptions *rdopts;
781         int                     numoptions;
782         static const relopt_parse_elt tab[] = {
783                 {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
784                 {"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
785         };
786
787         options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
788                                                           &numoptions);
789
790         /* if none set, we're done */
791         if (numoptions == 0)
792                 PG_RETURN_NULL();
793
794         rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
795
796         fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions,
797                                    validate, tab, lengthof(tab));
798
799         pfree(options);
800
801         PG_RETURN_BYTEA_P(rdopts);
802
803 }
804
805 /*
806  * Temporary GiST indexes are not WAL-logged, but we need LSNs to detect
807  * concurrent page splits anyway. GetXLogRecPtrForTemp() provides a fake
808  * sequence of LSNs for that purpose. Each call generates an LSN that is
809  * greater than any previous value returned by this function in the same
810  * session.
811  */
812 XLogRecPtr
813 GetXLogRecPtrForTemp(void)
814 {
815         static XLogRecPtr counter = 1;
816         counter++;
817         return counter;
818 }