granicus.if.org Git - postgresql/blob - src/backend/access/gist/gistutil.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * gistutil.c
   4  *        utilities routines for the postgres GiST index access method.
   5  *
   6  *
   7  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  * IDENTIFICATION
  11  *                      src/backend/access/gist/gistutil.c
  12  *-------------------------------------------------------------------------
  13  */
  14 #include "postgres.h"
  15
  16 #include <math.h>
  17
  18 #include "access/gist_private.h"
  19 #include "access/reloptions.h"
  20 #include "storage/indexfsm.h"
  21 #include "storage/lmgr.h"
  22 #include "utils/builtins.h"
  23
  24 /*
  25  * static *S used for temporary storage (saves stack and palloc() call)
  26  */
  27
  28 static Datum attrS[INDEX_MAX_KEYS];
  29 static bool isnullS[INDEX_MAX_KEYS];
  30
  31 /*
  32  * Write itup vector to page, has no control of free space.
  33  */
  34 void
  35 gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
  36 {
  37         OffsetNumber l = InvalidOffsetNumber;
  38         int                     i;
  39
  40         if (off == InvalidOffsetNumber)
  41                 off = (PageIsEmpty(page)) ? FirstOffsetNumber :
  42                         OffsetNumberNext(PageGetMaxOffsetNumber(page));
  43
  44         for (i = 0; i < len; i++)
  45         {
  46                 Size            sz = IndexTupleSize(itup[i]);
  47
  48                 l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
  49                 if (l == InvalidOffsetNumber)
  50                         elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
  51                                  i, len, (int) sz);
  52                 off++;
  53         }
  54 }
  55
  56 /*
  57  * Check space for itup vector on page
  58  */
  59 bool
  60 gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
  61 {
  62         unsigned int size = freespace,
  63                                 deleted = 0;
  64         int                     i;
  65
  66         for (i = 0; i < len; i++)
  67                 size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
  68
  69         if (todelete != InvalidOffsetNumber)
  70         {
  71                 IndexTuple      itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
  72
  73                 deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
  74         }
  75
  76         return (PageGetFreeSpace(page) + deleted < size);
  77 }
  78
  79 bool
  80 gistfitpage(IndexTuple *itvec, int len)
  81 {
  82         int                     i;
  83         Size            size = 0;
  84
  85         for (i = 0; i < len; i++)
  86                 size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
  87
  88         /* TODO: Consider fillfactor */
  89         return (size <= GiSTPageSize);
  90 }
  91
  92 /*
  93  * Read buffer into itup vector
  94  */
  95 IndexTuple *
  96 gistextractpage(Page page, int *len /* out */ )
  97 {
  98         OffsetNumber i,
  99                                 maxoff;
 100         IndexTuple *itvec;
 101
 102         maxoff = PageGetMaxOffsetNumber(page);
 103         *len = maxoff;
 104         itvec = palloc(sizeof(IndexTuple) * maxoff);
 105         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
 106                 itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
 107
 108         return itvec;
 109 }
 110
 111 /*
 112  * join two vectors into one
 113  */
 114 IndexTuple *
 115 gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
 116 {
 117         itvec = (IndexTuple *) repalloc((void *) itvec, sizeof(IndexTuple) * ((*len) + addlen));
 118         memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
 119         *len += addlen;
 120         return itvec;
 121 }
 122
 123 /*
 124  * make plain IndexTupleVector
 125  */
 126
 127 IndexTupleData *
 128 gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
 129 {
 130         char       *ptr,
 131                            *ret;
 132         int                     i;
 133
 134         *memlen = 0;
 135
 136         for (i = 0; i < veclen; i++)
 137                 *memlen += IndexTupleSize(vec[i]);
 138
 139         ptr = ret = palloc(*memlen);
 140
 141         for (i = 0; i < veclen; i++)
 142         {
 143                 memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
 144                 ptr += IndexTupleSize(vec[i]);
 145         }
 146
 147         return (IndexTupleData *) ret;
 148 }
 149
 150 /*
 151  * Make unions of keys in IndexTuple vector.
 152  * Resulting Datums aren't compressed.
 153  */
 154
 155 void
 156 gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len, int startkey,
 157                                    Datum *attr, bool *isnull)
 158 {
 159         int                     i;
 160         GistEntryVector *evec;
 161         int                     attrsize;
 162
 163         evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
 164
 165         for (i = startkey; i < giststate->tupdesc->natts; i++)
 166         {
 167                 int                     j;
 168
 169                 evec->n = 0;
 170                 if (!isnull[i])
 171                 {
 172                         gistentryinit(evec->vector[evec->n], attr[i],
 173                                                   NULL, NULL, (OffsetNumber) 0,
 174                                                   FALSE);
 175                         evec->n++;
 176                 }
 177
 178                 for (j = 0; j < len; j++)
 179                 {
 180                         Datum           datum;
 181                         bool            IsNull;
 182
 183                         datum = index_getattr(itvec[j], i + 1, giststate->tupdesc, &IsNull);
 184                         if (IsNull)
 185                                 continue;
 186
 187                         gistdentryinit(giststate, i,
 188                                                    evec->vector + evec->n,
 189                                                    datum,
 190                                                    NULL, NULL, (OffsetNumber) 0,
 191                                                    FALSE, IsNull);
 192                         evec->n++;
 193                 }
 194
 195                 /* If this tuple vector was all NULLs, the union is NULL */
 196                 if (evec->n == 0)
 197                 {
 198                         attr[i] = (Datum) 0;
 199                         isnull[i] = TRUE;
 200                 }
 201                 else
 202                 {
 203                         if (evec->n == 1)
 204                         {
 205                                 evec->n = 2;
 206                                 evec->vector[1] = evec->vector[0];
 207                         }
 208
 209                         /* Make union and store in attr array */
 210                         attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
 211                                                                                 giststate->supportCollation[i],
 212                                                                                 PointerGetDatum(evec),
 213                                                                                 PointerGetDatum(&attrsize));
 214
 215                         isnull[i] = FALSE;
 216                 }
 217         }
 218 }
 219
 220 /*
 221  * Return an IndexTuple containing the result of applying the "union"
 222  * method to the specified IndexTuple vector.
 223  */
 224 IndexTuple
 225 gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
 226 {
 227         memset(isnullS, TRUE, sizeof(bool) * giststate->tupdesc->natts);
 228
 229         gistMakeUnionItVec(giststate, itvec, len, 0, attrS, isnullS);
 230
 231         return gistFormTuple(giststate, r, attrS, isnullS, false);
 232 }
 233
 234 /*
 235  * makes union of two key
 236  */
 237 void
 238 gistMakeUnionKey(GISTSTATE *giststate, int attno,
 239                                  GISTENTRY *entry1, bool isnull1,
 240                                  GISTENTRY *entry2, bool isnull2,
 241                                  Datum *dst, bool *dstisnull)
 242 {
 243
 244         int                     dstsize;
 245
 246         static char storage[2 * sizeof(GISTENTRY) + GEVHDRSZ];
 247         GistEntryVector *evec = (GistEntryVector *) storage;
 248
 249         evec->n = 2;
 250
 251         if (isnull1 && isnull2)
 252         {
 253                 *dstisnull = TRUE;
 254                 *dst = (Datum) 0;
 255         }
 256         else
 257         {
 258                 if (isnull1 == FALSE && isnull2 == FALSE)
 259                 {
 260                         evec->vector[0] = *entry1;
 261                         evec->vector[1] = *entry2;
 262                 }
 263                 else if (isnull1 == FALSE)
 264                 {
 265                         evec->vector[0] = *entry1;
 266                         evec->vector[1] = *entry1;
 267                 }
 268                 else
 269                 {
 270                         evec->vector[0] = *entry2;
 271                         evec->vector[1] = *entry2;
 272                 }
 273
 274                 *dstisnull = FALSE;
 275                 *dst = FunctionCall2Coll(&giststate->unionFn[attno],
 276                                                                  giststate->supportCollation[attno],
 277                                                                  PointerGetDatum(evec),
 278                                                                  PointerGetDatum(&dstsize));
 279         }
 280 }
 281
 282 bool
 283 gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
 284 {
 285         bool            result;
 286
 287         FunctionCall3Coll(&giststate->equalFn[attno],
 288                                           giststate->supportCollation[attno],
 289                                           a, b,
 290                                           PointerGetDatum(&result));
 291         return result;
 292 }
 293
 294 /*
 295  * Decompress all keys in tuple
 296  */
 297 void
 298 gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
 299                                   OffsetNumber o, GISTENTRY *attdata, bool *isnull)
 300 {
 301         int                     i;
 302
 303         for (i = 0; i < r->rd_att->natts; i++)
 304         {
 305                 Datum           datum = index_getattr(tuple, i + 1, giststate->tupdesc, &isnull[i]);
 306
 307                 gistdentryinit(giststate, i, &attdata[i],
 308                                            datum, r, p, o,
 309                                            FALSE, isnull[i]);
 310         }
 311 }
 312
 313 /*
 314  * Forms union of oldtup and addtup, if union == oldtup then return NULL
 315  */
 316 IndexTuple
 317 gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
 318 {
 319         bool            neednew = FALSE;
 320         GISTENTRY       oldentries[INDEX_MAX_KEYS],
 321                                 addentries[INDEX_MAX_KEYS];
 322         bool            oldisnull[INDEX_MAX_KEYS],
 323                                 addisnull[INDEX_MAX_KEYS];
 324         IndexTuple      newtup = NULL;
 325         int                     i;
 326
 327         gistDeCompressAtt(giststate, r, oldtup, NULL,
 328                                           (OffsetNumber) 0, oldentries, oldisnull);
 329
 330         gistDeCompressAtt(giststate, r, addtup, NULL,
 331                                           (OffsetNumber) 0, addentries, addisnull);
 332
 333         for (i = 0; i < r->rd_att->natts; i++)
 334         {
 335                 gistMakeUnionKey(giststate, i,
 336                                                  oldentries + i, oldisnull[i],
 337                                                  addentries + i, addisnull[i],
 338                                                  attrS + i, isnullS + i);
 339
 340                 if (neednew)
 341                         /* we already need new key, so we can skip check */
 342                         continue;
 343
 344                 if (isnullS[i])
 345                         /* union of key may be NULL if and only if both keys are NULL */
 346                         continue;
 347
 348                 if (!addisnull[i])
 349                 {
 350                         if (oldisnull[i] || gistKeyIsEQ(giststate, i, oldentries[i].key, attrS[i]) == false)
 351                                 neednew = true;
 352                 }
 353         }
 354
 355         if (neednew)
 356         {
 357                 /* need to update key */
 358                 newtup = gistFormTuple(giststate, r, attrS, isnullS, false);
 359                 newtup->t_tid = oldtup->t_tid;
 360         }
 361
 362         return newtup;
 363 }
 364
 365 /*
 366  * Search an upper index page for the entry with lowest penalty for insertion
 367  * of the new index key contained in "it".
 368  *
 369  * Returns the index of the page entry to insert into.
 370  */
 371 OffsetNumber
 372 gistchoose(Relation r, Page p, IndexTuple it,   /* it has compressed entry */
 373                    GISTSTATE *giststate)
 374 {
 375         OffsetNumber result;
 376         OffsetNumber maxoff;
 377         OffsetNumber i;
 378         float           best_penalty[INDEX_MAX_KEYS];
 379         GISTENTRY       entry,
 380                                 identry[INDEX_MAX_KEYS];
 381         bool            isnull[INDEX_MAX_KEYS];
 382         int                     keep_current_best;
 383
 384         Assert(!GistPageIsLeaf(p));
 385
 386         gistDeCompressAtt(giststate, r,
 387                                           it, NULL, (OffsetNumber) 0,
 388                                           identry, isnull);
 389
 390         /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
 391         result = FirstOffsetNumber;
 392
 393         /*
 394          * The index may have multiple columns, and there's a penalty value for
 395          * each column.  The penalty associated with a column that appears earlier
 396          * in the index definition is strictly more important than the penalty of
 397          * a column that appears later in the index definition.
 398          *
 399          * best_penalty[j] is the best penalty we have seen so far for column j,
 400          * or -1 when we haven't yet examined column j.  Array entries to the
 401          * right of the first -1 are undefined.
 402          */
 403         best_penalty[0] = -1;
 404
 405         /*
 406          * If we find a tuple that's exactly as good as the currently best one, we
 407          * could use either one.  When inserting a lot of tuples with the same or
 408          * similar keys, it's preferable to descend down the same path when
 409          * possible, as that's more cache-friendly.  On the other hand, if all
 410          * inserts land on the same leaf page after a split, we're never going to
 411          * insert anything to the other half of the split, and will end up using
 412          * only 50% of the available space.  Distributing the inserts evenly would
 413          * lead to better space usage, but that hurts cache-locality during
 414          * insertion.  To get the best of both worlds, when we find a tuple that's
 415          * exactly as good as the previous best, choose randomly whether to stick
 416          * to the old best, or use the new one.  Once we decide to stick to the
 417          * old best, we keep sticking to it for any subsequent equally good tuples
 418          * we might find.  This favors tuples with low offsets, but still allows
 419          * some inserts to go to other equally-good subtrees.
 420          *
 421          * keep_current_best is -1 if we haven't yet had to make a random choice
 422          * whether to keep the current best tuple.  If we have done so, and
 423          * decided to keep it, keep_current_best is 1; if we've decided to
 424          * replace, keep_current_best is 0.  (This state will be reset to -1 as
 425          * soon as we've made the replacement, but sometimes we make the choice in
 426          * advance of actually finding a replacement best tuple.)
 427          */
 428         keep_current_best = -1;
 429
 430         /*
 431          * Loop over tuples on page.
 432          */
 433         maxoff = PageGetMaxOffsetNumber(p);
 434         Assert(maxoff >= FirstOffsetNumber);
 435
 436         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
 437         {
 438                 IndexTuple      itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
 439                 bool            zero_penalty;
 440                 int                     j;
 441
 442                 zero_penalty = true;
 443
 444                 /* Loop over index attributes. */
 445                 for (j = 0; j < r->rd_att->natts; j++)
 446                 {
 447                         Datum           datum;
 448                         float           usize;
 449                         bool            IsNull;
 450
 451                         /* Compute penalty for this column. */
 452                         datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
 453                         gistdentryinit(giststate, j, &entry, datum, r, p, i,
 454                                                    FALSE, IsNull);
 455                         usize = gistpenalty(giststate, j, &entry, IsNull,
 456                                                                 &identry[j], isnull[j]);
 457                         if (usize > 0)
 458                                 zero_penalty = false;
 459
 460                         if (best_penalty[j] < 0 || usize < best_penalty[j])
 461                         {
 462                                 /*
 463                                  * New best penalty for column.  Tentatively select this tuple
 464                                  * as the target, and record the best penalty.  Then reset the
 465                                  * next column's penalty to "unknown" (and indirectly, the
 466                                  * same for all the ones to its right).  This will force us to
 467                                  * adopt this tuple's penalty values as the best for all the
 468                                  * remaining columns during subsequent loop iterations.
 469                                  */
 470                                 result = i;
 471                                 best_penalty[j] = usize;
 472
 473                                 if (j < r->rd_att->natts - 1)
 474                                         best_penalty[j + 1] = -1;
 475
 476                                 /* we have new best, so reset keep-it decision */
 477                                 keep_current_best = -1;
 478                         }
 479                         else if (best_penalty[j] == usize)
 480                         {
 481                                 /*
 482                                  * The current tuple is exactly as good for this column as the
 483                                  * best tuple seen so far.      The next iteration of this loop
 484                                  * will compare the next column.
 485                                  */
 486                         }
 487                         else
 488                         {
 489                                 /*
 490                                  * The current tuple is worse for this column than the best
 491                                  * tuple seen so far.  Skip the remaining columns and move on
 492                                  * to the next tuple, if any.
 493                                  */
 494                                 zero_penalty = false;   /* so outer loop won't exit */
 495                                 break;
 496                         }
 497                 }
 498
 499                 /*
 500                  * If we looped past the last column, and did not update "result",
 501                  * then this tuple is exactly as good as the prior best tuple.
 502                  */
 503                 if (j == r->rd_att->natts && result != i)
 504                 {
 505                         if (keep_current_best == -1)
 506                         {
 507                                 /* we didn't make the random choice yet for this old best */
 508                                 keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
 509                         }
 510                         if (keep_current_best == 0)
 511                         {
 512                                 /* we choose to use the new tuple */
 513                                 result = i;
 514                                 /* choose again if there are even more exactly-as-good ones */
 515                                 keep_current_best = -1;
 516                         }
 517                 }
 518
 519                 /*
 520                  * If we find a tuple with zero penalty for all columns, and we've
 521                  * decided we don't want to search for another tuple with equal
 522                  * penalty, there's no need to examine remaining tuples; just break
 523                  * out of the loop and return it.
 524                  */
 525                 if (zero_penalty)
 526                 {
 527                         if (keep_current_best == -1)
 528                         {
 529                                 /* we didn't make the random choice yet for this old best */
 530                                 keep_current_best = (random() <= (MAX_RANDOM_VALUE / 2)) ? 1 : 0;
 531                         }
 532                         if (keep_current_best == 1)
 533                                 break;
 534                 }
 535         }
 536
 537         return result;
 538 }
 539
 540 /*
 541  * initialize a GiST entry with a decompressed version of key
 542  */
 543 void
 544 gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
 545                            Datum k, Relation r, Page pg, OffsetNumber o,
 546                            bool l, bool isNull)
 547 {
 548         if (!isNull)
 549         {
 550                 GISTENTRY  *dep;
 551
 552                 gistentryinit(*e, k, r, pg, o, l);
 553                 dep = (GISTENTRY *)
 554                         DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
 555                                                                                    giststate->supportCollation[nkey],
 556                                                                                           PointerGetDatum(e)));
 557                 /* decompressFn may just return the given pointer */
 558                 if (dep != e)
 559                         gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
 560                                                   dep->leafkey);
 561         }
 562         else
 563                 gistentryinit(*e, (Datum) 0, r, pg, o, l);
 564 }
 565
 566
 567 /*
 568  * initialize a GiST entry with a compressed version of key
 569  */
 570 void
 571 gistcentryinit(GISTSTATE *giststate, int nkey,
 572                            GISTENTRY *e, Datum k, Relation r,
 573                            Page pg, OffsetNumber o, bool l, bool isNull)
 574 {
 575         if (!isNull)
 576         {
 577                 GISTENTRY  *cep;
 578
 579                 gistentryinit(*e, k, r, pg, o, l);
 580                 cep = (GISTENTRY *)
 581                         DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[nkey],
 582                                                                                    giststate->supportCollation[nkey],
 583                                                                                           PointerGetDatum(e)));
 584                 /* compressFn may just return the given pointer */
 585                 if (cep != e)
 586                         gistentryinit(*e, cep->key, cep->rel, cep->page, cep->offset,
 587                                                   cep->leafkey);
 588         }
 589         else
 590                 gistentryinit(*e, (Datum) 0, r, pg, o, l);
 591 }
 592
 593 IndexTuple
 594 gistFormTuple(GISTSTATE *giststate, Relation r,
 595                           Datum attdata[], bool isnull[], bool newValues)
 596 {
 597         GISTENTRY       centry[INDEX_MAX_KEYS];
 598         Datum           compatt[INDEX_MAX_KEYS];
 599         int                     i;
 600         IndexTuple      res;
 601
 602         for (i = 0; i < r->rd_att->natts; i++)
 603         {
 604                 if (isnull[i])
 605                         compatt[i] = (Datum) 0;
 606                 else
 607                 {
 608                         gistcentryinit(giststate, i, &centry[i], attdata[i],
 609                                                    r, NULL, (OffsetNumber) 0,
 610                                                    newValues,
 611                                                    FALSE);
 612                         compatt[i] = centry[i].key;
 613                 }
 614         }
 615
 616         res = index_form_tuple(giststate->tupdesc, compatt, isnull);
 617
 618         /*
 619          * The offset number on tuples on internal pages is unused. For historical
 620          * reasons, it is set 0xffff.
 621          */
 622         ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
 623         return res;
 624 }
 625
 626 float
 627 gistpenalty(GISTSTATE *giststate, int attno,
 628                         GISTENTRY *orig, bool isNullOrig,
 629                         GISTENTRY *add, bool isNullAdd)
 630 {
 631         float           penalty = 0.0;
 632
 633         if (giststate->penaltyFn[attno].fn_strict == FALSE ||
 634                 (isNullOrig == FALSE && isNullAdd == FALSE))
 635         {
 636                 FunctionCall3Coll(&giststate->penaltyFn[attno],
 637                                                   giststate->supportCollation[attno],
 638                                                   PointerGetDatum(orig),
 639                                                   PointerGetDatum(add),
 640                                                   PointerGetDatum(&penalty));
 641                 /* disallow negative or NaN penalty */
 642                 if (isnan(penalty) || penalty < 0.0)
 643                         penalty = 0.0;
 644         }
 645         else if (isNullOrig && isNullAdd)
 646                 penalty = 0.0;
 647         else
 648         {
 649                 /* try to prevent mixing null and non-null values */
 650                 penalty = get_float4_infinity();
 651         }
 652
 653         return penalty;
 654 }
 655
 656 /*
 657  * Initialize a new index page
 658  */
 659 void
 660 GISTInitBuffer(Buffer b, uint32 f)
 661 {
 662         GISTPageOpaque opaque;
 663         Page            page;
 664         Size            pageSize;
 665
 666         pageSize = BufferGetPageSize(b);
 667         page = BufferGetPage(b);
 668         PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
 669
 670         opaque = GistPageGetOpaque(page);
 671         /* page was already zeroed by PageInit, so this is not needed: */
 672         /* memset(&(opaque->nsn), 0, sizeof(GistNSN)); */
 673         opaque->rightlink = InvalidBlockNumber;
 674         opaque->flags = f;
 675         opaque->gist_page_id = GIST_PAGE_ID;
 676 }
 677
 678 /*
 679  * Verify that a freshly-read page looks sane.
 680  */
 681 void
 682 gistcheckpage(Relation rel, Buffer buf)
 683 {
 684         Page            page = BufferGetPage(buf);
 685
 686         /*
 687          * ReadBuffer verifies that every newly-read page passes
 688          * PageHeaderIsValid, which means it either contains a reasonably sane
 689          * page header or is all-zero.  We have to defend against the all-zero
 690          * case, however.
 691          */
 692         if (PageIsNew(page))
 693                 ereport(ERROR,
 694                                 (errcode(ERRCODE_INDEX_CORRUPTED),
 695                          errmsg("index \"%s\" contains unexpected zero page at block %u",
 696                                         RelationGetRelationName(rel),
 697                                         BufferGetBlockNumber(buf)),
 698                                  errhint("Please REINDEX it.")));
 699
 700         /*
 701          * Additionally check that the special area looks sane.
 702          */
 703         if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
 704                 ereport(ERROR,
 705                                 (errcode(ERRCODE_INDEX_CORRUPTED),
 706                                  errmsg("index \"%s\" contains corrupted page at block %u",
 707                                                 RelationGetRelationName(rel),
 708                                                 BufferGetBlockNumber(buf)),
 709                                  errhint("Please REINDEX it.")));
 710 }
 711
 712
 713 /*
 714  * Allocate a new page (either by recycling, or by extending the index file)
 715  *
 716  * The returned buffer is already pinned and exclusive-locked
 717  *
 718  * Caller is responsible for initializing the page by calling GISTInitBuffer
 719  */
 720 Buffer
 721 gistNewBuffer(Relation r)
 722 {
 723         Buffer          buffer;
 724         bool            needLock;
 725
 726         /* First, try to get a page from FSM */
 727         for (;;)
 728         {
 729                 BlockNumber blkno = GetFreeIndexPage(r);
 730
 731                 if (blkno == InvalidBlockNumber)
 732                         break;                          /* nothing left in FSM */
 733
 734                 buffer = ReadBuffer(r, blkno);
 735
 736                 /*
 737                  * We have to guard against the possibility that someone else already
 738                  * recycled this page; the buffer may be locked if so.
 739                  */
 740                 if (ConditionalLockBuffer(buffer))
 741                 {
 742                         Page            page = BufferGetPage(buffer);
 743
 744                         if (PageIsNew(page))
 745                                 return buffer;  /* OK to use, if never initialized */
 746
 747                         gistcheckpage(r, buffer);
 748
 749                         if (GistPageIsDeleted(page))
 750                                 return buffer;  /* OK to use */
 751
 752                         LockBuffer(buffer, GIST_UNLOCK);
 753                 }
 754
 755                 /* Can't use it, so release buffer and try again */
 756                 ReleaseBuffer(buffer);
 757         }
 758
 759         /* Must extend the file */
 760         needLock = !RELATION_IS_LOCAL(r);
 761
 762         if (needLock)
 763                 LockRelationForExtension(r, ExclusiveLock);
 764
 765         buffer = ReadBuffer(r, P_NEW);
 766         LockBuffer(buffer, GIST_EXCLUSIVE);
 767
 768         if (needLock)
 769                 UnlockRelationForExtension(r, ExclusiveLock);
 770
 771         return buffer;
 772 }
 773
 774 Datum
 775 gistoptions(PG_FUNCTION_ARGS)
 776 {
 777         Datum           reloptions = PG_GETARG_DATUM(0);
 778         bool            validate = PG_GETARG_BOOL(1);
 779         relopt_value *options;
 780         GiSTOptions *rdopts;
 781         int                     numoptions;
 782         static const relopt_parse_elt tab[] = {
 783                 {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
 784                 {"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
 785         };
 786
 787         options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
 788                                                           &numoptions);
 789
 790         /* if none set, we're done */
 791         if (numoptions == 0)
 792                 PG_RETURN_NULL();
 793
 794         rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
 795
 796         fillRelOptions((void *) rdopts, sizeof(GiSTOptions), options, numoptions,
 797                                    validate, tab, lengthof(tab));
 798
 799         pfree(options);
 800
 801         PG_RETURN_BYTEA_P(rdopts);
 802
 803 }
 804
 805 /*
 806  * Temporary GiST indexes are not WAL-logged, but we need LSNs to detect
 807  * concurrent page splits anyway. GetXLogRecPtrForTemp() provides a fake
 808  * sequence of LSNs for that purpose. Each call generates an LSN that is
 809  * greater than any previous value returned by this function in the same
 810  * session.
 811  */
 812 XLogRecPtr
 813 GetXLogRecPtrForTemp(void)
 814 {
 815         static XLogRecPtr counter = 1;
 816         counter++;
 817         return counter;
 818 }