]> granicus.if.org Git - postgresql/blob - src/backend/access/hash/hashpage.c
Add typdefs to pgindent run.
[postgresql] / src / backend / access / hash / hashpage.c
1 /*-------------------------------------------------------------------------
2  *
3  * hashpage.c--
4  *        Hash table page management code for the Postgres hash access method
5  *
6  * Copyright (c) 1994, Regents of the University of California
7  *
8  *
9  * IDENTIFICATION
10  *        $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.12 1997/09/08 20:54:03 momjian Exp $
11  *
12  * NOTES
13  *        Postgres hash pages look like ordinary relation pages.  The opaque
14  *        data at high addresses includes information about the page including
15  *        whether a page is an overflow page or a true bucket, the block
16  *        numbers of the preceding and following pages, and the overflow
17  *        address of the page if it is an overflow page.
18  *
19  *        The first page in a hash relation, page zero, is special -- it stores
20  *        information describing the hash table; it is referred to as teh
21  *        "meta page." Pages one and higher store the actual data.
22  *
23  *-------------------------------------------------------------------------
24  */
25
26 #include <postgres.h>
27
28 #include <access/hash.h>
29 #include <storage/bufmgr.h>
30 #include <miscadmin.h>
31 #include <utils/memutils.h>
32 #include <storage/lmgr.h>
33 #include <access/genam.h>
34
35 #ifndef HAVE_MEMMOVE
36 #include <regex/utils.h>
37 #else
38 #include <string.h>
39 #endif
40
41 static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
42 static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
43 static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
44
45 /*
46  *      We use high-concurrency locking on hash indices.  There are two cases in
47  *      which we don't do locking.  One is when we're building the index.
48  *      Since the creating transaction has not committed, no one can see
49  *      the index, and there's no reason to share locks.  The second case
50  *      is when we're just starting up the database system.  We use some
51  *      special-purpose initialization code in the relation cache manager
52  *      (see utils/cache/relcache.c) to allow us to do indexed scans on
53  *      the system catalogs before we'd normally be able to.  This happens
54  *      before the lock table is fully initialized, so we can't use it.
55  *      Strictly speaking, this violates 2pl, but we don't do 2pl on the
56  *      system catalogs anyway.
57  */
58
59
60 #define USELOCKING              (!BuildingHash && !IsInitProcessingMode())
61
62
63 /*
64  *      _hash_metapinit() -- Initialize the metadata page of a hash index,
65  *                              the two buckets that we begin with and the initial
66  *                              bitmap page.
67  */
68 void
69 _hash_metapinit(Relation rel)
70 {
71         HashMetaPage metap;
72         HashPageOpaque pageopaque;
73         Buffer          metabuf;
74         Buffer          buf;
75         Page            pg;
76         int                     nbuckets;
77         uint32          nelem;                  /* number elements */
78         uint32          lg2nelem;               /* _hash_log2(nelem)   */
79         uint32          nblocks;
80         uint16          i;
81
82         /* can't be sharing this with anyone, now... */
83         if (USELOCKING)
84                 RelationSetLockForWrite(rel);
85
86         if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0)
87         {
88                 elog(WARN, "Cannot initialize non-empty hash table %s",
89                          RelationGetRelationName(rel));
90         }
91
92         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
93         pg = BufferGetPage(metabuf);
94         metap = (HashMetaPage) pg;
95         _hash_pageinit(pg, BufferGetPageSize(metabuf));
96
97         metap->hashm_magic = HASH_MAGIC;
98         metap->hashm_version = HASH_VERSION;
99         metap->hashm_nkeys = 0;
100         metap->hashm_nmaps = 0;
101         metap->hashm_ffactor = DEFAULT_FFACTOR;
102         metap->hashm_bsize = BufferGetPageSize(metabuf);
103         metap->hashm_bshift = _hash_log2(metap->hashm_bsize);
104         for (i = metap->hashm_bshift; i > 0; --i)
105         {
106                 if ((1 << i) < (metap->hashm_bsize -
107                                                 (DOUBLEALIGN(sizeof(PageHeaderData)) +
108                                                  DOUBLEALIGN(sizeof(HashPageOpaqueData)))))
109                 {
110                         break;
111                 }
112         }
113         Assert(i);
114         metap->hashm_bmsize = 1 << i;
115         metap->hashm_procid = index_getprocid(rel, 1, HASHPROC);
116
117         /*
118          * Make nelem = 2 rather than 0 so that we end up allocating space for
119          * the next greater power of two number of buckets.
120          */
121         nelem = 2;
122         lg2nelem = 1;                           /* _hash_log2(MAX(nelem, 2)) */
123         nbuckets = 2;                           /* 1 << lg2nelem */
124
125         memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
126         memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
127
128         metap->hashm_spares[lg2nelem] = 2;      /* lg2nelem + 1 */
129         metap->hashm_spares[lg2nelem + 1] = 2;          /* lg2nelem + 1 */
130         metap->hashm_ovflpoint = 1; /* lg2nelem */
131         metap->hashm_lastfreed = 2;
132
133         metap->hashm_maxbucket = metap->hashm_lowmask = 1;      /* nbuckets - 1 */
134         metap->hashm_highmask = 3;      /* (nbuckets << 1) - 1 */
135
136         pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
137         pageopaque->hasho_oaddr = InvalidOvflAddress;
138         pageopaque->hasho_prevblkno = InvalidBlockNumber;
139         pageopaque->hasho_nextblkno = InvalidBlockNumber;
140         pageopaque->hasho_flag = LH_META_PAGE;
141         pageopaque->hasho_bucket = -1;
142
143         /*
144          * First bitmap page is at: splitpoint lg2nelem page offset 1 which
145          * turns out to be page 3. Couldn't initialize page 3  until we
146          * created the first two buckets above.
147          */
148         if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
149                 elog(WARN, "Problem with _hash_initbitmap.");
150
151         /* all done */
152         _hash_wrtnorelbuf(rel, metabuf);
153
154         /*
155          * initialize the first two buckets
156          */
157         for (i = 0; i <= 1; i++)
158         {
159                 buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
160                 pg = BufferGetPage(buf);
161                 _hash_pageinit(pg, BufferGetPageSize(buf));
162                 pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
163                 pageopaque->hasho_oaddr = InvalidOvflAddress;
164                 pageopaque->hasho_prevblkno = InvalidBlockNumber;
165                 pageopaque->hasho_nextblkno = InvalidBlockNumber;
166                 pageopaque->hasho_flag = LH_BUCKET_PAGE;
167                 pageopaque->hasho_bucket = i;
168                 _hash_wrtbuf(rel, buf);
169         }
170
171         _hash_relbuf(rel, metabuf, HASH_WRITE);
172
173         if (USELOCKING)
174                 RelationUnsetLockForWrite(rel);
175 }
176
177 /*
178  *      _hash_getbuf() -- Get a buffer by block number for read or write.
179  *
180  *              When this routine returns, the appropriate lock is set on the
181  *              requested buffer its reference count is correct.
182  *
183  *              XXX P_NEW is not used because, unlike the tree structures, we
184  *              need the bucket blocks to be at certain block numbers.  we must
185  *              depend on the caller to call _hash_pageinit on the block if it
186  *              knows that this is a new block.
187  */
188 Buffer
189 _hash_getbuf(Relation rel, BlockNumber blkno, int access)
190 {
191         Buffer          buf;
192
193         if (blkno == P_NEW)
194         {
195                 elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
196         }
197         switch (access)
198         {
199                 case HASH_WRITE:
200                 case HASH_READ:
201                         _hash_setpagelock(rel, blkno, access);
202                         break;
203                 default:
204                         elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %s",
205                                  access, RelationGetRelationName(rel));
206                         break;
207         }
208         buf = ReadBuffer(rel, blkno);
209
210         /* ref count and lock type are correct */
211         return (buf);
212 }
213
214 /*
215  *      _hash_relbuf() -- release a locked buffer.
216  */
217 void
218 _hash_relbuf(Relation rel, Buffer buf, int access)
219 {
220         BlockNumber blkno;
221
222         blkno = BufferGetBlockNumber(buf);
223
224         switch (access)
225         {
226                 case HASH_WRITE:
227                 case HASH_READ:
228                         _hash_unsetpagelock(rel, blkno, access);
229                         break;
230                 default:
231                         elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %s",
232                                  access, blkno, RelationGetRelationName(rel));
233         }
234
235         ReleaseBuffer(buf);
236 }
237
238 /*
239  *      _hash_wrtbuf() -- write a hash page to disk.
240  *
241  *              This routine releases the lock held on the buffer and our reference
242  *              to it.  It is an error to call _hash_wrtbuf() without a write lock
243  *              or a reference to the buffer.
244  */
245 void
246 _hash_wrtbuf(Relation rel, Buffer buf)
247 {
248         BlockNumber blkno;
249
250         blkno = BufferGetBlockNumber(buf);
251         WriteBuffer(buf);
252         _hash_unsetpagelock(rel, blkno, HASH_WRITE);
253 }
254
255 /*
256  *      _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
257  *                                               our reference or lock.
258  *
259  *              It is an error to call _hash_wrtnorelbuf() without a write lock
260  *              or a reference to the buffer.
261  */
262 void
263 _hash_wrtnorelbuf(Relation rel, Buffer buf)
264 {
265         BlockNumber blkno;
266
267         blkno = BufferGetBlockNumber(buf);
268         WriteNoReleaseBuffer(buf);
269 }
270
271 Page
272 _hash_chgbufaccess(Relation rel,
273                                    Buffer *bufp,
274                                    int from_access,
275                                    int to_access)
276 {
277         BlockNumber blkno;
278
279         blkno = BufferGetBlockNumber(*bufp);
280
281         switch (from_access)
282         {
283                 case HASH_WRITE:
284                         _hash_wrtbuf(rel, *bufp);
285                         break;
286                 case HASH_READ:
287                         _hash_relbuf(rel, *bufp, from_access);
288                         break;
289                 default:
290                         elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %s",
291                                  from_access, blkno, RelationGetRelationName(rel));
292                         break;
293         }
294         *bufp = _hash_getbuf(rel, blkno, to_access);
295         return (BufferGetPage(*bufp));
296 }
297
298 /*
299  *      _hash_pageinit() -- Initialize a new page.
300  */
301 void
302 _hash_pageinit(Page page, Size size)
303 {
304         Assert(((PageHeader) page)->pd_lower == 0);
305         Assert(((PageHeader) page)->pd_upper == 0);
306         Assert(((PageHeader) page)->pd_special == 0);
307
308         /*
309          * Cargo-cult programming -- don't really need this to be zero, but
310          * creating new pages is an infrequent occurrence and it makes me feel
311          * good when I know they're empty.
312          */
313         memset(page, 0, size);
314
315         PageInit(page, size, sizeof(HashPageOpaqueData));
316 }
317
318 static void
319 _hash_setpagelock(Relation rel,
320                                   BlockNumber blkno,
321                                   int access)
322 {
323         ItemPointerData iptr;
324
325         if (USELOCKING)
326         {
327                 ItemPointerSet(&iptr, blkno, 1);
328
329                 switch (access)
330                 {
331                         case HASH_WRITE:
332                                 RelationSetSingleWLockPage(rel, &iptr);
333                                 break;
334                         case HASH_READ:
335                                 RelationSetSingleRLockPage(rel, &iptr);
336                                 break;
337                         default:
338                                 elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %s",
339                                          access, blkno, RelationGetRelationName(rel));
340                                 break;
341                 }
342         }
343 }
344
345 static void
346 _hash_unsetpagelock(Relation rel,
347                                         BlockNumber blkno,
348                                         int access)
349 {
350         ItemPointerData iptr;
351
352         if (USELOCKING)
353         {
354                 ItemPointerSet(&iptr, blkno, 1);
355
356                 switch (access)
357                 {
358                         case HASH_WRITE:
359                                 RelationUnsetSingleWLockPage(rel, &iptr);
360                                 break;
361                         case HASH_READ:
362                                 RelationUnsetSingleRLockPage(rel, &iptr);
363                                 break;
364                         default:
365                                 elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %s",
366                                          access, blkno, RelationGetRelationName(rel));
367                                 break;
368                 }
369         }
370 }
371
372 void
373 _hash_pagedel(Relation rel, ItemPointer tid)
374 {
375         Buffer          buf;
376         Buffer          metabuf;
377         Page            page;
378         BlockNumber blkno;
379         OffsetNumber offno;
380         HashMetaPage metap;
381         HashPageOpaque opaque;
382
383         blkno = ItemPointerGetBlockNumber(tid);
384         offno = ItemPointerGetOffsetNumber(tid);
385
386         buf = _hash_getbuf(rel, blkno, HASH_WRITE);
387         page = BufferGetPage(buf);
388         _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
389         opaque = (HashPageOpaque) PageGetSpecialPointer(page);
390
391         PageIndexTupleDelete(page, offno);
392         _hash_wrtnorelbuf(rel, buf);
393
394         if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE))
395         {
396                 buf = _hash_freeovflpage(rel, buf);
397                 if (BufferIsValid(buf))
398                 {
399                         _hash_relbuf(rel, buf, HASH_WRITE);
400                 }
401         }
402         else
403         {
404                 _hash_relbuf(rel, buf, HASH_WRITE);
405         }
406
407         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
408         metap = (HashMetaPage) BufferGetPage(metabuf);
409         _hash_checkpage((Page) metap, LH_META_PAGE);
410         ++metap->hashm_nkeys;
411         _hash_wrtbuf(rel, metabuf);
412 }
413
414 void
415 _hash_expandtable(Relation rel, Buffer metabuf)
416 {
417         HashMetaPage metap;
418         Bucket          old_bucket;
419         Bucket          new_bucket;
420         uint32          spare_ndx;
421
422 /*        elog(DEBUG, "_hash_expandtable: expanding..."); */
423
424         metap = (HashMetaPage) BufferGetPage(metabuf);
425         _hash_checkpage((Page) metap, LH_META_PAGE);
426
427         metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
428         new_bucket = ++metap->MAX_BUCKET;
429         metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
430         old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
431
432         /*
433          * If the split point is increasing (MAX_BUCKET's log base 2 *
434          * increases), we need to copy the current contents of the spare split
435          * bucket to the next bucket.
436          */
437         spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
438         if (spare_ndx > metap->OVFL_POINT)
439         {
440
441                 metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
442                 metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
443                 metap->OVFL_POINT = spare_ndx;
444                 metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
445         }
446
447         if (new_bucket > metap->HIGH_MASK)
448         {
449
450                 /* Starting a new doubling */
451                 metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);
452                 metap->LOW_MASK = metap->HIGH_MASK;
453                 metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
454                 metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);
455
456         }
457         /* Relocate records to the new bucket */
458         _hash_splitpage(rel, metabuf, old_bucket, new_bucket);
459 }
460
461
462 /*
463  * _hash_splitpage -- split 'obucket' into 'obucket' and 'nbucket'
464  *
465  * this routine is actually misnamed -- we are splitting a bucket that
466  * consists of a base bucket page and zero or more overflow (bucket
467  * chain) pages.
468  */
469 static void
470 _hash_splitpage(Relation rel,
471                                 Buffer metabuf,
472                                 Bucket obucket,
473                                 Bucket nbucket)
474 {
475         Bucket          bucket;
476         Buffer          obuf;
477         Buffer          nbuf;
478         Buffer          ovflbuf;
479         BlockNumber oblkno;
480         BlockNumber nblkno;
481         bool            null;
482         Datum           datum;
483         HashItem        hitem;
484         HashPageOpaque oopaque;
485         HashPageOpaque nopaque;
486         HashMetaPage metap;
487         IndexTuple      itup;
488         int                     itemsz;
489         OffsetNumber ooffnum;
490         OffsetNumber noffnum;
491         OffsetNumber omaxoffnum;
492         Page            opage;
493         Page            npage;
494         TupleDesc       itupdesc;
495
496 /*        elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
497                  obucket, obucket, nbucket);
498 */
499         metap = (HashMetaPage) BufferGetPage(metabuf);
500         _hash_checkpage((Page) metap, LH_META_PAGE);
501
502         /* get the buffers & pages */
503         oblkno = BUCKET_TO_BLKNO(obucket);
504         nblkno = BUCKET_TO_BLKNO(nbucket);
505         obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
506         nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
507         opage = BufferGetPage(obuf);
508         npage = BufferGetPage(nbuf);
509
510         /* initialize the new bucket */
511         _hash_pageinit(npage, BufferGetPageSize(nbuf));
512         nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
513         nopaque->hasho_prevblkno = InvalidBlockNumber;
514         nopaque->hasho_nextblkno = InvalidBlockNumber;
515         nopaque->hasho_flag = LH_BUCKET_PAGE;
516         nopaque->hasho_oaddr = InvalidOvflAddress;
517         nopaque->hasho_bucket = nbucket;
518         _hash_wrtnorelbuf(rel, nbuf);
519
520         /*
521          * make sure the old bucket isn't empty.  advance 'opage' and friends
522          * through the overflow bucket chain until we find a non-empty page.
523          *
524          * XXX we should only need this once, if we are careful to preserve the
525          * invariant that overflow pages are never empty.
526          */
527         _hash_checkpage(opage, LH_BUCKET_PAGE);
528         oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
529         if (PageIsEmpty(opage))
530         {
531                 oblkno = oopaque->hasho_nextblkno;
532                 _hash_relbuf(rel, obuf, HASH_WRITE);
533                 if (!BlockNumberIsValid(oblkno))
534                 {
535
536                         /*
537                          * the old bucket is completely empty; of course, the new
538                          * bucket will be as well, but since it's a base bucket page
539                          * we don't care.
540                          */
541                         _hash_relbuf(rel, nbuf, HASH_WRITE);
542                         return;
543                 }
544                 obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
545                 opage = BufferGetPage(obuf);
546                 _hash_checkpage(opage, LH_OVERFLOW_PAGE);
547                 if (PageIsEmpty(opage))
548                 {
549                         elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
550                 }
551                 oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
552         }
553
554         /*
555          * we are now guaranteed that 'opage' is not empty.  partition the
556          * tuples in the old bucket between the old bucket and the new bucket,
557          * advancing along their respective overflow bucket chains and adding
558          * overflow pages as needed.
559          */
560         ooffnum = FirstOffsetNumber;
561         omaxoffnum = PageGetMaxOffsetNumber(opage);
562         for (;;)
563         {
564
565                 /*
566                  * at each iteration through this loop, each of these variables
567                  * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
568                  */
569
570                 /* check if we're at the end of the page */
571                 if (ooffnum > omaxoffnum)
572                 {
573                         /* at end of page, but check for overflow page */
574                         oblkno = oopaque->hasho_nextblkno;
575                         if (BlockNumberIsValid(oblkno))
576                         {
577
578                                 /*
579                                  * we ran out of tuples on this particular page, but we
580                                  * have more overflow pages; re-init values.
581                                  */
582                                 _hash_wrtbuf(rel, obuf);
583                                 obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
584                                 opage = BufferGetPage(obuf);
585                                 _hash_checkpage(opage, LH_OVERFLOW_PAGE);
586                                 oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
587
588                                 /* we're guaranteed that an ovfl page has at least 1 tuple */
589                                 if (PageIsEmpty(opage))
590                                 {
591                                         elog(WARN, "_hash_splitpage: empty ovfl page %d!",
592                                                  oblkno);
593                                 }
594                                 ooffnum = FirstOffsetNumber;
595                                 omaxoffnum = PageGetMaxOffsetNumber(opage);
596                         }
597                         else
598                         {
599
600                                 /*
601                                  * we're at the end of the bucket chain, so now we're
602                                  * really done with everything.  before quitting, call
603                                  * _hash_squeezebucket to ensure the tuples in the bucket
604                                  * (including the overflow pages) are packed as tightly as
605                                  * possible.
606                                  */
607                                 _hash_wrtbuf(rel, obuf);
608                                 _hash_wrtbuf(rel, nbuf);
609                                 _hash_squeezebucket(rel, metap, obucket);
610                                 return;
611                         }
612                 }
613
614                 /* hash on the tuple */
615                 hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
616                 itup = &(hitem->hash_itup);
617                 itupdesc = RelationGetTupleDescriptor(rel);
618                 datum = index_getattr(itup, 1, itupdesc, &null);
619                 bucket = _hash_call(rel, metap, datum);
620
621                 if (bucket == nbucket)
622                 {
623
624                         /*
625                          * insert the tuple into the new bucket.  if it doesn't fit on
626                          * the current page in the new bucket, we must allocate a new
627                          * overflow page and place the tuple on that page instead.
628                          */
629                         itemsz = IndexTupleDSize(hitem->hash_itup)
630                                 + (sizeof(HashItemData) - sizeof(IndexTupleData));
631
632                         itemsz = DOUBLEALIGN(itemsz);
633
634                         if (PageGetFreeSpace(npage) < itemsz)
635                         {
636                                 ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
637                                 _hash_wrtbuf(rel, nbuf);
638                                 nbuf = ovflbuf;
639                                 npage = BufferGetPage(nbuf);
640                                 _hash_checkpage(npage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
641                         }
642
643                         noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
644                         PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
645                         _hash_wrtnorelbuf(rel, nbuf);
646
647                         /*
648                          * now delete the tuple from the old bucket.  after this
649                          * section of code, 'ooffnum' will actually point to the
650                          * ItemId to which we would point if we had advanced it before
651                          * the deletion (PageIndexTupleDelete repacks the ItemId
652                          * array).      this also means that 'omaxoffnum' is exactly one
653                          * less than it used to be, so we really can just decrement it
654                          * instead of calling PageGetMaxOffsetNumber.
655                          */
656                         PageIndexTupleDelete(opage, ooffnum);
657                         _hash_wrtnorelbuf(rel, obuf);
658                         omaxoffnum = OffsetNumberPrev(omaxoffnum);
659
660                         /*
661                          * tidy up.  if the old page was an overflow page and it is
662                          * now empty, we must free it (we want to preserve the
663                          * invariant that overflow pages cannot be empty).
664                          */
665                         if (PageIsEmpty(opage) &&
666                                 (oopaque->hasho_flag & LH_OVERFLOW_PAGE))
667                         {
668                                 obuf = _hash_freeovflpage(rel, obuf);
669
670                                 /* check that we're not through the bucket chain */
671                                 if (BufferIsInvalid(obuf))
672                                 {
673                                         _hash_wrtbuf(rel, nbuf);
674                                         _hash_squeezebucket(rel, metap, obucket);
675                                         return;
676                                 }
677
678                                 /*
679                                  * re-init. again, we're guaranteed that an ovfl page has
680                                  * at least one tuple.
681                                  */
682                                 opage = BufferGetPage(obuf);
683                                 _hash_checkpage(opage, LH_OVERFLOW_PAGE);
684                                 oblkno = BufferGetBlockNumber(obuf);
685                                 oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
686                                 if (PageIsEmpty(opage))
687                                 {
688                                         elog(WARN, "_hash_splitpage: empty overflow page %d",
689                                                  oblkno);
690                                 }
691                                 ooffnum = FirstOffsetNumber;
692                                 omaxoffnum = PageGetMaxOffsetNumber(opage);
693                         }
694                 }
695                 else
696                 {
697
698                         /*
699                          * the tuple stays on this page.  we didn't move anything, so
700                          * we didn't delete anything and therefore we don't have to
701                          * change 'omaxoffnum'.
702                          *
703                          * XXX any hash value from [0, nbucket-1] will map to this
704                          * bucket, which doesn't make sense to me.
705                          */
706                         ooffnum = OffsetNumberNext(ooffnum);
707                 }
708         }
709         /* NOTREACHED */
710 }