1 /*-------------------------------------------------------------------------
4 * Item insertion in hash tables for Postgres.
6 * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/hash/hashinsert.c
13 *-------------------------------------------------------------------------
18 #include "access/hash.h"
19 #include "utils/rel.h"
23 * _hash_doinsert() -- Handle insertion of a single index tuple.
25 * This routine is called by the public interface routines, hashbuild
26 * and hashinsert. By here, itup is completely filled in.
29 _hash_doinsert(Relation rel, IndexTuple itup)
35 BlockNumber oldblkno = InvalidBlockNumber;
38 HashPageOpaque pageopaque;
45 * Get the hash key for the item (it's stored in the index tuple itself).
47 hashkey = _hash_get_indextuple_hashkey(itup);
49 /* compute item size too */
50 itemsz = IndexTupleDSize(*itup);
51 itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
52 * need to be consistent */
54 /* Read the metapage */
55 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
56 metap = HashPageGetMeta(BufferGetPage(metabuf));
59 * Check whether the item can fit on a hash page at all. (Eventually, we
60 * ought to try to apply TOAST methods if not.) Note that at this point,
61 * itemsz doesn't include the ItemId.
63 * XXX this is useless code if we are only storing hash keys.
65 if (itemsz > HashMaxItemSize((Page) metap))
67 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
68 errmsg("index row size %zu exceeds hash maximum %zu",
69 itemsz, HashMaxItemSize((Page) metap)),
70 errhint("Values larger than a buffer page cannot be indexed.")));
73 * Loop until we get a lock on the correct target bucket.
78 * Compute the target bucket number, and convert to block number.
80 bucket = _hash_hashkey2bucket(hashkey,
81 metap->hashm_maxbucket,
82 metap->hashm_highmask,
83 metap->hashm_lowmask);
85 blkno = BUCKET_TO_BLKNO(metap, bucket);
87 /* Release metapage lock, but keep pin. */
88 _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);
91 * If the previous iteration of this loop locked what is still the
92 * correct target bucket, we are done. Otherwise, drop any old lock
93 * and lock what now appears to be the correct bucket.
97 if (oldblkno == blkno)
99 _hash_droplock(rel, oldblkno, HASH_SHARE);
101 _hash_getlock(rel, blkno, HASH_SHARE);
104 * Reacquire metapage lock and check that no bucket split has taken
105 * place while we were awaiting the bucket lock.
107 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_READ);
112 /* Fetch the primary bucket page for the bucket */
113 buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BUCKET_PAGE);
114 page = BufferGetPage(buf);
115 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
116 Assert(pageopaque->hasho_bucket == bucket);
118 /* Do the insertion */
119 while (PageGetFreeSpace(page) < itemsz)
122 * no space on this page; check for an overflow page
124 BlockNumber nextblkno = pageopaque->hasho_nextblkno;
126 if (BlockNumberIsValid(nextblkno))
129 * ovfl page exists; go get it. if it doesn't have room, we'll
130 * find out next pass through the loop test above.
132 _hash_relbuf(rel, buf);
133 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
134 page = BufferGetPage(buf);
139 * we're at the end of the bucket chain and we haven't found a
140 * page with enough room. allocate a new overflow page.
143 /* release our write lock without modifying buffer */
144 _hash_chgbufaccess(rel, buf, HASH_READ, HASH_NOLOCK);
146 /* chain to a new overflow page */
147 buf = _hash_addovflpage(rel, metabuf, buf);
148 page = BufferGetPage(buf);
150 /* should fit now, given test above */
151 Assert(PageGetFreeSpace(page) >= itemsz);
153 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
154 Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
155 Assert(pageopaque->hasho_bucket == bucket);
158 /* found page with enough space, so add the item here */
159 (void) _hash_pgaddtup(rel, buf, itemsz, itup);
161 /* write and release the modified page */
162 _hash_wrtbuf(rel, buf);
164 /* We can drop the bucket lock now */
165 _hash_droplock(rel, blkno, HASH_SHARE);
168 * Write-lock the metapage so we can increment the tuple count. After
169 * incrementing it, check to see if it's time for a split.
171 _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
173 metap->hashm_ntuples += 1;
175 /* Make sure this stays in sync with _hash_expandtable() */
176 do_expand = metap->hashm_ntuples >
177 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
179 /* Write out the metapage and drop lock, but keep pin */
180 _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
182 /* Attempt to split if a split is needed */
184 _hash_expandtable(rel, metabuf);
186 /* Finally drop our pin on the metapage */
187 _hash_dropbuf(rel, metabuf);
191 * _hash_pgaddtup() -- add a tuple to a particular page in the index.
193 * This routine adds the tuple to the page as requested; it does not write out
194 * the page. It is an error to call pgaddtup() without pin and write lock on
197 * Returns the offset number at which the tuple was inserted. This function
198 * is responsible for preserving the condition that tuples in a hash index
199 * page are sorted by hashkey value.
202 _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
204 OffsetNumber itup_off;
208 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
209 page = BufferGetPage(buf);
211 /* Find where to insert the tuple (preserving page's hashkey ordering) */
212 hashkey = _hash_get_indextuple_hashkey(itup);
213 itup_off = _hash_binsearch(page, hashkey);
215 if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
216 == InvalidOffsetNumber)
217 elog(ERROR, "failed to add index item to \"%s\"",
218 RelationGetRelationName(rel));