1 /*-------------------------------------------------------------------------
4 * Item insertion in hash tables for Postgres.
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/access/hash/hashinsert.c
13 *-------------------------------------------------------------------------
18 #include "access/hash.h"
19 #include "utils/rel.h"
23 * _hash_doinsert() -- Handle insertion of a single index tuple.
25 * This routine is called by the public interface routines, hashbuild
26 * and hashinsert. By here, itup is completely filled in.
29 _hash_doinsert(Relation rel, IndexTuple itup)
31 Buffer buf = InvalidBuffer;
35 HashMetaPage usedmetap = NULL;
38 HashPageOpaque pageopaque;
45 * Get the hash key for the item (it's stored in the index tuple itself).
47 hashkey = _hash_get_indextuple_hashkey(itup);
49 /* compute item size too */
50 itemsz = IndexTupleDSize(*itup);
51 itemsz = MAXALIGN(itemsz); /* be safe, PageAddItem will do this but we
52 * need to be consistent */
57 * Read the metapage. We don't lock it yet; HashMaxItemSize() will
58 * examine pd_pagesize_version, but that can't change so we can examine
61 metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
62 metapage = BufferGetPage(metabuf);
65 * Check whether the item can fit on a hash page at all. (Eventually, we
66 * ought to try to apply TOAST methods if not.) Note that at this point,
67 * itemsz doesn't include the ItemId.
69 * XXX this is useless code if we are only storing hash keys.
71 if (itemsz > HashMaxItemSize(metapage))
73 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
74 errmsg("index row size %zu exceeds hash maximum %zu",
75 itemsz, HashMaxItemSize(metapage)),
76 errhint("Values larger than a buffer page cannot be indexed.")));
78 /* Lock the primary bucket page for the target bucket. */
79 buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_WRITE,
81 Assert(usedmetap != NULL);
83 /* remember the primary bucket buffer to release the pin on it at end. */
86 page = BufferGetPage(buf);
87 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
88 bucket = pageopaque->hasho_bucket;
91 * If this bucket is in the process of being split, try to finish the
92 * split before inserting, because that might create room for the
93 * insertion to proceed without allocating an additional overflow page.
94 * It's only interesting to finish the split if we're trying to insert
95 * into the bucket from which we're removing tuples (the "old" bucket),
96 * not if we're trying to insert into the bucket into which tuples are
97 * being moved (the "new" bucket).
99 if (H_BUCKET_BEING_SPLIT(pageopaque) && IsBufferCleanupOK(buf))
101 /* release the lock on bucket buffer, before completing the split. */
102 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
104 _hash_finish_split(rel, metabuf, buf, bucket,
105 usedmetap->hashm_maxbucket,
106 usedmetap->hashm_highmask,
107 usedmetap->hashm_lowmask);
109 /* release the pin on old and meta buffer. retry for insert. */
110 _hash_dropbuf(rel, buf);
111 _hash_dropbuf(rel, metabuf);
115 /* Do the insertion */
116 while (PageGetFreeSpace(page) < itemsz)
119 * no space on this page; check for an overflow page
121 BlockNumber nextblkno = pageopaque->hasho_nextblkno;
123 if (BlockNumberIsValid(nextblkno))
126 * ovfl page exists; go get it. if it doesn't have room, we'll
127 * find out next pass through the loop test above. we always
128 * release both the lock and pin if this is an overflow page, but
129 * only the lock if this is the primary bucket page, since the pin
130 * on the primary bucket must be retained throughout the scan.
132 if (buf != bucket_buf)
133 _hash_relbuf(rel, buf);
135 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
136 buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
137 page = BufferGetPage(buf);
142 * we're at the end of the bucket chain and we haven't found a
143 * page with enough room. allocate a new overflow page.
146 /* release our write lock without modifying buffer */
147 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
149 /* chain to a new overflow page */
150 buf = _hash_addovflpage(rel, metabuf, buf, (buf == bucket_buf) ? true : false);
151 page = BufferGetPage(buf);
153 /* should fit now, given test above */
154 Assert(PageGetFreeSpace(page) >= itemsz);
156 pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
157 Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
158 Assert(pageopaque->hasho_bucket == bucket);
161 /* found page with enough space, so add the item here */
162 (void) _hash_pgaddtup(rel, buf, itemsz, itup);
165 * dirty and release the modified page. if the page we modified was an
166 * overflow page, we also need to separately drop the pin we retained on
167 * the primary bucket page.
169 MarkBufferDirty(buf);
170 _hash_relbuf(rel, buf);
171 if (buf != bucket_buf)
172 _hash_dropbuf(rel, bucket_buf);
175 * Write-lock the metapage so we can increment the tuple count. After
176 * incrementing it, check to see if it's time for a split.
178 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
180 metap = HashPageGetMeta(metapage);
181 metap->hashm_ntuples += 1;
183 /* Make sure this stays in sync with _hash_expandtable() */
184 do_expand = metap->hashm_ntuples >
185 (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
187 /* Write out the metapage and drop lock, but keep pin */
188 MarkBufferDirty(metabuf);
189 LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
191 /* Attempt to split if a split is needed */
193 _hash_expandtable(rel, metabuf);
195 /* Finally drop our pin on the metapage */
196 _hash_dropbuf(rel, metabuf);
200 * _hash_pgaddtup() -- add a tuple to a particular page in the index.
202 * This routine adds the tuple to the page as requested; it does not write out
203 * the page. It is an error to call pgaddtup() without pin and write lock on
206 * Returns the offset number at which the tuple was inserted. This function
207 * is responsible for preserving the condition that tuples in a hash index
208 * page are sorted by hashkey value.
211 _hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
213 OffsetNumber itup_off;
217 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
218 page = BufferGetPage(buf);
220 /* Find where to insert the tuple (preserving page's hashkey ordering) */
221 hashkey = _hash_get_indextuple_hashkey(itup);
222 itup_off = _hash_binsearch(page, hashkey);
224 if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
225 == InvalidOffsetNumber)
226 elog(ERROR, "failed to add index item to \"%s\"",
227 RelationGetRelationName(rel));
233 * _hash_pgaddmultitup() -- add a tuple vector to a particular page in the
236 * This routine has same requirements for locking and tuple ordering as
239 * Returns the offset number array at which the tuples were inserted.
242 _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
243 OffsetNumber *itup_offsets, uint16 nitups)
245 OffsetNumber itup_off;
250 _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
251 page = BufferGetPage(buf);
253 for (i = 0; i < nitups; i++)
257 itemsize = IndexTupleDSize(*itups[i]);
258 itemsize = MAXALIGN(itemsize);
260 /* Find where to insert the tuple (preserving page's hashkey ordering) */
261 hashkey = _hash_get_indextuple_hashkey(itups[i]);
262 itup_off = _hash_binsearch(page, hashkey);
264 itup_offsets[i] = itup_off;
266 if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
267 == InvalidOffsetNumber)
268 elog(ERROR, "failed to add index item to \"%s\"",
269 RelationGetRelationName(rel));