+ nblkno = npageopaque->hasho_nextblkno;
+
+ /*
+ * release our write lock without modifying buffer and ensure to
+ * retain the pin on primary bucket.
+ */
+ if (nbuf == bucket_nbuf)
+ LockBuffer(nbuf, BUFFER_LOCK_UNLOCK);
+ else
+ _hash_relbuf(rel, nbuf);
+
+ /* Exit loop if no more overflow pages in new bucket */
+ if (!BlockNumberIsValid(nblkno))
+ break;
+ }
+
+ /*
+ * Conditionally get the cleanup lock on old and new buckets to perform
+ * the split operation. If we don't get the cleanup locks, silently give
+ * up and next insertion on old bucket will try again to complete the
+ * split.
+ */
+ if (!ConditionalLockBufferForCleanup(obuf))
+ {
+ hash_destroy(tidhtab);
+ return;
+ }
+ if (!ConditionalLockBufferForCleanup(bucket_nbuf))
+ {
+ LockBuffer(obuf, BUFFER_LOCK_UNLOCK);
+ hash_destroy(tidhtab);
+ return;
+ }
+
+ npage = BufferGetPage(bucket_nbuf);
+ npageopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
+ nbucket = npageopaque->hasho_bucket;
+
+ _hash_splitbucket(rel, metabuf, obucket,
+ nbucket, obuf, bucket_nbuf, tidhtab,
+ maxbucket, highmask, lowmask);
+
+ _hash_dropbuf(rel, bucket_nbuf);
+ hash_destroy(tidhtab);
+}
+
+/*
+ * log_split_page() -- Log the split operation
+ *
+ * We log the split operation when the new page in new bucket gets full,
+ * so we log the entire page.
+ *
+ * 'buf' must be locked by the caller which is also responsible for unlocking
+ * it.
+ */
+static void
+log_split_page(Relation rel, Buffer buf)
+{
+ if (RelationNeedsWAL(rel))
+ {
+ XLogRecPtr recptr;
+
+ XLogBeginInsert();
+
+ XLogRegisterBuffer(0, buf, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_PAGE);
+
+ PageSetLSN(BufferGetPage(buf), recptr);
+ }
+}
+
+/*
+ * _hash_getcachedmetap() -- Returns cached metapage data.
+ *
+ * If metabuf is not InvalidBuffer, caller must hold a pin, but no lock, on
+ * the metapage. If not set, we'll set it before returning if we have to
+ * refresh the cache, and return with a pin but no lock on it; caller is
+ * responsible for releasing the pin.
+ *
+ * We refresh the cache if it's not initialized yet or force_refresh is true.
+ */
+HashMetaPage
+_hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
+{
+ Page page;
+
+ Assert(metabuf);
+ if (force_refresh || rel->rd_amcache == NULL)
+ {
+ char *cache = NULL;
+
+ /*
+ * It's important that we don't set rd_amcache to an invalid value.
+ * Either MemoryContextAlloc or _hash_getbuf could fail, so don't
+ * install a pointer to the newly-allocated storage in the actual
+ * relcache entry until both have succeeeded.
+ */
+ if (rel->rd_amcache == NULL)
+ cache = MemoryContextAlloc(rel->rd_indexcxt,
+ sizeof(HashMetaPageData));
+
+ /* Read the metapage. */
+ if (BufferIsValid(*metabuf))
+ LockBuffer(*metabuf, BUFFER_LOCK_SHARE);
+ else
+ *metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ,
+ LH_META_PAGE);
+ page = BufferGetPage(*metabuf);
+
+ /* Populate the cache. */
+ if (rel->rd_amcache == NULL)
+ rel->rd_amcache = cache;
+ memcpy(rel->rd_amcache, HashPageGetMeta(page),
+ sizeof(HashMetaPageData));
+
+ /* Release metapage lock, but keep the pin. */
+ LockBuffer(*metabuf, BUFFER_LOCK_UNLOCK);
+ }
+
+ return (HashMetaPage) rel->rd_amcache;
+}
+
+/*
+ * _hash_getbucketbuf_from_hashkey() -- Get the bucket's buffer for the given
+ * hashkey.
+ *
+ * Bucket pages do not move or get removed once they are allocated. This give
+ * us an opportunity to use the previously saved metapage contents to reach
+ * the target bucket buffer, instead of reading from the metapage every time.
+ * This saves one buffer access every time we want to reach the target bucket
+ * buffer, which is very helpful savings in bufmgr traffic and contention.
+ *
+ * The access type parameter (HASH_READ or HASH_WRITE) indicates whether the
+ * bucket buffer has to be locked for reading or writing.
+ *
+ * The out parameter cachedmetap is set with metapage contents used for
+ * hashkey to bucket buffer mapping. Some callers need this info to reach the
+ * old bucket in case of bucket split, see _hash_doinsert().
+ */
+Buffer
+_hash_getbucketbuf_from_hashkey(Relation rel, uint32 hashkey, int access,
+ HashMetaPage *cachedmetap)
+{
+ HashMetaPage metap;
+ Buffer buf;
+ Buffer metabuf = InvalidBuffer;
+ Page page;
+ Bucket bucket;
+ BlockNumber blkno;
+ HashPageOpaque opaque;
+
+ /* We read from target bucket buffer, hence locking is must. */
+ Assert(access == HASH_READ || access == HASH_WRITE);
+
+ metap = _hash_getcachedmetap(rel, &metabuf, false);
+ Assert(metap != NULL);
+
+ /*
+ * Loop until we get a lock on the correct target bucket.
+ */
+ for (;;)
+ {
+ /*
+ * Compute the target bucket number, and convert to block number.
+ */
+ bucket = _hash_hashkey2bucket(hashkey,
+ metap->hashm_maxbucket,
+ metap->hashm_highmask,
+ metap->hashm_lowmask);
+
+ blkno = BUCKET_TO_BLKNO(metap, bucket);
+
+ /* Fetch the primary bucket page for the bucket */
+ buf = _hash_getbuf(rel, blkno, access, LH_BUCKET_PAGE);
+ page = BufferGetPage(buf);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(opaque->hasho_bucket == bucket);
+ Assert(opaque->hasho_prevblkno != InvalidBlockNumber);
+
+ /*
+ * If this bucket hasn't been split, we're done.
+ */
+ if (opaque->hasho_prevblkno <= metap->hashm_maxbucket)
+ break;
+
+ /* Drop lock on this buffer, update cached metapage, and retry. */
+ _hash_relbuf(rel, buf);
+ metap = _hash_getcachedmetap(rel, &metabuf, true);
+ Assert(metap != NULL);
+ }
+
+ if (BufferIsValid(metabuf))
+ _hash_dropbuf(rel, metabuf);
+
+ if (cachedmetap)
+ *cachedmetap = metap;
+
+ return buf;