From 21a3cf41284c08307ef9abe3400be5dc53723519 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Wed, 1 Mar 2017 14:43:38 +0530 Subject: [PATCH] hash: Refactor and clean up bucket split code. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit As with commit 30df93f698d016d086e8961aa6c6076b37ea0ef4 and commit b0f18cb77f50a54e997d857d592f6a511617f52c, the goal here is to move all of the related page modifications to a single section of code, in preparation for adding write-ahead logging. Amit Kapila, with slight changes by me. The larger patch series of which this is a part has been reviewed and tested by Álvaro Herrera, Ashutosh Sharma, Mark Kirkwood, Jeff Janes, and Jesper Pedersen. --- src/backend/access/hash/hashpage.c | 160 ++++++++++++----------------- 1 file changed, 67 insertions(+), 93 deletions(-) diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 00f3ea81a7..bb1ce75634 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -40,12 +40,9 @@ static void _hash_splitbucket(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, Buffer obuf, Buffer nbuf, + HTAB *htab, uint32 maxbucket, uint32 highmask, uint32 lowmask); -static void _hash_splitbucket_guts(Relation rel, Buffer metabuf, - Bucket obucket, Bucket nbucket, Buffer obuf, - Buffer nbuf, HTAB *htab, uint32 maxbucket, - uint32 highmask, uint32 lowmask); /* @@ -497,7 +494,9 @@ _hash_expandtable(Relation rel, Buffer metabuf) Buffer buf_nblkno; Buffer buf_oblkno; Page opage; + Page npage; HashPageOpaque oopaque; + HashPageOpaque nopaque; uint32 maxbucket; uint32 highmask; uint32 lowmask; @@ -685,18 +684,18 @@ restart_expand: goto fail; } - /* - * Okay to proceed with split. Update the metapage bucket mapping info. - * - * Since we are scribbling on the metapage data right in the shared - * buffer, any failure in this next little bit leaves us with a big + * Since we are scribbling on the pages in the shared buffers, establish a + * critical section. Any failure in this next code leaves us with a big * problem: the metapage is effectively corrupt but could get written back * to disk. We don't really expect any failure, but just to be sure, * establish a critical section. */ START_CRIT_SECTION(); + /* + * Okay to proceed with split. Update the metapage bucket mapping info. + */ metap->hashm_maxbucket = new_bucket; if (new_bucket > metap->hashm_highmask) @@ -718,8 +717,7 @@ restart_expand: metap->hashm_ovflpoint = spare_ndx; } - /* Done mucking with metapage */ - END_CRIT_SECTION(); + MarkBufferDirty(metabuf); /* * Copy bucket mapping info now; this saves re-accessing the meta page @@ -732,16 +730,51 @@ restart_expand: highmask = metap->hashm_highmask; lowmask = metap->hashm_lowmask; - /* Write out the metapage and drop lock, but keep pin */ - MarkBufferDirty(metabuf); + opage = BufferGetPage(buf_oblkno); + oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); + + /* + * Mark the old bucket to indicate that split is in progress. (At + * operation end, we will clear the split-in-progress flag.) Also, + * for a primary bucket page, hasho_prevblkno stores the number of + * buckets that existed as of the last split, so we must update that + * value here. + */ + oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT; + oopaque->hasho_prevblkno = maxbucket; + + MarkBufferDirty(buf_oblkno); + + npage = BufferGetPage(buf_nblkno); + + /* + * initialize the new bucket's primary page and mark it to indicate that + * split is in progress. + */ + nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); + nopaque->hasho_prevblkno = maxbucket; + nopaque->hasho_nextblkno = InvalidBlockNumber; + nopaque->hasho_bucket = new_bucket; + nopaque->hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_BEING_POPULATED; + nopaque->hasho_page_id = HASHO_PAGE_ID; + + MarkBufferDirty(buf_nblkno); + + END_CRIT_SECTION(); + + /* drop lock, but keep pin */ LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); /* Relocate records to the new bucket */ _hash_splitbucket(rel, metabuf, old_bucket, new_bucket, - buf_oblkno, buf_nblkno, + buf_oblkno, buf_nblkno, NULL, maxbucket, highmask, lowmask); + /* all done, now release the locks and pins on primary buckets. */ + _hash_relbuf(rel, buf_oblkno); + _hash_relbuf(rel, buf_nblkno); + return; /* Here if decide not to split or fail to acquire old bucket lock */ @@ -803,10 +836,16 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) /* * _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket' * + * This routine is used to partition the tuples between old and new bucket and + * is used to finish the incomplete split operations. To finish the previously + * interrupted split operation, the caller needs to fill htab. If htab is set, + * then we skip the movement of tuples that exists in htab, otherwise NULL + * value of htab indicates movement of all the tuples that belong to the new + * bucket. + * * We are splitting a bucket that consists of a base bucket page and zero * or more overflow (bucket chain) pages. We must relocate tuples that - * belong in the new bucket, and compress out any free space in the old - * bucket. + * belong in the new bucket. * * The caller must hold cleanup locks on both buckets to ensure that * no one else is trying to access them (see README). @@ -832,72 +871,10 @@ _hash_splitbucket(Relation rel, Bucket nbucket, Buffer obuf, Buffer nbuf, + HTAB *htab, uint32 maxbucket, uint32 highmask, uint32 lowmask) -{ - Page opage; - Page npage; - HashPageOpaque oopaque; - HashPageOpaque nopaque; - - opage = BufferGetPage(obuf); - oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); - - /* - * Mark the old bucket to indicate that split is in progress. (At - * operation end, we will clear the split-in-progress flag.) Also, - * for a primary bucket page, hasho_prevblkno stores the number of - * buckets that existed as of the last split, so we must update that - * value here. - */ - oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT; - oopaque->hasho_prevblkno = maxbucket; - - npage = BufferGetPage(nbuf); - - /* - * initialize the new bucket's primary page and mark it to indicate that - * split is in progress. - */ - nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); - nopaque->hasho_prevblkno = maxbucket; - nopaque->hasho_nextblkno = InvalidBlockNumber; - nopaque->hasho_bucket = nbucket; - nopaque->hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_BEING_POPULATED; - nopaque->hasho_page_id = HASHO_PAGE_ID; - - _hash_splitbucket_guts(rel, metabuf, obucket, - nbucket, obuf, nbuf, NULL, - maxbucket, highmask, lowmask); - - /* all done, now release the locks and pins on primary buckets. */ - _hash_relbuf(rel, obuf); - _hash_relbuf(rel, nbuf); -} - -/* - * _hash_splitbucket_guts -- Helper function to perform the split operation - * - * This routine is used to partition the tuples between old and new bucket and - * to finish incomplete split operations. To finish the previously - * interrupted split operation, caller needs to fill htab. If htab is set, then - * we skip the movement of tuples that exists in htab, otherwise NULL value of - * htab indicates movement of all the tuples that belong to new bucket. - * - * Caller needs to lock and unlock the old and new primary buckets. - */ -static void -_hash_splitbucket_guts(Relation rel, - Buffer metabuf, - Bucket obucket, - Bucket nbucket, - Buffer obuf, - Buffer nbuf, - HTAB *htab, - uint32 maxbucket, - uint32 highmask, - uint32 lowmask) { Buffer bucket_obuf; Buffer bucket_nbuf; @@ -987,6 +964,7 @@ _hash_splitbucket_guts(Relation rel, { /* write out nbuf and drop lock, but keep pin */ MarkBufferDirty(nbuf); + /* drop lock, but keep pin */ LockBuffer(nbuf, BUFFER_LOCK_UNLOCK); /* chain to a new overflow page */ nbuf = _hash_addovflpage(rel, metabuf, nbuf, (nbuf == bucket_nbuf) ? true : false); @@ -1025,7 +1003,14 @@ _hash_splitbucket_guts(Relation rel, /* Exit loop if no more overflow pages in old bucket */ if (!BlockNumberIsValid(oblkno)) + { + MarkBufferDirty(nbuf); + if (nbuf == bucket_nbuf) + LockBuffer(nbuf, BUFFER_LOCK_UNLOCK); + else + _hash_relbuf(rel, nbuf); break; + } /* Else, advance to next old page */ obuf = _hash_getbuf(rel, oblkno, HASH_READ, LH_OVERFLOW_PAGE); @@ -1041,17 +1026,6 @@ _hash_splitbucket_guts(Relation rel, * To avoid deadlocks due to locking order of buckets, first lock the old * bucket and then the new bucket. */ - if (nbuf == bucket_nbuf) - { - MarkBufferDirty(bucket_nbuf); - LockBuffer(bucket_nbuf, BUFFER_LOCK_UNLOCK); - } - else - { - MarkBufferDirty(nbuf); - _hash_relbuf(rel, nbuf); - } - LockBuffer(bucket_obuf, BUFFER_LOCK_EXCLUSIVE); opage = BufferGetPage(bucket_obuf); oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); @@ -1192,9 +1166,9 @@ _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket, npageopaque = (HashPageOpaque) PageGetSpecialPointer(npage); nbucket = npageopaque->hasho_bucket; - _hash_splitbucket_guts(rel, metabuf, obucket, - nbucket, obuf, bucket_nbuf, tidhtab, - maxbucket, highmask, lowmask); + _hash_splitbucket(rel, metabuf, obucket, + nbucket, obuf, bucket_nbuf, tidhtab, + maxbucket, highmask, lowmask); _hash_relbuf(rel, bucket_nbuf); LockBuffer(obuf, BUFFER_LOCK_UNLOCK); -- 2.40.0