From 3ae5133b1cf478d516666f2003bc68ba0edb84c7 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Sun, 8 Apr 2012 23:04:07 -0400 Subject: [PATCH] Teach SLRU code to avoid replacing I/O-busy pages. Patch by me; review by Tom Lane and others. --- src/backend/access/transam/slru.c | 92 +++++++++++++++++++------------ 1 file changed, 57 insertions(+), 35 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 3049e01e1c..a8e3f19119 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -403,12 +403,6 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, /* Acquire per-buffer lock (cannot deadlock, see notes at top) */ LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE); - /* - * Temporarily mark page as recently-used to discourage - * SlruSelectLRUPage from selecting it again for someone else. - */ - SlruRecentlyUsed(shared, slotno); - /* Release control lock while doing I/O */ LWLockRelease(shared->ControlLock); @@ -909,9 +903,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) { int slotno; int cur_count; - int bestslot; - int best_delta; - int best_page_number; + int bestvalidslot = 0; /* keep compiler quiet */ + int best_valid_delta = -1; + int best_valid_page_number = 0; /* keep compiler quiet */ + int bestinvalidslot = 0; /* keep compiler quiet */ + int best_invalid_delta = -1; + int best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ for (slotno = 0; slotno < shared->num_slots; slotno++) @@ -922,8 +919,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) } /* - * If we find any EMPTY slot, just select that one. Else locate the - * least-recently-used slot to replace. + * If we find any EMPTY slot, just select that one. Else choose a + * victim page to replace. We normally take the least recently used + * valid page, but we will never take the slot containing + * latest_page_number, even if it appears least recently used. We + * will select a slot that is already I/O busy only if there is no + * other choice: a read-busy slot will not be least recently used once + * the read finishes, and waiting for an I/O on a write-busy slot is + * inferior to just picking some other slot. Testing shows the slot + * we pick instead will often be clean, allowing us to begin a read + * at once. * * Normally the page_lru_count values will all be different and so * there will be a well-defined LRU page. But since we allow @@ -932,9 +937,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) * acquire the same lru_count values. In that case we break ties by * choosing the furthest-back page. * - * In no case will we select the slot containing latest_page_number - * for replacement, even if it appears least recently used. - * * Notice that this next line forcibly advances cur_lru_count to a * value that is certainly beyond any value that will be in the * page_lru_count array after the loop finishes. This ensures that @@ -944,9 +946,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) * multiple pages with the same lru_count. */ cur_count = (shared->cur_lru_count)++; - best_delta = -1; - bestslot = 0; /* no-op, just keeps compiler quiet */ - best_page_number = 0; /* ditto */ for (slotno = 0; slotno < shared->num_slots; slotno++) { int this_delta; @@ -968,34 +967,57 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno) this_delta = 0; } this_page_number = shared->page_number[slotno]; - if ((this_delta > best_delta || - (this_delta == best_delta && - ctl->PagePrecedes(this_page_number, best_page_number))) && - this_page_number != shared->latest_page_number) + if (this_page_number == shared->latest_page_number) + continue; + if (shared->page_status[slotno] == SLRU_PAGE_VALID) + { + if (this_delta > best_valid_delta || + (this_delta == best_valid_delta && + ctl->PagePrecedes(this_page_number, + best_valid_page_number))) + { + bestvalidslot = slotno; + best_valid_delta = this_delta; + best_valid_page_number = this_page_number; + } + } + else { - bestslot = slotno; - best_delta = this_delta; - best_page_number = this_page_number; + if (this_delta > best_invalid_delta || + (this_delta == best_invalid_delta && + ctl->PagePrecedes(this_page_number, + best_invalid_page_number))) + { + bestinvalidslot = slotno; + best_invalid_delta = this_delta; + best_invalid_page_number = this_page_number; + } } } + /* + * If all pages (except possibly the latest one) are I/O busy, we'll + * have to wait for an I/O to complete and then retry. In that unhappy + * case, we choose to wait for the I/O on the least recently used slot, + * on the assumption that it was likely initiated first of all the I/Os + * in progress and may therefore finish first. + */ + if (best_valid_delta < 0) + { + SimpleLruWaitIO(ctl, bestinvalidslot); + continue; + } + /* * If the selected page is clean, we're set. */ - if (shared->page_status[bestslot] == SLRU_PAGE_VALID && - !shared->page_dirty[bestslot]) - return bestslot; + if (!shared->page_dirty[bestvalidslot]) + return bestvalidslot; /* - * We need to wait for I/O. Normal case is that it's dirty and we - * must initiate a write, but it's possible that the page is already - * write-busy, or in the worst case still read-busy. In those cases - * we wait for the existing I/O to complete. + * Write the page. */ - if (shared->page_status[bestslot] == SLRU_PAGE_VALID) - SlruInternalWritePage(ctl, bestslot, NULL); - else - SimpleLruWaitIO(ctl, bestslot); + SlruInternalWritePage(ctl, bestvalidslot, NULL); /* * Now loop back and try again. This is the easiest way of dealing -- 2.40.0