]> granicus.if.org Git - postgresql/commitdiff
Teach SLRU code to avoid replacing I/O-busy pages.
authorRobert Haas <rhaas@postgresql.org>
Mon, 9 Apr 2012 03:04:07 +0000 (23:04 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 9 Apr 2012 03:05:55 +0000 (23:05 -0400)
Patch by me; review by Tom Lane and others.

src/backend/access/transam/slru.c

index 3049e01e1cd3063bccdfce26e67402dc6872db3e..a8e3f19119a568858e8107bf53196bffa5cfa2c5 100644 (file)
@@ -403,12 +403,6 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
                /* Acquire per-buffer lock (cannot deadlock, see notes at top) */
                LWLockAcquire(shared->buffer_locks[slotno], LW_EXCLUSIVE);
 
-               /*
-                * Temporarily mark page as recently-used to discourage
-                * SlruSelectLRUPage from selecting it again for someone else.
-                */
-               SlruRecentlyUsed(shared, slotno);
-
                /* Release control lock while doing I/O */
                LWLockRelease(shared->ControlLock);
 
@@ -909,9 +903,12 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
        {
                int                     slotno;
                int                     cur_count;
-               int                     bestslot;
-               int                     best_delta;
-               int                     best_page_number;
+               int                     bestvalidslot = 0;                              /* keep compiler quiet */
+               int                     best_valid_delta = -1;
+               int                     best_valid_page_number = 0;             /* keep compiler quiet */
+               int                     bestinvalidslot = 0;                    /* keep compiler quiet */
+               int                     best_invalid_delta = -1;
+               int                     best_invalid_page_number = 0;   /* keep compiler quiet */
 
                /* See if page already has a buffer assigned */
                for (slotno = 0; slotno < shared->num_slots; slotno++)
@@ -922,8 +919,16 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
                }
 
                /*
-                * If we find any EMPTY slot, just select that one. Else locate the
-                * least-recently-used slot to replace.
+                * If we find any EMPTY slot, just select that one. Else choose a
+                * victim page to replace.  We normally take the least recently used
+                * valid page, but we will never take the slot containing
+                * latest_page_number, even if it appears least recently used.  We
+                * will select a slot that is already I/O busy only if there is no
+                * other choice: a read-busy slot will not be least recently used once
+                * the read finishes, and waiting for an I/O on a write-busy slot is
+                * inferior to just picking some other slot.  Testing shows the slot
+                * we pick instead will often be clean, allowing us to begin a read
+                * at once.
                 *
                 * Normally the page_lru_count values will all be different and so
                 * there will be a well-defined LRU page.  But since we allow
@@ -932,9 +937,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
                 * acquire the same lru_count values.  In that case we break ties by
                 * choosing the furthest-back page.
                 *
-                * In no case will we select the slot containing latest_page_number
-                * for replacement, even if it appears least recently used.
-                *
                 * Notice that this next line forcibly advances cur_lru_count to a
                 * value that is certainly beyond any value that will be in the
                 * page_lru_count array after the loop finishes.  This ensures that
@@ -944,9 +946,6 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
                 * multiple pages with the same lru_count.
                 */
                cur_count = (shared->cur_lru_count)++;
-               best_delta = -1;
-               bestslot = 0;                   /* no-op, just keeps compiler quiet */
-               best_page_number = 0;   /* ditto */
                for (slotno = 0; slotno < shared->num_slots; slotno++)
                {
                        int                     this_delta;
@@ -968,34 +967,57 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
                                this_delta = 0;
                        }
                        this_page_number = shared->page_number[slotno];
-                       if ((this_delta > best_delta ||
-                                (this_delta == best_delta &&
-                                 ctl->PagePrecedes(this_page_number, best_page_number))) &&
-                               this_page_number != shared->latest_page_number)
+                       if (this_page_number == shared->latest_page_number)
+                               continue;
+                       if (shared->page_status[slotno] == SLRU_PAGE_VALID)
+                       {
+                               if (this_delta > best_valid_delta ||
+                                       (this_delta == best_valid_delta &&
+                                        ctl->PagePrecedes(this_page_number,
+                                                                          best_valid_page_number)))
+                               {
+                                       bestvalidslot = slotno;
+                                       best_valid_delta = this_delta;
+                                       best_valid_page_number = this_page_number;
+                               }
+                       }
+                       else
                        {
-                               bestslot = slotno;
-                               best_delta = this_delta;
-                               best_page_number = this_page_number;
+                               if (this_delta > best_invalid_delta ||
+                                       (this_delta == best_invalid_delta &&
+                                        ctl->PagePrecedes(this_page_number,
+                                                                          best_invalid_page_number)))
+                               {
+                                       bestinvalidslot = slotno;
+                                       best_invalid_delta = this_delta;
+                                       best_invalid_page_number = this_page_number;
+                               }
                        }
                }
 
+               /*
+                * If all pages (except possibly the latest one) are I/O busy, we'll
+                * have to wait for an I/O to complete and then retry.  In that unhappy
+                * case, we choose to wait for the I/O on the least recently used slot,
+                * on the assumption that it was likely initiated first of all the I/Os
+                * in progress and may therefore finish first.
+                */
+               if (best_valid_delta < 0)
+               {
+                       SimpleLruWaitIO(ctl, bestinvalidslot);
+                       continue;
+               }
+
                /*
                 * If the selected page is clean, we're set.
                 */
-               if (shared->page_status[bestslot] == SLRU_PAGE_VALID &&
-                       !shared->page_dirty[bestslot])
-                       return bestslot;
+               if (!shared->page_dirty[bestvalidslot])
+                       return bestvalidslot;
 
                /*
-                * We need to wait for I/O.  Normal case is that it's dirty and we
-                * must initiate a write, but it's possible that the page is already
-                * write-busy, or in the worst case still read-busy.  In those cases
-                * we wait for the existing I/O to complete.
+                * Write the page.
                 */
-               if (shared->page_status[bestslot] == SLRU_PAGE_VALID)
-                       SlruInternalWritePage(ctl, bestslot, NULL);
-               else
-                       SimpleLruWaitIO(ctl, bestslot);
+               SlruInternalWritePage(ctl, bestvalidslot, NULL);
 
                /*
                 * Now loop back and try again.  This is the easiest way of dealing