]> granicus.if.org Git - postgresql/commitdiff
Arrange for read-only accesses to SLRU page buffers to take only a shared
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 6 Dec 2005 18:10:06 +0000 (18:10 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 6 Dec 2005 18:10:06 +0000 (18:10 +0000)
lock, not exclusive, if the desired page is already in memory.  This can
be demonstrated to be a significant win on the pg_subtrans cache when there
is a large window of open transactions.  It should be useful for pg_clog
as well.  I didn't try to make GetMultiXactIdMembers() use the code, as
that would have taken some restructuring, and what with the local cache
for multixact contents it probably wouldn't really make a difference.
Per my recent proposal.

src/backend/access/transam/clog.c
src/backend/access/transam/multixact.c
src/backend/access/transam/slru.c
src/backend/access/transam/subtrans.c
src/include/access/slru.h

index 44e1b9b7d3be3da561736a5221875f90959173c1..22e2c0541e1f1a5a4522a5d06b3901d04875598a 100644 (file)
@@ -24,7 +24,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.34 2005/11/05 21:19:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.35 2005/12/06 18:10:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -128,9 +128,9 @@ TransactionIdGetStatus(TransactionId xid)
        char       *byteptr;
        XidStatus       status;
 
-       LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
+       /* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-       slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
+       slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid);
        byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
 
        status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
index 798d9dc61b69b1f5f2d78a4181a0ceb35915c3c8..3e8f86818f3dc76ddbff795ce648d9048c9b5fe9 100644 (file)
@@ -42,7 +42,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.13 2005/11/22 18:17:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.14 2005/12/06 18:10:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1690,12 +1690,12 @@ TruncateMultiXact(void)
                int                     entryno;
                MultiXactOffset *offptr;
 
-               LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
+               /* lock is acquired by SimpleLruReadPage_ReadOnly */
 
                pageno = MultiXactIdToOffsetPage(oldestMXact);
                entryno = MultiXactIdToOffsetEntry(oldestMXact);
 
-               slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, oldestMXact);
+               slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, oldestMXact);
                offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
                offptr += entryno;
                oldestOffset = *offptr;
index 6997ce1fc520add78c6d46c46b7ecede5b8f9b0d..901a5e57da727dd9f90f9b29429b6d57dcc7f7b2 100644 (file)
  * reading in or writing out a page buffer does not hold the control lock,
  * only the per-buffer lock for the buffer it is working on.
  *
+ * "Holding the control lock" means exclusive lock in all cases except for
+ * SimpleLruReadPage_ReadOnly(); see comments for SlruRecentlyUsed() for
+ * the implications of that.
+ *
  * When initiating I/O on a buffer, we acquire the per-buffer lock exclusively
  * before releasing the control lock.  The per-buffer lock is released after
  * completing the I/O, re-acquiring the control lock, and updating the shared
@@ -37,7 +41,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.31 2005/11/22 18:17:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.32 2005/12/06 18:10:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -91,15 +95,30 @@ typedef struct SlruFlushData
 } SlruFlushData;
 
 /*
- * Macro to mark a buffer slot "most recently used".
+ * Macro to mark a buffer slot "most recently used".  Note multiple evaluation
+ * of arguments!
+ *
+ * The reason for the if-test is that there are often many consecutive
+ * accesses to the same page (particularly the latest page).  By suppressing
+ * useless increments of cur_lru_count, we reduce the probability that old
+ * pages' counts will "wrap around" and make them appear recently used.
+ *
+ * We allow this code to be executed concurrently by multiple processes within
+ * SimpleLruReadPage_ReadOnly().  As long as int reads and writes are atomic,
+ * this should not cause any completely-bogus values to enter the computation.
+ * However, it is possible for either cur_lru_count or individual
+ * page_lru_count entries to be "reset" to lower values than they should have,
+ * in case a process is delayed while it executes this macro.  With care in
+ * SlruSelectLRUPage(), this does little harm, and in any case the absolute
+ * worst possible consequence is a nonoptimal choice of page to evict.  The
+ * gain from allowing concurrent reads of SLRU pages seems worth it.
  */
 #define SlruRecentlyUsed(shared, slotno)       \
        do { \
-               if ((shared)->page_lru_count[slotno] != 0) { \
-                       int             iilru; \
-                       for (iilru = 0; iilru < NUM_SLRU_BUFFERS; iilru++) \
-                               (shared)->page_lru_count[iilru]++; \
-                       (shared)->page_lru_count[slotno] = 0; \
+               int             new_lru_count = (shared)->cur_lru_count; \
+               if (new_lru_count != (shared)->page_lru_count[slotno]) { \
+                       (shared)->cur_lru_count = ++new_lru_count; \
+                       (shared)->page_lru_count[slotno] = new_lru_count; \
                } \
        } while (0)
 
@@ -165,7 +184,7 @@ SimpleLruInit(SlruCtl ctl, const char *name,
                        shared->page_buffer[slotno] = bufptr;
                        shared->page_status[slotno] = SLRU_PAGE_EMPTY;
                        shared->page_dirty[slotno] = false;
-                       shared->page_lru_count[slotno] = 1;
+                       shared->page_lru_count[slotno] = 0;
                        shared->buffer_locks[slotno] = LWLockAssign();
                        bufptr += BLCKSZ;
                }
@@ -351,6 +370,49 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid)
        }
 }
 
+/*
+ * Find a page in a shared buffer, reading it in if necessary.
+ * The page number must correspond to an already-initialized page.
+ * The caller must intend only read-only access to the page.
+ *
+ * The passed-in xid is used only for error reporting, and may be
+ * InvalidTransactionId if no specific xid is associated with the action.
+ *
+ * Return value is the shared-buffer slot number now holding the page.
+ * The buffer's LRU access info is updated.
+ *
+ * Control lock must NOT be held at entry, but will be held at exit.
+ * It is unspecified whether the lock will be shared or exclusive.
+ */
+int
+SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
+{
+       SlruShared      shared = ctl->shared;
+       int                     slotno;
+
+       /* Try to find the page while holding only shared lock */
+       LWLockAcquire(shared->ControlLock, LW_SHARED);
+
+       /* See if page is already in a buffer */
+       for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
+       {
+               if (shared->page_number[slotno] == pageno &&
+                       shared->page_status[slotno] != SLRU_PAGE_EMPTY &&
+                       shared->page_status[slotno] != SLRU_PAGE_READ_IN_PROGRESS)
+               {
+                       /* See comments for SlruRecentlyUsed macro */
+                       SlruRecentlyUsed(shared, slotno);
+                       return slotno;
+               }
+       }
+
+       /* No luck, so switch to normal exclusive lock and do regular read */
+       LWLockRelease(shared->ControlLock);
+       LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
+
+       return SimpleLruReadPage(ctl, pageno, xid);
+}
+
 /*
  * Write a page from a shared buffer, if necessary.
  * Does nothing if the specified slot is not dirty.
@@ -729,8 +791,10 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
        for (;;)
        {
                int                     slotno;
-               int                     bestslot = 0;
-               unsigned int bestcount = 0;
+               int                     cur_count;
+               int                     bestslot;
+               int                     best_delta;
+               int                     best_page_number;
 
                /* See if page already has a buffer assigned */
                for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
@@ -742,17 +806,59 @@ SlruSelectLRUPage(SlruCtl ctl, int pageno)
 
                /*
                 * If we find any EMPTY slot, just select that one. Else locate the
-                * least-recently-used slot that isn't the latest page.
+                * least-recently-used slot to replace.
+                *
+                * Normally the page_lru_count values will all be different and so
+                * there will be a well-defined LRU page.  But since we allow
+                * concurrent execution of SlruRecentlyUsed() within
+                * SimpleLruReadPage_ReadOnly(), it is possible that multiple pages
+                * acquire the same lru_count values.  In that case we break ties by
+                * choosing the furthest-back page.
+                *
+                * In no case will we select the slot containing latest_page_number
+                * for replacement, even if it appears least recently used.
+                *
+                * Notice that this next line forcibly advances cur_lru_count to a
+                * value that is certainly beyond any value that will be in the
+                * page_lru_count array after the loop finishes.  This ensures that
+                * the next execution of SlruRecentlyUsed will mark the page newly
+                * used, even if it's for a page that has the current counter value.
+                * That gets us back on the path to having good data when there are
+                * multiple pages with the same lru_count.
                 */
+               cur_count = (shared->cur_lru_count)++;
+               best_delta = -1;
+               bestslot = 0;                   /* no-op, just keeps compiler quiet */
+               best_page_number = 0;   /* ditto */
                for (slotno = 0; slotno < NUM_SLRU_BUFFERS; slotno++)
                {
+                       int                     this_delta;
+                       int                     this_page_number;
+
                        if (shared->page_status[slotno] == SLRU_PAGE_EMPTY)
                                return slotno;
-                       if (shared->page_lru_count[slotno] > bestcount &&
-                               shared->page_number[slotno] != shared->latest_page_number)
+                       this_delta = cur_count - shared->page_lru_count[slotno];
+                       if (this_delta < 0)
+                       {
+                               /*
+                                * Clean up in case shared updates have caused cur_count
+                                * increments to get "lost".  We back off the page counts,
+                                * rather than trying to increase cur_count, to avoid any
+                                * question of infinite loops or failure in the presence of
+                                * wrapped-around counts.
+                                */
+                               shared->page_lru_count[slotno] = cur_count;
+                               this_delta = 0;
+                       }
+                       this_page_number = shared->page_number[slotno];
+                       if ((this_delta > best_delta ||
+                                (this_delta == best_delta &&
+                                 ctl->PagePrecedes(this_page_number, best_page_number))) &&
+                               this_page_number != shared->latest_page_number)
                        {
                                bestslot = slotno;
-                               bestcount = shared->page_lru_count[slotno];
+                               best_delta = this_delta;
+                               best_page_number = this_page_number;
                        }
                }
 
index e81ec6f62d5b2a41d8ce6e4bf48b1f88e08fc38e..bb32550cfc4101a8e569ac2b1270d0f005626b5b 100644 (file)
@@ -22,7 +22,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.13 2005/11/22 18:17:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.14 2005/12/06 18:10:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -110,9 +110,9 @@ SubTransGetParent(TransactionId xid)
        if (!TransactionIdIsNormal(xid))
                return InvalidTransactionId;
 
-       LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
+       /* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-       slotno = SimpleLruReadPage(SubTransCtl, pageno, xid);
+       slotno = SimpleLruReadPage_ReadOnly(SubTransCtl, pageno, xid);
        ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
        ptr += entryno;
 
index def801e34a587a7dcb512dcc5774bc02d5d9f3db..62acb43c4e913e1ed82dd23533e213b427890f17 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/slru.h,v 1.15 2005/11/05 21:19:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/slru.h,v 1.16 2005/12/06 18:10:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -46,17 +46,26 @@ typedef struct SlruSharedData
 
        /*
         * Info for each buffer slot.  Page number is undefined when status is
-        * EMPTY.  lru_count is essentially the number of page switches since last
-        * use of this page; the page with highest lru_count is the best candidate
-        * to replace.
+        * EMPTY.
         */
        char       *page_buffer[NUM_SLRU_BUFFERS];
        SlruPageStatus page_status[NUM_SLRU_BUFFERS];
        bool            page_dirty[NUM_SLRU_BUFFERS];
        int                     page_number[NUM_SLRU_BUFFERS];
-       unsigned int page_lru_count[NUM_SLRU_BUFFERS];
+       int                     page_lru_count[NUM_SLRU_BUFFERS];
        LWLockId        buffer_locks[NUM_SLRU_BUFFERS];
 
+       /*----------
+        * We mark a page "most recently used" by setting
+        *              page_lru_count[slotno] = ++cur_lru_count;
+        * The oldest page is therefore the one with the highest value of
+        *              cur_lru_count - page_lru_count[slotno]
+        * The counts will eventually wrap around, but this calculation still
+        * works as long as no page's age exceeds INT_MAX counts.
+        *----------
+        */
+       int                     cur_lru_count;
+
        /*
         * latest_page_number is the page number of the current end of the log;
         * this is not critical data, since we use it only to avoid swapping out
@@ -106,6 +115,8 @@ extern void SimpleLruInit(SlruCtl ctl, const char *name,
                          LWLockId ctllock, const char *subdir);
 extern int     SimpleLruZeroPage(SlruCtl ctl, int pageno);
 extern int     SimpleLruReadPage(SlruCtl ctl, int pageno, TransactionId xid);
+extern int     SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno,
+                                                                          TransactionId xid);
 extern void SimpleLruWritePage(SlruCtl ctl, int slotno, SlruFlush fdata);
 extern void SimpleLruFlush(SlruCtl ctl, bool checkpoint);
 extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage);