]> granicus.if.org Git - postgresql/commitdiff
Wakeup WALWriter as needed for asynchronous commit performance.
authorSimon Riggs <simon@2ndQuadrant.com>
Sun, 13 Nov 2011 09:00:57 +0000 (09:00 +0000)
committerSimon Riggs <simon@2ndQuadrant.com>
Sun, 13 Nov 2011 09:00:57 +0000 (09:00 +0000)
Previously we waited for wal_writer_delay before flushing WAL. Now
we also wake WALWriter as soon as a WAL buffer page has filled.
Significant effect observed on performance of asynchronous commits
by Robert Haas, attributed to the ability to set hint bits on tuples
earlier and so reducing contention caused by clog lookups.

src/backend/access/transam/xlog.c
src/backend/postmaster/walwriter.c
src/include/access/xlog.h

index 0d494e2e3bfdef46dad20a5da1b5295e4a079a07..20c04240b70513de0694ab25d57844ca33a92167 100644 (file)
@@ -432,6 +432,11 @@ typedef struct XLogCtlData
         */
        Latch           recoveryWakeupLatch;
 
+       /*
+        * WALWriterLatch is used to wake up the WALWriter to write some WAL.
+        */
+       Latch           WALWriterLatch;
+
        /*
         * During recovery, we keep a copy of the latest checkpoint record here.
         * Used by the background writer when it wants to create a restartpoint.
@@ -1916,19 +1921,35 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
 }
 
 /*
- * Record the LSN for an asynchronous transaction commit/abort.
+ * Record the LSN for an asynchronous transaction commit/abort
+ * and nudge the WALWriter if there is a complete page to write.
  * (This should not be called for for synchronous commits.)
  */
 void
 XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
 {
+       XLogRecPtr      WriteRqstPtr = asyncXactLSN;
+
        /* use volatile pointer to prevent code rearrangement */
        volatile XLogCtlData *xlogctl = XLogCtl;
 
        SpinLockAcquire(&xlogctl->info_lck);
+       LogwrtResult = xlogctl->LogwrtResult;
        if (XLByteLT(xlogctl->asyncXactLSN, asyncXactLSN))
                xlogctl->asyncXactLSN = asyncXactLSN;
        SpinLockRelease(&xlogctl->info_lck);
+
+       /* back off to last completed page boundary */
+       WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
+
+       /* if we have already flushed that far, we're done */
+       if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+               return;
+
+       /*
+        * Nudge the WALWriter if we have a full page of WAL to write.
+        */
+       SetLatch(&XLogCtl->WALWriterLatch);
 }
 
 /*
@@ -5072,6 +5093,7 @@ XLOGShmemInit(void)
        XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
        SpinLockInit(&XLogCtl->info_lck);
        InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
+       InitSharedLatch(&XLogCtl->WALWriterLatch);
 
        /*
         * If we are not in bootstrap mode, pg_control should already exist. Read
@@ -10013,3 +10035,12 @@ WakeupRecovery(void)
 {
        SetLatch(&XLogCtl->recoveryWakeupLatch);
 }
+
+/*
+ * Manage the WALWriterLatch
+ */
+Latch *
+WALWriterLatch(void)
+{
+       return &XLogCtl->WALWriterLatch;
+}
index 23c4aacd7da9c5bc6c5a55eb61cdd43ce513de41..157728e20e7b3eb80a71bb07d701ff99f5d71832 100644 (file)
@@ -11,7 +11,8 @@
  *
  * Note that as with the bgwriter for shared buffers, regular backends are
  * still empowered to issue WAL writes and fsyncs when the walwriter doesn't
- * keep up.
+ * keep up. This means that the WALWriter is not an essential process and
+ * can shutdown quickly when requested.
  *
  * Because the walwriter's cycle is directly linked to the maximum delay
  * before async-commit transactions are guaranteed committed, it's probably
@@ -76,7 +77,6 @@ static void wal_quickdie(SIGNAL_ARGS);
 static void WalSigHupHandler(SIGNAL_ARGS);
 static void WalShutdownHandler(SIGNAL_ARGS);
 
-
 /*
  * Main entry point for walwriter process
  *
@@ -89,6 +89,8 @@ WalWriterMain(void)
        sigjmp_buf      local_sigjmp_buf;
        MemoryContext walwriter_context;
 
+       InitLatch(WALWriterLatch()); /* initialize latch used in main loop */
+
        /*
         * If possible, make this process a group leader, so that the postmaster
         * can signal any child processes too.  (walwriter probably never has any
@@ -220,7 +222,7 @@ WalWriterMain(void)
         */
        for (;;)
        {
-               long            udelay;
+               ResetLatch(WALWriterLatch());
 
                /*
                 * Emergency bailout if postmaster has died.  This is to avoid the
@@ -248,20 +250,9 @@ WalWriterMain(void)
                 */
                XLogBackgroundFlush();
 
-               /*
-                * Delay until time to do something more, but fall out of delay
-                * reasonably quickly if signaled.
-                */
-               udelay = WalWriterDelay * 1000L;
-               while (udelay > 999999L)
-               {
-                       if (got_SIGHUP || shutdown_requested)
-                               break;
-                       pg_usleep(1000000L);
-                       udelay -= 1000000L;
-               }
-               if (!(got_SIGHUP || shutdown_requested))
-                       pg_usleep(udelay);
+               (void) WaitLatch(WALWriterLatch(),
+                                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                                          WalWriterDelay /* ms */);
        }
 }
 
@@ -308,6 +299,7 @@ static void
 WalSigHupHandler(SIGNAL_ARGS)
 {
        got_SIGHUP = true;
+       SetLatch(WALWriterLatch());
 }
 
 /* SIGTERM: set flag to exit normally */
@@ -315,4 +307,5 @@ static void
 WalShutdownHandler(SIGNAL_ARGS)
 {
        shutdown_requested = true;
+       SetLatch(WALWriterLatch());
 }
index 83106b80e4ec4b11b62a980673a73588b0b763dd..6344a850dcd20a90c418b311177d7a1cf9491d6e 100644 (file)
@@ -16,6 +16,7 @@
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
 #include "storage/buf.h"
+#include "storage/latch.h"
 #include "utils/pg_crc.h"
 
 /*
@@ -319,6 +320,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
 
 extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
+extern Latch *WALWriterLatch(void);
 
 /*
  * Starting/stopping a base backup