]> granicus.if.org Git - postgresql/commitdiff
Fix an issue in recent walwriter hibernation patch.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 9 May 2012 03:05:58 +0000 (23:05 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 9 May 2012 03:06:40 +0000 (23:06 -0400)
Users of asynchronous-commit mode expect there to be a guaranteed maximum
delay before an async commit's WAL records get flushed to disk.  The
original version of the walwriter hibernation patch broke that.  Add an
extra shared-memory flag to allow async commits to kick the walwriter out
of hibernation mode, without adding any noticeable overhead in cases where
no action is needed.

src/backend/access/transam/xlog.c
src/backend/postmaster/walwriter.c
src/include/access/xlog.h

index 6d3a4cd3dfe2494ccb3bff704fe7838cc236599d..d3650bdf051e7d34a6d194c500ef4b01be64baf4 100644 (file)
@@ -426,6 +426,13 @@ typedef struct XLogCtlData
         */
        bool            SharedHotStandbyActive;
 
+       /*
+        * WalWriterSleeping indicates whether the WAL writer is currently in
+        * low-power mode (and hence should be nudged if an async commit occurs).
+        * Protected by info_lck.
+        */
+       bool            WalWriterSleeping;
+
        /*
         * recoveryWakeupLatch is used to wake up the startup process to continue
         * WAL replay, if it is waiting for WAL to arrive or failover trigger file
@@ -1903,32 +1910,44 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
 
 /*
  * Record the LSN for an asynchronous transaction commit/abort
- * and nudge the WALWriter if there is a complete page to write.
+ * and nudge the WALWriter if there is work for it to do.
  * (This should not be called for synchronous commits.)
  */
 void
 XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
 {
        XLogRecPtr      WriteRqstPtr = asyncXactLSN;
+       bool            sleeping;
 
        /* use volatile pointer to prevent code rearrangement */
        volatile XLogCtlData *xlogctl = XLogCtl;
 
        SpinLockAcquire(&xlogctl->info_lck);
        LogwrtResult = xlogctl->LogwrtResult;
+       sleeping = xlogctl->WalWriterSleeping;
        if (XLByteLT(xlogctl->asyncXactLSN, asyncXactLSN))
                xlogctl->asyncXactLSN = asyncXactLSN;
        SpinLockRelease(&xlogctl->info_lck);
 
-       /* back off to last completed page boundary */
-       WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
+       /*
+        * If the WALWriter is sleeping, we should kick it to make it come out of
+        * low-power mode.  Otherwise, determine whether there's a full page of
+        * WAL available to write.
+        */
+       if (!sleeping)
+       {
+               /* back off to last completed page boundary */
+               WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
 
-       /* if we have already flushed that far, we're done */
-       if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
-               return;
+               /* if we have already flushed that far, we're done */
+               if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+                       return;
+       }
 
        /*
-        * Nudge the WALWriter if we have a full page of WAL to write.
+        * Nudge the WALWriter: it has a full page of WAL to write, or we want
+        * it to come out of low-power mode so that this async commit will reach
+        * disk within the expected amount of time.
         */
        if (ProcGlobal->walwriterLatch)
                SetLatch(ProcGlobal->walwriterLatch);
@@ -5100,6 +5119,7 @@ XLOGShmemInit(void)
        XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
        XLogCtl->SharedRecoveryInProgress = true;
        XLogCtl->SharedHotStandbyActive = false;
+       XLogCtl->WalWriterSleeping = false;
        XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
        SpinLockInit(&XLogCtl->info_lck);
        InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
@@ -10479,3 +10499,17 @@ WakeupRecovery(void)
 {
        SetLatch(&XLogCtl->recoveryWakeupLatch);
 }
+
+/*
+ * Update the WalWriterSleeping flag.
+ */
+void
+SetWalWriterSleeping(bool sleeping)
+{
+       /* use volatile pointer to prevent code rearrangement */
+       volatile XLogCtlData *xlogctl = XLogCtl;
+
+       SpinLockAcquire(&xlogctl->info_lck);
+       xlogctl->WalWriterSleeping = sleeping;
+       SpinLockRelease(&xlogctl->info_lck);
+}
index cd41dbbc8c3d3f9b7b0d8a8aba511806cbf752ec..733d01fd5b3249d13ee637d8e2d8ec68a4555faa 100644 (file)
@@ -99,6 +99,7 @@ WalWriterMain(void)
        sigjmp_buf      local_sigjmp_buf;
        MemoryContext walwriter_context;
        int                     left_till_hibernate;
+       bool            hibernating;
 
        /*
         * If possible, make this process a group leader, so that the postmaster
@@ -230,6 +231,8 @@ WalWriterMain(void)
         * Reset hibernation state after any error.
         */
        left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
+       hibernating = false;
+       SetWalWriterSleeping(false);
 
        /*
         * Advertise our latch that backends can use to wake us up while we're
@@ -244,6 +247,21 @@ WalWriterMain(void)
        {
                long    cur_timeout;
 
+               /*
+                * Advertise whether we might hibernate in this cycle.  We do this
+                * before resetting the latch to ensure that any async commits will
+                * see the flag set if they might possibly need to wake us up, and
+                * that we won't miss any signal they send us.  (If we discover work
+                * to do in the last cycle before we would hibernate, the global flag
+                * will be set unnecessarily, but little harm is done.)  But avoid
+                * touching the global flag if it doesn't need to change.
+                */
+               if (hibernating != (left_till_hibernate <= 1))
+               {
+                       hibernating = (left_till_hibernate <= 1);
+                       SetWalWriterSleeping(hibernating);
+               }
+
                /* Clear any already-pending wakeups */
                ResetLatch(&MyProc->procLatch);
 
index 129712e7b9c9c8aa0e2554da698a0b893d3ff087..df5f232eeea44684b25ce81673b54e4ad47cc33c 100644 (file)
@@ -316,6 +316,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
 
 extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
+extern void SetWalWriterSleeping(bool sleeping);
 
 /*
  * Starting/stopping a base backup