]> granicus.if.org Git - postgresql/commitdiff
Reduce idle power consumption of walwriter and checkpointer processes.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 9 May 2012 00:03:26 +0000 (20:03 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 9 May 2012 00:03:26 +0000 (20:03 -0400)
This patch modifies the walwriter process so that, when it has not found
anything useful to do for many consecutive wakeup cycles, it extends its
sleep time to reduce the server's idle power consumption.  It reverts to
normal as soon as it's done any successful flushes.  It's still true that
during any async commit, backends check for completed, unflushed pages of
WAL and signal the walwriter if there are any; so that in practice the
walwriter can get awakened and returned to normal operation sooner than the
sleep time might suggest.

Also, improve the checkpointer so that it uses a latch and a computed delay
time to not wake up at all except when it has something to do, replacing a
previous hardcoded 0.5 sec wakeup cycle.  This also is primarily useful for
reducing the server's power consumption when idle.

In passing, get rid of the dedicated latch for signaling the walwriter in
favor of using its procLatch, since that comports better with possible
generic signal handlers using that latch.  Also, fix a pre-existing bug
with failure to save/restore errno in walwriter's signal handlers.

Peter Geoghegan, somewhat simplified by Tom

src/backend/access/transam/xlog.c
src/backend/postmaster/checkpointer.c
src/backend/postmaster/walwriter.c
src/backend/storage/lmgr/proc.c
src/include/access/xlog.h
src/include/storage/latch.h
src/include/storage/proc.h

index b584cb0d0ba8deada78eefd2ad9e78385ac7bb8a..6d3a4cd3dfe2494ccb3bff704fe7838cc236599d 100644 (file)
@@ -433,11 +433,6 @@ typedef struct XLogCtlData
         */
        Latch           recoveryWakeupLatch;
 
-       /*
-        * WALWriterLatch is used to wake up the WALWriter to write some WAL.
-        */
-       Latch           WALWriterLatch;
-
        /*
         * During recovery, we keep a copy of the latest checkpoint record here.
         * Used by the background writer when it wants to create a restartpoint.
@@ -1935,7 +1930,8 @@ XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
        /*
         * Nudge the WALWriter if we have a full page of WAL to write.
         */
-       SetLatch(&XLogCtl->WALWriterLatch);
+       if (ProcGlobal->walwriterLatch)
+               SetLatch(ProcGlobal->walwriterLatch);
 }
 
 /*
@@ -2167,22 +2163,25 @@ XLogFlush(XLogRecPtr record)
  * block, and flush through the latest one of those.  Thus, if async commits
  * are not being used, we will flush complete blocks only.     We can guarantee
  * that async commits reach disk after at most three cycles; normally only
- * one or two. (We allow XLogWrite to write "flexibly", meaning it can stop
- * at the end of the buffer ring; this makes a difference only with very high
- * load or long wal_writer_delay, but imposes one extra cycle for the worst
- * case for async commits.)
+ * one or two.  (When flushing complete blocks, we allow XLogWrite to write
+ * "flexibly", meaning it can stop at the end of the buffer ring; this makes a
+ * difference only with very high load or long wal_writer_delay, but imposes
+ * one extra cycle for the worst case for async commits.)
  *
  * This routine is invoked periodically by the background walwriter process.
+ *
+ * Returns TRUE if we flushed anything.
  */
-void
+bool
 XLogBackgroundFlush(void)
 {
        XLogRecPtr      WriteRqstPtr;
        bool            flexible = true;
+       bool            wrote_something = false;
 
        /* XLOG doesn't need flushing during recovery */
        if (RecoveryInProgress())
-               return;
+               return false;
 
        /* read LogwrtResult and update local state */
        {
@@ -2224,7 +2223,7 @@ XLogBackgroundFlush(void)
                                XLogFileClose();
                        }
                }
-               return;
+               return false;
        }
 
 #ifdef WAL_DEBUG
@@ -2247,10 +2246,13 @@ XLogBackgroundFlush(void)
                WriteRqst.Write = WriteRqstPtr;
                WriteRqst.Flush = WriteRqstPtr;
                XLogWrite(WriteRqst, flexible, false);
+               wrote_something = true;
        }
        LWLockRelease(WALWriteLock);
 
        END_CRIT_SECTION();
+
+       return wrote_something;
 }
 
 /*
@@ -5101,7 +5103,6 @@ XLOGShmemInit(void)
        XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
        SpinLockInit(&XLogCtl->info_lck);
        InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
-       InitSharedLatch(&XLogCtl->WALWriterLatch);
 
        /*
         * If we are not in bootstrap mode, pg_control should already exist. Read
@@ -10478,12 +10479,3 @@ WakeupRecovery(void)
 {
        SetLatch(&XLogCtl->recoveryWakeupLatch);
 }
-
-/*
- * Manage the WALWriterLatch
- */
-Latch *
-WALWriterLatch(void)
-{
-       return &XLogCtl->WALWriterLatch;
-}
index 2329b1a9a910d77c5c58b9eb5cb804cd386df39f..2731eb8f24bbf80005417b8a77d01dd60c0f3a71 100644 (file)
@@ -51,6 +51,7 @@
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "storage/pmsignal.h"
+#include "storage/proc.h"
 #include "storage/shmem.h"
 #include "storage/smgr.h"
 #include "storage/spin.h"
@@ -178,6 +179,7 @@ static void UpdateSharedMemoryConfig(void);
 static void chkpt_quickdie(SIGNAL_ARGS);
 static void ChkptSigHupHandler(SIGNAL_ARGS);
 static void ReqCheckpointHandler(SIGNAL_ARGS);
+static void chkpt_sigusr1_handler(SIGNAL_ARGS);
 static void ReqShutdownHandler(SIGNAL_ARGS);
 
 
@@ -224,7 +226,7 @@ CheckpointerMain(void)
        pqsignal(SIGQUIT, chkpt_quickdie);              /* hard crash time */
        pqsignal(SIGALRM, SIG_IGN);
        pqsignal(SIGPIPE, SIG_IGN);
-       pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
+       pqsignal(SIGUSR1, chkpt_sigusr1_handler);
        pqsignal(SIGUSR2, ReqShutdownHandler);          /* request shutdown */
 
        /*
@@ -359,6 +361,12 @@ CheckpointerMain(void)
         */
        UpdateSharedMemoryConfig();
 
+       /*
+        * Advertise our latch that backends can use to wake us up while we're
+        * sleeping.
+        */
+       ProcGlobal->checkpointerLatch = &MyProc->procLatch;
+
        /*
         * Loop forever
         */
@@ -368,6 +376,10 @@ CheckpointerMain(void)
                int                     flags = 0;
                pg_time_t       now;
                int                     elapsed_secs;
+               int                     cur_timeout;
+
+               /* Clear any already-pending wakeups */
+               ResetLatch(&MyProc->procLatch);
 
                /*
                 * Emergency bailout if postmaster has died.  This is to avoid the
@@ -387,15 +399,15 @@ CheckpointerMain(void)
                        ProcessConfigFile(PGC_SIGHUP);
 
                        /*
-                        * Checkpointer is the last process to shutdown, so we ask
+                        * Checkpointer is the last process to shut down, so we ask
                         * it to hold the keys for a range of other tasks required
                         * most of which have nothing to do with checkpointing at all.
                         *
-                        * For various reasons, some config values can change
-                        * dynamically so are the primary copy of them is held in
-                        * shared memory to make sure all backends see the same value.
-                        * We make Checkpointer responsible for updating the shared
-                        * memory copy if the parameter setting changes because of SIGHUP.
+                        * For various reasons, some config values can change dynamically
+                        * so the primary copy of them is held in shared memory to make
+                        * sure all backends see the same value.  We make Checkpointer
+                        * responsible for updating the shared memory copy if the
+                        * parameter setting changes because of SIGHUP.
                         */
                        UpdateSharedMemoryConfig();
                }
@@ -488,7 +500,7 @@ CheckpointerMain(void)
                                                 errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
 
                        /*
-                        * Initialize checkpointer-private variables used during checkpoint.
+                        * Initialize checkpointer-private variables used during checkpoint
                         */
                        ckpt_active = true;
                        if (!do_restartpoint)
@@ -543,20 +555,34 @@ CheckpointerMain(void)
                        ckpt_active = false;
                }
 
+               /* Check for archive_timeout and switch xlog files if necessary. */
+               CheckArchiveTimeout();
+
                /*
                 * Send off activity statistics to the stats collector
                 */
                pgstat_send_bgwriter();
 
                /*
-                * Nap for a while and then loop again. Later patches will replace
-                * this with a latch loop. Keep it simple now for clarity.
-                * Relatively long sleep because the bgwriter does cleanup now.
+                * Sleep until we are signaled or it's time for another checkpoint
+                * or xlog file switch.
                 */
-               pg_usleep(500000L);
+               now = (pg_time_t) time(NULL);
+               elapsed_secs = now - last_checkpoint_time;
+               if (elapsed_secs >= CheckPointTimeout)
+                       continue;                       /* no sleep for us ... */
+               cur_timeout = CheckPointTimeout - elapsed_secs;
+               if (XLogArchiveTimeout > 0 && !RecoveryInProgress())
+               {
+                       elapsed_secs = now - last_xlog_switch_time;
+                       if (elapsed_secs >= XLogArchiveTimeout)
+                               continue;               /* no sleep for us ... */
+                       cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs);
+               }
 
-               /* Check for archive_timeout and switch xlog files if necessary. */
-               CheckArchiveTimeout();
+               (void) WaitLatch(&MyProc->procLatch,
+                                                WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                                cur_timeout * 1000L /* convert to ms */);
        }
 }
 
@@ -814,21 +840,50 @@ chkpt_quickdie(SIGNAL_ARGS)
 static void
 ChkptSigHupHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        got_SIGHUP = true;
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
+       errno = save_errno;
 }
 
 /* SIGINT: set flag to run a normal checkpoint right away */
 static void
 ReqCheckpointHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        checkpoint_requested = true;
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
+       errno = save_errno;
+}
+
+/* SIGUSR1: used for latch wakeups */
+static void
+chkpt_sigusr1_handler(SIGNAL_ARGS)
+{
+       int                     save_errno = errno;
+
+       latch_sigusr1_handler();
+
+       errno = save_errno;
 }
 
 /* SIGUSR2: set flag to run a shutdown checkpoint and exit */
 static void
 ReqShutdownHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        shutdown_requested = true;
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
+       errno = save_errno;
 }
 
 
@@ -1055,6 +1110,7 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
                                        BlockNumber segno)
 {
        BgWriterRequest *request;
+       bool            too_full;
 
        if (!IsUnderPostmaster)
                return false;                   /* probably shouldn't even get here */
@@ -1068,14 +1124,13 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
        BgWriterShmem->num_backend_writes++;
 
        /*
-        * If the background writer isn't running or the request queue is full,
+        * If the checkpointer isn't running or the request queue is full,
         * the backend will have to perform its own fsync request.      But before
-        * forcing that to happen, we can try to compact the background writer
-        * request queue.
+        * forcing that to happen, we can try to compact the request queue.
         */
        if (BgWriterShmem->checkpointer_pid == 0 ||
-               (BgWriterShmem->num_requests >= BgWriterShmem->max_requests
-                && !CompactCheckpointerRequestQueue()))
+               (BgWriterShmem->num_requests >= BgWriterShmem->max_requests &&
+                !CompactCheckpointerRequestQueue()))
        {
                /*
                 * Count the subset of writes where backends have to do their own
@@ -1085,11 +1140,23 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
                LWLockRelease(BgWriterCommLock);
                return false;
        }
+
+       /* OK, insert request */
        request = &BgWriterShmem->requests[BgWriterShmem->num_requests++];
        request->rnode = rnode;
        request->forknum = forknum;
        request->segno = segno;
+
+       /* If queue is more than half full, nudge the checkpointer to empty it */
+       too_full = (BgWriterShmem->num_requests >=
+                               BgWriterShmem->max_requests / 2);
+
        LWLockRelease(BgWriterCommLock);
+
+       /* ... but not till after we release the lock */
+       if (too_full && ProcGlobal->checkpointerLatch)
+               SetLatch(ProcGlobal->checkpointerLatch);
+
        return true;
 }
 
@@ -1109,7 +1176,7 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
  * practice: there's one queue entry per shared buffer.
  */
 static bool
-CompactCheckpointerRequestQueue()
+CompactCheckpointerRequestQueue(void)
 {
        struct BgWriterSlotMapping
        {
@@ -1230,7 +1297,7 @@ AbsorbFsyncRequests(void)
         */
        LWLockAcquire(BgWriterCommLock, LW_EXCLUSIVE);
 
-       /* Transfer write count into pending pgstats message */
+       /* Transfer stats counts into pending pgstats message */
        BgWriterStats.m_buf_written_backend += BgWriterShmem->num_backend_writes;
        BgWriterStats.m_buf_fsync_backend += BgWriterShmem->num_backend_fsync;
 
index 08ef946ee6c5e078207dc418fa81525a96d439c3..cd41dbbc8c3d3f9b7b0d8a8aba511806cbf752ec 100644 (file)
@@ -54,6 +54,7 @@
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "storage/pmsignal.h"
+#include "storage/proc.h"
 #include "storage/smgr.h"
 #include "utils/guc.h"
 #include "utils/hsearch.h"
  */
 int                    WalWriterDelay = 200;
 
+/*
+ * Number of do-nothing loops before lengthening the delay time, and the
+ * multiplier to apply to WalWriterDelay when we do decide to hibernate.
+ * (Perhaps these need to be configurable?)
+ */
+#define LOOPS_UNTIL_HIBERNATE          50
+#define HIBERNATE_FACTOR                       25
+
 /*
  * Flags set by interrupt handlers for later service in the main loop.
  */
@@ -76,6 +85,7 @@ static volatile sig_atomic_t shutdown_requested = false;
 static void wal_quickdie(SIGNAL_ARGS);
 static void WalSigHupHandler(SIGNAL_ARGS);
 static void WalShutdownHandler(SIGNAL_ARGS);
+static void walwriter_sigusr1_handler(SIGNAL_ARGS);
 
 /*
  * Main entry point for walwriter process
@@ -88,8 +98,7 @@ WalWriterMain(void)
 {
        sigjmp_buf      local_sigjmp_buf;
        MemoryContext walwriter_context;
-
-       InitLatch(WALWriterLatch()); /* initialize latch used in main loop */
+       int                     left_till_hibernate;
 
        /*
         * If possible, make this process a group leader, so that the postmaster
@@ -114,7 +123,7 @@ WalWriterMain(void)
        pqsignal(SIGQUIT, wal_quickdie);        /* hard crash time */
        pqsignal(SIGALRM, SIG_IGN);
        pqsignal(SIGPIPE, SIG_IGN);
-       pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
+       pqsignal(SIGUSR1, walwriter_sigusr1_handler);
        pqsignal(SIGUSR2, SIG_IGN); /* not used */
 
        /*
@@ -217,12 +226,26 @@ WalWriterMain(void)
         */
        PG_SETMASK(&UnBlockSig);
 
+       /*
+        * Reset hibernation state after any error.
+        */
+       left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
+
+       /*
+        * Advertise our latch that backends can use to wake us up while we're
+        * sleeping.
+        */
+       ProcGlobal->walwriterLatch = &MyProc->procLatch;
+
        /*
         * Loop forever
         */
        for (;;)
        {
-               ResetLatch(WALWriterLatch());
+               long    cur_timeout;
+
+               /* Clear any already-pending wakeups */
+               ResetLatch(&MyProc->procLatch);
 
                /*
                 * Emergency bailout if postmaster has died.  This is to avoid the
@@ -246,13 +269,27 @@ WalWriterMain(void)
                }
 
                /*
-                * Do what we're here for...
+                * Do what we're here for; then, if XLogBackgroundFlush() found useful
+                * work to do, reset hibernation counter.
                 */
-               XLogBackgroundFlush();
+               if (XLogBackgroundFlush())
+                       left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
+               else if (left_till_hibernate > 0)
+                       left_till_hibernate--;
 
-               (void) WaitLatch(WALWriterLatch(),
-                                                          WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
-                                                          WalWriterDelay /* ms */);
+               /*
+                * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
+                * haven't done anything useful for quite some time, lengthen the
+                * sleep time so as to reduce the server's idle power consumption.
+                */
+               if (left_till_hibernate > 0)
+                       cur_timeout = WalWriterDelay; /* in ms */
+               else
+                       cur_timeout = WalWriterDelay * HIBERNATE_FACTOR;
+
+               (void) WaitLatch(&MyProc->procLatch,
+                                                WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+                                                cur_timeout);
        }
 }
 
@@ -298,14 +335,35 @@ wal_quickdie(SIGNAL_ARGS)
 static void
 WalSigHupHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        got_SIGHUP = true;
-       SetLatch(WALWriterLatch());
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
+       errno = save_errno;
 }
 
 /* SIGTERM: set flag to exit normally */
 static void
 WalShutdownHandler(SIGNAL_ARGS)
 {
+       int                     save_errno = errno;
+
        shutdown_requested = true;
-       SetLatch(WALWriterLatch());
+       if (MyProc)
+               SetLatch(&MyProc->procLatch);
+
+       errno = save_errno;
+}
+
+/* SIGUSR1: used for latch wakeups */
+static void
+walwriter_sigusr1_handler(SIGNAL_ARGS)
+{
+       int                     save_errno = errno;
+
+       latch_sigusr1_handler();
+
+       errno = save_errno;
 }
index 20ed5de75e73151fa41d6c6462de63919c2a97d2..8e309f8a0b4d7dd1f7e3e0d97c1c6a3f3b3726a0 100644 (file)
@@ -187,6 +187,8 @@ InitProcGlobal(void)
        ProcGlobal->startupProcPid = 0;
        ProcGlobal->startupBufferPinWaitBufId = -1;
        ProcGlobal->bgwriterLatch = NULL;
+       ProcGlobal->walwriterLatch = NULL;
+       ProcGlobal->checkpointerLatch = NULL;
 
        /*
         * Create and initialize all the PGPROC structures we'll need (except for
index f8aecef665b897d47b44510a05cd1587c5971501..129712e7b9c9c8aa0e2554da698a0b893d3ff087 100644 (file)
@@ -16,7 +16,6 @@
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
 #include "storage/buf.h"
-#include "storage/latch.h"
 #include "utils/pg_crc.h"
 
 /*
@@ -266,7 +265,7 @@ extern CheckpointStatsData CheckpointStats;
 
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
 extern void XLogFlush(XLogRecPtr RecPtr);
-extern void XLogBackgroundFlush(void);
+extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
 extern int XLogFileInit(uint32 log, uint32 seg,
                         bool *use_existent, bool use_lock);
@@ -317,7 +316,6 @@ extern TimeLineID GetRecoveryTargetTLI(void);
 
 extern bool CheckPromoteSignal(void);
 extern void WakeupRecovery(void);
-extern Latch *WALWriterLatch(void);
 
 /*
  * Starting/stopping a base backup
index f97fedfdf649c390a2fb40af90fbc4ff1b798998..6a7df38d1a2591599afe3602f3fdcc3276114be5 100644 (file)
  * will be lifted in future by inserting suitable memory barriers into
  * SetLatch and ResetLatch.
  *
+ * Note that use of the process latch (PGPROC.procLatch) is generally better
+ * than an ad-hoc shared latch for signaling auxiliary processes.  This is
+ * because generic signal handlers will call SetLatch on the process latch
+ * only, so using any latch other than the process latch effectively precludes
+ * ever registering a generic handler.  Since signals have the potential to
+ * invalidate the latch timeout on some platforms, resulting in a
+ * denial-of-service, it is important to verify that all signal handlers
+ * within all WaitLatch-calling processes call SetLatch.
+ *
  *
  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
index 987bc0826082897af8c8d5e0c9701354a3254996..c4808f44a0887b572abc05933316ba34a87228ce 100644 (file)
@@ -188,8 +188,12 @@ typedef struct PROC_HDR
        PGPROC     *freeProcs;
        /* Head of list of autovacuum's free PGPROC structures */
        PGPROC     *autovacFreeProcs;
-       /* BGWriter process latch */
+       /* BGWriter process's latch */
        Latch      *bgwriterLatch;
+       /* WALWriter process's latch */
+       Latch      *walwriterLatch;
+       /* Checkpointer process's latch */
+       Latch      *checkpointerLatch;
        /* Current shared estimate of appropriate spins_per_delay value */
        int                     spins_per_delay;
        /* The proc of the Startup process, since not in ProcArray */