*
* If the bgwriter exits unexpectedly, the postmaster treats that the same
* as a backend crash: shared memory may be corrupted, so remaining backends
- * should be killed by SIGQUIT and then a recovery cycle started. (Even if
- * shared memory isn't corrupted, we have lost information about which
- * files need to be fsync'd for the next checkpoint, and so a system
- * restart needs to be forced.)
+ * should be killed by SIGQUIT and then a recovery cycle started.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
#include "pgstat.h"
#include "postmaster/bgwriter.h"
#include "storage/bufmgr.h"
+#include "storage/buf_internals.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
int BgWriterDelay = 200;
/*
- * Time to sleep between bgwriter rounds, when it has no work to do.
+ * Multiplier to apply to BgWriterDelay when we decide to hibernate.
+ * (Perhaps this needs to be configurable?)
*/
-#define BGWRITER_HIBERNATE_MS 10000
+#define HIBERNATE_FACTOR 50
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
static bool am_bg_writer = false;
-/* Prototypes for private functions */
-
-static void BgWriterNap(bool hibernating);
-
/* Signal handlers */
static void bg_quickdie(SIGNAL_ARGS);
{
sigjmp_buf local_sigjmp_buf;
MemoryContext bgwriter_context;
- bool hibernating;
+ bool prev_hibernate;
am_bg_writer = true;
* handler is still needed for latch wakeups.
*/
pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */
- pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */
+ pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */
pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&BlockSig, SIGQUIT);
- /*
- * Advertise our latch that backends can use to wake us up while we're
- * sleeping.
- */
- ProcGlobal->bgwriterLatch = &MyProc->procLatch;
-
/*
* Create a resource owner to keep track of our resources (currently only
* buffer pins).
if (RecoveryInProgress())
ThisTimeLineID = GetRecoveryTargetTLI();
+ /*
+ * Reset hibernation state after any error.
+ */
+ prev_hibernate = false;
+
/*
* Loop forever
*/
- hibernating = false;
for (;;)
{
- bool lapped;
+ bool can_hibernate;
+ int rc;
- /*
- * Emergency bailout if postmaster has died. This is to avoid the
- * necessity for manual cleanup of all postmaster children.
- */
- if (!PostmasterIsAlive())
- exit(1);
+ /* Clear any already-pending wakeups */
+ ResetLatch(&MyProc->procLatch);
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
- /* update global shmem state for sync rep */
}
if (shutdown_requested)
{
/*
* Do one cycle of dirty-buffer writing.
*/
- if (hibernating && bgwriter_lru_maxpages > 0)
- ResetLatch(&MyProc->procLatch);
- lapped = BgBufferSync();
-
- if (lapped && !hibernating)
- {
- /*
- * BgBufferSync did nothing. Since there doesn't seem to be any
- * work for the bgwriter to do, go into slower-paced
- * "hibernation" mode, where we sleep for much longer times than
- * bgwriter_delay says. Fewer wakeups saves electricity. If a
- * backend starts dirtying pages again, it will wake us up by
- * setting our latch.
- *
- * The latch is kept set during productive cycles where buffers
- * are written, and only reset before going into a longer sleep.
- * That ensures that when there's a constant trickle of activity,
- * the SetLatch() calls that backends have to do will see the
- * latch as already set, and are not slowed down by having to
- * actually set the latch and signal us.
- */
- hibernating = true;
-
- /*
- * Take one more short nap and perform one more bgwriter cycle -
- * someone might've dirtied a buffer just after we finished the
- * previous bgwriter cycle, while the latch was still set. If
- * we still find nothing to do after this cycle, the next sleep
- * will be longer.
- */
- BgWriterNap(false);
- continue;
- }
- else if (!lapped && hibernating)
- {
- /*
- * Woken up from hibernation. Set the latch just in case it's
- * not set yet (usually we wake up from hibernation because a
- * backend already set the latch, but not necessarily).
- */
- SetLatch(&MyProc->procLatch);
- hibernating = false;
- }
+ can_hibernate = BgBufferSync();
/*
- * Take a short or long nap, depending on whether there was any work
- * to do.
+ * Send off activity statistics to the stats collector
*/
- BgWriterNap(hibernating);
- }
-}
+ pgstat_send_bgwriter();
-/*
- * BgWriterNap -- Nap for the configured time or until a signal is received.
- *
- * If 'hibernating' is false, sleeps for bgwriter_delay milliseconds.
- * Otherwise sleeps longer, but also wakes up if the process latch is set.
- */
-static void
-BgWriterNap(bool hibernating)
-{
- long udelay;
-
- /*
- * Send off activity statistics to the stats collector
- */
- pgstat_send_bgwriter();
-
- /*
- * If there was no work to do in the previous bgwriter cycle, take a
- * longer nap.
- */
- if (hibernating)
- {
/*
- * We wake on a buffer being dirtied. It's possible that some
- * useful work will become available for the bgwriter to do without
- * a buffer actually being dirtied, like when a dirty buffer's usage
- * count is decremented to zero or it's unpinned. This corner case
- * is judged as too marginal to justify adding additional SetLatch()
- * calls in very hot code paths, cheap though those calls may be.
+ * Sleep until we are signaled or BgWriterDelay has elapsed.
*
- * We still wake up periodically, so that BufferAlloc stats are
- * updated reasonably promptly.
+ * Note: the feedback control loop in BgBufferSync() expects that we
+ * will call it every BgWriterDelay msec. While it's not critical for
+ * correctness that that be exact, the feedback loop might misbehave
+ * if we stray too far from that. Hence, avoid loading this process
+ * down with latch events that are likely to happen frequently during
+ * normal operation.
*/
- int res = WaitLatch(&MyProc->procLatch,
- WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
- BGWRITER_HIBERNATE_MS);
+ rc = WaitLatch(&MyProc->procLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ BgWriterDelay /* ms */);
/*
- * Only do a quick return if timeout was reached (or postmaster died)
- * to ensure that no less than BgWriterDelay ms has passed between
- * BgBufferSyncs - WaitLatch() might have returned instantaneously.
+ * If no latch event and BgBufferSync says nothing's happening, extend
+ * the sleep in "hibernation" mode, where we sleep for much longer
+ * than bgwriter_delay says. Fewer wakeups save electricity. When a
+ * backend starts using buffers again, it will wake us up by setting
+ * our latch. Because the extra sleep will persist only as long as no
+ * buffer allocations happen, this should not distort the behavior of
+ * BgBufferSync's control loop too badly; essentially, it will think
+ * that the system-wide idle interval didn't exist.
+ *
+ * There is a race condition here, in that a backend might allocate a
+ * buffer between the time BgBufferSync saw the alloc count as zero
+ * and the time we call StrategyNotifyBgWriter. While it's not
+ * critical that we not hibernate anyway, we try to reduce the odds of
+ * that by only hibernating when BgBufferSync says nothing's happening
+ * for two consecutive cycles. Also, we mitigate any possible
+ * consequences of a missed wakeup by not hibernating forever.
*/
- if (res & (WL_TIMEOUT | WL_POSTMASTER_DEATH))
- return;
- }
+ if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate)
+ {
+ /* Ask for notification at next buffer allocation */
+ StrategyNotifyBgWriter(&MyProc->procLatch);
+ /* Sleep ... */
+ rc = WaitLatch(&MyProc->procLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ BgWriterDelay * HIBERNATE_FACTOR);
+ /* Reset the notification request in case we timed out */
+ StrategyNotifyBgWriter(NULL);
+ }
- /*
- * Nap for the configured time.
- *
- * On some platforms, signals won't interrupt the sleep. To ensure we
- * respond reasonably promptly when someone signals us, break down the
- * sleep into 1-second increments, and check for interrupts after each
- * nap.
- */
- udelay = BgWriterDelay * 1000L;
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (rc & WL_POSTMASTER_DEATH)
+ exit(1);
- while (udelay > 999999L)
- {
- if (got_SIGHUP || shutdown_requested)
- break;
- pg_usleep(1000000L);
- udelay -= 1000000L;
+ prev_hibernate = can_hibernate;
}
-
- if (!(got_SIGHUP || shutdown_requested))
- pg_usleep(udelay);
}
+
/* --------------------------------
* signal handler routines
* --------------------------------
MarkBufferDirty(Buffer buffer)
{
volatile BufferDesc *bufHdr;
- bool dirtied = false;
if (!BufferIsValid(buffer))
elog(ERROR, "bad buffer ID: %d", buffer);
Assert(bufHdr->refcount > 0);
- if (!(bufHdr->flags & BM_DIRTY))
- dirtied = true;
-
- bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
-
- UnlockBufHdr(bufHdr);
-
/*
- * If the buffer was not dirty already, do vacuum accounting, and
- * nudge bgwriter.
+ * If the buffer was not dirty already, do vacuum accounting.
*/
- if (dirtied)
+ if (!(bufHdr->flags & BM_DIRTY))
{
VacuumPageDirty++;
pgBufferUsage.shared_blks_dirtied++;
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageDirty;
- if (ProcGlobal->bgwriterLatch)
- SetLatch(ProcGlobal->bgwriterLatch);
}
+
+ bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+
+ UnlockBufHdr(bufHdr);
}
/*
*
* This is called periodically by the background writer process.
*
- * Returns true if the clocksweep has been "lapped", so that there's nothing
- * to do. Also returns true if there's nothing to do because bgwriter was
- * effectively disabled by setting bgwriter_lru_maxpages to 0.
+ * Returns true if it's appropriate for the bgwriter process to go into
+ * low-power hibernation mode. (This happens if the strategy clock sweep
+ * has been "lapped" and no buffer allocations have occurred recently,
+ * or if the bgwriter has been effectively disabled by setting
+ * bgwriter_lru_maxpages to 0.)
*/
bool
BgBufferSync(void)
int num_written;
int reusable_buffers;
+ /* Variables for final smoothed_density update */
+ long new_strategy_delta;
+ uint32 new_recent_alloc;
+
/*
* Find out where the freelist clock sweep currently is, and how many
* buffer allocations have happened since our last call.
* which is helpful because a long memory isn't as desirable on the
* density estimates.
*/
- strategy_delta = bufs_to_lap - num_to_scan;
- recent_alloc = reusable_buffers - reusable_buffers_est;
- if (strategy_delta > 0 && recent_alloc > 0)
+ new_strategy_delta = bufs_to_lap - num_to_scan;
+ new_recent_alloc = reusable_buffers - reusable_buffers_est;
+ if (new_strategy_delta > 0 && new_recent_alloc > 0)
{
- scans_per_alloc = (float) strategy_delta / (float) recent_alloc;
+ scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc;
smoothed_density += (scans_per_alloc - smoothed_density) /
smoothing_samples;
#ifdef BGW_DEBUG
elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f",
- recent_alloc, strategy_delta, scans_per_alloc, smoothed_density);
+ new_recent_alloc, new_strategy_delta,
+ scans_per_alloc, smoothed_density);
#endif
}
- return (bufs_to_lap == 0);
+ /* Return true if OK to hibernate */
+ return (bufs_to_lap == 0 && recent_alloc == 0);
}
/*
if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) !=
(BM_DIRTY | BM_JUST_DIRTIED))
{
- bool dirtied = false;
-
LockBufHdr(bufHdr);
Assert(bufHdr->refcount > 0);
if (!(bufHdr->flags & BM_DIRTY))
- dirtied = true;
- bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
- UnlockBufHdr(bufHdr);
-
- if (dirtied)
{
+ /* Do vacuum cost accounting */
VacuumPageDirty++;
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageDirty;
- /* The bgwriter may need to be woken. */
- if (ProcGlobal->bgwriterLatch)
- SetLatch(ProcGlobal->bgwriterLatch);
}
+ bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
+ UnlockBufHdr(bufHdr);
}
}
*/
uint32 completePasses; /* Complete cycles of the clock sweep */
uint32 numBufferAllocs; /* Buffers allocated since last reset */
+
+ /*
+ * Notification latch, or NULL if none. See StrategyNotifyBgWriter.
+ */
+ Latch *bgwriterLatch;
} BufferStrategyControl;
/* Pointers to shared state */
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
volatile BufferDesc *buf;
+ Latch *bgwriterLatch;
int trycounter;
/*
*/
StrategyControl->numBufferAllocs++;
+ /*
+ * If bgwriterLatch is set, we need to waken the bgwriter, but we should
+ * not do so while holding BufFreelistLock; so release and re-grab. This
+ * is annoyingly tedious, but it happens at most once per bgwriter cycle,
+ * so the performance hit is minimal.
+ */
+ bgwriterLatch = StrategyControl->bgwriterLatch;
+ if (bgwriterLatch)
+ {
+ StrategyControl->bgwriterLatch = NULL;
+ LWLockRelease(BufFreelistLock);
+ SetLatch(bgwriterLatch);
+ LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
+ }
+
/*
* Try to get a buffer from the freelist. Note that the freeNext fields
* are considered to be protected by the BufFreelistLock not the
return result;
}
+/*
+ * StrategyNotifyBgWriter -- set or clear allocation notification latch
+ *
+ * If bgwriterLatch isn't NULL, the next invocation of StrategyGetBuffer will
+ * set that latch. Pass NULL to clear the pending notification before it
+ * happens. This feature is used by the bgwriter process to wake itself up
+ * from hibernation, and is not meant for anybody else to use.
+ */
+void
+StrategyNotifyBgWriter(Latch *bgwriterLatch)
+{
+ /*
+ * We acquire the BufFreelistLock just to ensure that the store appears
+ * atomic to StrategyGetBuffer. The bgwriter should call this rather
+ * infrequently, so there's no performance penalty from being safe.
+ */
+ LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
+ StrategyControl->bgwriterLatch = bgwriterLatch;
+ LWLockRelease(BufFreelistLock);
+}
+
/*
* StrategyShmemSize
/* Clear statistics */
StrategyControl->completePasses = 0;
StrategyControl->numBufferAllocs = 0;
+
+ /* No pending notification */
+ StrategyControl->bgwriterLatch = NULL;
}
else
Assert(!init);