-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.132 2007/07/24 01:53:55 alvherre Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.133 2007/07/24 04:54:08 tgl Exp $ -->
<chapter Id="runtime-config">
<title>Server Configuration</title>
</para>
</listitem>
</varlistentry>
-
+
<varlistentry id="guc-wal-buffers" xreflabel="wal_buffers">
<term><varname>wal_buffers</varname> (<type>integer</type>)</term>
<indexterm>
</para>
</listitem>
</varlistentry>
-
+
+ <varlistentry id="guc-wal-writer-delay" xreflabel="wal_writer_delay">
+ <term><varname>wal_writer_delay</varname> (<type>integer</type>)</term>
+ <indexterm>
+ <primary><varname>wal_writer_delay</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Specifies the delay between activity rounds for the WAL writer.
+ In each round the writer will flush WAL to disk. It then sleeps for
+ <varname>wal_writer_delay</> milliseconds, and repeats. The default
+ value is 200 milliseconds (<literal>200ms</>). Note that on many
+ systems, the effective resolution of sleep delays is 10 milliseconds;
+ setting <varname>wal_writer_delay</> to a value that is not a multiple
+ of 10 might have the same results as setting it to the next higher
+ multiple of 10. This parameter can only be set in the
+ <filename>postgresql.conf</> file or on the server command line.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-commit-delay" xreflabel="commit_delay">
<term><varname>commit_delay</varname> (<type>integer</type>)</term>
<indexterm>
</indexterm>
<listitem>
<para>
- Specifies the target length of checkpoints, as a fraction of
+ Specifies the target length of checkpoints, as a fraction of
the checkpoint interval. The default is 0.5.
This parameter can only be set in the <filename>postgresql.conf</>
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.274 2007/06/30 19:12:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.275 2007/07/24 04:54:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
uint32 len,
write_len;
unsigned i;
- XLogwrtRqst LogwrtRqst;
bool updrqst;
bool doPageWrites;
bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
START_CRIT_SECTION();
- /* update LogwrtResult before doing cache fill check */
- {
- /* use volatile pointer to prevent code rearrangement */
- volatile XLogCtlData *xlogctl = XLogCtl;
-
- SpinLockAcquire(&xlogctl->info_lck);
- LogwrtRqst = xlogctl->LogwrtRqst;
- LogwrtResult = xlogctl->LogwrtResult;
- SpinLockRelease(&xlogctl->info_lck);
- }
-
- /*
- * If cache is half filled then try to acquire write lock and do
- * XLogWrite. Ignore any fractional blocks in performing this check.
- */
- LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % XLOG_BLCKSZ;
- if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
- (LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
- XLogCtl->XLogCacheByte / 2))
- {
- if (LWLockConditionalAcquire(WALWriteLock, LW_EXCLUSIVE))
- {
- /*
- * Since the amount of data we write here is completely optional
- * anyway, tell XLogWrite it can be "flexible" and stop at a
- * convenient boundary. This allows writes triggered by this
- * mechanism to synchronize with the cache boundaries, so that in
- * a long transaction we'll basically dump alternating halves of
- * the buffer array.
- */
- LogwrtResult = XLogCtl->Write.LogwrtResult;
- if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write))
- XLogWrite(LogwrtRqst, true, false);
- LWLockRelease(WALWriteLock);
- }
- }
-
/* Now wait to get insert lock */
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
}
+/*
+ * Flush xlog, but without specifying exactly where to flush to.
+ *
+ * We normally flush only completed blocks; but if there is nothing to do on
+ * that basis, we check for unflushed async commits in the current incomplete
+ * block, and flush through the latest one of those. Thus, if async commits
+ * are not being used, we will flush complete blocks only. We can guarantee
+ * that async commits reach disk after at most three cycles; normally only
+ * one or two. (We allow XLogWrite to write "flexibly", meaning it can stop
+ * at the end of the buffer ring; this makes a difference only with very high
+ * load or long wal_writer_delay, but imposes one extra cycle for the worst
+ * case for async commits.)
+ *
+ * This routine is invoked periodically by the background walwriter process.
+ */
+void
+XLogBackgroundFlush(void)
+{
+ XLogRecPtr WriteRqstPtr;
+ bool flexible = true;
+
+ /* read LogwrtResult and update local state */
+ {
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+
+ SpinLockAcquire(&xlogctl->info_lck);
+ LogwrtResult = xlogctl->LogwrtResult;
+ WriteRqstPtr = xlogctl->LogwrtRqst.Write;
+ SpinLockRelease(&xlogctl->info_lck);
+ }
+
+ /* back off to last completed page boundary */
+ WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
+
+#ifdef NOT_YET /* async commit patch is still to come */
+ /* if we have already flushed that far, consider async commit records */
+ if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+ {
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+
+ SpinLockAcquire(&xlogctl->async_commit_lck);
+ WriteRqstPtr = xlogctl->asyncCommitLSN;
+ SpinLockRelease(&xlogctl->async_commit_lck);
+ flexible = false; /* ensure it all gets written */
+ }
+#endif
+
+ /* Done if already known flushed */
+ if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+ return;
+
+#ifdef WAL_DEBUG
+ if (XLOG_DEBUG)
+ elog(LOG, "xlog bg flush request %X/%X; write %X/%X; flush %X/%X",
+ WriteRqstPtr.xlogid, WriteRqstPtr.xrecoff,
+ LogwrtResult.Write.xlogid, LogwrtResult.Write.xrecoff,
+ LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
+#endif
+
+ START_CRIT_SECTION();
+
+ /* now wait for the write lock */
+ LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+ LogwrtResult = XLogCtl->Write.LogwrtResult;
+ if (!XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
+ {
+ XLogwrtRqst WriteRqst;
+
+ WriteRqst.Write = WriteRqstPtr;
+ WriteRqst.Flush = WriteRqstPtr;
+ XLogWrite(WriteRqst, flexible, false);
+ }
+ LWLockRelease(WALWriteLock);
+
+ END_CRIT_SECTION();
+}
+
/*
* Test whether XLOG data has been flushed up to (at least) the given position.
*
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.234 2007/06/28 00:02:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.235 2007/07/24 04:54:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "postmaster/bgwriter.h"
+#include "postmaster/walwriter.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/proc.h"
* AuxiliaryProcessMain
*
* The main entry point for auxiliary processes, such as the bgwriter,
- * bootstrapper and the shared memory checker code.
+ * walwriter, bootstrapper and the shared memory checker code.
*
* This code is here just because of historical reasons.
*/
case BgWriterProcess:
statmsg = "writer process";
break;
+ case WalWriterProcess:
+ statmsg = "wal writer process";
+ break;
default:
statmsg = "??? process";
break;
InitXLOGAccess();
BackgroundWriterMain();
proc_exit(1); /* should never return */
+
+ case WalWriterProcess:
+ /* don't set signals, walwriter has its own agenda */
+ InitXLOGAccess();
+ WalWriterMain();
+ proc_exit(1); /* should never return */
default:
elog(PANIC, "unrecognized process type: %d", auxType);
# Makefile for src/backend/postmaster
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.22 2007/01/20 17:16:12 petere Exp $
+# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.23 2007/07/24 04:54:09 tgl Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-OBJS = bgwriter.o autovacuum.o pgarch.o pgstat.o postmaster.o syslogger.o \
- fork_process.o
+OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \
+ syslogger.o walwriter.o
all: SUBSYS.o
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.534 2007/07/23 10:16:54 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.535 2007/07/24 04:54:09 tgl Exp $
*
* NOTES
*
{
pid_t pid; /* process id of backend */
long cancel_key; /* cancel key for cancels for this backend */
- bool is_autovacuum; /* is it an autovacuum process */
+ bool is_autovacuum; /* is it an autovacuum process? */
} Backend;
static Dllist *BackendList;
#ifdef EXEC_BACKEND
/*
* Number of entries in the backend table. Twice the number of backends,
- * plus four other subprocesses (stats, bgwriter, autovac, logger).
+ * plus five other subprocesses (stats, bgwriter, walwriter, autovac, logger).
*/
-#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 4)
+#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 5)
static Backend *ShmemBackendArray;
#endif
/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
BgWriterPID = 0,
+ WalWriterPID = 0,
AutoVacPID = 0,
PgArchPID = 0,
PgStatPID = 0,
bool redirection_done = false;
/* received START_AUTOVAC_LAUNCHER signal */
-static bool start_autovac_launcher = false;
+static volatile sig_atomic_t start_autovac_launcher = false;
/*
* State for assigning random salts and cancel keys.
#define StartupDataBase() StartChildProcess(StartupProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
+#define StartWalWriter() StartChildProcess(WalWriterProcess)
/* Macros to check exit status of a child process */
#define EXIT_STATUS_0(st) ((st) == 0)
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
- * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/autovacuum.c,
- * postmaster/pgarch.c, postmaster/pgstat.c, and postmaster/syslogger.c.
+ * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
+ * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, and
+ * postmaster/syslogger.c.
*/
pqinitmask();
PG_SETMASK(&BlockSig);
signal_child(BgWriterPID, SIGUSR2);
}
+ /*
+ * Likewise, if we have lost the walwriter process, try to start a
+ * new one. We don't need walwriter to complete a shutdown, so
+ * don't start it if shutdown already initiated.
+ */
+ if (WalWriterPID == 0 &&
+ StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
+ WalWriterPID = StartWalWriter();
+
/* If we have lost the autovacuum launcher, try to start a new one */
if (AutoVacPID == 0 &&
(AutoVacuumingActive() || start_autovac_launcher) &&
{
AutoVacPID = StartAutoVacLauncher();
if (AutoVacPID != 0)
- start_autovac_launcher = false; /* signal successfully processed */
+ start_autovac_launcher = false; /* signal processed */
}
/* If we have lost the archiver, try to start a new one */
SignalChildren(SIGHUP);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGHUP);
+ if (WalWriterPID != 0)
+ signal_child(WalWriterPID, SIGHUP);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGHUP);
if (PgArchPID != 0)
/* and the autovac launcher too */
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
+ /* and the walwriter too */
+ if (WalWriterPID != 0)
+ signal_child(WalWriterPID, SIGTERM);
- if (DLGetHead(BackendList) || AutoVacPID != 0)
+ if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0)
break; /* let reaper() handle this */
/*
ereport(LOG,
(errmsg("received fast shutdown request")));
- if (DLGetHead(BackendList) || AutoVacPID != 0)
+ if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0)
{
if (!FatalError)
{
SignalChildren(SIGTERM);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
+ if (WalWriterPID != 0)
+ signal_child(WalWriterPID, SIGTERM);
/* reaper() does the rest */
}
break;
*
* Note: if we previously got SIGTERM then we may send SIGUSR2 to
* the bgwriter a second time here. This should be harmless.
+ * Ditto for the signals to the other special children.
*/
if (StartupPID != 0)
{
signal_child(StartupPID, SIGQUIT);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGQUIT);
+ if (WalWriterPID != 0)
+ signal_child(WalWriterPID, SIGQUIT);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGQUIT);
if (PgArchPID != 0)
/*
* Go to shutdown mode if a shutdown request was pending.
- * Otherwise, try to start the archiver, stats collector and
- * autovacuum launcher.
+ * Otherwise, try to start the other special children.
*/
if (Shutdown > NoShutdown && BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
else if (Shutdown == NoShutdown)
{
+ if (WalWriterPID == 0)
+ WalWriterPID = StartWalWriter();
if (XLogArchivingActive() && PgArchPID == 0)
PgArchPID = pgarch_start();
if (PgStatPID == 0)
BgWriterPID = 0;
if (EXIT_STATUS_0(exitstatus) &&
Shutdown > NoShutdown && !FatalError &&
- !DLGetHead(BackendList) && AutoVacPID == 0)
+ !DLGetHead(BackendList) &&
+ WalWriterPID == 0 && AutoVacPID == 0)
{
/*
* Normal postmaster exit is here: we've seen normal exit of
* required will happen on next postmaster start.
*/
if (Shutdown > NoShutdown &&
- !DLGetHead(BackendList) && AutoVacPID == 0)
+ !DLGetHead(BackendList) &&
+ WalWriterPID == 0 && AutoVacPID == 0)
{
ereport(LOG,
(errmsg("abnormal database system shutdown")));
continue;
}
+ /*
+ * Was it the wal writer? Normal exit can be ignored; we'll
+ * start a new one at the next iteration of the postmaster's main loop,
+ * if necessary. Any other exit condition is treated as a crash.
+ */
+ if (WalWriterPID != 0 && pid == WalWriterPID)
+ {
+ WalWriterPID = 0;
+ if (!EXIT_STATUS_0(exitstatus))
+ HandleChildCrash(pid, exitstatus,
+ _("wal writer process"));
+ continue;
+ }
+
/*
* Was it the autovacuum launcher? Normal exit can be ignored; we'll
* start a new one at the next iteration of the postmaster's main loop,
* StartupDataBase. (We can ignore the archiver and stats processes
* here since they are not connected to shmem.)
*/
- if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0 ||
+ if (DLGetHead(BackendList) || StartupPID != 0 ||
+ BgWriterPID != 0 || WalWriterPID != 0 ||
AutoVacPID != 0)
goto reaper_done;
ereport(LOG,
if (Shutdown > NoShutdown)
{
- if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0)
+ if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0 ||
+ WalWriterPID != 0)
goto reaper_done;
/* Start the bgwriter if not running */
if (BgWriterPID == 0)
}
/*
- * HandleChildCrash -- cleanup after failed backend, bgwriter, or autovacuum.
+ * HandleChildCrash -- cleanup after failed backend, bgwriter, walwriter,
+ * or autovacuum.
*
* The objectives here are to clean up our local state about the child
* process, and to signal all other remaining children to quickdie.
signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
+ /* Take care of the walwriter too */
+ if (pid == WalWriterPID)
+ WalWriterPID = 0;
+ else if (WalWriterPID != 0 && !FatalError)
+ {
+ ereport(DEBUG2,
+ (errmsg_internal("sending %s to process %d",
+ (SendStop ? "SIGSTOP" : "SIGQUIT"),
+ (int) WalWriterPID)));
+ signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
+ }
+
/* Take care of the autovacuum launcher too */
if (pid == AutoVacPID)
AutoVacPID = 0;
start_autovac_launcher = true;
}
- /* The autovacuum launcher wants us to start a worker process. */
if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER))
+ {
+ /* The autovacuum launcher wants us to start a worker process. */
StartAutovacuumWorker();
+ }
PG_SETMASK(&UnBlockSig);
ereport(LOG,
(errmsg("could not fork background writer process: %m")));
break;
+ case WalWriterProcess:
+ ereport(LOG,
+ (errmsg("could not fork wal writer process: %m")));
+ break;
default:
ereport(LOG,
(errmsg("could not fork process: %m")));
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * walwriter.c
+ *
+ * The WAL writer background process is new as of Postgres 8.3. It attempts
+ * to keep regular backends from having to write out (and fsync) WAL pages.
+ * Also, it guarantees that transaction commit records that weren't synced
+ * to disk immediately upon commit (ie, were "asynchronously committed")
+ * will reach disk within a knowable time --- which, as it happens, is at
+ * most three times the wal_writer_delay cycle time.
+ *
+ * Note that as with the bgwriter for shared buffers, regular backends are
+ * still empowered to issue WAL writes and fsyncs when the walwriter doesn't
+ * keep up.
+ *
+ * Because the walwriter's cycle is directly linked to the maximum delay
+ * before async-commit transactions are guaranteed committed, it's probably
+ * unwise to load additional functionality onto it. For instance, if you've
+ * got a yen to create xlog segments further in advance, that'd be better done
+ * in bgwriter than in walwriter.
+ *
+ * The walwriter is started by the postmaster as soon as the startup subprocess
+ * finishes. It remains alive until the postmaster commands it to terminate.
+ * Normal termination is by SIGTERM, which instructs the walwriter to exit(0).
+ * Emergency termination is by SIGQUIT; like any backend, the walwriter will
+ * simply abort and exit on SIGQUIT.
+ *
+ * If the walwriter exits unexpectedly, the postmaster treats that the same
+ * as a backend crash: shared memory may be corrupted, so remaining backends
+ * should be killed by SIGQUIT and then a recovery cycle started.
+ *
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/postmaster/walwriter.c,v 1.1 2007/07/24 04:54:09 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <signal.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "access/xlog.h"
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "postmaster/walwriter.h"
+#include "storage/bufmgr.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pmsignal.h"
+#include "storage/smgr.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+
+
+/*
+ * GUC parameters
+ */
+int WalWriterDelay = 200;
+
+/*
+ * Flags set by interrupt handlers for later service in the main loop.
+ */
+static volatile sig_atomic_t got_SIGHUP = false;
+static volatile sig_atomic_t shutdown_requested = false;
+
+/* Signal handlers */
+static void wal_quickdie(SIGNAL_ARGS);
+static void WalSigHupHandler(SIGNAL_ARGS);
+static void WalShutdownHandler(SIGNAL_ARGS);
+
+
+/*
+ * Main entry point for walwriter process
+ *
+ * This is invoked from BootstrapMain, which has already created the basic
+ * execution environment, but not enabled signals yet.
+ */
+void
+WalWriterMain(void)
+{
+ sigjmp_buf local_sigjmp_buf;
+ MemoryContext walwriter_context;
+
+ /*
+ * If possible, make this process a group leader, so that the postmaster
+ * can signal any child processes too. (walwriter probably never has
+ * any child processes, but for consistency we make all postmaster
+ * child processes do this.)
+ */
+#ifdef HAVE_SETSID
+ if (setsid() < 0)
+ elog(FATAL, "setsid() failed: %m");
+#endif
+
+ /*
+ * Properly accept or ignore signals the postmaster might send us
+ *
+ * We have no particular use for SIGINT at the moment, but seems
+ * reasonable to treat like SIGTERM.
+ */
+ pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
+ pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */
+ pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */
+ pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */
+ pqsignal(SIGALRM, SIG_IGN);
+ pqsignal(SIGPIPE, SIG_IGN);
+ pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
+ pqsignal(SIGUSR2, SIG_IGN); /* not used */
+
+ /*
+ * Reset some signals that are accepted by postmaster but not here
+ */
+ pqsignal(SIGCHLD, SIG_DFL);
+ pqsignal(SIGTTIN, SIG_DFL);
+ pqsignal(SIGTTOU, SIG_DFL);
+ pqsignal(SIGCONT, SIG_DFL);
+ pqsignal(SIGWINCH, SIG_DFL);
+
+ /* We allow SIGQUIT (quickdie) at all times */
+#ifdef HAVE_SIGPROCMASK
+ sigdelset(&BlockSig, SIGQUIT);
+#else
+ BlockSig &= ~(sigmask(SIGQUIT));
+#endif
+
+ /*
+ * Create a resource owner to keep track of our resources (not clear
+ * that we need this, but may as well have one).
+ */
+ CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");
+
+ /*
+ * Create a memory context that we will do all our work in. We do this so
+ * that we can reset the context during error recovery and thereby avoid
+ * possible memory leaks. Formerly this code just ran in
+ * TopMemoryContext, but resetting that would be a really bad idea.
+ */
+ walwriter_context = AllocSetContextCreate(TopMemoryContext,
+ "Wal Writer",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ MemoryContextSwitchTo(walwriter_context);
+
+ /*
+ * If an exception is encountered, processing resumes here.
+ *
+ * This code is heavily based on bgwriter.c, q.v.
+ */
+ if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+ {
+ /* Since not using PG_TRY, must reset error stack by hand */
+ error_context_stack = NULL;
+
+ /* Prevent interrupts while cleaning up */
+ HOLD_INTERRUPTS();
+
+ /* Report the error to the server log */
+ EmitErrorReport();
+
+ /*
+ * These operations are really just a minimal subset of
+ * AbortTransaction(). We don't have very many resources to worry
+ * about in walwriter, but we do have LWLocks, and perhaps buffers?
+ */
+ LWLockReleaseAll();
+ AbortBufferIO();
+ UnlockBuffers();
+ /* buffer pins are released here: */
+ ResourceOwnerRelease(CurrentResourceOwner,
+ RESOURCE_RELEASE_BEFORE_LOCKS,
+ false, true);
+ /* we needn't bother with the other ResourceOwnerRelease phases */
+ AtEOXact_Buffers(false);
+
+ /*
+ * Now return to normal top-level context and clear ErrorContext for
+ * next time.
+ */
+ MemoryContextSwitchTo(walwriter_context);
+ FlushErrorState();
+
+ /* Flush any leaked data in the top-level context */
+ MemoryContextResetAndDeleteChildren(walwriter_context);
+
+ /* Now we can allow interrupts again */
+ RESUME_INTERRUPTS();
+
+ /*
+ * Sleep at least 1 second after any error. A write error is likely
+ * to be repeated, and we don't want to be filling the error logs as
+ * fast as we can.
+ */
+ pg_usleep(1000000L);
+
+ /*
+ * Close all open files after any error. This is helpful on Windows,
+ * where holding deleted files open causes various strange errors.
+ * It's not clear we need it elsewhere, but shouldn't hurt.
+ */
+ smgrcloseall();
+ }
+
+ /* We can now handle ereport(ERROR) */
+ PG_exception_stack = &local_sigjmp_buf;
+
+ /*
+ * Unblock signals (they were blocked when the postmaster forked us)
+ */
+ PG_SETMASK(&UnBlockSig);
+
+ /*
+ * Loop forever
+ */
+ for (;;)
+ {
+ long udelay;
+
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (!PostmasterIsAlive(true))
+ exit(1);
+
+ /*
+ * Process any requests or signals received recently.
+ */
+ if (got_SIGHUP)
+ {
+ got_SIGHUP = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ }
+ if (shutdown_requested)
+ {
+ /* Normal exit from the walwriter is here */
+ proc_exit(0); /* done */
+ }
+
+ /*
+ * Do what we're here for...
+ */
+ XLogBackgroundFlush();
+
+ /*
+ * Delay until time to do something more, but fall out of delay
+ * reasonably quickly if signaled.
+ */
+ udelay = WalWriterDelay * 1000L;
+ while (udelay > 999999L)
+ {
+ if (got_SIGHUP || shutdown_requested)
+ break;
+ pg_usleep(1000000L);
+ udelay -= 1000000L;
+ }
+ if (!(got_SIGHUP || shutdown_requested))
+ pg_usleep(udelay);
+ }
+}
+
+
+/* --------------------------------
+ * signal handler routines
+ * --------------------------------
+ */
+
+/*
+ * wal_quickdie() occurs when signalled SIGQUIT by the postmaster.
+ *
+ * Some backend has bought the farm,
+ * so we need to stop what we're doing and exit.
+ */
+static void
+wal_quickdie(SIGNAL_ARGS)
+{
+ PG_SETMASK(&BlockSig);
+
+ /*
+ * DO NOT proc_exit() -- we're here because shared memory may be
+ * corrupted, so we don't want to try to clean up our transaction. Just
+ * nail the windows shut and get out of town.
+ *
+ * Note we do exit(2) not exit(0). This is to force the postmaster into a
+ * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
+ * backend. This is necessary precisely because we don't clean up our
+ * shared memory state.
+ */
+ exit(2);
+}
+
+/* SIGHUP: set flag to re-read config file at next convenient time */
+static void
+WalSigHupHandler(SIGNAL_ARGS)
+{
+ got_SIGHUP = true;
+}
+
+/* SIGTERM: set flag to exit normally */
+static void
+WalShutdownHandler(SIGNAL_ARGS)
+{
+ shutdown_requested = true;
+}
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.406 2007/07/24 01:53:56 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.407 2007/07/24 04:54:09 tgl Exp $
*
*--------------------------------------------------------------------
*/
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
+#include "postmaster/walwriter.h"
#include "storage/fd.h"
#include "storage/freespace.h"
#include "tcop/tcopprot.h"
8, 4, INT_MAX, NULL, NULL
},
+ {
+ {"wal_writer_delay", PGC_SIGHUP, WAL_SETTINGS,
+ gettext_noop("WAL writer sleep time between WAL flushes."),
+ NULL,
+ GUC_UNIT_MS
+ },
+ &WalWriterDelay,
+ 200, 1, 10000, NULL, NULL
+ },
+
{
{"commit_delay", PGC_USERSET, WAL_CHECKPOINTS,
gettext_noop("Sets the delay in microseconds between transaction commit and "
#full_page_writes = on # recover from partial page writes
#wal_buffers = 64kB # min 32kB
# (change requires restart)
+#wal_writer_delay = 200ms # range 1-10000, in milliseconds
+
#commit_delay = 0 # range 0-100000, in microseconds
#commit_siblings = 5 # range 1-1000
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.80 2007/06/30 19:12:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.81 2007/07/24 04:54:09 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr);
+extern void XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/bootstrap/bootstrap.h,v 1.46 2007/03/07 13:35:03 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/bootstrap/bootstrap.h,v 1.47 2007/07/24 04:54:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
CheckerProcess,
BootstrapProcess,
StartupProcess,
- BgWriterProcess
+ BgWriterProcess,
+ WalWriterProcess
} AuxProcType;
#endif /* BOOTSTRAP_H */
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * walwriter.h
+ * Exports from postmaster/walwriter.c.
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ * $PostgreSQL: pgsql/src/include/postmaster/walwriter.h,v 1.1 2007/07/24 04:54:09 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _WALWRITER_H
+#define _WALWRITER_H
+
+/* GUC options */
+extern int WalWriterDelay;
+
+extern void WalWriterMain(void);
+
+#endif /* _WALWRITER_H */