#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
+#include "postmaster/bgworker.h"
#include "postmaster/fork_process.h"
#include "postmaster/pgarch.h"
#include "postmaster/postmaster.h"
#endif
+/*
+ * Possible types of a backend. Beyond being the possible bkend_type values in
+ * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
+ * and CountChildren().
+ */
+#define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
+#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
+#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
+#define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
+#define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
+
+#define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
+
/*
* List of active backends (or child processes anyway; we don't actually
* know whether a given child has become a backend or is still in the
* children we have and send them appropriate signals when necessary.
*
* "Special" children such as the startup, bgwriter and autovacuum launcher
- * tasks are not in this list. Autovacuum worker and walsender processes are
- * in it. Also, "dead_end" children are in it: these are children launched just
- * for the purpose of sending a friendly rejection message to a would-be
- * client. We must track them because they are attached to shared memory,
- * but we know they will never become live backends. dead_end children are
- * not assigned a PMChildSlot.
+ * tasks are not in this list. Autovacuum worker and walsender are in it.
+ * Also, "dead_end" children are in it: these are children launched just for
+ * the purpose of sending a friendly rejection message to a would-be client.
+ * We must track them because they are attached to shared memory, but we know
+ * they will never become live backends. dead_end children are not assigned a
+ * PMChildSlot.
+ *
+ * Background workers that request shared memory access during registration are
+ * in this list, too.
*/
typedef struct bkend
{
pid_t pid; /* process id of backend */
long cancel_key; /* cancel key for cancels for this backend */
int child_slot; /* PMChildSlot for this backend, if any */
- bool is_autovacuum; /* is it an autovacuum process? */
+
+ /*
+ * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
+ * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
+ * bkend_type is normal, you should check for a recent transition.
+ */
+ int bkend_type;
bool dead_end; /* is it going to send an error and quit? */
dlist_node elem; /* list link in BackendList */
} Backend;
static Backend *ShmemBackendArray;
#endif
+
+/*
+ * List of background workers.
+ *
+ * A worker that requests a database connection during registration will have
+ * rw_backend set, and will be present in BackendList. Note: do not rely on
+ * rw_backend being non-NULL for shmem-connected workers!
+ */
+typedef struct RegisteredBgWorker
+{
+ BackgroundWorker rw_worker; /* its registry entry */
+ Backend *rw_backend; /* its BackendList entry, or NULL */
+ pid_t rw_pid; /* 0 if not running */
+ int rw_child_slot;
+ TimestampTz rw_crashed_at; /* if not 0, time it last crashed */
+#ifdef EXEC_BACKEND
+ int rw_cookie;
+#endif
+ slist_node rw_lnode; /* list link */
+} RegisteredBgWorker;
+
+static slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
+
+BackgroundWorker *MyBgworkerEntry = NULL;
+
+
+
/* The socket number we are listening for connections on */
int PostPortNumber;
/* The directory names for Unix socket(s) */
/* the launcher needs to be signalled to communicate some condition */
static volatile bool avlauncher_needs_signal = false;
+/* set when there's a worker that needs to be started up */
+static volatile bool StartWorkerNeeded = true;
+static volatile bool HaveCrashedWorker = false;
+
/*
* State for assigning random salts and cancel keys.
* Also, the global MyCancelKey passes the cancel key assigned to a given
static void sigusr1_handler(SIGNAL_ARGS);
static void startup_die(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS);
+static int GetNumRegisteredBackgroundWorkers(int flags);
static void StartupPacketTimeoutHandler(void);
static void CleanupBackend(int pid, int exitstatus);
+static bool CleanupBackgroundWorker(int pid, int exitstatus);
+static void do_start_bgworker(void);
static void HandleChildCrash(int pid, int exitstatus, const char *procname);
static void LogChildExit(int lev, const char *procname,
int pid, int exitstatus);
static void RandomSalt(char *md5Salt);
static void signal_child(pid_t pid, int signal);
static bool SignalSomeChildren(int signal, int targets);
+static bool SignalUnconnectedWorkers(int signal);
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
-/*
- * Possible types of a backend. These are OR-able request flag bits
- * for SignalSomeChildren() and CountChildren().
- */
-#define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
-#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
-#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
-#define BACKEND_TYPE_ALL 0x0007 /* OR of all the above */
-
static int CountChildren(int target);
+static int CountUnconnectedWorkers(void);
+static void StartOneBackgroundWorker(void);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
static pid_t StartChildProcess(AuxProcType type);
static void StartAutovacuumWorker(void);
static void ShmemBackendArrayAdd(Backend *bn);
static void ShmemBackendArrayRemove(Backend *bn);
+
+static BackgroundWorker *find_bgworker_entry(int cookie);
#endif /* EXEC_BACKEND */
#define StartupDataBase() StartChildProcess(StartupProcess)
*/
process_shared_preload_libraries();
+ /*
+ * If loadable modules have added background workers, MaxBackends needs to
+ * be updated. Do so now by forcing a no-op update of max_connections.
+ * XXX This is a pretty ugly way to do it, but it doesn't seem worth
+ * introducing a new entry point in guc.c to do it in a cleaner fashion.
+ */
+ if (GetNumShmemAttachedBgworkers() > 0)
+ SetConfigOption("max_connections",
+ GetConfigOption("max_connections", false, false),
+ PGC_POSTMASTER, PGC_S_OVERRIDE);
+
/*
* Establish input sockets.
*/
* handling setup of child processes. See tcop/postgres.c,
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
* postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
- * postmaster/syslogger.c and postmaster/checkpointer.c.
+ * postmaster/syslogger.c, postmaster/bgworker.c and
+ * postmaster/checkpointer.c.
*/
pqinitmask();
PG_SETMASK(&BlockSig);
Assert(StartupPID != 0);
pmState = PM_STARTUP;
+ /* Some workers may be scheduled to start now */
+ StartOneBackgroundWorker();
+
status = ServerLoop();
/*
FreeFile(fp);
}
+/*
+ * Determine how long should we let ServerLoop sleep.
+ *
+ * In normal conditions we wait at most one minute, to ensure that the other
+ * background tasks handled by ServerLoop get done even when no requests are
+ * arriving. However, if there are background workers waiting to be started,
+ * we don't actually sleep so that they are quickly serviced.
+ */
+static void
+DetermineSleepTime(struct timeval *timeout)
+{
+ TimestampTz next_wakeup = 0;
+
+ /*
+ * Normal case: either there are no background workers at all, or we're in
+ * a shutdown sequence (during which we ignore bgworkers altogether).
+ */
+ if (Shutdown > NoShutdown ||
+ (!StartWorkerNeeded && !HaveCrashedWorker))
+ {
+ timeout->tv_sec = 60;
+ timeout->tv_usec = 0;
+ return;
+ }
+
+ if (StartWorkerNeeded)
+ {
+ timeout->tv_sec = 0;
+ timeout->tv_usec = 0;
+ return;
+ }
+
+ if (HaveCrashedWorker)
+ {
+ slist_iter siter;
+
+ /*
+ * When there are crashed bgworkers, we sleep just long enough that
+ * they are restarted when they request to be. Scan the list to
+ * determine the minimum of all wakeup times according to most recent
+ * crash time and requested restart interval.
+ */
+ slist_foreach(siter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+ TimestampTz this_wakeup;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+
+ if (rw->rw_crashed_at == 0)
+ continue;
+
+ if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
+ continue;
+
+ this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
+ 1000L * rw->rw_worker.bgw_restart_time);
+ if (next_wakeup == 0 || this_wakeup < next_wakeup)
+ next_wakeup = this_wakeup;
+ }
+ }
+
+ if (next_wakeup != 0)
+ {
+ int microsecs;
+
+ TimestampDifference(GetCurrentTimestamp(), next_wakeup,
+ &timeout->tv_sec, µsecs);
+ timeout->tv_usec = microsecs;
+
+ /* Ensure we don't exceed one minute */
+ if (timeout->tv_sec > 60)
+ {
+ timeout->tv_sec = 60;
+ timeout->tv_usec = 0;
+ }
+ }
+ else
+ {
+ timeout->tv_sec = 60;
+ timeout->tv_usec = 0;
+ }
+}
+
/*
* Main idle loop of postmaster
*/
/*
* Wait for a connection request to arrive.
*
- * We wait at most one minute, to ensure that the other background
- * tasks handled below get done even when no requests are arriving.
- *
* If we are in PM_WAIT_DEAD_END state, then we don't want to accept
* any new connections, so we don't call select() at all; just sleep
* for a little bit with signals unblocked.
/* must set timeout each time; some OSes change it! */
struct timeval timeout;
- timeout.tv_sec = 60;
- timeout.tv_usec = 0;
+ DetermineSleepTime(&timeout);
selres = select(nSockets, &rmask, NULL, NULL, &timeout);
}
kill(AutoVacPID, SIGUSR2);
}
+ /* Get other worker processes running, if needed */
+ if (StartWorkerNeeded || HaveCrashedWorker)
+ StartOneBackgroundWorker();
+
/*
* Touch Unix socket and lock files every 58 minutes, to ensure that
* they are not removed by overzealous /tmp-cleaning tasks. We assume
}
}
-
/*
* Initialise the masks for select() for the ports we are listening on.
* Return the number of sockets to listen on.
Backend *bp;
#ifndef EXEC_BACKEND
- dlist_iter iter;
+ dlist_iter iter;
#else
int i;
#endif
if (pmState == PM_RUN || pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
{
- /* autovacuum workers are told to shut down immediately */
- SignalSomeChildren(SIGTERM, BACKEND_TYPE_AUTOVAC);
+ /* autovac workers are told to shut down immediately */
+ /* and bgworkers too; does this need tweaking? */
+ SignalSomeChildren(SIGTERM,
+ BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER);
+ SignalUnconnectedWorkers(SIGTERM);
/* and the autovac launcher too */
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
signal_child(BgWriterPID, SIGTERM);
if (WalReceiverPID != 0)
signal_child(WalReceiverPID, SIGTERM);
+ SignalUnconnectedWorkers(SIGTERM);
if (pmState == PM_RECOVERY)
{
/*
- * Only startup, bgwriter, walreceiver, and/or checkpointer
- * should be active in this state; we just signaled the first
- * three, and we don't want to kill checkpointer yet.
+ * Only startup, bgwriter, walreceiver, unconnected bgworkers,
+ * and/or checkpointer should be active in this state; we just
+ * signaled the first four, and we don't want to kill
+ * checkpointer yet.
*/
pmState = PM_WAIT_BACKENDS;
}
{
ereport(LOG,
(errmsg("aborting any active transactions")));
- /* shut down all backends and autovac workers */
+ /* shut down all backends and workers */
SignalSomeChildren(SIGTERM,
- BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC);
+ BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC |
+ BACKEND_TYPE_BGWORKER);
/* and the autovac launcher too */
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
signal_child(PgArchPID, SIGQUIT);
if (PgStatPID != 0)
signal_child(PgStatPID, SIGQUIT);
+ SignalUnconnectedWorkers(SIGQUIT);
ExitPostmaster(0);
break;
}
if (PgStatPID == 0)
PgStatPID = pgstat_start();
+ /* some workers may be scheduled to start now */
+ StartOneBackgroundWorker();
+
/* at this point we are really open for business */
ereport(LOG,
(errmsg("database system is ready to accept connections")));
continue;
}
+ /* Was it one of our background workers? */
+ if (CleanupBackgroundWorker(pid, exitstatus))
+ {
+ /* have it be restarted */
+ HaveCrashedWorker = true;
+ continue;
+ }
+
/*
* Else do standard backend child cleanup.
*/
errno = save_errno;
}
+/*
+ * Scan the bgworkers list and see if the given PID (which has just stopped
+ * or crashed) is in it. Handle its shutdown if so, and return true. If not a
+ * bgworker, return false.
+ *
+ * This is heavily based on CleanupBackend. One important difference is that
+ * we don't know yet that the dying process is a bgworker, so we must be silent
+ * until we're sure it is.
+ */
+static bool
+CleanupBackgroundWorker(int pid,
+ int exitstatus) /* child's exit status */
+{
+ char namebuf[MAXPGPATH];
+ slist_iter iter;
+
+ slist_foreach(iter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+ if (rw->rw_pid != pid)
+ continue;
+
+#ifdef WIN32
+ /* see CleanupBackend */
+ if (exitstatus == ERROR_WAIT_NO_CHILDREN)
+ exitstatus = 0;
+#endif
+
+ snprintf(namebuf, MAXPGPATH, "%s: %s", _("worker process"),
+ rw->rw_worker.bgw_name);
+
+ /* Delay restarting any bgworker that exits with a nonzero status. */
+ if (!EXIT_STATUS_0(exitstatus))
+ rw->rw_crashed_at = GetCurrentTimestamp();
+ else
+ rw->rw_crashed_at = 0;
+
+ /*
+ * Additionally, for shared-memory-connected workers, just like a
+ * backend, any exit status other than 0 or 1 is considered a crash
+ * and causes a system-wide restart.
+ */
+ if (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS)
+ {
+ if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
+ {
+ rw->rw_crashed_at = GetCurrentTimestamp();
+ HandleChildCrash(pid, exitstatus, namebuf);
+ return true;
+ }
+ }
+
+ if (!ReleasePostmasterChildSlot(rw->rw_child_slot))
+ {
+ /*
+ * Uh-oh, the child failed to clean itself up. Treat as a crash
+ * after all.
+ */
+ rw->rw_crashed_at = GetCurrentTimestamp();
+ HandleChildCrash(pid, exitstatus, namebuf);
+ return true;
+ }
+
+ /* Get it out of the BackendList and clear out remaining data */
+ if (rw->rw_backend)
+ {
+ Assert(rw->rw_worker.bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION);
+ dlist_delete(&rw->rw_backend->elem);
+#ifdef EXEC_BACKEND
+ ShmemBackendArrayRemove(rw->rw_backend);
+#endif
+ free(rw->rw_backend);
+ rw->rw_backend = NULL;
+ }
+ rw->rw_pid = 0;
+ rw->rw_child_slot = 0;
+
+ LogChildExit(LOG, namebuf, pid, exitstatus);
+
+ return true;
+ }
+
+ return false;
+}
/*
* CleanupBackend -- cleanup after terminated backend.
*
* Remove all local state associated with backend.
+ *
+ * If you change this, see also CleanupBackgroundWorker.
*/
static void
CleanupBackend(int pid,
/*
* HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
- * walwriter or autovacuum.
+ * walwriter, autovacuum, or background worker.
*
* The objectives here are to clean up our local state about the child
* process, and to signal all other remaining children to quickdie.
HandleChildCrash(int pid, int exitstatus, const char *procname)
{
dlist_mutable_iter iter;
+ slist_iter siter;
Backend *bp;
/*
(errmsg("terminating any other active server processes")));
}
+ /* Process background workers. */
+ slist_foreach(siter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
+ if (rw->rw_pid == 0)
+ continue; /* not running */
+ if (rw->rw_pid == pid)
+ {
+ /*
+ * Found entry for freshly-dead worker, so remove it.
+ */
+ (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
+ if (rw->rw_backend)
+ {
+ dlist_delete(&rw->rw_backend->elem);
+#ifdef EXEC_BACKEND
+ ShmemBackendArrayRemove(rw->rw_backend);
+#endif
+ free(rw->rw_backend);
+ rw->rw_backend = NULL;
+ }
+ rw->rw_pid = 0;
+ rw->rw_child_slot = 0;
+ /* don't reset crashed_at */
+ /* Keep looping so we can signal remaining workers */
+ }
+ else
+ {
+ /*
+ * This worker is still alive. Unless we did so already, tell it
+ * to commit hara-kiri.
+ *
+ * SIGQUIT is the special signal that says exit without proc_exit
+ * and let the user know what's going on. But if SendStop is set
+ * (-s on command line), then we send SIGSTOP instead, so that we
+ * can get core dumps from all backends by hand.
+ */
+ if (!FatalError)
+ {
+ ereport(DEBUG2,
+ (errmsg_internal("sending %s to process %d",
+ (SendStop ? "SIGSTOP" : "SIGQUIT"),
+ (int) rw->rw_pid)));
+ signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT));
+ }
+ }
+ }
+
/* Process regular backends */
dlist_foreach_modify(iter, &BackendList)
{
*
* We could exclude dead_end children here, but at least in the
* SIGSTOP case it seems better to include them.
+ *
+ * Background workers were already processed above; ignore them
+ * here.
*/
+ if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
+ continue;
+
if (!FatalError)
{
ereport(DEBUG2,
{
/*
* PM_WAIT_BACKENDS state ends when we have no regular backends
- * (including autovac workers) and no walwriter, autovac launcher or
- * bgwriter. If we are doing crash recovery then we expect the
- * checkpointer to exit as well, otherwise not. The archiver, stats,
- * and syslogger processes are disregarded since they are not
- * connected to shared memory; we also disregard dead_end children
- * here. Walsenders are also disregarded, they will be terminated
- * later after writing the checkpoint record, like the archiver
- * process.
+ * (including autovac workers), no bgworkers (including unconnected
+ * ones), and no walwriter, autovac launcher or bgwriter. If we are
+ * doing crash recovery then we expect the checkpointer to exit as
+ * well, otherwise not. The archiver, stats, and syslogger processes
+ * are disregarded since they are not connected to shared memory; we
+ * also disregard dead_end children here. Walsenders are also
+ * disregarded, they will be terminated later after writing the
+ * checkpoint record, like the archiver process.
*/
- if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 &&
+ if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
+ CountUnconnectedWorkers() == 0 &&
StartupPID == 0 &&
WalReceiverPID == 0 &&
BgWriterPID == 0 &&
#endif
}
+/*
+ * Send a signal to bgworkers that did not request backend connections
+ *
+ * The reason this is interesting is that workers that did request connections
+ * are considered by SignalChildren; this function complements that one.
+ */
+static bool
+SignalUnconnectedWorkers(int signal)
+{
+ slist_iter iter;
+ bool signaled = false;
+
+ slist_foreach(iter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+ if (rw->rw_pid == 0)
+ continue;
+ /* ignore connected workers */
+ if (rw->rw_backend != NULL)
+ continue;
+
+ ereport(DEBUG4,
+ (errmsg_internal("sending signal %d to process %d",
+ signal, (int) rw->rw_pid)));
+ signal_child(rw->rw_pid, signal);
+ signaled = true;
+ }
+ return signaled;
+}
+
/*
* Send a signal to the targeted children (but NOT special children;
* dead_end children are never signaled, either).
static bool
SignalSomeChildren(int signal, int target)
{
- dlist_iter iter;
+ dlist_iter iter;
bool signaled = false;
dlist_foreach(iter, &BackendList)
*/
if (target != BACKEND_TYPE_ALL)
{
- int child;
+ /*
+ * Assign bkend_type for any recently announced WAL Sender
+ * processes.
+ */
+ if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
+ IsPostmasterChildWalSender(bp->child_slot))
+ bp->bkend_type = BACKEND_TYPE_WALSND;
- if (bp->is_autovacuum)
- child = BACKEND_TYPE_AUTOVAC;
- else if (IsPostmasterChildWalSender(bp->child_slot))
- child = BACKEND_TYPE_WALSND;
- else
- child = BACKEND_TYPE_NORMAL;
- if (!(target & child))
+ if (!(target & bp->bkend_type))
continue;
}
* of backends.
*/
bn->pid = pid;
- bn->is_autovacuum = false;
+ bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
dlist_push_head(&BackendList, &bn->elem);
#ifdef EXEC_BACKEND
fp = AllocateFile(tmpfilename, PG_BINARY_W);
if (!fp)
{
- /* As in OpenTemporaryFile, try to make the temp-file directory */
+ /*
+ * As in OpenTemporaryFileInTablespace, try to make the temp-file
+ * directory
+ */
mkdir(PG_TEMP_FILES_DIR, S_IRWXU);
fp = AllocateFile(tmpfilename, PG_BINARY_W);
if (strcmp(argv[1], "--forkbackend") == 0 ||
strcmp(argv[1], "--forkavlauncher") == 0 ||
strcmp(argv[1], "--forkavworker") == 0 ||
- strcmp(argv[1], "--forkboot") == 0)
+ strcmp(argv[1], "--forkboot") == 0 ||
+ strncmp(argv[1], "--forkbgworker=", 15) == 0)
PGSharedMemoryReAttach();
/* autovacuum needs this set before calling InitProcess */
AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
}
+ if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
+ {
+ int cookie;
+
+ /* Close the postmaster's sockets */
+ ClosePostmasterPorts(false);
+
+ /* Restore basic shared memory pointers */
+ InitShmemAccess(UsedShmemSegAddr);
+
+ /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
+ InitProcess();
+
+ /* Attach process to shared data structures */
+ CreateSharedMemoryAndSemaphores(false, 0);
+
+ cookie = atoi(argv[1] + 15);
+ MyBgworkerEntry = find_bgworker_entry(cookie);
+ do_start_bgworker();
+ }
if (strcmp(argv[1], "--forkarch") == 0)
{
/* Close the postmaster's sockets */
(errmsg("database system is ready to accept read only connections")));
pmState = PM_HOT_STANDBY;
+
+ /* Some workers may be scheduled to start now */
+ StartOneBackgroundWorker();
}
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
return random();
}
+/*
+ * Count up number of worker processes that did not request backend connections
+ * See SignalUnconnectedWorkers for why this is interesting.
+ */
+static int
+CountUnconnectedWorkers(void)
+{
+ slist_iter iter;
+ int cnt = 0;
+
+ slist_foreach(iter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+ if (rw->rw_pid == 0)
+ continue;
+ /* ignore connected workers */
+ if (rw->rw_backend != NULL)
+ continue;
+
+ cnt++;
+ }
+ return cnt;
+}
+
/*
* Count up number of child processes of specified types (dead_end chidren
* are always excluded).
static int
CountChildren(int target)
{
- dlist_iter iter;
+ dlist_iter iter;
int cnt = 0;
dlist_foreach(iter, &BackendList)
*/
if (target != BACKEND_TYPE_ALL)
{
- int child;
+ /*
+ * Assign bkend_type for any recently announced WAL Sender
+ * processes.
+ */
+ if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
+ IsPostmasterChildWalSender(bp->child_slot))
+ bp->bkend_type = BACKEND_TYPE_WALSND;
- if (bp->is_autovacuum)
- child = BACKEND_TYPE_AUTOVAC;
- else if (IsPostmasterChildWalSender(bp->child_slot))
- child = BACKEND_TYPE_WALSND;
- else
- child = BACKEND_TYPE_NORMAL;
- if (!(target & child))
+ if (!(target & bp->bkend_type))
continue;
}
bn->pid = StartAutoVacWorker();
if (bn->pid > 0)
{
- bn->is_autovacuum = true;
+ bn->bkend_type = BACKEND_TYPE_AUTOVAC;
dlist_push_head(&BackendList, &bn->elem);
#ifdef EXEC_BACKEND
ShmemBackendArrayAdd(bn);
*
* This reports the number of entries needed in per-child-process arrays
* (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
- * These arrays include regular backends, autovac workers and walsenders,
- * but not special children nor dead_end children. This allows the arrays
- * to have a fixed maximum size, to wit the same too-many-children limit
- * enforced by canAcceptConnections(). The exact value isn't too critical
- * as long as it's more than MaxBackends.
+ * These arrays include regular backends, autovac workers, walsenders
+ * and background workers, but not special children nor dead_end children.
+ * This allows the arrays to have a fixed maximum size, to wit the same
+ * too-many-children limit enforced by canAcceptConnections(). The exact value
+ * isn't too critical as long as it's more than MaxBackends.
*/
int
MaxLivePostmasterChildren(void)
{
- return 2 * MaxBackends;
+ return 2 * (MaxConnections + autovacuum_max_workers + 1 +
+ GetNumRegisteredBackgroundWorkers(0));
}
+/*
+ * Register a new background worker.
+ *
+ * This can only be called in the _PG_init function of a module library
+ * that's loaded by shared_preload_libraries; otherwise it has no effect.
+ */
+void
+RegisterBackgroundWorker(BackgroundWorker *worker)
+{
+ RegisteredBgWorker *rw;
+ int namelen = strlen(worker->bgw_name);
+
+#ifdef EXEC_BACKEND
+
+ /*
+ * Use 1 here, not 0, to avoid confusing a possible bogus cookie read by
+ * atoi() in SubPostmasterMain.
+ */
+ static int BackgroundWorkerCookie = 1;
+#endif
+
+ if (!IsUnderPostmaster)
+ ereport(LOG,
+ (errmsg("registering background worker: %s", worker->bgw_name)));
+
+ if (!process_shared_preload_libraries_in_progress)
+ {
+ if (!IsUnderPostmaster)
+ ereport(LOG,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
+ worker->bgw_name)));
+ return;
+ }
+
+ /* sanity check for flags */
+ if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
+ {
+ if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
+ {
+ if (!IsUnderPostmaster)
+ ereport(LOG,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection",
+ worker->bgw_name)));
+ return;
+ }
+
+ if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
+ {
+ if (!IsUnderPostmaster)
+ ereport(LOG,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
+ worker->bgw_name)));
+ return;
+ }
+
+ /* XXX other checks? */
+ }
+
+ if ((worker->bgw_restart_time < 0 &&
+ worker->bgw_restart_time != BGW_NEVER_RESTART) ||
+ (worker->bgw_restart_time > USECS_PER_DAY / 1000))
+ {
+ if (!IsUnderPostmaster)
+ ereport(LOG,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("background worker \"%s\": invalid restart interval",
+ worker->bgw_name)));
+ return;
+ }
+
+ /*
+ * Copy the registration data into the registered workers list.
+ */
+ rw = malloc(sizeof(RegisteredBgWorker) + namelen + 1);
+ if (rw == NULL)
+ {
+ ereport(LOG,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+ return;
+ }
+
+ rw->rw_worker = *worker;
+ rw->rw_worker.bgw_name = ((char *) rw) + sizeof(RegisteredBgWorker);
+ strlcpy(rw->rw_worker.bgw_name, worker->bgw_name, namelen + 1);
+
+ rw->rw_backend = NULL;
+ rw->rw_pid = 0;
+ rw->rw_child_slot = 0;
+ rw->rw_crashed_at = 0;
+#ifdef EXEC_BACKEND
+ rw->rw_cookie = BackgroundWorkerCookie++;
+#endif
+
+ slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
+}
+
+/*
+ * Connect background worker to a database.
+ */
+void
+BackgroundWorkerInitializeConnection(char *dbname, char *username)
+{
+ BackgroundWorker *worker = MyBgworkerEntry;
+
+ /* XXX is this the right errcode? */
+ if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
+ ereport(FATAL,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("database connection requirement not indicated during registration")));
+
+ InitPostgres(dbname, InvalidOid, username, NULL);
+
+ /* it had better not gotten out of "init" mode yet */
+ if (!IsInitProcessingMode())
+ ereport(ERROR,
+ (errmsg("invalid processing mode in bgworker")));
+ SetProcessingMode(NormalProcessing);
+}
+
+/*
+ * Block/unblock signals in a background worker
+ */
+void
+BackgroundWorkerBlockSignals(void)
+{
+ PG_SETMASK(&BlockSig);
+}
+
+void
+BackgroundWorkerUnblockSignals(void)
+{
+ PG_SETMASK(&UnBlockSig);
+}
+
+#ifdef EXEC_BACKEND
+static BackgroundWorker *
+find_bgworker_entry(int cookie)
+{
+ slist_iter iter;
+
+ slist_foreach(iter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+ if (rw->rw_cookie == cookie)
+ return &rw->rw_worker;
+ }
+
+ return NULL;
+}
+#endif
+
+static void
+bgworker_quickdie(SIGNAL_ARGS)
+{
+ sigaddset(&BlockSig, SIGQUIT); /* prevent nested calls */
+ PG_SETMASK(&BlockSig);
+
+ /*
+ * We DO NOT want to run proc_exit() callbacks -- we're here because
+ * shared memory may be corrupted, so we don't want to try to clean up our
+ * transaction. Just nail the windows shut and get out of town. Now that
+ * there's an atexit callback to prevent third-party code from breaking
+ * things by calling exit() directly, we have to reset the callbacks
+ * explicitly to make this work as intended.
+ */
+ on_exit_reset();
+
+ /*
+ * Note we do exit(0) here, not exit(2) like quickdie. The reason is that
+ * we don't want to be seen this worker as independently crashed, because
+ * then postmaster would delay restarting it again afterwards. If some
+ * idiot DBA manually sends SIGQUIT to a random bgworker, the "dead man
+ * switch" will ensure that postmaster sees this as a crash.
+ */
+ exit(0);
+}
+
+/*
+ * Standard SIGTERM handler for background workers
+ */
+static void
+bgworker_die(SIGNAL_ARGS)
+{
+ PG_SETMASK(&BlockSig);
+
+ ereport(FATAL,
+ (errcode(ERRCODE_ADMIN_SHUTDOWN),
+ errmsg("terminating background worker \"%s\" due to administrator command",
+ MyBgworkerEntry->bgw_name)));
+}
+
+static void
+do_start_bgworker(void)
+{
+ sigjmp_buf local_sigjmp_buf;
+ char buf[MAXPGPATH];
+ BackgroundWorker *worker = MyBgworkerEntry;
+
+ if (worker == NULL)
+ elog(FATAL, "unable to find bgworker entry");
+
+ /* we are a postmaster subprocess now */
+ IsUnderPostmaster = true;
+ IsBackgroundWorker = true;
+
+ /* reset MyProcPid */
+ MyProcPid = getpid();
+
+ /* record Start Time for logging */
+ MyStartTime = time(NULL);
+
+ /* Identify myself via ps */
+ snprintf(buf, MAXPGPATH, "bgworker: %s", worker->bgw_name);
+ init_ps_display(buf, "", "", "");
+
+ SetProcessingMode(InitProcessing);
+
+ /* Apply PostAuthDelay */
+ if (PostAuthDelay > 0)
+ pg_usleep(PostAuthDelay * 1000000L);
+
+ /*
+ * If possible, make this process a group leader, so that the postmaster
+ * can signal any child processes too.
+ */
+#ifdef HAVE_SETSID
+ if (setsid() < 0)
+ elog(FATAL, "setsid() failed: %m");
+#endif
+
+ /*
+ * Set up signal handlers.
+ */
+ if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
+ {
+ /*
+ * SIGINT is used to signal canceling the current action
+ */
+ pqsignal(SIGINT, StatementCancelHandler);
+ pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+ pqsignal(SIGFPE, FloatExceptionHandler);
+
+ /* XXX Any other handlers needed here? */
+ }
+ else
+ {
+ pqsignal(SIGINT, SIG_IGN);
+ pqsignal(SIGUSR1, SIG_IGN);
+ pqsignal(SIGFPE, SIG_IGN);
+ }
+
+ /* SIGTERM and SIGHUP are configurable */
+ if (worker->bgw_sigterm)
+ pqsignal(SIGTERM, worker->bgw_sigterm);
+ else
+ pqsignal(SIGTERM, bgworker_die);
+
+ if (worker->bgw_sighup)
+ pqsignal(SIGHUP, worker->bgw_sighup);
+ else
+ pqsignal(SIGHUP, SIG_IGN);
+
+ pqsignal(SIGQUIT, bgworker_quickdie);
+ InitializeTimeouts(); /* establishes SIGALRM handler */
+
+ pqsignal(SIGPIPE, SIG_IGN);
+ pqsignal(SIGUSR2, SIG_IGN);
+ pqsignal(SIGCHLD, SIG_DFL);
+
+ /*
+ * If an exception is encountered, processing resumes here.
+ *
+ * See notes in postgres.c about the design of this coding.
+ */
+ if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+ {
+ /* Since not using PG_TRY, must reset error stack by hand */
+ error_context_stack = NULL;
+
+ /* Prevent interrupts while cleaning up */
+ HOLD_INTERRUPTS();
+
+ /* Report the error to the server log */
+ EmitErrorReport();
+
+ /*
+ * Do we need more cleanup here? For shmem-connected bgworkers, we
+ * will call InitProcess below, which will install ProcKill as exit
+ * callback. That will take care of releasing locks, etc.
+ */
+
+ /* and go away */
+ proc_exit(1);
+ }
+
+ /* We can now handle ereport(ERROR) */
+ PG_exception_stack = &local_sigjmp_buf;
+
+ /* Early initialization */
+ BaseInit();
+
+ /*
+ * If necessary, create a per-backend PGPROC struct in shared memory,
+ * except in the EXEC_BACKEND case where this was done in
+ * SubPostmasterMain. We must do this before we can use LWLocks (and in
+ * the EXEC_BACKEND case we already had to do some stuff with LWLocks).
+ */
+#ifndef EXEC_BACKEND
+ if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS)
+ InitProcess();
+#endif
+
+ /*
+ * Note that in normal processes, we would call InitPostgres here. For a
+ * worker, however, we don't know what database to connect to, yet; so we
+ * need to wait until the user code does it via
+ * BackgroundWorkerInitializeConnection().
+ */
+
+ /*
+ * Now invoke the user-defined worker code
+ */
+ worker->bgw_main(worker->bgw_main_arg);
+
+ /* ... and if it returns, we're done */
+ proc_exit(0);
+}
+
+/*
+ * Return the number of background workers registered that have at least
+ * one of the passed flag bits set.
+ */
+static int
+GetNumRegisteredBackgroundWorkers(int flags)
+{
+ slist_iter iter;
+ int count = 0;
+
+ slist_foreach(iter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+ if (flags != 0 &&
+ !(rw->rw_worker.bgw_flags & flags))
+ continue;
+
+ count++;
+ }
+
+ return count;
+}
+
+/*
+ * Return the number of bgworkers that need to have PGPROC entries.
+ */
+int
+GetNumShmemAttachedBgworkers(void)
+{
+ return GetNumRegisteredBackgroundWorkers(BGWORKER_SHMEM_ACCESS);
+}
+
+#ifdef EXEC_BACKEND
+static pid_t
+bgworker_forkexec(int cookie)
+{
+ char *av[10];
+ int ac = 0;
+ char forkav[MAXPGPATH];
+
+ snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", cookie);
+
+ av[ac++] = "postgres";
+ av[ac++] = forkav;
+ av[ac++] = NULL; /* filled in by postmaster_forkexec */
+ av[ac] = NULL;
+
+ Assert(ac < lengthof(av));
+
+ return postmaster_forkexec(ac, av);
+}
+#endif
+
+/*
+ * Start a new bgworker.
+ * Starting time conditions must have been checked already.
+ *
+ * This code is heavily based on autovacuum.c, q.v.
+ */
+static void
+start_bgworker(RegisteredBgWorker *rw)
+{
+ pid_t worker_pid;
+
+ ereport(LOG,
+ (errmsg("starting background worker process \"%s\"",
+ rw->rw_worker.bgw_name)));
+
+#ifdef EXEC_BACKEND
+ switch ((worker_pid = bgworker_forkexec(rw->rw_cookie)))
+#else
+ switch ((worker_pid = fork_process()))
+#endif
+ {
+ case -1:
+ ereport(LOG,
+ (errmsg("could not fork worker process: %m")));
+ return;
+
+#ifndef EXEC_BACKEND
+ case 0:
+ /* in postmaster child ... */
+ /* Close the postmaster's sockets */
+ ClosePostmasterPorts(false);
+
+ /* Lose the postmaster's on-exit routines */
+ on_exit_reset();
+
+ /* Do NOT release postmaster's working memory context */
+
+ MyBgworkerEntry = &rw->rw_worker;
+ do_start_bgworker();
+ break;
+#endif
+ default:
+ rw->rw_pid = worker_pid;
+ if (rw->rw_backend)
+ rw->rw_backend->pid = rw->rw_pid;
+ }
+}
+
+/*
+ * Does the current postmaster state require starting a worker with the
+ * specified start_time?
+ */
+static bool
+bgworker_should_start_now(BgWorkerStartTime start_time)
+{
+ switch (pmState)
+ {
+ case PM_NO_CHILDREN:
+ case PM_WAIT_DEAD_END:
+ case PM_SHUTDOWN_2:
+ case PM_SHUTDOWN:
+ case PM_WAIT_BACKENDS:
+ case PM_WAIT_READONLY:
+ case PM_WAIT_BACKUP:
+ break;
+
+ case PM_RUN:
+ if (start_time == BgWorkerStart_RecoveryFinished)
+ return true;
+ /* fall through */
+
+ case PM_HOT_STANDBY:
+ if (start_time == BgWorkerStart_ConsistentState)
+ return true;
+ /* fall through */
+
+ case PM_RECOVERY:
+ case PM_STARTUP:
+ case PM_INIT:
+ if (start_time == BgWorkerStart_PostmasterStart)
+ return true;
+ /* fall through */
+
+ }
+
+ return false;
+}
+
+/*
+ * Allocate the Backend struct for a connected background worker, but don't
+ * add it to the list of backends just yet.
+ *
+ * Some info from the Backend is copied into the passed rw.
+ */
+static bool
+assign_backendlist_entry(RegisteredBgWorker *rw)
+{
+ Backend *bn = malloc(sizeof(Backend));
+
+ if (bn == NULL)
+ {
+ ereport(LOG,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /*
+ * The worker didn't really crash, but setting this nonzero makes
+ * postmaster wait a bit before attempting to start it again; if it
+ * tried again right away, most likely it'd find itself under the same
+ * memory pressure.
+ */
+ rw->rw_crashed_at = GetCurrentTimestamp();
+ return false;
+ }
+
+ /*
+ * Compute the cancel key that will be assigned to this session. We
+ * probably don't need cancel keys for background workers, but we'd better
+ * have something random in the field to prevent unfriendly people from
+ * sending cancels to them.
+ */
+ MyCancelKey = PostmasterRandom();
+ bn->cancel_key = MyCancelKey;
+
+ bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
+ bn->bkend_type = BACKEND_TYPE_BGWORKER;
+ bn->dead_end = false;
+
+ rw->rw_backend = bn;
+ rw->rw_child_slot = bn->child_slot;
+
+ return true;
+}
+
+/*
+ * If the time is right, start one background worker.
+ *
+ * As a side effect, the bgworker control variables are set or reset whenever
+ * there are more workers to start after this one, and whenever the overall
+ * system state requires it.
+ */
+static void
+StartOneBackgroundWorker(void)
+{
+ slist_iter iter;
+ TimestampTz now = 0;
+
+ if (FatalError)
+ {
+ StartWorkerNeeded = false;
+ HaveCrashedWorker = false;
+ return; /* not yet */
+ }
+
+ HaveCrashedWorker = false;
+
+ slist_foreach(iter, &BackgroundWorkerList)
+ {
+ RegisteredBgWorker *rw;
+
+ rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
+
+ /* already running? */
+ if (rw->rw_pid != 0)
+ continue;
+
+ /*
+ * If this worker has crashed previously, maybe it needs to be
+ * restarted (unless on registration it specified it doesn't want to
+ * be restarted at all). Check how long ago did a crash last happen.
+ * If the last crash is too recent, don't start it right away; let it
+ * be restarted once enough time has passed.
+ */
+ if (rw->rw_crashed_at != 0)
+ {
+ if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
+ continue;
+
+ if (now == 0)
+ now = GetCurrentTimestamp();
+
+ if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
+ rw->rw_worker.bgw_restart_time * 1000))
+ {
+ HaveCrashedWorker = true;
+ continue;
+ }
+ }
+
+ if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
+ {
+ /* reset crash time before calling assign_backendlist_entry */
+ rw->rw_crashed_at = 0;
+
+ /*
+ * If necessary, allocate and assign the Backend element. Note we
+ * must do this before forking, so that we can handle out of
+ * memory properly.
+ *
+ * If not connected, we don't need a Backend element, but we still
+ * need a PMChildSlot.
+ */
+ if (rw->rw_worker.bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
+ {
+ if (!assign_backendlist_entry(rw))
+ return;
+ }
+ else
+ rw->rw_child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
+
+ start_bgworker(rw); /* sets rw->rw_pid */
+
+ if (rw->rw_backend)
+ {
+ dlist_push_head(&BackendList, &rw->rw_backend->elem);
+#ifdef EXEC_BACKEND
+ ShmemBackendArrayAdd(rw->rw_backend);
+#endif
+ }
+
+ /*
+ * Have ServerLoop call us again. Note that there might not
+ * actually *be* another runnable worker, but we don't care all
+ * that much; we will find out the next time we run.
+ */
+ StartWorkerNeeded = true;
+ return;
+ }
+ }
+
+ /* no runnable worker found */
+ StartWorkerNeeded = false;
+}
#ifdef EXEC_BACKEND