#define NoShutdown 0
#define SmartShutdown 1
#define FastShutdown 2
+#define ImmediateShutdown 3
static int Shutdown = NoShutdown;
static PMState pmState = PM_INIT;
+/* Start time of abort processing at immediate shutdown or child crash */
+static time_t AbortStartTime;
+#define SIGKILL_CHILDREN_AFTER_SECS 5
+
static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
bool ClientAuthInProgress = false; /* T during new-client
static void signal_child(pid_t pid, int signal);
static bool SignalSomeChildren(int signal, int targets);
static bool SignalUnconnectedWorkers(int signal);
+static void TerminateChildren(int signal);
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
if (Shutdown > NoShutdown ||
(!StartWorkerNeeded && !HaveCrashedWorker))
{
- timeout->tv_sec = 60;
- timeout->tv_usec = 0;
+ if (AbortStartTime > 0)
+ {
+ /* remaining time, but at least 1 second */
+ timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS -
+ (time(NULL) - AbortStartTime), 1);
+ timeout->tv_usec = 0;
+ }
+ else
+ {
+ timeout->tv_sec = 60;
+ timeout->tv_usec = 0;
+ }
return;
}
TouchSocketLockFiles();
last_touch_time = now;
}
+
+ /*
+ * If we already sent SIGQUIT to children and they are slow to shut
+ * down, it's time to send them SIGKILL. This doesn't happen normally,
+ * but under certain conditions backends can get stuck while shutting
+ * down. This is a last measure to get them unwedged.
+ *
+ * Note we also do this during recovery from a process crash.
+ */
+ if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
+ now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS)
+ {
+ /* We were gentle with them before. Not anymore */
+ TerminateChildren(SIGKILL);
+
+ /*
+ * Additionally, unless we're recovering from a process crash, it's
+ * now the time for postmaster to abandon ship.
+ */
+ if (!FatalError)
+ ExitPostmaster(1);
+ }
}
}
/*
* Immediate Shutdown:
*
- * abort all children with SIGQUIT and exit without attempt to
- * properly shut down data base system.
+ * abort all children with SIGQUIT, wait for them to exit,
+ * terminate remaining ones with SIGKILL, then exit without
+ * attempt to properly shut down the data base system.
*/
+ if (Shutdown >= ImmediateShutdown)
+ break;
+ Shutdown = ImmediateShutdown;
ereport(LOG,
(errmsg("received immediate shutdown request")));
- SignalChildren(SIGQUIT);
- if (StartupPID != 0)
- signal_child(StartupPID, SIGQUIT);
- if (BgWriterPID != 0)
- signal_child(BgWriterPID, SIGQUIT);
- if (CheckpointerPID != 0)
- signal_child(CheckpointerPID, SIGQUIT);
- if (WalWriterPID != 0)
- signal_child(WalWriterPID, SIGQUIT);
- if (WalReceiverPID != 0)
- signal_child(WalReceiverPID, SIGQUIT);
- if (AutoVacPID != 0)
- signal_child(AutoVacPID, SIGQUIT);
- if (PgArchPID != 0)
- signal_child(PgArchPID, SIGQUIT);
- if (PgStatPID != 0)
- signal_child(PgStatPID, SIGQUIT);
- SignalUnconnectedWorkers(SIGQUIT);
- ExitPostmaster(0);
+
+ TerminateChildren(SIGQUIT);
+ pmState = PM_WAIT_BACKENDS;
+
+ /* set stopwatch for them to die */
+ AbortStartTime = time(NULL);
+
+ /*
+ * Now wait for backends to exit. If there are none,
+ * PostmasterStateMachine will take the next step.
+ */
+ PostmasterStateMachine();
break;
}
dlist_mutable_iter iter;
slist_iter siter;
Backend *bp;
+ bool take_action;
/*
- * Make log entry unless there was a previous crash (if so, nonzero exit
- * status is to be expected in SIGQUIT response; don't clutter log)
+ * We only log messages and send signals if this is the first process crash
+ * and we're not doing an immediate shutdown; otherwise, we're only here to
+ * update postmaster's idea of live processes. If we have already signalled
+ * children, nonzero exit status is to be expected, so don't clutter log.
*/
- if (!FatalError)
+ take_action = !FatalError && Shutdown != ImmediateShutdown;
+
+ if (take_action)
{
LogChildExit(LOG, procname, pid, exitstatus);
ereport(LOG,
* (-s on command line), then we send SIGSTOP instead, so that we
* can get core dumps from all backends by hand.
*/
- if (!FatalError)
+ if (take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
continue;
- if (!FatalError)
+ if (take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* Take care of the startup process too */
if (pid == StartupPID)
StartupPID = 0;
- else if (StartupPID != 0 && !FatalError)
+ else if (StartupPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* Take care of the bgwriter too */
if (pid == BgWriterPID)
BgWriterPID = 0;
- else if (BgWriterPID != 0 && !FatalError)
+ else if (BgWriterPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* Take care of the checkpointer too */
if (pid == CheckpointerPID)
CheckpointerPID = 0;
- else if (CheckpointerPID != 0 && !FatalError)
+ else if (CheckpointerPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* Take care of the walwriter too */
if (pid == WalWriterPID)
WalWriterPID = 0;
- else if (WalWriterPID != 0 && !FatalError)
+ else if (WalWriterPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* Take care of the walreceiver too */
if (pid == WalReceiverPID)
WalReceiverPID = 0;
- else if (WalReceiverPID != 0 && !FatalError)
+ else if (WalReceiverPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* Take care of the autovacuum launcher too */
if (pid == AutoVacPID)
AutoVacPID = 0;
- else if (AutoVacPID != 0 && !FatalError)
+ else if (AutoVacPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
* simplifies the state-machine logic in the case where a shutdown request
* arrives during crash processing.)
*/
- if (PgArchPID != 0 && !FatalError)
+ if (PgArchPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
* simplifies the state-machine logic in the case where a shutdown request
* arrives during crash processing.)
*/
- if (PgStatPID != 0 && !FatalError)
+ if (PgStatPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
/* We do NOT restart the syslogger */
- FatalError = true;
+ if (Shutdown != ImmediateShutdown)
+ FatalError = true;
+
/* We now transit into a state of waiting for children to die */
if (pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY ||
pmState == PM_WAIT_READONLY ||
pmState == PM_SHUTDOWN)
pmState = PM_WAIT_BACKENDS;
+
+ /*
+ * .. and if this doesn't happen quickly enough, now the clock is ticking
+ * for us to kill them without mercy.
+ */
+ if (AbortStartTime == 0)
+ AbortStartTime = time(NULL);
}
/*
WalWriterPID == 0 &&
AutoVacPID == 0)
{
- if (FatalError)
+ if (Shutdown >= ImmediateShutdown || FatalError)
{
/*
* Start waiting for dead_end children to die. This state
/*
* We already SIGQUIT'd the archiver and stats processes, if
- * any, when we entered FatalError state.
+ * any, when we started immediate shutdown or entered
+ * FatalError state.
*/
}
else
case SIGTERM:
case SIGQUIT:
case SIGSTOP:
+ case SIGKILL:
if (kill(-pid, signal) < 0)
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
break;
return signaled;
}
+/*
+ * Send a termination signal to children. This considers all of our children
+ * processes, except syslogger and dead_end backends.
+ */
+static void
+TerminateChildren(int signal)
+{
+ SignalChildren(signal);
+ if (StartupPID != 0)
+ signal_child(StartupPID, signal);
+ if (BgWriterPID != 0)
+ signal_child(BgWriterPID, signal);
+ if (CheckpointerPID != 0)
+ signal_child(CheckpointerPID, signal);
+ if (WalWriterPID != 0)
+ signal_child(WalWriterPID, signal);
+ if (WalReceiverPID != 0)
+ signal_child(WalReceiverPID, signal);
+ if (AutoVacPID != 0)
+ signal_child(AutoVacPID, signal);
+ if (PgArchPID != 0)
+ signal_child(PgArchPID, signal);
+ if (PgStatPID != 0)
+ signal_child(PgStatPID, signal);
+ SignalUnconnectedWorkers(signal);
+}
+
/*
* BackendStartup -- start backend process
*