Send SIGKILL to children if they don't die quickly in immediate shutdown

author Alvaro Herrera <alvherre@alvh.no-ip.org>

Fri, 28 Jun 2013 21:20:53 +0000 (17:20 -0400)

committer Alvaro Herrera <alvherre@alvh.no-ip.org>

Fri, 28 Jun 2013 21:49:46 +0000 (17:49 -0400)
author Alvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 28 Jun 2013 21:20:53 +0000 (17:20 -0400)
committer Alvaro Herrera <alvherre@alvh.no-ip.org>
Fri, 28 Jun 2013 21:49:46 +0000 (17:49 -0400)
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml

index 7dada6b67f970406581c2d85196dac7bf9e9ac9f..47059d303cfd8c41803929f6bce81d6332ffcc5d 100644 (file)
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1362,11 +1362,11 @@ echo -1000 > /proc/self/oom_score_adj
       <listitem>
        <para>
        This is the <firstterm>Immediate Shutdown</firstterm> mode.
-      The master <command>postgres</command> process will send a
-      <systemitem>SIGQUIT</systemitem> to all child processes and exit
-      immediately, without properly shutting itself down. The child processes
-      likewise exit immediately upon receiving
-      <systemitem>SIGQUIT</systemitem>. This will lead to recovery (by
+      The server will send <systemitem>SIGQUIT</systemitem> to all child
+      processes and wait for them to terminate.  Those that don't terminate
+      within 5 seconds, will be sent <systemitem>SIGKILL</systemitem> by the
+      master <command>postgres</command> process, which will then terminate
+      without further waiting.  This will lead to recovery (by
        replaying the WAL log) upon next start-up. This is recommended
        only in emergencies.
        </para>
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c

index 87e60621396714a937052948c592d600a84e95cf..64467cb53ca829d362e8890a971ef42b52035aed 100644 (file)
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -275,6 +275,7 @@ static pid_t StartupPID = 0,
  #define                        NoShutdown              0
  #define                        SmartShutdown   1
  #define                        FastShutdown    2
+#define                        ImmediateShutdown       3
  
  static int     Shutdown = NoShutdown;
  
@@ -345,6 +346,10 @@ typedef enum
  
  static PMState pmState = PM_INIT;
  
+/* Start time of abort processing at immediate shutdown or child crash */
+static time_t AbortStartTime;
+#define SIGKILL_CHILDREN_AFTER_SECS            5
+
  static bool ReachedNormalRunning = false;              /* T if we've reached PM_RUN */
  
  bool           ClientAuthInProgress = false;           /* T during new-client
@@ -421,6 +426,7 @@ static void RandomSalt(char *md5Salt);
  static void signal_child(pid_t pid, int signal);
  static bool SignalSomeChildren(int signal, int targets);
  static bool SignalUnconnectedWorkers(int signal);
+static void TerminateChildren(int signal);
  
  #define SignalChildren(sig)                       SignalSomeChildren(sig, BACKEND_TYPE_ALL)
  
@@ -1427,8 +1433,18 @@ DetermineSleepTime(struct timeval * timeout)
         if (Shutdown > NoShutdown ||
                 (!StartWorkerNeeded && !HaveCrashedWorker))
         {
-               timeout->tv_sec = 60;
-               timeout->tv_usec = 0;
+               if (AbortStartTime > 0)
+               {
+                       /* remaining time, but at least 1 second */
+                       timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS -
+                                                                 (time(NULL) - AbortStartTime), 1);
+                       timeout->tv_usec = 0;
+               }
+               else
+               {
+                       timeout->tv_sec = 60;
+                       timeout->tv_usec = 0;
+               }
                 return;
         }
  
@@ -1660,6 +1676,28 @@ ServerLoop(void)
                         TouchSocketLockFiles();
                         last_touch_time = now;
                 }
+
+               /*
+                * If we already sent SIGQUIT to children and they are slow to shut
+                * down, it's time to send them SIGKILL.  This doesn't happen normally,
+                * but under certain conditions backends can get stuck while shutting
+                * down.  This is a last measure to get them unwedged.
+                *
+                * Note we also do this during recovery from a process crash.
+                */
+               if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
+                       now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS)
+               {
+                       /* We were gentle with them before. Not anymore */
+                       TerminateChildren(SIGKILL);
+
+                       /*
+                        * Additionally, unless we're recovering from a process crash, it's
+                        * now the time for postmaster to abandon ship.
+                        */
+                       if (!FatalError)
+                               ExitPostmaster(1);
+               }
         }
  }
  
@@ -2455,30 +2493,27 @@ pmdie(SIGNAL_ARGS)
                         /*
                          * Immediate Shutdown:
                          *
-                        * abort all children with SIGQUIT and exit without attempt to
-                        * properly shut down data base system.
+                        * abort all children with SIGQUIT, wait for them to exit,
+                        * terminate remaining ones with SIGKILL, then exit without
+                        * attempt to properly shut down the data base system.
                          */
+                       if (Shutdown >= ImmediateShutdown)
+                               break;
+                       Shutdown = ImmediateShutdown;
                         ereport(LOG,
                                         (errmsg("received immediate shutdown request")));
-                       SignalChildren(SIGQUIT);
-                       if (StartupPID != 0)
-                               signal_child(StartupPID, SIGQUIT);
-                       if (BgWriterPID != 0)
-                               signal_child(BgWriterPID, SIGQUIT);
-                       if (CheckpointerPID != 0)
-                               signal_child(CheckpointerPID, SIGQUIT);
-                       if (WalWriterPID != 0)
-                               signal_child(WalWriterPID, SIGQUIT);
-                       if (WalReceiverPID != 0)
-                               signal_child(WalReceiverPID, SIGQUIT);
-                       if (AutoVacPID != 0)
-                               signal_child(AutoVacPID, SIGQUIT);
-                       if (PgArchPID != 0)
-                               signal_child(PgArchPID, SIGQUIT);
-                       if (PgStatPID != 0)
-                               signal_child(PgStatPID, SIGQUIT);
-                       SignalUnconnectedWorkers(SIGQUIT);
-                       ExitPostmaster(0);
+
+                       TerminateChildren(SIGQUIT);
+                       pmState = PM_WAIT_BACKENDS;
+
+                       /* set stopwatch for them to die */
+                       AbortStartTime = time(NULL);
+
+                       /*
+                        * Now wait for backends to exit.  If there are none,
+                        * PostmasterStateMachine will take the next step.
+                        */
+                       PostmasterStateMachine();
                         break;
         }
  
@@ -2952,12 +2987,17 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         dlist_mutable_iter iter;
         slist_iter      siter;
         Backend    *bp;
+       bool            take_action;
  
         /*
-        * Make log entry unless there was a previous crash (if so, nonzero exit
-        * status is to be expected in SIGQUIT response; don't clutter log)
+        * We only log messages and send signals if this is the first process crash
+        * and we're not doing an immediate shutdown; otherwise, we're only here to
+        * update postmaster's idea of live processes.  If we have already signalled
+        * children, nonzero exit status is to be expected, so don't clutter log.
          */
-       if (!FatalError)
+       take_action = !FatalError && Shutdown != ImmediateShutdown;
+
+       if (take_action)
         {
                 LogChildExit(LOG, procname, pid, exitstatus);
                 ereport(LOG,
@@ -3003,7 +3043,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
                          * (-s on command line), then we send SIGSTOP instead, so that we
                          * can get core dumps from all backends by hand.
                          */
-                       if (!FatalError)
+                       if (take_action)
                         {
                                 ereport(DEBUG2,
                                                 (errmsg_internal("sending %s to process %d",
@@ -3055,7 +3095,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
                         if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
                                 continue;
  
-                       if (!FatalError)
+                       if (take_action)
                         {
                                 ereport(DEBUG2,
                                                 (errmsg_internal("sending %s to process %d",
@@ -3069,7 +3109,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         /* Take care of the startup process too */
         if (pid == StartupPID)
                 StartupPID = 0;
-       else if (StartupPID != 0 && !FatalError)
+       else if (StartupPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3081,7 +3121,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         /* Take care of the bgwriter too */
         if (pid == BgWriterPID)
                 BgWriterPID = 0;
-       else if (BgWriterPID != 0 && !FatalError)
+       else if (BgWriterPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3093,7 +3133,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         /* Take care of the checkpointer too */
         if (pid == CheckpointerPID)
                 CheckpointerPID = 0;
-       else if (CheckpointerPID != 0 && !FatalError)
+       else if (CheckpointerPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3105,7 +3145,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         /* Take care of the walwriter too */
         if (pid == WalWriterPID)
                 WalWriterPID = 0;
-       else if (WalWriterPID != 0 && !FatalError)
+       else if (WalWriterPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3117,7 +3157,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         /* Take care of the walreceiver too */
         if (pid == WalReceiverPID)
                 WalReceiverPID = 0;
-       else if (WalReceiverPID != 0 && !FatalError)
+       else if (WalReceiverPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3129,7 +3169,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         /* Take care of the autovacuum launcher too */
         if (pid == AutoVacPID)
                 AutoVacPID = 0;
-       else if (AutoVacPID != 0 && !FatalError)
+       else if (AutoVacPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3144,7 +3184,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
          * simplifies the state-machine logic in the case where a shutdown request
          * arrives during crash processing.)
          */
-       if (PgArchPID != 0 && !FatalError)
+       if (PgArchPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3159,7 +3199,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
          * simplifies the state-machine logic in the case where a shutdown request
          * arrives during crash processing.)
          */
-       if (PgStatPID != 0 && !FatalError)
+       if (PgStatPID != 0 && take_action)
         {
                 ereport(DEBUG2,
                                 (errmsg_internal("sending %s to process %d",
@@ -3171,7 +3211,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
  
         /* We do NOT restart the syslogger */
  
-       FatalError = true;
+       if (Shutdown != ImmediateShutdown)
+               FatalError = true;
+
         /* We now transit into a state of waiting for children to die */
         if (pmState == PM_RECOVERY ||
                 pmState == PM_HOT_STANDBY ||
@@ -3180,6 +3222,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
                 pmState == PM_WAIT_READONLY ||
                 pmState == PM_SHUTDOWN)
                 pmState = PM_WAIT_BACKENDS;
+
+       /*
+        * .. and if this doesn't happen quickly enough, now the clock is ticking
+        * for us to kill them without mercy.
+        */
+       if (AbortStartTime == 0)
+               AbortStartTime = time(NULL);
  }
  
  /*
@@ -3316,7 +3365,7 @@ PostmasterStateMachine(void)
                         WalWriterPID == 0 &&
                         AutoVacPID == 0)
                 {
-                       if (FatalError)
+                       if (Shutdown >= ImmediateShutdown || FatalError)
                         {
                                 /*
                                  * Start waiting for dead_end children to die.  This state
@@ -3326,7 +3375,8 @@ PostmasterStateMachine(void)
  
                                 /*
                                  * We already SIGQUIT'd the archiver and stats processes, if
-                                * any, when we entered FatalError state.
+                                * any, when we started immediate shutdown or entered
+                                * FatalError state.
                                  */
                         }
                         else
@@ -3511,6 +3561,7 @@ signal_child(pid_t pid, int signal)
                 case SIGTERM:
                 case SIGQUIT:
                 case SIGSTOP:
+               case SIGKILL:
                         if (kill(-pid, signal) < 0)
                                 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
                         break;
@@ -3597,6 +3648,33 @@ SignalSomeChildren(int signal, int target)
         return signaled;
  }
  
+/*
+ * Send a termination signal to children.  This considers all of our children
+ * processes, except syslogger and dead_end backends.
+ */
+static void
+TerminateChildren(int signal)
+{
+       SignalChildren(signal);
+       if (StartupPID != 0)
+               signal_child(StartupPID, signal);
+       if (BgWriterPID != 0)
+               signal_child(BgWriterPID, signal);
+       if (CheckpointerPID != 0)
+               signal_child(CheckpointerPID, signal);
+       if (WalWriterPID != 0)
+               signal_child(WalWriterPID, signal);
+       if (WalReceiverPID != 0)
+               signal_child(WalReceiverPID, signal);
+       if (AutoVacPID != 0)
+               signal_child(AutoVacPID, signal);
+       if (PgArchPID != 0)
+               signal_child(PgArchPID, signal);
+       if (PgStatPID != 0)
+               signal_child(PgStatPID, signal);
+       SignalUnconnectedWorkers(signal);
+}
+
  /*
   * BackendStartup -- start backend process
   *
diff --git a/src/port/kill.c b/src/port/kill.c

index 0a810cd22528ab2680d14173069f0adb1820e6a1..d0f23efb1779e62b5c0c1a6d814c89f9dd993c4c 100644 (file)
--- a/src/port/kill.c
+++ b/src/port/kill.c
@@ -38,6 +38,26 @@ pgkill(int pid, int sig)
                 errno = EINVAL;
                 return -1;
         }
+
+       /* special case for SIGKILL: just ask the system to terminate the target */
+       if (sig == SIGKILL)
+       {
+               HANDLE prochandle;
+
+               if ((prochandle = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD) pid)) == NULL)
+               {
+                       errno = ESRCH;
+                       return -1;
+               }
+               if (!TerminateProcess(prochandle, 255))
+               {
+                       _dosmaperr(GetLastError());
+                       CloseHandle(prochandle);
+                       return -1;
+               }
+               CloseHandle(prochandle);
+               return 0;
+       }
         snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
  
         if (CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))
author	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Fri, 28 Jun 2013 21:20:53 +0000 (17:20 -0400)
committer	Alvaro Herrera <alvherre@alvh.no-ip.org>
	Fri, 28 Jun 2013 21:49:46 +0000 (17:49 -0400)
doc/src/sgml/runtime.sgml		patch \| blob \| history
src/backend/postmaster/postmaster.c		patch \| blob \| history
src/port/kill.c		patch \| blob \| history