From: Tom Lane Date: Wed, 20 Dec 2000 21:51:52 +0000 (+0000) Subject: Prevent freshly-started backend from ignoring SIGUSR1, per race condition X-Git-Tag: REL7_1_BETA2~119 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=39b547f43006938e93bd781ccef6f8c3610b9509;p=postgresql Prevent freshly-started backend from ignoring SIGUSR1, per race condition observed by Inoue. Also, don't call ProcRemove() from postmaster if we have detected a backend crash --- too risky if shared memory is corrupted. It's not needed anyway, considering we are going to reinitialize shared memory and semaphores as soon as the last child is dead. --- diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index e1c8021ff0..ef2896b3c2 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.200 2000/12/18 18:45:04 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.201 2000/12/20 21:51:52 tgl Exp $ * * NOTES * @@ -180,7 +180,7 @@ static time_t checkpointed = 0; static int Shutdown = NoShutdown; -static bool FatalError = false; +static bool FatalError = false; /* T if recovering from backend crash */ /* * State for assigning random salts and cancel keys. @@ -649,7 +649,7 @@ PostmasterMain(int argc, char *argv[]) pqsignal(SIGTERM, pmdie); /* wait for children and ShutdownDataBase */ pqsignal(SIGALRM, SIG_IGN); /* ignored */ pqsignal(SIGPIPE, SIG_IGN); /* ignored */ - pqsignal(SIGUSR1, SIG_IGN); /* ignored */ + pqsignal(SIGUSR1, pmdie); /* currently ignored, but see note in pmdie */ pqsignal(SIGUSR2, pmdie); /* send SIGUSR2, don't die */ pqsignal(SIGCHLD, reaper); /* handle child termination */ pqsignal(SIGTTIN, SIG_IGN); /* ignored */ @@ -1329,6 +1329,18 @@ pmdie(SIGNAL_ARGS) switch (postgres_signal_arg) { + case SIGUSR1: + /* + * Currently the postmaster ignores SIGUSR1 (maybe it should + * do something useful instead?) But we must have some handler + * installed for SIGUSR1, not just set it to SIG_IGN. Else, a + * freshly spawned backend would likewise have it set to SIG_IGN, + * which would mean the backend would ignore any attempt to kill + * it before it had gotten as far as setting up its own handler. + */ + errno = save_errno; + return; + case SIGUSR2: /* @@ -1511,7 +1523,7 @@ reaper(SIGNAL_ARGS) ExitPostmaster(1); } StartupPID = 0; - FatalError = false; + FatalError = false; /* done with recovery */ if (Shutdown > NoShutdown) { if (ShutdownPID > 0) @@ -1539,12 +1551,7 @@ reaper(SIGNAL_ARGS) /* * Wait for all children exit, then reset shmem and StartupDataBase. */ - if (DLGetHead(BackendList)) - { - errno = save_errno; - return; - } - if (StartupPID > 0 || ShutdownPID > 0) + if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0) { errno = save_errno; return; @@ -1595,13 +1602,10 @@ CleanupProc(int pid, Dlelem *curr, *next; Backend *bp; - int sig; if (DebugLvl) - { fprintf(stderr, "%s: CleanupProc: pid %d exited with status %d\n", progname, pid, exitstatus); - } /* * If a backend dies in an ugly way (i.e. exit status not 0) then we @@ -1609,7 +1613,7 @@ CleanupProc(int pid, * we assume everything is hunky dory and simply remove the backend * from the active backend list. */ - if (!exitstatus) + if (exitstatus == 0) { curr = DLGetHead(BackendList); while (curr) @@ -1628,66 +1632,64 @@ CleanupProc(int pid, if (pid == CheckPointPID) { CheckPointPID = 0; - checkpointed = time(NULL); + if (!FatalError) + checkpointed = time(NULL); } else - ProcRemove(pid); + { + /* Why is this done here, and not by the backend itself? */ + if (!FatalError) + ProcRemove(pid); + } return; } if (!FatalError) { + /* Make log entry unless we did so already */ tnow = time(NULL); fprintf(stderr, "Server process (pid %d) exited with status %d at %s" "Terminating any active server processes...\n", pid, exitstatus, ctime(&tnow)); fflush(stderr); } - FatalError = true; + curr = DLGetHead(BackendList); while (curr) { next = DLGetSucc(curr); bp = (Backend *) DLE_VAL(curr); - - /* - * SIGUSR1 is the special signal that says exit without proc_exit - * and let the user know what's going on. ProcSemaphoreKill() - * cleans up the backends semaphore. If SendStop is set (-s on - * command line), then we send a SIGSTOP so that we can core dumps - * from all backends by hand. - */ - sig = (SendStop) ? SIGSTOP : SIGUSR1; if (bp->pid != pid) { - if (DebugLvl) - fprintf(stderr, "%s: CleanupProc: sending %s to process %d\n", - progname, - (sig == SIGUSR1) - ? "SIGUSR1" : "SIGSTOP", - bp->pid); - kill(bp->pid, sig); + /* + * This backend is still alive. Unless we did so already, + * tell it to commit hara-kiri. + * + * SIGUSR1 is the special signal that says exit without proc_exit + * and let the user know what's going on. But if SendStop is set + * (-s on command line), then we send SIGSTOP instead, so that we + * can get core dumps from all backends by hand. + */ + if (!FatalError) + { + if (DebugLvl) + fprintf(stderr, "%s: CleanupProc: sending %s to process %d\n", + progname, + (SendStop ? "SIGSTOP" : "SIGUSR1"), + bp->pid); + kill(bp->pid, (SendStop ? SIGSTOP : SIGUSR1)); + } } else { - /* - * I don't like that we call ProcRemove() here, assuming that - * shmem may be corrupted! But is there another way to free - * backend semaphores? Actually, I believe that we need not in - * per backend semaphore at all (we use them to wait on lock - * only, couldn't we just sigpause?), so probably we'll remove - * this call from here someday. -- vadim 04-10-1999 + * Found entry for freshly-dead backend, so remove it. + * + * Don't call ProcRemove() here, since shmem may be corrupted! + * We are going to reinitialize shmem and semaphores anyway + * once all the children are dead, so no need for it. */ - if (pid == CheckPointPID) - { - CheckPointPID = 0; - checkpointed = 0; - } - else - ProcRemove(pid); - DLRemove(curr); free(bp); DLFreeElem(curr); @@ -1695,6 +1697,13 @@ CleanupProc(int pid, curr = next; } + if (pid == CheckPointPID) + { + CheckPointPID = 0; + checkpointed = 0; + } + + FatalError = true; } /* diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2946c9a5ef..a8b7b4ec1b 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.197 2000/12/18 18:45:05 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.198 2000/12/20 21:51:52 tgl Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -1462,21 +1462,12 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[], const cha Assert(DataDir); /* - * 1. Set BlockSig and UnBlockSig masks. 2. Set up signal handlers. 3. - * Allow only SIGUSR1 signal (we never block it) during - * initialization. + * Set up signal handlers and masks. * - * Note that postmaster already blocked ALL signals to make us happy. + * Note that postmaster blocked all signals before forking child process, + * so there is no race condition whereby we might receive a signal before + * we have set up the handler. */ - pqinitmask(); - -#ifdef HAVE_SIGPROCMASK - sigdelset(&BlockSig, SIGUSR1); -#else - BlockSig &= ~(sigmask(SIGUSR1)); -#endif - - PG_SETMASK(&BlockSig); /* block everything except SIGUSR1 */ pqsignal(SIGHUP, SigHupHandler); /* set flag to read config file */ pqsignal(SIGINT, QueryCancelHandler); /* cancel current query */ @@ -1499,6 +1490,17 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[], const cha pqsignal(SIGTTOU, SIG_DFL); pqsignal(SIGCONT, SIG_DFL); + pqinitmask(); + + /* We allow SIGUSR1 (quickdie) at all times */ +#ifdef HAVE_SIGPROCMASK + sigdelset(&BlockSig, SIGUSR1); +#else + BlockSig &= ~(sigmask(SIGUSR1)); +#endif + + PG_SETMASK(&BlockSig); /* block everything except SIGUSR1 */ + if (IsUnderPostmaster) { @@ -1649,7 +1651,7 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[], const cha if (!IsUnderPostmaster) { puts("\nPOSTGRES backend interactive interface "); - puts("$Revision: 1.197 $ $Date: 2000/12/18 18:45:05 $\n"); + puts("$Revision: 1.198 $ $Date: 2000/12/20 21:51:52 $\n"); } /*