]> granicus.if.org Git - postgresql/commitdiff
Adjust elog.c so that elog(FATAL) exits (including cases where ERROR is
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 21 Nov 2006 00:49:55 +0000 (00:49 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 21 Nov 2006 00:49:55 +0000 (00:49 +0000)
promoted to FATAL) end in exit(1) not exit(0).  Then change the postmaster to
allow exit(1) without a system-wide panic, but not for the startup subprocess
or the bgwriter.  There were a couple of places that were using exit(1) to
deliberately force a system-wide panic; adjust these to be exit(2) instead.
This fixes the problem noted back in July that if the startup process exits
with elog(ERROR), the postmaster would think everything is hunky-dory and
proceed to start up.  Alternative solutions such as trying to run the entire
startup process as a critical section seem less clean, primarily because of
the fact that a fair amount of startup code is shared by all postmaster
children in the EXEC_BACKEND case.  We'd need an ugly special case somewhere
near the head of main.c to make it work if it's the child process's
responsibility to determine what happens; and what's the point when the
postmaster already treats different children differently?

src/backend/bootstrap/bootstrap.c
src/backend/postmaster/bgwriter.c
src/backend/postmaster/postmaster.c
src/backend/tcop/postgres.c
src/backend/utils/error/elog.c

index 70b9172cd7fcd8649d4da0aa62f9f73b4a91e0a1..b44c501cc69629e31fa45f17da3c5e67bd36c346 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.225 2006/10/04 00:29:49 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[])
                case BS_XLOG_STARTUP:
                        bootstrap_signals();
                        StartupXLOG();
-
-                       /*
-                        * These next two functions don't consider themselves critical,
-                        * but we'd best PANIC anyway if they fail.
-                        */
-                       START_CRIT_SECTION();
                        LoadFreeSpaceMap();
                        BuildFlatFiles(false);
-                       END_CRIT_SECTION();
                        proc_exit(0);           /* startup done */
 
                case BS_XLOG_BGWRITER:
index e3a4925b91181edba5585e0ca592c909ad84147d..f2cb3ff68de6cef729c3d3d74683b1d30e02c720 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.29 2006/10/06 17:13:59 petere Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS)
         * corrupted, so we don't want to try to clean up our transaction. Just
         * nail the windows shut and get out of town.
         *
-        * Note we do exit(1) not exit(0).      This is to force the postmaster into a
+        * Note we do exit(2) not exit(0).      This is to force the postmaster into a
         * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
         * backend.  This is necessary precisely because we don't clean up our
         * shared memory state.
         */
-       exit(1);
+       exit(2);
 }
 
 /* SIGHUP: set flag to re-read config file at next convenient time */
index caf7d9a82d47663adc3d42e8e525466108804d88..9d7b6065f46a496b8ae925dcf2f78304e63d3c48 100644 (file)
@@ -37,7 +37,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.501 2006/11/05 22:42:09 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $
  *
  * NOTES
  *
@@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid);
 #define StartupDataBase()              StartChildProcess(BS_XLOG_STARTUP)
 #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
 
+/* Macros to check exit status of a child process */
+#define EXIT_STATUS_0(st)  ((st) == 0)
+#define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)
+
 
 /*
  * Postmaster main entry point
@@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS)
                if (StartupPID != 0 && pid == StartupPID)
                {
                        StartupPID = 0;
-                       if (exitstatus != 0)
+                       /* Note: FATAL exit of startup is treated as catastrophic */
+                       if (!EXIT_STATUS_0(exitstatus))
                        {
                                LogChildExit(LOG, _("startup process"),
                                                         pid, exitstatus);
@@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS)
                if (BgWriterPID != 0 && pid == BgWriterPID)
                {
                        BgWriterPID = 0;
-                       if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError &&
+                       if (EXIT_STATUS_0(exitstatus) &&
+                               Shutdown > NoShutdown && !FatalError &&
                                !DLGetHead(BackendList) && AutoVacPID == 0)
                        {
                                /*
@@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS)
                        }
 
                        /*
-                        * Any unexpected exit of the bgwriter is treated as a crash.
+                        * Any unexpected exit of the bgwriter (including FATAL exit)
+                        * is treated as a crash.
                         */
                        HandleChildCrash(pid, exitstatus,
                                                         _("background writer process"));
@@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS)
                }
 
                /*
-                * Was it the autovacuum process?  Normal exit can be ignored; we'll
-                * start a new one at the next iteration of the postmaster's main
-                * loop, if necessary.  An unexpected exit is treated as a crash.
+                * Was it the autovacuum process?  Normal or FATAL exit can be
+                * ignored; we'll start a new one at the next iteration of the
+                * postmaster's main loop, if necessary.  Any other exit condition
+                * is treated as a crash.
                 */
                if (AutoVacPID != 0 && pid == AutoVacPID)
                {
                        AutoVacPID = 0;
                        autovac_stopped();
-                       if (exitstatus != 0)
+                       if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
                                HandleChildCrash(pid, exitstatus,
                                                                 _("autovacuum process"));
                        continue;
@@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS)
                if (PgArchPID != 0 && pid == PgArchPID)
                {
                        PgArchPID = 0;
-                       if (exitstatus != 0)
+                       if (!EXIT_STATUS_0(exitstatus))
                                LogChildExit(LOG, _("archiver process"),
                                                         pid, exitstatus);
                        if (XLogArchivingActive() &&
@@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS)
                if (PgStatPID != 0 && pid == PgStatPID)
                {
                        PgStatPID = 0;
-                       if (exitstatus != 0)
+                       if (!EXIT_STATUS_0(exitstatus))
                                LogChildExit(LOG, _("statistics collector process"),
                                                         pid, exitstatus);
                        if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
@@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS)
                        SysLoggerPID = 0;
                        /* for safety's sake, launch new logger *first* */
                        SysLoggerPID = SysLogger_Start();
-                       if (exitstatus != 0)
+                       if (!EXIT_STATUS_0(exitstatus))
                                LogChildExit(LOG, _("system logger process"),
                                                         pid, exitstatus);
                        continue;
@@ -2229,12 +2237,12 @@ CleanupBackend(int pid,
        LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
 
        /*
-        * If a backend dies in an ugly way (i.e. exit status not 0) then we must
-        * signal all other backends to quickdie.  If exit status is zero we
-        * assume everything is hunky dory and simply remove the backend from the
+        * If a backend dies in an ugly way then we must signal all other backends
+        * to quickdie.  If exit status is zero (normal) or one (FATAL exit), we
+        * assume everything is all right and simply remove the backend from the
         * active backend list.
         */
-       if (exitstatus != 0)
+       if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
        {
                HandleChildCrash(pid, exitstatus, _("server process"));
                return;
index d019864fea47210722ee79826005caaf971ad72a..8b014c887f50fc7162acc5dce8c6e7899ba9f5ba 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.516 2006/10/19 19:52:22 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $
  *
  * NOTES
  *       this is the "main" module of the postgres backend and
@@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS)
         * corrupted, so we don't want to try to clean up our transaction. Just
         * nail the windows shut and get out of town.
         *
-        * Note we do exit(1) not exit(0).      This is to force the postmaster into a
+        * Note we do exit(2) not exit(0).      This is to force the postmaster into a
         * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
         * backend.  This is necessary precisely because we don't clean up our
         * shared memory state.
         */
-       exit(1);
+       exit(2);
 }
 
 /*
@@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS)
 
 /*
  * Timeout or shutdown signal from postmaster during client authentication.
- * Simply exit(0).
+ * Simply exit(1).
  *
  * XXX: possible future improvement: try to send a message indicating
  * why we are disconnecting.  Problem is to be sure we don't block while
@@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS)
 void
 authdie(SIGNAL_ARGS)
 {
-       exit(0);
+       exit(1);
 }
 
 /*
index 61c413b4be32f8719cdb8ec39a72f3e4f9c7a166..6b6a9c51cf70a08b3a07c818a8c401af8294c259 100644 (file)
@@ -42,7 +42,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.175 2006/10/01 22:08:18 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.176 2006/11/21 00:49:55 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -421,25 +421,23 @@ errfinish(int dummy,...)
                 * fflush here is just to improve the odds that we get to see the
                 * error message, in case things are so hosed that proc_exit crashes.
                 * Any other code you might be tempted to add here should probably be
-                * in an on_proc_exit callback instead.
+                * in an on_proc_exit or on_shmem_exit callback instead.
                 */
                fflush(stdout);
                fflush(stderr);
 
                /*
-                * If proc_exit is already running, we exit with nonzero exit code to
-                * indicate that something's pretty wrong.  We also want to exit with
-                * nonzero exit code if not running under the postmaster (for example,
-                * if we are being run from the initdb script, we'd better return an
-                * error status).
+                * Do normal process-exit cleanup, then return exit code 1 to indicate
+                * FATAL termination.  The postmaster may or may not consider this
+                * worthy of panic, depending on which subprocess returns it.
                 */
-               proc_exit(proc_exit_inprogress || !IsUnderPostmaster);
+               proc_exit(1);
        }
 
        if (elevel >= PANIC)
        {
                /*
-                * Serious crash time. Postmaster will observe nonzero process exit
+                * Serious crash time. Postmaster will observe SIGABRT process exit
                 * status and kill the other backends too.
                 *
                 * XXX: what if we are *in* the postmaster?  abort() won't kill our