]> granicus.if.org Git - postgresql/commitdiff
Hack pg_ctl to report postmaster's exit status.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 7 Oct 2019 14:39:07 +0000 (10:39 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 7 Oct 2019 14:39:07 +0000 (10:39 -0400)
Temporarily change pg_ctl so that the postmaster's exit status will
be printed (to the postmaster's stdout).  This is to help identify
the cause of intermittent "postmaster exited during a parallel
transaction" failures seen on a couple of buildfarm members.  This
change degrades pg_ctl's functionality in a couple of minor ways,
so we'll revert it once we've obtained the desired info.

Discussion: https://postgr.es/m/18537.1570421268@sss.pgh.pa.us

src/bin/pg_ctl/pg_ctl.c

index dd76be6dd2eda6e7a2d37da425ee79fefb330189..316651ced28b07b25997cf5093ccc3ef784975e5 100644 (file)
@@ -106,6 +106,7 @@ static char promote_file[MAXPGPATH];
 static char logrotate_file[MAXPGPATH];
 
 static volatile pgpid_t postmasterPID = -1;
+static pgpid_t old_postmaster_pid = 0;
 
 #ifdef WIN32
 static DWORD pgctl_start_type = SERVICE_AUTO_START;
@@ -490,16 +491,17 @@ start_postmaster(void)
 
        /*
         * Since there might be quotes to handle here, it is easier simply to pass
-        * everything to a shell to process them.  Use exec so that the postmaster
-        * has the same PID as the current child process.
+        * everything to a shell to process them.
+        *
+        * Since we aren't telling the shell to directly exec the postmaster,
+        * the returned PID is a parent process, the same as on Windows.
         */
        if (log_file != NULL)
-               snprintf(cmd, MAXPGPATH, "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1",
-                                exec_path, pgdata_opt, post_opts,
-                                DEVNULL, log_file);
+               snprintf(cmd, MAXPGPATH, "exec < \"%s\" >> \"%s\" 2>&1; \"%s\" %s%s; echo postmaster exit status is $?",
+                                DEVNULL, log_file, exec_path, pgdata_opt, post_opts);
        else
-               snprintf(cmd, MAXPGPATH, "exec \"%s\" %s%s < \"%s\" 2>&1",
-                                exec_path, pgdata_opt, post_opts, DEVNULL);
+               snprintf(cmd, MAXPGPATH, "exec < \"%s\" 2>&1; \"%s\" %s%s; echo postmaster exit status is $?",
+                                DEVNULL, exec_path, pgdata_opt, post_opts);
 
        (void) execl("/bin/sh", "/bin/sh", "-c", cmd, (char *) NULL);
 
@@ -586,12 +588,8 @@ wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint)
                        pmpid = atol(optlines[LOCK_FILE_LINE_PID - 1]);
                        pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
                        if (pmstart >= start_time - 2 &&
-#ifndef WIN32
-                               pmpid == pm_pid
-#else
-                       /* Windows can only reject standalone-backend PIDs */
-                               pmpid > 0
-#endif
+                       /* If pid is the value we saw before starting, assume it's stale */
+                               pmpid > 0 && pmpid != old_postmaster_pid
                                )
                        {
                                /*
@@ -621,7 +619,7 @@ wait_for_postmaster(pgpid_t pm_pid, bool do_checkpoint)
                 * Check whether the child postmaster process is still alive.  This
                 * lets us exit early if the postmaster fails during startup.
                 *
-                * On Windows, we may be checking the postmaster's parent shell, but
+                * We may be checking the postmaster's parent shell, but
                 * that's fine for this purpose.
                 */
 #ifndef WIN32
@@ -823,13 +821,12 @@ do_init(void)
 static void
 do_start(void)
 {
-       pgpid_t         old_pid = 0;
        pgpid_t         pm_pid;
 
        if (ctl_command != RESTART_COMMAND)
        {
-               old_pid = get_pgpid(false);
-               if (old_pid != 0)
+               old_postmaster_pid = get_pgpid(false);
+               if (old_postmaster_pid != 0)
                        write_stderr(_("%s: another server might be running; "
                                                   "trying to start server anyway\n"),
                                                 progname);