/*
* Since there might be quotes to handle here, it is easier simply to pass
* everything to a shell to process them.
+ *
+ * XXX it would be better to fork and exec so that we would know the
+ * child postmaster's PID directly; then test_postmaster_connection could
+ * use the PID without having to rely on reading it back from the pidfile.
*/
if (log_file != NULL)
snprintf(cmd, MAXPGPATH, SYSTEMQUOTE "\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 &" SYSTEMQUOTE,
test_postmaster_connection(bool do_checkpoint)
{
PGPing ret = PQPING_NO_RESPONSE;
+ bool found_stale_pidfile = false;
+ pgpid_t pm_pid = 0;
char connstr[MAXPGPATH * 2 + 256];
int i;
if (optlines[3] == NULL)
{
/* File is exactly three lines, must be pre-9.1 */
- write_stderr(_("%s: -w option is not supported when starting a pre-9.1 server\n"),
+ write_stderr(_("\n%s: -w option is not supported when starting a pre-9.1 server\n"),
progname);
return PQPING_NO_ATTEMPT;
}
optlines[5] != NULL)
{
/* File is complete enough for us, parse it */
+ long pmpid;
time_t pmstart;
- int portnum;
- char *sockdir;
- char *hostaddr;
- char host_str[MAXPGPATH];
/*
- * Easy cross-check that we are looking at the right data
- * directory: is the postmaster older than this execution
- * of pg_ctl? Subtract 2 seconds to allow for possible
- * clock skew between pg_ctl and the postmaster.
+ * Make sanity checks. If it's for a standalone backend
+ * (negative PID), or the recorded start time is before
+ * pg_ctl started, then either we are looking at the wrong
+ * data directory, or this is a pre-existing pidfile that
+ * hasn't (yet?) been overwritten by our child postmaster.
+ * Allow 2 seconds slop for possible cross-process clock
+ * skew.
*/
+ pmpid = atol(optlines[LOCK_FILE_LINE_PID - 1]);
pmstart = atol(optlines[LOCK_FILE_LINE_START_TIME - 1]);
- if (pmstart < start_time - 2)
+ if (pmpid <= 0 || pmstart < start_time - 2)
{
- write_stderr(_("%s: this data directory is running a pre-existing postmaster\n"),
- progname);
- return PQPING_NO_ATTEMPT;
+ /*
+ * Set flag to report stale pidfile if it doesn't
+ * get overwritten before we give up waiting.
+ */
+ found_stale_pidfile = true;
}
+ else
+ {
+ /*
+ * OK, seems to be a valid pidfile from our child.
+ */
+ int portnum;
+ char *sockdir;
+ char *hostaddr;
+ char host_str[MAXPGPATH];
- /*
- * OK, extract port number and host string to use. Prefer
- * using Unix socket if available.
- */
- portnum = atoi(optlines[LOCK_FILE_LINE_PORT - 1]);
+ found_stale_pidfile = false;
+ pm_pid = (pgpid_t) pmpid;
- sockdir = optlines[LOCK_FILE_LINE_SOCKET_DIR - 1];
- hostaddr = optlines[LOCK_FILE_LINE_LISTEN_ADDR - 1];
+ /*
+ * Extract port number and host string to use. Prefer
+ * using Unix socket if available.
+ */
+ portnum = atoi(optlines[LOCK_FILE_LINE_PORT - 1]);
+ sockdir = optlines[LOCK_FILE_LINE_SOCKET_DIR - 1];
+ hostaddr = optlines[LOCK_FILE_LINE_LISTEN_ADDR - 1];
- /*
- * While unix_socket_directory can accept relative
- * directories, libpq's host parameter must have a leading
- * slash to indicate a socket directory. So, ignore
- * sockdir if it's relative, and try to use TCP instead.
- */
- if (sockdir[0] == '/')
- strlcpy(host_str, sockdir, sizeof(host_str));
- else
- strlcpy(host_str, hostaddr, sizeof(host_str));
+ /*
+ * While unix_socket_directory can accept relative
+ * directories, libpq's host parameter must have a
+ * leading slash to indicate a socket directory. So,
+ * ignore sockdir if it's relative, and try to use TCP
+ * instead.
+ */
+ if (sockdir[0] == '/')
+ strlcpy(host_str, sockdir, sizeof(host_str));
+ else
+ strlcpy(host_str, hostaddr, sizeof(host_str));
- /* remove trailing newline */
- if (strchr(host_str, '\n') != NULL)
- *strchr(host_str, '\n') = '\0';
+ /* remove trailing newline */
+ if (strchr(host_str, '\n') != NULL)
+ *strchr(host_str, '\n') = '\0';
- /* Fail if we couldn't get either sockdir or host addr */
- if (host_str[0] == '\0')
- {
- write_stderr(_("%s: -w option cannot use a relative socket directory specification\n"),
- progname);
- return PQPING_NO_ATTEMPT;
- }
+ /* Fail if couldn't get either sockdir or host addr */
+ if (host_str[0] == '\0')
+ {
+ write_stderr(_("\n%s: -w option cannot use a relative socket directory specification\n"),
+ progname);
+ return PQPING_NO_ATTEMPT;
+ }
- /* If postmaster is listening on "*", use "localhost" */
- if (strcmp(host_str, "*") == 0)
- strcpy(host_str, "localhost");
+ /* If postmaster is listening on "*", use localhost */
+ if (strcmp(host_str, "*") == 0)
+ strcpy(host_str, "localhost");
- /*
- * We need to set connect_timeout otherwise on Windows the
- * Service Control Manager (SCM) will probably timeout
- * first.
- */
- snprintf(connstr, sizeof(connstr),
- "dbname=postgres port=%d host='%s' connect_timeout=5",
- portnum, host_str);
+ /*
+ * We need to set connect_timeout otherwise on Windows
+ * the Service Control Manager (SCM) will probably
+ * timeout first.
+ */
+ snprintf(connstr, sizeof(connstr),
+ "dbname=postgres port=%d host='%s' connect_timeout=5",
+ portnum, host_str);
+ }
}
}
}
/*
* The postmaster should create postmaster.pid very soon after being
* started. If it's not there after we've waited 5 or more seconds,
- * assume startup failed and give up waiting.
+ * assume startup failed and give up waiting. (Note this covers
+ * both cases where the pidfile was never created, and where it was
+ * created and then removed during postmaster exit.) Also, if there
+ * *is* a file there but it appears stale, issue a suitable warning
+ * and give up waiting.
*/
if (i >= 5)
{
if (stat(pid_file, &statbuf) != 0)
return PQPING_NO_RESPONSE;
+
+ if (found_stale_pidfile)
+ {
+ write_stderr(_("\n%s: this data directory appears to be running a pre-existing postmaster\n"),
+ progname);
+ return PQPING_NO_RESPONSE;
+ }
}
+ /*
+ * If we've been able to identify the child postmaster's PID, check
+ * the process is still alive. This covers cases where the postmaster
+ * successfully created the pidfile but then crashed without removing
+ * it.
+ */
+ if (pm_pid > 0 && !postmaster_is_alive((pid_t) pm_pid))
+ return PQPING_NO_RESPONSE;
+
/* No response, or startup still in process; wait */
#if defined(WIN32)
if (do_checkpoint)
static void
do_start(void)
{
- pgpid_t pid;
pgpid_t old_pid = 0;
int exitcode;
exit(1);
}
- if (old_pid != 0)
- {
- pg_usleep(1000000);
- pid = get_pgpid();
- if (pid == old_pid)
- {
- write_stderr(_("%s: could not start server\n"
- "Examine the log output.\n"),
- progname);
- exit(1);
- }
- }
-
if (do_wait)
{
print_msg(_("waiting for server to start..."));