From c8196c87a76b86c514e1f245624a21ab068c9012 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 1 Oct 2004 18:30:25 +0000 Subject: [PATCH] Adjust postmaster to recognize that a lockfile containing its parent's PID must be stale. Tweak example startup scripts to not use pg_ctl but launch the postmaster directly, thereby ensuring that only the postmaster's direct parent shell will be a postgres-owned process. In combination these should fix the longstanding problem of the postmaster sometimes refusing to start during reboot because it thinks the old lockfile is not stale. --- contrib/start-scripts/PostgreSQL.darwin | 26 ++++++++++++-------- contrib/start-scripts/freebsd | 23 +++++++++++------- contrib/start-scripts/linux | 28 +++++++++++++--------- src/backend/utils/init/miscinit.c | 32 ++++++++++++++++++------- 4 files changed, 72 insertions(+), 37 deletions(-) diff --git a/contrib/start-scripts/PostgreSQL.darwin b/contrib/start-scripts/PostgreSQL.darwin index e953a3091b..3e4b86a7f3 100755 --- a/contrib/start-scripts/PostgreSQL.darwin +++ b/contrib/start-scripts/PostgreSQL.darwin @@ -48,7 +48,7 @@ prefix="/usr/local/pgsql" # Data directory PGDATA="/usr/local/pgsql/data" -# Who to run pg_ctl as, should be "postgres". +# Who to run the postmaster as, usually "postgres". (NOT "root") PGUSER="postgres" # the logfile path and name (NEEDS to be writeable by PGUSER) @@ -68,8 +68,13 @@ ROTATESEC="604800" # The path that is to be used for the script PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin -# What to use to start up the postmaster -DAEMON="$prefix/bin/pg_ctl" +# What to use to start up the postmaster (we do NOT use pg_ctl for this, +# as it adds no value and can cause the postmaster to misrecognize a stale +# lock file) +DAEMON="$prefix/bin/postmaster" + +# What to use to shut down the postmaster +PGCTL="$prefix/bin/pg_ctl" # The apache log rotation utility LOGUTIL="/usr/sbin/rotatelogs" @@ -80,27 +85,28 @@ StartService () { if [ "${POSTGRESQLSERVER:=-NO-}" = "-YES-" ]; then ConsoleMessage "Starting PostgreSQL database server" if [ "${ROTATELOGS}" = "1" ]; then - sudo -u $PGUSER sh -c "${DAEMON} start -D ${PGDATA} -s | ${LOGUTIL} ${PGLOG} ${ROTATESEC} &" + sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' | ${LOGUTIL} '${PGLOG}' ${ROTATESEC} &" else - sudo -u $PGUSER $DAEMON start -D "$PGDATA" -s -l $PGLOG + sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' &" >>$PGLOG 2>&1 fi fi } StopService () { ConsoleMessage "Stopping PostgreSQL database server" - sudo -u $PGUSER $DAEMON stop -D "$PGDATA" -s -m fast + sudo -u $PGUSER $PGCTL stop -D "$PGDATA" -s -m fast } RestartService () { if [ "${POSTGRESQLSERVER:=-NO-}" = "-YES-" ]; then ConsoleMessage "Restarting PostgreSQL database server" + # should match StopService: + sudo -u $PGUSER $PGCTL stop -D "$PGDATA" -s -m fast + # should match StartService: if [ "${ROTATELOGS}" = "1" ]; then -# StopService -# StartService - sudo -u $PGUSER sh -c "${DAEMON} restart -D ${PGDATA} -s -m fast | ${LOGUTIL} ${PGLOG} ${ROTATESEC} &" + sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' | ${LOGUTIL} '${PGLOG}' ${ROTATESEC} &" else - sudo -u $PGUSER $DAEMON restart -D "$PGDATA" -s -m fast + sudo -u $PGUSER sh -c "${DAEMON} -D '${PGDATA}' &" >>$PGLOG 2>&1 fi else StopService diff --git a/contrib/start-scripts/freebsd b/contrib/start-scripts/freebsd index cee56956ce..528fc776c7 100644 --- a/contrib/start-scripts/freebsd +++ b/contrib/start-scripts/freebsd @@ -6,7 +6,7 @@ # Created through merger of the Linux start script by Ryan Kirkpatrick # and the script in the FreeBSD ports collection. -# $PostgreSQL: pgsql/contrib/start-scripts/freebsd,v 1.3 2003/11/29 19:51:35 pgsql Exp $ +# $PostgreSQL: pgsql/contrib/start-scripts/freebsd,v 1.4 2004/10/01 18:30:21 tgl Exp $ ## EDIT FROM HERE @@ -16,7 +16,7 @@ prefix=/usr/local/pgsql # Data directory PGDATA="/usr/local/pgsql/data" -# Who to run pg_ctl as, should be "postgres". +# Who to run the postmaster as, usually "postgres". (NOT "root") PGUSER=postgres # Where to keep a log file @@ -27,24 +27,31 @@ PGLOG="$PGDATA/serverlog" # The path that is to be used for the script PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin -# What to use to start up the postmaster -DAEMON="$prefix/bin/pg_ctl" +# What to use to start up the postmaster (we do NOT use pg_ctl for this, +# as it adds no value and can cause the postmaster to misrecognize a stale +# lock file) +DAEMON="$prefix/bin/postmaster" +# What to use to shut down the postmaster +PGCTL="$prefix/bin/pg_ctl" + +# Only start if we can find the postmaster. test -x "$DAEMON" || exit 0 case $1 in start) - su -l $PGUSER -c "$DAEMON start -D '$PGDATA' -s -l $PGLOG" + su -l $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo -n ' postgresql' ;; stop) - su -l $PGUSER -c "$DAEMON stop -D '$PGDATA' -s -m fast" + su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast" ;; restart) - su -l $PGUSER -c "$DAEMON restart -D '$PGDATA' -s -m fast" + su -l $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w" + su -l $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 ;; status) - su -l $PGUSER -c "$DAEMON status -D '$PGDATA'" + su -l $PGUSER -c "$PGCTL status -D '$PGDATA'" ;; *) # Print help diff --git a/contrib/start-scripts/linux b/contrib/start-scripts/linux index 19042d0e99..e0174de571 100644 --- a/contrib/start-scripts/linux +++ b/contrib/start-scripts/linux @@ -24,7 +24,7 @@ # Original author: Ryan Kirkpatrick -# $PostgreSQL: pgsql/contrib/start-scripts/linux,v 1.6 2003/11/29 19:51:36 pgsql Exp $ +# $PostgreSQL: pgsql/contrib/start-scripts/linux,v 1.7 2004/10/01 18:30:21 tgl Exp $ ## EDIT FROM HERE @@ -34,7 +34,7 @@ prefix=/usr/local/pgsql # Data directory PGDATA="/usr/local/pgsql/data" -# Who to run pg_ctl as, should be "postgres". +# Who to run the postmaster as, usually "postgres". (NOT "root") PGUSER=postgres # Where to keep a log file @@ -54,38 +54,44 @@ fi # The path that is to be used for the script PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin -# What to use to start up the postmaster -DAEMON="$prefix/bin/pg_ctl" +# What to use to start up the postmaster (we do NOT use pg_ctl for this, +# as it adds no value and can cause the postmaster to misrecognize a stale +# lock file) +DAEMON="$prefix/bin/postmaster" + +# What to use to shut down the postmaster +PGCTL="$prefix/bin/pg_ctl" set -e -# Only start if we can find pg_ctl. -test -f $DAEMON || exit 0 +# Only start if we can find the postmaster. +test -x $DAEMON || exit 0 # Parse command line parameters. case $1 in start) $ECHO_N "Starting PostgreSQL: "$ECHO_C - su - $PGUSER -c "$DAEMON start -D '$PGDATA' -s -l $PGLOG" + su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo "ok" ;; stop) echo -n "Stopping PostgreSQL: " - su - $PGUSER -c "$DAEMON stop -D '$PGDATA' -s -m fast" + su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast" echo "ok" ;; restart) echo -n "Restarting PostgreSQL: " - su - $PGUSER -c "$DAEMON restart -D '$PGDATA' -s -m fast -l $PGLOG" + su - $PGUSER -c "$PGCTL stop -D '$PGDATA' -s -m fast -w" + su - $PGUSER -c "$DAEMON -D '$PGDATA' &" >>$PGLOG 2>&1 echo "ok" ;; reload) echo -n "Reload PostgreSQL: " - su - $PGUSER -c "$DAEMON reload -D '$PGDATA' -s" + su - $PGUSER -c "$PGCTL reload -D '$PGDATA' -s" echo "ok" ;; status) - su - $PGUSER -c "$DAEMON status -D '$PGDATA'" + su - $PGUSER -c "$PGCTL status -D '$PGDATA'" ;; *) # Print help diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 9bb72c4432..5136e39f44 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.132 2004/08/29 05:06:50 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.133 2004/10/01 18:30:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -526,10 +526,22 @@ CreateLockFile(const char *filename, bool amPostmaster, /* * Check to see if the other process still exists * + * If the PID in the lockfile is our own PID or our parent's PID, + * then the file must be stale (probably left over from a previous + * system boot cycle). We need this test because of the likelihood + * that a reboot will assign exactly the same PID as we had in the + * previous reboot. Also, if there is just one more process launch + * in this reboot than in the previous one, the lockfile might mention + * our parent's PID. We can reject that since we'd never be launched + * directly by a competing postmaster. We can't detect grandparent + * processes unfortunately, but if the init script is written carefully + * then all but the immediate parent shell will be root-owned processes + * and so the kill test will fail with EPERM. + * * Normally kill() will fail with ESRCH if the given PID doesn't * exist. BeOS returns EINVAL for some silly reason, however. */ - if (other_pid != my_pid) + if (other_pid != my_pid && other_pid != getppid()) { if (kill(other_pid, 0) == 0 || (errno != ESRCH @@ -544,12 +556,16 @@ CreateLockFile(const char *filename, bool amPostmaster, errmsg("lock file \"%s\" already exists", filename), isDDLock ? - errhint("Is another %s (PID %d) running in data directory \"%s\"?", - (encoded_pid < 0 ? "postgres" : "postmaster"), - (int) other_pid, refName) : - errhint("Is another %s (PID %d) using socket file \"%s\"?", - (encoded_pid < 0 ? "postgres" : "postmaster"), - (int) other_pid, refName))); + (encoded_pid < 0 ? + errhint("Is another postgres (PID %d) running in data directory \"%s\"?", + (int) other_pid, refName) : + errhint("Is another postmaster (PID %d) running in data directory \"%s\"?", + (int) other_pid, refName)) : + (encoded_pid < 0 ? + errhint("Is another postgres (PID %d) using socket file \"%s\"?", + (int) other_pid, refName) : + errhint("Is another postmaster (PID %d) using socket file \"%s\"?", + (int) other_pid, refName)))); } } -- 2.40.0