From 3971f64843b02e4a55d854156bd53e46a0588e45 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 8 Feb 2016 18:43:11 -0500 Subject: [PATCH] Temporarily make pg_ctl and server shutdown a whole lot chattier. This is a quick hack, due to be reverted when its purpose has been served, to try to gather information about why some of the buildfarm critters regularly fail with "postmaster does not shut down" complaints. Maybe they are just really overloaded, but maybe something else is going on. Hence, instrument pg_ctl to print the current time when it starts waiting for postmaster shutdown and when it gives up, and add a lot of logging of the current time in the server's checkpoint and shutdown code paths. No attempt has been made to make this pretty. I'm not even totally sure if it will build on Windows, but we'll soon find out. --- src/backend/access/transam/xlog.c | 43 +++++++++++++++++++++++++++-- src/backend/postmaster/autovacuum.c | 3 +- src/backend/postmaster/postmaster.c | 9 ++++-- src/backend/storage/buffer/bufmgr.c | 13 +++++++++ src/backend/utils/init/miscinit.c | 28 +++++++++++++++++++ src/bin/pg_ctl/pg_ctl.c | 36 ++++++++++++++++++++++-- src/include/miscadmin.h | 1 + 7 files changed, 125 insertions(+), 8 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a2846c41b5..6ea92b4102 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7927,7 +7927,7 @@ ShutdownXLOG(int code, Datum arg) { /* Don't be chatty in standalone mode */ ereport(IsPostmasterEnvironment ? LOG : NOTICE, - (errmsg("shutting down"))); + (errmsg("shutting down at %s", current_time_as_str()))); if (RecoveryInProgress()) CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); @@ -7943,15 +7943,28 @@ ShutdownXLOG(int code, Datum arg) RequestXLogSwitch(); CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); + + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "shutdown checkpoint complete at %s", + current_time_as_str()); } ShutdownCLOG(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "ShutdownCLOG() complete at %s", + current_time_as_str()); ShutdownCommitTs(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "ShutdownCommitTs() complete at %s", + current_time_as_str()); ShutdownSUBTRANS(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "ShutdownSUBTRANS() complete at %s", + current_time_as_str()); ShutdownMultiXact(); /* Don't be chatty in standalone mode */ ereport(IsPostmasterEnvironment ? LOG : NOTICE, - (errmsg("database system is shut down"))); + (errmsg("database system is shut down at %s", current_time_as_str()))); } /* @@ -8602,19 +8615,45 @@ CreateEndOfRecoveryRecord(void) static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags) { + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointGuts starting at %s", current_time_as_str()); CheckPointCLOG(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointCLOG() done at %s", current_time_as_str()); CheckPointCommitTs(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointCommitTs() done at %s", current_time_as_str()); CheckPointSUBTRANS(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointSUBTRANS() done at %s", current_time_as_str()); CheckPointMultiXact(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointMultiXact() done at %s", current_time_as_str()); CheckPointPredicate(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointPredicate() done at %s", current_time_as_str()); CheckPointRelationMap(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointRelationMap() done at %s", current_time_as_str()); CheckPointReplicationSlots(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointReplicationSlots() done at %s", current_time_as_str()); CheckPointSnapBuild(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointSnapBuild() done at %s", current_time_as_str()); CheckPointLogicalRewriteHeap(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointLogicalRewriteHeap() done at %s", current_time_as_str()); CheckPointBuffers(flags); /* performs all required fsyncs */ + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointBuffers() done at %s", current_time_as_str()); CheckPointReplicationOrigin(); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointReplicationOrigin() done at %s", current_time_as_str()); /* We deliberately delay 2PC checkpointing as long as possible */ CheckPointTwoPhase(checkPointRedo); + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "CheckPointGuts done at %s", current_time_as_str()); } /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index e2859df41d..0914476d33 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -772,7 +772,8 @@ AutoVacLauncherMain(int argc, char *argv[]) /* Normal exit from the autovac launcher is here */ shutdown: ereport(LOG, - (errmsg("autovacuum launcher shutting down"))); + (errmsg("autovacuum launcher shutting down at %s", + current_time_as_str()))); AutoVacuumShmem->av_launcherpid = 0; proc_exit(0); /* done */ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index b16fc28a27..1337eabb3a 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2538,7 +2538,8 @@ pmdie(SIGNAL_ARGS) break; Shutdown = SmartShutdown; ereport(LOG, - (errmsg("received smart shutdown request"))); + (errmsg("received smart shutdown request at %s", + current_time_as_str()))); #ifdef USE_SYSTEMD sd_notify(0, "STOPPING=1"); #endif @@ -2593,7 +2594,8 @@ pmdie(SIGNAL_ARGS) break; Shutdown = FastShutdown; ereport(LOG, - (errmsg("received fast shutdown request"))); + (errmsg("received fast shutdown request at %s", + current_time_as_str()))); #ifdef USE_SYSTEMD sd_notify(0, "STOPPING=1"); #endif @@ -2656,7 +2658,8 @@ pmdie(SIGNAL_ARGS) break; Shutdown = ImmediateShutdown; ereport(LOG, - (errmsg("received immediate shutdown request"))); + (errmsg("received immediate shutdown request at %s", + current_time_as_str()))); #ifdef USE_SYSTEMD sd_notify(0, "STOPPING=1"); #endif diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7141eb83c6..3d28946699 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1691,6 +1691,10 @@ BufferSync(int flags) UnlockBufHdr(bufHdr); } + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "BufferSync(%x) beginning to write %d buffers at %s", + flags, num_to_write, current_time_as_str()); + if (num_to_write == 0) return; /* nothing to do */ @@ -1745,6 +1749,11 @@ BufferSync(int flags) if (num_written >= num_to_write) break; + if ((num_written % 1024) == 0) + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "BufferSync(%x) wrote %d/%d buffers at %s", + flags, num_written, num_to_write, current_time_as_str()); + /* * Sleep to throttle our I/O rate. */ @@ -1756,6 +1765,10 @@ BufferSync(int flags) buf_id = 0; } + elog(IsPostmasterEnvironment ? LOG : NOTICE, + "BufferSync(%x) done, wrote %d/%d buffers at %s", + flags, num_written, num_to_write, current_time_as_str()); + /* * Update checkpoint statistics. As noted above, this doesn't include * buffers written by other backends or bgwriter scan. diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index b7bab56099..03cbb6e349 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -691,6 +691,31 @@ GetUserNameFromId(Oid roleid, bool noerr) return result; } +/* + * Quick hack. + */ +const char * +current_time_as_str(void) +{ + static char buf[128]; + struct timeval now_timeval; + pg_time_t now; + char msbuf[8]; + + gettimeofday(&now_timeval, NULL); + now = (pg_time_t) now_timeval.tv_sec; + + pg_strftime(buf, sizeof(buf), + /* leave room for milliseconds... */ + "%Y-%m-%d %H:%M:%S %Z", + pg_localtime(&now, log_timezone)); + + /* 'paste' milliseconds into place... */ + sprintf(msbuf, ".%03d", (int) (now_timeval.tv_usec / 1000)); + memcpy(buf + 19, msbuf, 4); + + return buf; +} /*------------------------------------------------------------------------- * Interlock-file support @@ -724,6 +749,9 @@ UnlinkLockFiles(int status, Datum arg) } /* Since we're about to exit, no need to reclaim storage */ lock_files = NIL; + + if (IsPostmasterEnvironment) + elog(LOG, "lock files all released at %s", current_time_as_str()); } /* diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 9da38c4e6f..c970a73380 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -946,6 +946,32 @@ do_start(void) } +/* + * Quick hack. + */ +const char * +current_time_as_str(void) +{ + static char buf[128]; + struct timeval now_timeval; + time_t now; + char msbuf[8]; + + gettimeofday(&now_timeval, NULL); + now = (time_t) now_timeval.tv_sec; + + strftime(buf, sizeof(buf), + /* leave room for milliseconds... */ + "%Y-%m-%d %H:%M:%S %Z", + localtime(&now)); + + /* 'paste' milliseconds into place... */ + sprintf(msbuf, ".%03d", (int) (now_timeval.tv_usec / 1000)); + memcpy(buf + 19, msbuf, 4); + + return buf; +} + static void do_stop(void) { @@ -998,7 +1024,12 @@ do_stop(void) "Shutdown will not complete until pg_stop_backup() is called.\n\n")); } - print_msg(_("waiting for server to shut down...")); + if (!silent_mode) + { + fprintf(stdout, _("waiting for server to shut down at %s..."), + current_time_as_str()); + fflush(stdout); + } for (cnt = 0; cnt < wait_seconds; cnt++) { @@ -1015,7 +1046,8 @@ do_stop(void) { print_msg(_(" failed\n")); - write_stderr(_("%s: server does not shut down\n"), progname); + write_stderr(_("%s: server does not shut down at %s\n"), + progname, current_time_as_str()); if (shutdown_mode == SMART_MODE) write_stderr(_("HINT: The \"-m fast\" option immediately disconnects sessions rather than\n" "waiting for session-initiated disconnection.\n")); diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index cc7833e6cd..ec535a3bc4 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -448,6 +448,7 @@ extern char *local_preload_libraries_string; #define LOCK_FILE_LINE_LISTEN_ADDR 6 #define LOCK_FILE_LINE_SHMEM_KEY 7 +extern const char *current_time_as_str(void); extern void CreateDataDirLockFile(bool amPostmaster); extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster, const char *socketDir); -- 2.40.0