From cb2f2873d6b81ad7f0a9733ba738bfac0746fb7b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 10 May 2012 17:26:08 -0400 Subject: [PATCH] Temporarily revert stats collector latch changes so we can ship beta1. This patch reverts commit 49340037ee3ab46cb24144a86705e35f272c24d5 and some follow-on tweaking in pgstat.c. While the basic scheme of latch-ifying the stats collector seems sound enough, it's failing on most Windows buildfarm members for unknown reasons, and there's no time left to debug that before 9.2beta1. Better to ship a beta version without this improvement. I hope to re-revert this once beta1 is out, though. --- src/backend/postmaster/pgstat.c | 185 ++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 68 deletions(-) diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 01273e1934..ac971abb18 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -28,6 +28,12 @@ #include #include #include +#ifdef HAVE_POLL_H +#include +#endif +#ifdef HAVE_SYS_POLL_H +#include +#endif #include "pgstat.h" @@ -49,8 +55,8 @@ #include "storage/backendid.h" #include "storage/fd.h" #include "storage/ipc.h" -#include "storage/latch.h" #include "storage/pg_shmem.h" +#include "storage/pmsignal.h" #include "storage/procsignal.h" #include "utils/ascii.h" #include "utils/guc.h" @@ -88,6 +94,9 @@ * failed statistics collector; in * seconds. */ +#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster + * death; in seconds. */ + #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY) #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY) @@ -130,8 +139,6 @@ PgStat_MsgBgWriter BgWriterStats; */ NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET; -static Latch pgStatLatch; - static struct sockaddr_storage pgStatAddr; static time_t last_pgstat_start_time; @@ -3002,7 +3009,15 @@ PgstatCollectorMain(int argc, char *argv[]) { int len; PgStat_Msg msg; - int wr; + +#ifndef WIN32 +#ifdef HAVE_POLL + struct pollfd input_fd; +#else + struct timeval sel_timeout; + fd_set rfds; +#endif +#endif IsUnderPostmaster = true; /* we are a postmaster subprocess now */ @@ -3021,13 +3036,9 @@ PgstatCollectorMain(int argc, char *argv[]) elog(FATAL, "setsid() failed: %m"); #endif - /* Initialize private latch for use by signal handlers */ - InitLatch(&pgStatLatch); - /* * Ignore all signals usually bound to some action in the postmaster, - * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to - * support latch operations, because pgStatLatch is local not shared. + * except SIGQUIT. */ pqsignal(SIGHUP, pgstat_sighup_handler); pqsignal(SIGINT, SIG_IGN); @@ -3062,24 +3073,26 @@ PgstatCollectorMain(int argc, char *argv[]) pgStatRunningInCollector = true; pgStatDBHash = pgstat_read_statsfile(InvalidOid, true); + /* + * Setup the descriptor set for select(2). Since only one bit in the set + * ever changes, we need not repeat FD_ZERO each time. + */ +#if !defined(HAVE_POLL) && !defined(WIN32) + FD_ZERO(&rfds); +#endif + /* * Loop to process messages until we get SIGQUIT or detect ungraceful * death of our parent postmaster. * - * For performance reasons, we don't want to do ResetLatch/WaitLatch after - * every message; instead, do that only after a recv() fails to obtain a - * message. (This effectively means that if backends are sending us stuff - * like mad, we won't notice postmaster death until things slack off a - * bit; which seems fine.) To do that, we have an inner loop that - * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside - * the inner loop, which means that such interrupts will get serviced but - * the latch won't get cleared until next time there is a break in the - * action. + * For performance reasons, we don't want to do a PostmasterIsAlive() test + * after every message; instead, do it only when select()/poll() is + * interrupted by timeout. In essence, we'll stay alive as long as + * backends keep sending us stuff often, even if the postmaster is gone. */ for (;;) { - /* Clear any already-pending wakeups */ - ResetLatch(&pgStatLatch); + int got_data; /* * Quit if we get SIGQUIT from the postmaster. @@ -3088,37 +3101,87 @@ PgstatCollectorMain(int argc, char *argv[]) break; /* - * Inner loop iterates as long as we keep getting messages, or until - * need_exit becomes set. + * Reload configuration if we got SIGHUP from the postmaster. */ - while (!need_exit) + if (got_SIGHUP) { - /* - * Reload configuration if we got SIGHUP from the postmaster. - */ - if (got_SIGHUP) - { - got_SIGHUP = false; - ProcessConfigFile(PGC_SIGHUP); - } + ProcessConfigFile(PGC_SIGHUP); + got_SIGHUP = false; + } - /* - * Write the stats file if a new request has arrived that is not - * satisfied by existing file. - */ - if (last_statwrite < last_statrequest) - pgstat_write_statsfile(false); + /* + * Write the stats file if a new request has arrived that is not + * satisfied by existing file. + */ + if (last_statwrite < last_statrequest) + pgstat_write_statsfile(false); - /* - * Try to receive and process a message. This will not block, - * since the socket is set to non-blocking mode. - */ + /* + * Wait for a message to arrive; but not for more than + * PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will + * shut down after an ungraceful postmaster termination; so it needn't + * be very fast. However, on some systems SIGQUIT won't interrupt the + * poll/select call, so this also limits speed of response to SIGQUIT, + * which is more important.) + * + * We use poll(2) if available, otherwise select(2). Win32 has its own + * implementation. + */ +#ifndef WIN32 +#ifdef HAVE_POLL + input_fd.fd = pgStatSock; + input_fd.events = POLLIN | POLLERR; + input_fd.revents = 0; + + if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0) + { + if (errno == EINTR) + continue; + ereport(ERROR, + (errcode_for_socket_access(), + errmsg("poll() failed in statistics collector: %m"))); + } + + got_data = (input_fd.revents != 0); +#else /* !HAVE_POLL */ + + FD_SET(pgStatSock, &rfds); + + /* + * timeout struct is modified by select() on some operating systems, + * so re-fill it each time. + */ + sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT; + sel_timeout.tv_usec = 0; + + if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0) + { + if (errno == EINTR) + continue; + ereport(ERROR, + (errcode_for_socket_access(), + errmsg("select() failed in statistics collector: %m"))); + } + + got_data = FD_ISSET(pgStatSock, &rfds); +#endif /* HAVE_POLL */ +#else /* WIN32 */ + got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ, + PGSTAT_SELECT_TIMEOUT * 1000); +#endif + + /* + * If there is a message on the socket, read it and check for + * validity. + */ + if (got_data) + { len = recv(pgStatSock, (char *) &msg, sizeof(PgStat_Msg), 0); if (len < 0) { - if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) - break; /* out of inner loop */ + if (errno == EINTR) + continue; ereport(ERROR, (errcode_for_socket_access(), errmsg("could not read statistics message: %m"))); @@ -3216,21 +3279,17 @@ PgstatCollectorMain(int argc, char *argv[]) default: break; } - } /* end of inner message-processing loop */ - - /* Sleep until there's something to do */ - wr = WaitLatchOrSocket(&pgStatLatch, - WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE, - pgStatSock, - -1L); - - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (wr & WL_POSTMASTER_DEATH) - break; - } /* end of outer loop */ + } + else + { + /* + * We can only get here if the select/poll timeout elapsed. Check + * for postmaster death. + */ + if (!PostmasterIsAlive()) + break; + } + } /* end of message-processing loop */ /* * Save the final stats to reuse at next startup. @@ -3245,24 +3304,14 @@ PgstatCollectorMain(int argc, char *argv[]) static void pgstat_exit(SIGNAL_ARGS) { - int save_errno = errno; - need_exit = true; - SetLatch(&pgStatLatch); - - errno = save_errno; } /* SIGHUP handler for collector process */ static void pgstat_sighup_handler(SIGNAL_ARGS) { - int save_errno = errno; - got_SIGHUP = true; - SetLatch(&pgStatLatch); - - errno = save_errno; } -- 2.40.0