From 61c21ddad03dda1f92a05a723cf449087d80ec08 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 23 Apr 2017 15:26:25 -0700 Subject: [PATCH] Remove select(2) backed latch implementation. poll(2) is required by Single Unix Spec v2, the usual baseline for postgres (leaving windows aside). There's not been any buildfarm animals without poll(2) for a long while, leaving the select(2) implementation to be largely untested. On windows, including mingw, poll() is not available, but we have a special case implementation for windows anyway. Author: Andres Freund Discussion: https://postgr.es/m/20170420003611.7r2sdvehesdyiz2i@alap3.anarazel.de --- src/backend/storage/ipc/latch.c | 221 ++++---------------------------- 1 file changed, 28 insertions(+), 193 deletions(-) diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c index 67162fdd4a..31aa5debc7 100644 --- a/src/backend/storage/ipc/latch.c +++ b/src/backend/storage/ipc/latch.c @@ -3,27 +3,24 @@ * latch.c * Routines for inter-process latches * - * The Unix implementation uses the so-called self-pipe trick to overcome - * the race condition involved with select() and setting a global flag - * in the signal handler. When a latch is set and the current process - * is waiting for it, the signal handler wakes up the select() in - * WaitLatch by writing a byte to a pipe. A signal by itself doesn't - * interrupt select() on all platforms, and even on platforms where it - * does, a signal that arrives just before the select() call does not - * prevent the select() from entering sleep. An incoming byte on a pipe - * however reliably interrupts the sleep, and causes select() to return - * immediately even if the signal arrives before select() begins. - * - * (Actually, we prefer epoll_wait() over poll() over select() where - * available, but the same comments apply.) + * The Unix implementation uses the so-called self-pipe trick to overcome the + * race condition involved with poll() (or epoll_wait() on linux) and setting + * a global flag in the signal handler. When a latch is set and the current + * process is waiting for it, the signal handler wakes up the poll() in + * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt + * poll() on all platforms, and even on platforms where it does, a signal that + * arrives just before the poll() call does not prevent poll() from entering + * sleep. An incoming byte on a pipe however reliably interrupts the sleep, + * and causes poll() to return immediately even if the signal arrives before + * poll() begins. * * When SetLatch is called from the same process that owns the latch, * SetLatch writes the byte directly to the pipe. If it's owned by another * process, SIGUSR1 is sent and the signal handler in the waiting process * writes the byte to the pipe on behalf of the signaling process. * - * The Windows implementation uses Windows events that are inherited by - * all postmaster child processes. + * The Windows implementation uses Windows events that are inherited by all + * postmaster child processes. There's no need for the self-pipe trick there. * * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -39,7 +36,6 @@ #include #include #include -#include #ifdef HAVE_SYS_EPOLL_H #include #endif @@ -49,9 +45,6 @@ #ifdef HAVE_SYS_POLL_H #include #endif -#ifdef HAVE_SYS_SELECT_H -#include -#endif #include "miscadmin.h" #include "pgstat.h" @@ -69,14 +62,12 @@ * define somewhere before this block. */ #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \ - defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32) + defined(WAIT_USE_WIN32) /* don't overwrite manual choice */ #elif defined(HAVE_SYS_EPOLL_H) #define WAIT_USE_EPOLL #elif defined(HAVE_POLL) #define WAIT_USE_POLL -#elif HAVE_SYS_SELECT_H -#define WAIT_USE_SELECT #elif WIN32 #define WAIT_USE_WIN32 #else @@ -162,8 +153,8 @@ InitializeLatchSupport(void) /* * Set up the self-pipe that allows a signal handler to wake up the - * select() in WaitLatch. Make the write-end non-blocking, so that - * SetLatch won't block if the event has already been set many times + * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so + * that SetLatch won't block if the event has already been set many times * filling the kernel buffer. Make the read-end non-blocking too, so that * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK. */ @@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch) /* * See if anyone's waiting for the latch. It can be the current process if - * we're in a signal handler. We use the self-pipe to wake up the select() - * in that case. If it's another process, send a signal. + * we're in a signal handler. We use the self-pipe to wake up the + * poll()/epoll_wait() in that case. If it's another process, send a + * signal. * * Fetch owner_pid only once, in case the latch is concurrently getting * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't @@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD); #elif defined(WAIT_USE_POLL) WaitEventAdjustPoll(set, event); -#elif defined(WAIT_USE_SELECT) - /* nothing to do */ #elif defined(WAIT_USE_WIN32) WaitEventAdjustWin32(set, event); #endif @@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch) WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD); #elif defined(WAIT_USE_POLL) WaitEventAdjustPoll(set, event); -#elif defined(WAIT_USE_SELECT) - /* nothing to do */ #elif defined(WAIT_USE_WIN32) WaitEventAdjustWin32(set, event); #endif @@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, * because we don't expect the pipe to become readable or to have * any errors either, treat those cases as postmaster death, too. * - * As explained in the WAIT_USE_SELECT implementation, select(2) - * may spuriously return. Be paranoid about that here too, a - * spurious WL_POSTMASTER_DEATH would be painful. + * Be paranoid about a spurious event signalling the postmaster as + * being dead. There have been reports about that happening with + * older primitives (select(2) to be specific), and a spurious + * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't + * cost much. */ if (!PostmasterIsAlive()) { @@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, * we don't expect the pipe to become readable or to have any * errors either, treat those cases as postmaster death, too. * - * As explained in the WAIT_USE_SELECT implementation, select(2) - * may spuriously return. Be paranoid about that here too, a - * spurious WL_POSTMASTER_DEATH would be painful. + * Be paranoid about a spurious event signalling the postmaster as + * being dead. There have been reports about that happening with + * older primitives (select(2) to be specific), and a spurious + * WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't + * cost much. */ if (!PostmasterIsAlive()) { @@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, return returned_events; } -#elif defined(WAIT_USE_SELECT) - -/* - * Wait using select(2). - * - * XXX: On at least older linux kernels select(), in violation of POSIX, - * doesn't reliably return a socket as writable if closed - but we rely on - * that. So far all the known cases of this problem are on platforms that also - * provide a poll() implementation without that bug. If we find one where - * that's not the case, we'll need to add a workaround. - */ -static inline int -WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, - WaitEvent *occurred_events, int nevents) -{ - int returned_events = 0; - int rc; - WaitEvent *cur_event; - fd_set input_mask; - fd_set output_mask; - int hifd; - struct timeval tv; - struct timeval *tvp = NULL; - - FD_ZERO(&input_mask); - FD_ZERO(&output_mask); - - /* - * Prepare input/output masks. We do so every loop iteration as there's no - * entirely portable way to copy fd_sets. - */ - for (cur_event = set->events; - cur_event < (set->events + set->nevents); - cur_event++) - { - if (cur_event->events == WL_LATCH_SET) - FD_SET(cur_event->fd, &input_mask); - else if (cur_event->events == WL_POSTMASTER_DEATH) - FD_SET(cur_event->fd, &input_mask); - else - { - Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)); - if (cur_event->events == WL_SOCKET_READABLE) - FD_SET(cur_event->fd, &input_mask); - else if (cur_event->events == WL_SOCKET_WRITEABLE) - FD_SET(cur_event->fd, &output_mask); - } - - if (cur_event->fd > hifd) - hifd = cur_event->fd; - } - - /* Sleep */ - if (cur_timeout >= 0) - { - tv.tv_sec = cur_timeout / 1000L; - tv.tv_usec = (cur_timeout % 1000L) * 1000L; - tvp = &tv; - } - rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp); - - /* Check return code */ - if (rc < 0) - { - /* EINTR is okay, otherwise complain */ - if (errno != EINTR) - { - waiting = false; - ereport(ERROR, - (errcode_for_socket_access(), - errmsg("select() failed: %m"))); - } - return 0; /* retry */ - } - else if (rc == 0) - { - /* timeout exceeded */ - return -1; - } - - /* - * To associate events with select's masks, we have to check the status of - * the file descriptors associated with an event; by looping through all - * events. - */ - for (cur_event = set->events; - cur_event < (set->events + set->nevents) - && returned_events < nevents; - cur_event++) - { - occurred_events->pos = cur_event->pos; - occurred_events->user_data = cur_event->user_data; - occurred_events->events = 0; - - if (cur_event->events == WL_LATCH_SET && - FD_ISSET(cur_event->fd, &input_mask)) - { - /* There's data in the self-pipe, clear it. */ - drainSelfPipe(); - - if (set->latch->is_set) - { - occurred_events->fd = PGINVALID_SOCKET; - occurred_events->events = WL_LATCH_SET; - occurred_events++; - returned_events++; - } - } - else if (cur_event->events == WL_POSTMASTER_DEATH && - FD_ISSET(cur_event->fd, &input_mask)) - { - /* - * According to the select(2) man page on Linux, select(2) may - * spuriously return and report a file descriptor as readable, - * when it's not; and presumably so can poll(2). It's not clear - * that the relevant cases would ever apply to the postmaster - * pipe, but since the consequences of falsely returning - * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the - * trouble to positively verify EOF with PostmasterIsAlive(). - */ - if (!PostmasterIsAlive()) - { - occurred_events->fd = PGINVALID_SOCKET; - occurred_events->events = WL_POSTMASTER_DEATH; - occurred_events++; - returned_events++; - } - } - else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) - { - Assert(cur_event->fd != PGINVALID_SOCKET); - - if ((cur_event->events & WL_SOCKET_READABLE) && - FD_ISSET(cur_event->fd, &input_mask)) - { - /* data available in socket, or EOF */ - occurred_events->events |= WL_SOCKET_READABLE; - } - - if ((cur_event->events & WL_SOCKET_WRITEABLE) && - FD_ISSET(cur_event->fd, &output_mask)) - { - /* socket is writeable, or EOF */ - occurred_events->events |= WL_SOCKET_WRITEABLE; - } - - if (occurred_events->events != 0) - { - occurred_events->fd = cur_event->fd; - occurred_events++; - returned_events++; - } - } - } - return returned_events; -} - #elif defined(WAIT_USE_WIN32) /* -- 2.40.0