]> granicus.if.org Git - postgresql/commitdiff
Allow latches to wait for socket writability without waiting for readability.
authorAndres Freund <andres@anarazel.de>
Tue, 13 Jan 2015 11:58:43 +0000 (12:58 +0100)
committerAndres Freund <andres@anarazel.de>
Tue, 13 Jan 2015 11:58:43 +0000 (12:58 +0100)
So far WaitLatchOrSocket() required to pass in WL_SOCKET_READABLE as
that solely was used to indicate error conditions, like EOF. Waiting
for WL_SOCKET_WRITEABLE would have meant to busy wait upon socket
errors.

Adjust the API to signal errors by returning the socket as readable,
writable or both, depending on WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE
being specified.  It would arguably be nicer to return WL_SOCKET_ERROR
but that's not possible on platforms and would probably also result in
more complex callsites.

This previously had explicitly been forbidden in e42a21b9e6c9, as
there was no strong use case at that point. We now are looking into
making FE/BE communication use latches, so changing this makes sense.

There also are some portability concerns because there cases of older
platforms where select(2) is known to, in violation of POSIX, not
return a socket as writable after the peer has closed it.  So far the
platforms where that's the case provide a working poll(2). If we find
one where that's not the case, we'll need to add a workaround for that
platform.

Discussion: 20140927191243.GD5423@alap3.anarazel.de
Reviewed-By: Heikki Linnakangas, Noah Misch
src/backend/port/unix_latch.c
src/backend/port/win32_latch.c

index ecf01bd22b92c02fd13da7915cfa93cfa26802f6..92ae78015832b69f5f8250e3790c65c3ab50e90d 100644 (file)
@@ -200,9 +200,9 @@ WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
  * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
  * conditions.
  *
- * When waiting on a socket, WL_SOCKET_READABLE *must* be included in
- * 'wakeEvents'; WL_SOCKET_WRITEABLE is optional.  The reason for this is
- * that EOF and error conditions are reported only via WL_SOCKET_READABLE.
+ * When waiting on a socket, EOF and error conditions are reported by
+ * returning the socket as readable/writable or both, depending on
+ * WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified.
  */
 int
 WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
@@ -230,8 +230,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
 
        Assert(wakeEvents != 0);        /* must have at least one wake event */
-       /* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
-       Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);
 
        if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
                elog(ERROR, "cannot wait on a latch owned by another process");
@@ -291,7 +289,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                        break;
                }
 
-               /* Must wait ... we use poll(2) if available, otherwise select(2) */
+               /*
+                * Must wait ... we use poll(2) if available, otherwise select(2).
+                *
+                * On at least older linux kernels select(), in violation of POSIX,
+                * doesn't reliably return a socket as writable if closed - but we
+                * rely on that. So far all the known cases of this problem are on
+                * platforms that also provide a poll() implementation without that
+                * bug.  If we find one where that's not the case, we'll need to add a
+                * workaround.
+                */
 #ifdef HAVE_POLL
                nfds = 0;
                if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
@@ -346,7 +353,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                {
                        /* at least one event occurred, so check revents values */
                        if ((wakeEvents & WL_SOCKET_READABLE) &&
-                               (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+                               (pfds[0].revents & POLLIN))
                        {
                                /* data available in socket, or EOF/error condition */
                                result |= WL_SOCKET_READABLE;
@@ -354,8 +361,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                        if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
                                (pfds[0].revents & POLLOUT))
                        {
+                               /* socket is writable */
                                result |= WL_SOCKET_WRITEABLE;
                        }
+                       if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL))
+                       {
+                               /* EOF/error condition */
+                               if (wakeEvents & WL_SOCKET_READABLE)
+                                       result |= WL_SOCKET_READABLE;
+                               if (wakeEvents & WL_SOCKET_WRITEABLE)
+                                       result |= WL_SOCKET_WRITEABLE;
+                       }
 
                        /*
                         * We expect a POLLHUP when the remote end is closed, but because
@@ -439,6 +455,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                        }
                        if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
                        {
+                               /* socket is writable, or EOF */
                                result |= WL_SOCKET_WRITEABLE;
                        }
                        if ((wakeEvents & WL_POSTMASTER_DEATH) &&
index 112e60ed5948da3ed60001789ceda5c3fecfbe39..da0657c915a01402ce3eaee1d4d61ca6097d0332 100644 (file)
@@ -117,8 +117,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
 
        Assert(wakeEvents != 0);        /* must have at least one wake event */
-       /* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
-       Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);
 
        if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
                elog(ERROR, "cannot wait on a latch owned by another process");
@@ -152,10 +150,10 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
        if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
        {
                /* Need an event object to represent events on the socket */
-               int                     flags = 0;
+               int                     flags = FD_CLOSE; /* always check for errors/EOF */
 
                if (wakeEvents & WL_SOCKET_READABLE)
-                       flags |= (FD_READ | FD_CLOSE);
+                       flags |= FD_READ;
                if (wakeEvents & WL_SOCKET_WRITEABLE)
                        flags |= FD_WRITE;
 
@@ -232,7 +230,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                                elog(ERROR, "failed to enumerate network events: error code %u",
                                         WSAGetLastError());
                        if ((wakeEvents & WL_SOCKET_READABLE) &&
-                               (resEvents.lNetworkEvents & (FD_READ | FD_CLOSE)))
+                               (resEvents.lNetworkEvents & FD_READ))
                        {
                                result |= WL_SOCKET_READABLE;
                        }
@@ -241,6 +239,13 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                        {
                                result |= WL_SOCKET_WRITEABLE;
                        }
+                       if (resEvents.lNetworkEvents & FD_CLOSE)
+                       {
+                               if (wakeEvents & WL_SOCKET_READABLE)
+                                       result |= WL_SOCKET_READABLE;
+                               if (wakeEvents & WL_SOCKET_WRITEABLE)
+                                       result |= WL_SOCKET_WRITEABLE;
+                       }
                }
                else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
                                 rc == WAIT_OBJECT_0 + pmdeath_eventno)