]> granicus.if.org Git - postgresql/commitdiff
Fix WaitLatch() to return promptly when the requested timeout expires.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
If the sleep is interrupted by a signal, we must recompute the remaining
time to wait; otherwise, a steady stream of non-wait-terminating interrupts
could delay return from WaitLatch indefinitely.  This has been shown to be
a problem for the autovacuum launcher, and there may well be other places
now or in the future with similar issues.  So we'd better make the function
robust, even though this'll add at least one gettimeofday call per wait.

Back-patch to 9.2.  We might eventually need to fix 9.1 as well, but the
code is quite different there, and the usage of WaitLatch in 9.1 is so
limited that it's not clearly important to do so.

Reported and diagnosed by Jeff Janes, though I rewrote his patch rather
heavily.

src/backend/port/unix_latch.c
src/backend/port/win32_latch.c
src/include/storage/latch.h

index bbd1810ea530547c75255f02c5be44c559cc03de..29ef38226aaa16e575161604120568b5d964b270 100644 (file)
@@ -48,6 +48,7 @@
 #endif
 
 #include "miscadmin.h"
+#include "portability/instr_time.h"
 #include "postmaster/postmaster.h"
 #include "storage/latch.h"
 #include "storage/pmsignal.h"
@@ -176,12 +177,8 @@ DisownLatch(volatile Latch *latch)
  * function returns immediately.
  *
  * The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
- * is given.  On some platforms, signals do not interrupt the wait, or even
- * cause the timeout to be restarted, so beware that the function can sleep
- * for several times longer than the requested timeout.  However, this
- * difficulty is not so great as it seems, because the signal handlers for any
- * signals that the caller should respond to ought to be programmed to end the
- * wait by calling SetLatch.  Ideally, the timeout parameter is vestigial.
+ * is given.  Note that some extra overhead is incurred when WL_TIMEOUT is
+ * given, so avoid using a timeout if possible.
  *
  * The latch must be owned by the current process, ie. it must be a
  * backend-local latch initialized with InitLatch, or a shared latch
@@ -211,13 +208,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
 {
        int                     result = 0;
        int                     rc;
+       instr_time      start_time,
+                               cur_time;
+       long            cur_timeout;
 
 #ifdef HAVE_POLL
        struct pollfd pfds[3];
        int                     nfds;
 #else
        struct timeval tv,
-                          *tvp = NULL;
+                          *tvp;
        fd_set          input_mask;
        fd_set          output_mask;
        int                     hifd;
@@ -234,21 +234,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
        if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
                elog(ERROR, "cannot wait on a latch owned by another process");
 
-       /* Initialize timeout */
+       /*
+        * Initialize timeout if requested.  We must record the current time so
+        * that we can determine the remaining timeout if the poll() or select()
+        * is interrupted.      (On some platforms, select() will update the contents
+        * of "tv" for us, but unfortunately we can't rely on that.)
+        */
        if (wakeEvents & WL_TIMEOUT)
        {
+               INSTR_TIME_SET_CURRENT(start_time);
                Assert(timeout >= 0);
+               cur_timeout = timeout;
+
 #ifndef HAVE_POLL
-               tv.tv_sec = timeout / 1000L;
-               tv.tv_usec = (timeout % 1000L) * 1000L;
+               tv.tv_sec = cur_timeout / 1000L;
+               tv.tv_usec = (cur_timeout % 1000L) * 1000L;
                tvp = &tv;
 #endif
        }
        else
        {
-#ifdef HAVE_POLL
-               /* make sure poll() agrees there is no timeout */
-               timeout = -1;
+               cur_timeout = -1;
+
+#ifndef HAVE_POLL
+               tvp = NULL;
 #endif
        }
 
@@ -311,54 +320,62 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                }
 
                /* Sleep */
-               rc = poll(pfds, nfds, (int) timeout);
+               rc = poll(pfds, nfds, (int) cur_timeout);
 
                /* Check return code */
                if (rc < 0)
                {
-                       if (errno == EINTR)
-                               continue;
-                       waiting = false;
-                       ereport(ERROR,
-                                       (errcode_for_socket_access(),
-                                        errmsg("poll() failed: %m")));
+                       /* EINTR is okay, otherwise complain */
+                       if (errno != EINTR)
+                       {
+                               waiting = false;
+                               ereport(ERROR,
+                                               (errcode_for_socket_access(),
+                                                errmsg("poll() failed: %m")));
+                       }
                }
-               if (rc == 0 && (wakeEvents & WL_TIMEOUT))
+               else if (rc == 0)
                {
                        /* timeout exceeded */
-                       result |= WL_TIMEOUT;
+                       if (wakeEvents & WL_TIMEOUT)
+                               result |= WL_TIMEOUT;
                }
-               if ((wakeEvents & WL_SOCKET_READABLE) &&
-                       (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+               else
                {
-                       /* data available in socket, or EOF/error condition */
-                       result |= WL_SOCKET_READABLE;
-               }
-               if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
-                       (pfds[0].revents & POLLOUT))
-               {
-                       result |= WL_SOCKET_WRITEABLE;
-               }
+                       /* at least one event occurred, so check revents values */
+                       if ((wakeEvents & WL_SOCKET_READABLE) &&
+                               (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+                       {
+                               /* data available in socket, or EOF/error condition */
+                               result |= WL_SOCKET_READABLE;
+                       }
+                       if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
+                               (pfds[0].revents & POLLOUT))
+                       {
+                               result |= WL_SOCKET_WRITEABLE;
+                       }
 
-               /*
-                * We expect a POLLHUP when the remote end is closed, but because we
-                * don't expect the pipe to become readable or to have any errors
-                * either, treat those as postmaster death, too.
-                */
-               if ((wakeEvents & WL_POSTMASTER_DEATH) &&
-                 (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
-               {
                        /*
-                        * According to the select(2) man page on Linux, select(2) may
-                        * spuriously return and report a file descriptor as readable,
-                        * when it's not; and presumably so can poll(2).  It's not clear
-                        * that the relevant cases would ever apply to the postmaster
-                        * pipe, but since the consequences of falsely returning
-                        * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
-                        * trouble to positively verify EOF with PostmasterIsAlive().
+                        * We expect a POLLHUP when the remote end is closed, but because
+                        * we don't expect the pipe to become readable or to have any
+                        * errors either, treat those cases as postmaster death, too.
                         */
-                       if (!PostmasterIsAlive())
-                               result |= WL_POSTMASTER_DEATH;
+                       if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+                               (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
+                       {
+                               /*
+                                * According to the select(2) man page on Linux, select(2) may
+                                * spuriously return and report a file descriptor as readable,
+                                * when it's not; and presumably so can poll(2).  It's not
+                                * clear that the relevant cases would ever apply to the
+                                * postmaster pipe, but since the consequences of falsely
+                                * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+                                * we take the trouble to positively verify EOF with
+                                * PostmasterIsAlive().
+                                */
+                               if (!PostmasterIsAlive())
+                                       result |= WL_POSTMASTER_DEATH;
+                       }
                }
 #else                                                  /* !HAVE_POLL */
 
@@ -395,43 +412,66 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                /* Check return code */
                if (rc < 0)
                {
-                       if (errno == EINTR)
-                               continue;
-                       waiting = false;
-                       ereport(ERROR,
-                                       (errcode_for_socket_access(),
-                                        errmsg("select() failed: %m")));
+                       /* EINTR is okay, otherwise complain */
+                       if (errno != EINTR)
+                       {
+                               waiting = false;
+                               ereport(ERROR,
+                                               (errcode_for_socket_access(),
+                                                errmsg("select() failed: %m")));
+                       }
                }
-               if (rc == 0 && (wakeEvents & WL_TIMEOUT))
+               else if (rc == 0)
                {
                        /* timeout exceeded */
-                       result |= WL_TIMEOUT;
-               }
-               if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
-               {
-                       /* data available in socket, or EOF */
-                       result |= WL_SOCKET_READABLE;
+                       if (wakeEvents & WL_TIMEOUT)
+                               result |= WL_TIMEOUT;
                }
-               if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
+               else
                {
-                       result |= WL_SOCKET_WRITEABLE;
-               }
-               if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+                       /* at least one event occurred, so check masks */
+                       if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
+                       {
+                               /* data available in socket, or EOF */
+                               result |= WL_SOCKET_READABLE;
+                       }
+                       if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
+                       {
+                               result |= WL_SOCKET_WRITEABLE;
+                       }
+                       if ((wakeEvents & WL_POSTMASTER_DEATH) &&
                        FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
-               {
-                       /*
-                        * According to the select(2) man page on Linux, select(2) may
-                        * spuriously return and report a file descriptor as readable,
-                        * when it's not; and presumably so can poll(2).  It's not clear
-                        * that the relevant cases would ever apply to the postmaster
-                        * pipe, but since the consequences of falsely returning
-                        * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
-                        * trouble to positively verify EOF with PostmasterIsAlive().
-                        */
-                       if (!PostmasterIsAlive())
-                               result |= WL_POSTMASTER_DEATH;
+                       {
+                               /*
+                                * According to the select(2) man page on Linux, select(2) may
+                                * spuriously return and report a file descriptor as readable,
+                                * when it's not; and presumably so can poll(2).  It's not
+                                * clear that the relevant cases would ever apply to the
+                                * postmaster pipe, but since the consequences of falsely
+                                * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+                                * we take the trouble to positively verify EOF with
+                                * PostmasterIsAlive().
+                                */
+                               if (!PostmasterIsAlive())
+                                       result |= WL_POSTMASTER_DEATH;
+                       }
                }
 #endif   /* HAVE_POLL */
+
+               /* If we're not done, update cur_timeout for next iteration */
+               if (result == 0 && cur_timeout >= 0)
+               {
+                       INSTR_TIME_SET_CURRENT(cur_time);
+                       INSTR_TIME_SUBTRACT(cur_time, start_time);
+                       cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+                       if (cur_timeout < 0)
+                               cur_timeout = 0;
+
+#ifndef HAVE_POLL
+                       tv.tv_sec = cur_timeout / 1000L;
+                       tv.tv_usec = (cur_timeout % 1000L) * 1000L;
+#endif
+               }
        } while (result == 0);
        waiting = false;
 
index 0c089fc7ecc80b4c5fff806866ea6b8b46700ded..95370d9d58ddc49f56de54f7593db3f5c6354c19 100644 (file)
@@ -24,6 +24,7 @@
 #include <unistd.h>
 
 #include "miscadmin.h"
+#include "portability/instr_time.h"
 #include "postmaster/postmaster.h"
 #include "storage/latch.h"
 #include "storage/pmsignal.h"
@@ -100,6 +101,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                                  long timeout)
 {
        DWORD           rc;
+       instr_time      start_time,
+                               cur_time;
+       long            cur_timeout;
        HANDLE          events[4];
        HANDLE          latchevent;
        HANDLE          sockevent = WSA_INVALID_EVENT;
@@ -118,11 +122,19 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
        if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
                elog(ERROR, "cannot wait on a latch owned by another process");
 
-       /* Convert timeout to form used by WaitForMultipleObjects() */
+       /*
+        * Initialize timeout if requested.  We must record the current time so
+        * that we can determine the remaining timeout if WaitForMultipleObjects
+        * is interrupted.
+        */
        if (wakeEvents & WL_TIMEOUT)
+       {
+               INSTR_TIME_SET_CURRENT(start_time);
                Assert(timeout >= 0);
+               cur_timeout = timeout;
+       }
        else
-               timeout = INFINITE;
+               cur_timeout = INFINITE;
 
        /*
         * Construct an array of event handles for WaitforMultipleObjects().
@@ -187,7 +199,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                        break;
                }
 
-               rc = WaitForMultipleObjects(numevents, events, FALSE, timeout);
+               rc = WaitForMultipleObjects(numevents, events, FALSE, cur_timeout);
 
                if (rc == WAIT_FAILED)
                        elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
@@ -203,7 +215,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                }
                else if (rc == WAIT_OBJECT_0 + 1)
                {
-                       /* Latch is set, we'll handle that on next iteration of loop */
+                       /*
+                        * Latch is set.  We'll handle that on next iteration of loop, but
+                        * let's not waste the cycles to update cur_timeout below.
+                        */
+                       continue;
                }
                else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
                                 rc == WAIT_OBJECT_0 + 2)               /* socket is at event slot 2 */
@@ -240,8 +256,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                }
                else
                        elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
-       }
-       while (result == 0);
+
+               /* If we're not done, update cur_timeout for next iteration */
+               if (result == 0 && cur_timeout != INFINITE)
+               {
+                       INSTR_TIME_SET_CURRENT(cur_time);
+                       INSTR_TIME_SUBTRACT(cur_time, start_time);
+                       cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+                       if (cur_timeout < 0)
+                               cur_timeout = 0;
+               }
+       } while (result == 0);
 
        /* Clean up the event object we created for the socket */
        if (sockevent != WSA_INVALID_EVENT)
index f5c7fe1ca5ee9d0ee9706035f0b13f7a02bc1968..0fa572e50b01d5ab18fbe537117bf5308d13b179 100644 (file)
@@ -33,8 +33,8 @@
  * ResetLatch  - Clears the latch, allowing it to be set again
  * WaitLatch   - Waits for the latch to become set
  *
- * WaitLatch includes a provision for timeouts (which should hopefully not
- * be necessary once the code is fully latch-ified) and a provision for
+ * WaitLatch includes a provision for timeouts (which should be avoided
+ * when possible, as they incur extra overhead) and a provision for
  * postmaster child processes to wake up immediately on postmaster death.
  * See unix_latch.c for detailed specifications for the exported functions.
  *
  * will be lifted in future by inserting suitable memory barriers into
  * SetLatch and ResetLatch.
  *
+ * On some platforms, signals will not interrupt the latch wait primitive
+ * by themselves.  Therefore, it is critical that any signal handler that
+ * is meant to terminate a WaitLatch wait calls SetLatch.
+ *
  * Note that use of the process latch (PGPROC.procLatch) is generally better
  * than an ad-hoc shared latch for signaling auxiliary processes.  This is
  * because generic signal handlers will call SetLatch on the process latch
  * only, so using any latch other than the process latch effectively precludes
- * ever registering a generic handler. Since signals have the potential to
- * invalidate the latch timeout on some platforms, resulting in a
- * denial-of-service, it is important to verify that all signal handlers
- * within all WaitLatch-calling processes call SetLatch.
+ * use of any generic handler.
  *
  *
  * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group