Fix WaitLatch() to return promptly when the requested timeout expires.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
diff --git a/src/backend/port/unix_latch.c b/src/backend/port/unix_latch.c

index bbd1810ea530547c75255f02c5be44c559cc03de..29ef38226aaa16e575161604120568b5d964b270 100644 (file)
--- a/src/backend/port/unix_latch.c
+++ b/src/backend/port/unix_latch.c
@@ -48,6 +48,7 @@
  #endif
  
  #include "miscadmin.h"
+#include "portability/instr_time.h"
  #include "postmaster/postmaster.h"
  #include "storage/latch.h"
  #include "storage/pmsignal.h"
@@ -176,12 +177,8 @@ DisownLatch(volatile Latch *latch)
   * function returns immediately.
   *
   * The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
- * is given.  On some platforms, signals do not interrupt the wait, or even
- * cause the timeout to be restarted, so beware that the function can sleep
- * for several times longer than the requested timeout.  However, this
- * difficulty is not so great as it seems, because the signal handlers for any
- * signals that the caller should respond to ought to be programmed to end the
- * wait by calling SetLatch.  Ideally, the timeout parameter is vestigial.
+ * is given.  Note that some extra overhead is incurred when WL_TIMEOUT is
+ * given, so avoid using a timeout if possible.
   *
   * The latch must be owned by the current process, ie. it must be a
   * backend-local latch initialized with InitLatch, or a shared latch
@@ -211,13 +208,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
  {
         int                     result = 0;
         int                     rc;
+       instr_time      start_time,
+                               cur_time;
+       long            cur_timeout;
  
  #ifdef HAVE_POLL
         struct pollfd pfds[3];
         int                     nfds;
  #else
         struct timeval tv,
-                          *tvp = NULL;
+                          *tvp;
         fd_set          input_mask;
         fd_set          output_mask;
         int                     hifd;
@@ -234,21 +234,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
         if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
                 elog(ERROR, "cannot wait on a latch owned by another process");
  
-       /* Initialize timeout */
+       /*
+        * Initialize timeout if requested.  We must record the current time so
+        * that we can determine the remaining timeout if the poll() or select()
+        * is interrupted.      (On some platforms, select() will update the contents
+        * of "tv" for us, but unfortunately we can't rely on that.)
+        */
         if (wakeEvents & WL_TIMEOUT)
         {
+               INSTR_TIME_SET_CURRENT(start_time);
                 Assert(timeout >= 0);
+               cur_timeout = timeout;
+
  #ifndef HAVE_POLL
-               tv.tv_sec = timeout / 1000L;
-               tv.tv_usec = (timeout % 1000L) * 1000L;
+               tv.tv_sec = cur_timeout / 1000L;
+               tv.tv_usec = (cur_timeout % 1000L) * 1000L;
                 tvp = &tv;
  #endif
         }
         else
         {
-#ifdef HAVE_POLL
-               /* make sure poll() agrees there is no timeout */
-               timeout = -1;
+               cur_timeout = -1;
+
+#ifndef HAVE_POLL
+               tvp = NULL;
  #endif
         }
  
@@ -311,54 +320,62 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                 }
  
                 /* Sleep */
-               rc = poll(pfds, nfds, (int) timeout);
+               rc = poll(pfds, nfds, (int) cur_timeout);
  
                 /* Check return code */
                 if (rc < 0)
                 {
-                       if (errno == EINTR)
-                               continue;
-                       waiting = false;
-                       ereport(ERROR,
-                                       (errcode_for_socket_access(),
-                                        errmsg("poll() failed: %m")));
+                       /* EINTR is okay, otherwise complain */
+                       if (errno != EINTR)
+                       {
+                               waiting = false;
+                               ereport(ERROR,
+                                               (errcode_for_socket_access(),
+                                                errmsg("poll() failed: %m")));
+                       }
                 }
-               if (rc == 0 && (wakeEvents & WL_TIMEOUT))
+               else if (rc == 0)
                 {
                         /* timeout exceeded */
-                       result |= WL_TIMEOUT;
+                       if (wakeEvents & WL_TIMEOUT)
+                               result |= WL_TIMEOUT;
                 }
-               if ((wakeEvents & WL_SOCKET_READABLE) &&
-                       (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+               else
                 {
-                       /* data available in socket, or EOF/error condition */
-                       result |= WL_SOCKET_READABLE;
-               }
-               if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
-                       (pfds[0].revents & POLLOUT))
-               {
-                       result |= WL_SOCKET_WRITEABLE;
-               }
+                       /* at least one event occurred, so check revents values */
+                       if ((wakeEvents & WL_SOCKET_READABLE) &&
+                               (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
+                       {
+                               /* data available in socket, or EOF/error condition */
+                               result |= WL_SOCKET_READABLE;
+                       }
+                       if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
+                               (pfds[0].revents & POLLOUT))
+                       {
+                               result |= WL_SOCKET_WRITEABLE;
+                       }
  
-               /*
-                * We expect a POLLHUP when the remote end is closed, but because we
-                * don't expect the pipe to become readable or to have any errors
-                * either, treat those as postmaster death, too.
-                */
-               if ((wakeEvents & WL_POSTMASTER_DEATH) &&
-                 (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
-               {
                         /*
-                        * According to the select(2) man page on Linux, select(2) may
-                        * spuriously return and report a file descriptor as readable,
-                        * when it's not; and presumably so can poll(2).  It's not clear
-                        * that the relevant cases would ever apply to the postmaster
-                        * pipe, but since the consequences of falsely returning
-                        * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
-                        * trouble to positively verify EOF with PostmasterIsAlive().
+                        * We expect a POLLHUP when the remote end is closed, but because
+                        * we don't expect the pipe to become readable or to have any
+                        * errors either, treat those cases as postmaster death, too.
                          */
-                       if (!PostmasterIsAlive())
-                               result |= WL_POSTMASTER_DEATH;
+                       if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+                               (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
+                       {
+                               /*
+                                * According to the select(2) man page on Linux, select(2) may
+                                * spuriously return and report a file descriptor as readable,
+                                * when it's not; and presumably so can poll(2).  It's not
+                                * clear that the relevant cases would ever apply to the
+                                * postmaster pipe, but since the consequences of falsely
+                                * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+                                * we take the trouble to positively verify EOF with
+                                * PostmasterIsAlive().
+                                */
+                               if (!PostmasterIsAlive())
+                                       result |= WL_POSTMASTER_DEATH;
+                       }
                 }
  #else                                                  /* !HAVE_POLL */
  
@@ -395,43 +412,66 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                 /* Check return code */
                 if (rc < 0)
                 {
-                       if (errno == EINTR)
-                               continue;
-                       waiting = false;
-                       ereport(ERROR,
-                                       (errcode_for_socket_access(),
-                                        errmsg("select() failed: %m")));
+                       /* EINTR is okay, otherwise complain */
+                       if (errno != EINTR)
+                       {
+                               waiting = false;
+                               ereport(ERROR,
+                                               (errcode_for_socket_access(),
+                                                errmsg("select() failed: %m")));
+                       }
                 }
-               if (rc == 0 && (wakeEvents & WL_TIMEOUT))
+               else if (rc == 0)
                 {
                         /* timeout exceeded */
-                       result |= WL_TIMEOUT;
-               }
-               if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
-               {
-                       /* data available in socket, or EOF */
-                       result |= WL_SOCKET_READABLE;
+                       if (wakeEvents & WL_TIMEOUT)
+                               result |= WL_TIMEOUT;
                 }
-               if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
+               else
                 {
-                       result |= WL_SOCKET_WRITEABLE;
-               }
-               if ((wakeEvents & WL_POSTMASTER_DEATH) &&
+                       /* at least one event occurred, so check masks */
+                       if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
+                       {
+                               /* data available in socket, or EOF */
+                               result |= WL_SOCKET_READABLE;
+                       }
+                       if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
+                       {
+                               result |= WL_SOCKET_WRITEABLE;
+                       }
+                       if ((wakeEvents & WL_POSTMASTER_DEATH) &&
                         FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
-               {
-                       /*
-                        * According to the select(2) man page on Linux, select(2) may
-                        * spuriously return and report a file descriptor as readable,
-                        * when it's not; and presumably so can poll(2).  It's not clear
-                        * that the relevant cases would ever apply to the postmaster
-                        * pipe, but since the consequences of falsely returning
-                        * WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
-                        * trouble to positively verify EOF with PostmasterIsAlive().
-                        */
-                       if (!PostmasterIsAlive())
-                               result |= WL_POSTMASTER_DEATH;
+                       {
+                               /*
+                                * According to the select(2) man page on Linux, select(2) may
+                                * spuriously return and report a file descriptor as readable,
+                                * when it's not; and presumably so can poll(2).  It's not
+                                * clear that the relevant cases would ever apply to the
+                                * postmaster pipe, but since the consequences of falsely
+                                * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
+                                * we take the trouble to positively verify EOF with
+                                * PostmasterIsAlive().
+                                */
+                               if (!PostmasterIsAlive())
+                                       result |= WL_POSTMASTER_DEATH;
+                       }
                 }
  #endif   /* HAVE_POLL */
+
+               /* If we're not done, update cur_timeout for next iteration */
+               if (result == 0 && cur_timeout >= 0)
+               {
+                       INSTR_TIME_SET_CURRENT(cur_time);
+                       INSTR_TIME_SUBTRACT(cur_time, start_time);
+                       cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+                       if (cur_timeout < 0)
+                               cur_timeout = 0;
+
+#ifndef HAVE_POLL
+                       tv.tv_sec = cur_timeout / 1000L;
+                       tv.tv_usec = (cur_timeout % 1000L) * 1000L;
+#endif
+               }
         } while (result == 0);
         waiting = false;
  
diff --git a/src/backend/port/win32_latch.c b/src/backend/port/win32_latch.c

index 0c089fc7ecc80b4c5fff806866ea6b8b46700ded..95370d9d58ddc49f56de54f7593db3f5c6354c19 100644 (file)
--- a/src/backend/port/win32_latch.c
+++ b/src/backend/port/win32_latch.c
@@ -24,6 +24,7 @@
  #include <unistd.h>
  
  #include "miscadmin.h"
+#include "portability/instr_time.h"
  #include "postmaster/postmaster.h"
  #include "storage/latch.h"
  #include "storage/pmsignal.h"
@@ -100,6 +101,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                                   long timeout)
  {
         DWORD           rc;
+       instr_time      start_time,
+                               cur_time;
+       long            cur_timeout;
         HANDLE          events[4];
         HANDLE          latchevent;
         HANDLE          sockevent = WSA_INVALID_EVENT;
@@ -118,11 +122,19 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
         if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
                 elog(ERROR, "cannot wait on a latch owned by another process");
  
-       /* Convert timeout to form used by WaitForMultipleObjects() */
+       /*
+        * Initialize timeout if requested.  We must record the current time so
+        * that we can determine the remaining timeout if WaitForMultipleObjects
+        * is interrupted.
+        */
         if (wakeEvents & WL_TIMEOUT)
+       {
+               INSTR_TIME_SET_CURRENT(start_time);
                 Assert(timeout >= 0);
+               cur_timeout = timeout;
+       }
         else
-               timeout = INFINITE;
+               cur_timeout = INFINITE;
  
         /*
          * Construct an array of event handles for WaitforMultipleObjects().
@@ -187,7 +199,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                         break;
                 }
  
-               rc = WaitForMultipleObjects(numevents, events, FALSE, timeout);
+               rc = WaitForMultipleObjects(numevents, events, FALSE, cur_timeout);
  
                 if (rc == WAIT_FAILED)
                         elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
@@ -203,7 +215,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                 }
                 else if (rc == WAIT_OBJECT_0 + 1)
                 {
-                       /* Latch is set, we'll handle that on next iteration of loop */
+                       /*
+                        * Latch is set.  We'll handle that on next iteration of loop, but
+                        * let's not waste the cycles to update cur_timeout below.
+                        */
+                       continue;
                 }
                 else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
                                  rc == WAIT_OBJECT_0 + 2)               /* socket is at event slot 2 */
@@ -240,8 +256,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
                 }
                 else
                         elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
-       }
-       while (result == 0);
+
+               /* If we're not done, update cur_timeout for next iteration */
+               if (result == 0 && cur_timeout != INFINITE)
+               {
+                       INSTR_TIME_SET_CURRENT(cur_time);
+                       INSTR_TIME_SUBTRACT(cur_time, start_time);
+                       cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
+                       if (cur_timeout < 0)
+                               cur_timeout = 0;
+               }
+       } while (result == 0);
  
         /* Clean up the event object we created for the socket */
         if (sockevent != WSA_INVALID_EVENT)
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h

index f5c7fe1ca5ee9d0ee9706035f0b13f7a02bc1968..0fa572e50b01d5ab18fbe537117bf5308d13b179 100644 (file)
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@@ -33,8 +33,8 @@
   * ResetLatch  - Clears the latch, allowing it to be set again
   * WaitLatch   - Waits for the latch to become set
   *
- * WaitLatch includes a provision for timeouts (which should hopefully not
- * be necessary once the code is fully latch-ified) and a provision for
+ * WaitLatch includes a provision for timeouts (which should be avoided
+ * when possible, as they incur extra overhead) and a provision for
   * postmaster child processes to wake up immediately on postmaster death.
   * See unix_latch.c for detailed specifications for the exported functions.
   *
@@ -64,14 +64,15 @@
   * will be lifted in future by inserting suitable memory barriers into
   * SetLatch and ResetLatch.
   *
+ * On some platforms, signals will not interrupt the latch wait primitive
+ * by themselves.  Therefore, it is critical that any signal handler that
+ * is meant to terminate a WaitLatch wait calls SetLatch.
+ *
   * Note that use of the process latch (PGPROC.procLatch) is generally better
   * than an ad-hoc shared latch for signaling auxiliary processes.  This is
   * because generic signal handlers will call SetLatch on the process latch
   * only, so using any latch other than the process latch effectively precludes
- * ever registering a generic handler. Since signals have the potential to
- * invalidate the latch timeout on some platforms, resulting in a
- * denial-of-service, it is important to verify that all signal handlers
- * within all WaitLatch-calling processes call SetLatch.
+ * use of any generic handler.
   *
   *
   * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 9 Nov 2012 01:04:48 +0000 (20:04 -0500)
src/backend/port/unix_latch.c		patch \| blob \| history
src/backend/port/win32_latch.c		patch \| blob \| history
src/include/storage/latch.h		patch \| blob \| history