When PRECISE_TIMERS is set with epoll, use timerfd for microsecond precision

author Nick Mathewson <nickm@torproject.org>

Thu, 26 Apr 2012 20:22:03 +0000 (16:22 -0400)

committer Nick Mathewson <nickm@torproject.org>

Thu, 26 Apr 2012 20:42:21 +0000 (16:42 -0400)
author Nick Mathewson <nickm@torproject.org>
Thu, 26 Apr 2012 20:22:03 +0000 (16:22 -0400)
committer Nick Mathewson <nickm@torproject.org>
Thu, 26 Apr 2012 20:42:21 +0000 (16:42 -0400)
diff --git a/configure.in b/configure.in

index 9bd2a2da9e525f9ecac7e677128ec08e7c4db3ca..1c649a235b0dc9ee94ac02730f073adba61f6320 100644 (file)
--- a/configure.in
+++ b/configure.in
@@ -221,6 +221,7 @@ AC_CHECK_HEADERS([ \
    sys/sendfile.h \
    sys/socket.h \
    sys/time.h \
+  sys/timerfd.h \
    sys/uio.h \
    sys/wait.h \
    unistd.h \
@@ -357,6 +358,7 @@ AC_CHECK_FUNCS([ \
    strtok_r \
    strtoll \
    sysctl \
+  timerfd_create \
    unsetenv \
    usleep \
    vasprintf \
diff --git a/epoll.c b/epoll.c

index a40939c479b27f13b3a36f304ebb006b423d3f11..edd4e18b480a4e3bb4ba499edea36f6763d6aa46 100644 (file)
--- a/epoll.c
+++ b/epoll.c
@@ -47,6 +47,9 @@
  #ifdef EVENT__HAVE_FCNTL_H
  #include <fcntl.h>
  #endif
+#ifdef EVENT__HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#endif
  
  #include "event-internal.h"
  #include "evsignal-internal.h"
@@ -57,10 +60,24 @@
  #include "changelist-internal.h"
  #include "time-internal.h"
  
+#if defined(EVENT__HAVE_SYS_TIMERFD_H) &&                        \
+       defined(EVENT__HAVE_TIMERFD_CREATE) &&                    \
+       defined(HAVE_POSIX_MONOTONIC) && defined(TFD_NONBLOCK) && \
+       defined(TFD_CLOEXEC)
+/* Note that we only use timerfd if TFD_NONBLOCK and TFD_CLOEXEC are available
+   and working.  This means that we can't support it on 2.6.25 (where timerfd
+   was introduced) or 2.6.26, since 2.6.27 introduced those flags.
+ */
+#define USING_TIMERFD
+#endif
+
  struct epollop {
         struct epoll_event *events;
         int nevents;
         int epfd;
+#ifdef USING_TIMERFD
+       int timerfd;
+#endif
  };
  
  static void *epoll_init(struct event_base *);
@@ -147,8 +164,38 @@ epoll_init(struct event_base *base)
  
         if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
             ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
-               evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL))
+               evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL)) {
+
                 base->evsel = &epollops_changelist;
+       }
+
+#ifdef USING_TIMERFD
+       /*
+         The epoll interface ordinarily gives us one-millisecond precision,
+         so on Linux it makes perfect sense to use the CLOCK_MONOTONIC_COARSE
+         timer.  But when the user has set the new PRECISE_TIMER flag for an
+         event_base, we can try to use timerfd to give them finer granularity.
+       */
+       if ((base->flags & EVENT_BASE_FLAG_PRECISE_TIMER) &&
+           base->monotonic_timer.monotonic_clock == CLOCK_MONOTONIC) {
+               int fd;
+               fd = epollop->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+               if (epollop->timerfd >= 0) {
+                       struct epoll_event epev;
+                       epev.data.fd = epollop->timerfd;
+                       epev.events = EPOLLIN;
+                       if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, fd, &epev) < 0) {
+                               event_warn("epoll_ctl(timerfd)");
+                               close(fd);
+                               epollop->timerfd = -1;
+                       }
+               } else {
+                       event_warn("timerfd_create");
+               }
+       } else {
+               epollop->timerfd = -1;
+       }
+#endif
  
         evsig_init_(base);
  
@@ -509,6 +556,33 @@ epoll_dispatch(struct event_base *base, struct timeval *tv)
         int i, res;
         long timeout = -1;
  
+#ifdef USING_TIMERFD
+       if (epollop->timerfd >= 0) {
+               struct itimerspec is;
+               is.it_interval.tv_sec = 0;
+               is.it_interval.tv_nsec = 0;
+               if (tv == NULL) {
+                       /* No timeout; disarm the timer. */
+                       is.it_value.tv_sec = 0;
+                       is.it_value.tv_nsec = 0;
+               } else {
+                       if (tv->tv_sec == 0 && tv->tv_usec == 0) {
+                               /* we need to exit immediately; timerfd can't
+                                * do that. */
+                               timeout = 0;
+                       }
+                       is.it_value.tv_sec = tv->tv_sec;
+                       is.it_value.tv_nsec = tv->tv_usec * 1000;
+               }
+               /* TODO: we could avoid unnecessary syscalls here by only
+                  calling timerfd_settime when the top timeout changes, or
+                  when we're called with a different timeval.
+               */
+               if (timerfd_settime(epollop->timerfd, 0, &is, NULL) < 0) {
+                       event_warn("timerfd_settime");
+               }
+       } else
+#endif
         if (tv != NULL) {
                 timeout = evutil_tv_to_msec_(tv);
                 if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
@@ -542,6 +616,10 @@ epoll_dispatch(struct event_base *base, struct timeval *tv)
         for (i = 0; i < res; i++) {
                 int what = events[i].events;
                 short ev = 0;
+#ifdef USING_TIMERFD
+               if (events[i].data.fd == epollop->timerfd)
+                       continue;
+#endif
  
                 if (what & (EPOLLHUP|EPOLLERR)) {
                         ev = EV_READ | EV_WRITE;
@@ -586,6 +664,10 @@ epoll_dealloc(struct event_base *base)
                 mm_free(epollop->events);
         if (epollop->epfd >= 0)
                 close(epollop->epfd);
+#ifdef USING_TIMERFD
+       if (epollop->timerfd >= 0)
+               close(epollop->timerfd);
+#endif
  
         memset(epollop, 0, sizeof(struct epollop));
         mm_free(epollop);
diff --git a/test/test.sh b/test/test.sh

index 5a4efabaa0bc407b735a5f48043a6fb8f8ab1713..59748c00f6a8dc6da3b51c5f04f6587156cbfb90 100755 (executable)
--- a/test/test.sh
+++ b/test/test.sh
@@ -42,6 +42,7 @@ setup () {
                 eval "EVENT_NO$i=yes; export EVENT_NO$i"
         done
         unset EVENT_EPOLL_USE_CHANGELIST
+       unset EVENT_PRECISE_TIMER
  }
  
  announce () {
@@ -112,16 +113,24 @@ do_test() {
         unset EVENT_NO$1
         if test "$2" = "(changelist)" ; then
             EVENT_EPOLL_USE_CHANGELIST=yes; export EVENT_EPOLL_USE_CHANGELIST
+       elif test "$2" = "(timerfd)" ; then
+           EVENT_PRECISE_TIMER=1; export EVENT_PRECISE_TIMER
+       elif test "$2" = "(timerfd+changelist)" ; then
+           EVENT_EPOLL_USE_CHANGELIST=yes; export EVENT_EPOLL_USE_CHANGELIST
+           EVENT_PRECISE_TIMER=1; export EVENT_PRECISE_TIMER
          fi
+
         run_tests
  }
  
  announce "Running tests:"
  
+do_test EPOLL "(timerfd)"
+do_test EPOLL "(changelist)"
+do_test EPOLL "(timerfd+changelist)"
  for i in $BACKENDS; do
         do_test $i
  done
-do_test EPOLL "(changelist)"
  
  if test "$FAILED" = "yes"; then
         exit 1
author	Nick Mathewson <nickm@torproject.org>
	Thu, 26 Apr 2012 20:22:03 +0000 (16:22 -0400)
committer	Nick Mathewson <nickm@torproject.org>
	Thu, 26 Apr 2012 20:42:21 +0000 (16:42 -0400)
configure.in		patch \| blob \| history
epoll.c		patch \| blob \| history
test/test.sh		patch \| blob \| history