]> granicus.if.org Git - libevent/commitdiff
When PRECISE_TIMERS is set with epoll, use timerfd for microsecond precision
authorNick Mathewson <nickm@torproject.org>
Thu, 26 Apr 2012 20:22:03 +0000 (16:22 -0400)
committerNick Mathewson <nickm@torproject.org>
Thu, 26 Apr 2012 20:42:21 +0000 (16:42 -0400)
The epoll interface ordinarily gives us one-millisecond
precision, so on Linux it makes perfect sense to use the
CLOCK_MONOTONIC_COARSE timer.  But when the user has set the new
PRECISE_TIMER flag for an event_base (either by the
EVENT_BASE_FLAG_PRECISE_TIMER flag, or by the EVENT_PRECISE_TIMER
environment variable), they presumably want finer granularity.

On not-too-old Linuxes, we can achieve this using the Timerfd
mechanism, which accepts nanosecond granularity and understands
posix clocks.  It's a little more expensive than just calling
epoll_wait(), so we won't do it by default.

configure.in
epoll.c
test/test.sh

index 9bd2a2da9e525f9ecac7e677128ec08e7c4db3ca..1c649a235b0dc9ee94ac02730f073adba61f6320 100644 (file)
@@ -221,6 +221,7 @@ AC_CHECK_HEADERS([ \
   sys/sendfile.h \
   sys/socket.h \
   sys/time.h \
+  sys/timerfd.h \
   sys/uio.h \
   sys/wait.h \
   unistd.h \
@@ -357,6 +358,7 @@ AC_CHECK_FUNCS([ \
   strtok_r \
   strtoll \
   sysctl \
+  timerfd_create \
   unsetenv \
   usleep \
   vasprintf \
diff --git a/epoll.c b/epoll.c
index a40939c479b27f13b3a36f304ebb006b423d3f11..edd4e18b480a4e3bb4ba499edea36f6763d6aa46 100644 (file)
--- a/epoll.c
+++ b/epoll.c
@@ -47,6 +47,9 @@
 #ifdef EVENT__HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
+#ifdef EVENT__HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#endif
 
 #include "event-internal.h"
 #include "evsignal-internal.h"
 #include "changelist-internal.h"
 #include "time-internal.h"
 
+#if defined(EVENT__HAVE_SYS_TIMERFD_H) &&                        \
+       defined(EVENT__HAVE_TIMERFD_CREATE) &&                    \
+       defined(HAVE_POSIX_MONOTONIC) && defined(TFD_NONBLOCK) && \
+       defined(TFD_CLOEXEC)
+/* Note that we only use timerfd if TFD_NONBLOCK and TFD_CLOEXEC are available
+   and working.  This means that we can't support it on 2.6.25 (where timerfd
+   was introduced) or 2.6.26, since 2.6.27 introduced those flags.
+ */
+#define USING_TIMERFD
+#endif
+
 struct epollop {
        struct epoll_event *events;
        int nevents;
        int epfd;
+#ifdef USING_TIMERFD
+       int timerfd;
+#endif
 };
 
 static void *epoll_init(struct event_base *);
@@ -147,8 +164,38 @@ epoll_init(struct event_base *base)
 
        if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
            ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
-               evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL))
+               evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL)) {
+
                base->evsel = &epollops_changelist;
+       }
+
+#ifdef USING_TIMERFD
+       /*
+         The epoll interface ordinarily gives us one-millisecond precision,
+         so on Linux it makes perfect sense to use the CLOCK_MONOTONIC_COARSE
+         timer.  But when the user has set the new PRECISE_TIMER flag for an
+         event_base, we can try to use timerfd to give them finer granularity.
+       */
+       if ((base->flags & EVENT_BASE_FLAG_PRECISE_TIMER) &&
+           base->monotonic_timer.monotonic_clock == CLOCK_MONOTONIC) {
+               int fd;
+               fd = epollop->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+               if (epollop->timerfd >= 0) {
+                       struct epoll_event epev;
+                       epev.data.fd = epollop->timerfd;
+                       epev.events = EPOLLIN;
+                       if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, fd, &epev) < 0) {
+                               event_warn("epoll_ctl(timerfd)");
+                               close(fd);
+                               epollop->timerfd = -1;
+                       }
+               } else {
+                       event_warn("timerfd_create");
+               }
+       } else {
+               epollop->timerfd = -1;
+       }
+#endif
 
        evsig_init_(base);
 
@@ -509,6 +556,33 @@ epoll_dispatch(struct event_base *base, struct timeval *tv)
        int i, res;
        long timeout = -1;
 
+#ifdef USING_TIMERFD
+       if (epollop->timerfd >= 0) {
+               struct itimerspec is;
+               is.it_interval.tv_sec = 0;
+               is.it_interval.tv_nsec = 0;
+               if (tv == NULL) {
+                       /* No timeout; disarm the timer. */
+                       is.it_value.tv_sec = 0;
+                       is.it_value.tv_nsec = 0;
+               } else {
+                       if (tv->tv_sec == 0 && tv->tv_usec == 0) {
+                               /* we need to exit immediately; timerfd can't
+                                * do that. */
+                               timeout = 0;
+                       }
+                       is.it_value.tv_sec = tv->tv_sec;
+                       is.it_value.tv_nsec = tv->tv_usec * 1000;
+               }
+               /* TODO: we could avoid unnecessary syscalls here by only
+                  calling timerfd_settime when the top timeout changes, or
+                  when we're called with a different timeval.
+               */
+               if (timerfd_settime(epollop->timerfd, 0, &is, NULL) < 0) {
+                       event_warn("timerfd_settime");
+               }
+       } else
+#endif
        if (tv != NULL) {
                timeout = evutil_tv_to_msec_(tv);
                if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
@@ -542,6 +616,10 @@ epoll_dispatch(struct event_base *base, struct timeval *tv)
        for (i = 0; i < res; i++) {
                int what = events[i].events;
                short ev = 0;
+#ifdef USING_TIMERFD
+               if (events[i].data.fd == epollop->timerfd)
+                       continue;
+#endif
 
                if (what & (EPOLLHUP|EPOLLERR)) {
                        ev = EV_READ | EV_WRITE;
@@ -586,6 +664,10 @@ epoll_dealloc(struct event_base *base)
                mm_free(epollop->events);
        if (epollop->epfd >= 0)
                close(epollop->epfd);
+#ifdef USING_TIMERFD
+       if (epollop->timerfd >= 0)
+               close(epollop->timerfd);
+#endif
 
        memset(epollop, 0, sizeof(struct epollop));
        mm_free(epollop);
index 5a4efabaa0bc407b735a5f48043a6fb8f8ab1713..59748c00f6a8dc6da3b51c5f04f6587156cbfb90 100755 (executable)
@@ -42,6 +42,7 @@ setup () {
                eval "EVENT_NO$i=yes; export EVENT_NO$i"
        done
        unset EVENT_EPOLL_USE_CHANGELIST
+       unset EVENT_PRECISE_TIMER
 }
 
 announce () {
@@ -112,16 +113,24 @@ do_test() {
        unset EVENT_NO$1
        if test "$2" = "(changelist)" ; then
            EVENT_EPOLL_USE_CHANGELIST=yes; export EVENT_EPOLL_USE_CHANGELIST
+       elif test "$2" = "(timerfd)" ; then
+           EVENT_PRECISE_TIMER=1; export EVENT_PRECISE_TIMER
+       elif test "$2" = "(timerfd+changelist)" ; then
+           EVENT_EPOLL_USE_CHANGELIST=yes; export EVENT_EPOLL_USE_CHANGELIST
+           EVENT_PRECISE_TIMER=1; export EVENT_PRECISE_TIMER
         fi
+
        run_tests
 }
 
 announce "Running tests:"
 
+do_test EPOLL "(timerfd)"
+do_test EPOLL "(changelist)"
+do_test EPOLL "(timerfd+changelist)"
 for i in $BACKENDS; do
        do_test $i
 done
-do_test EPOLL "(changelist)"
 
 if test "$FAILED" = "yes"; then
        exit 1