Fix a nasty bug related to use of dup() with epoll on Linux

author Nick Mathewson <nickm@torproject.org>

Sun, 24 Oct 2010 15:38:29 +0000 (11:38 -0400)

committer Nick Mathewson <nickm@torproject.org>

Sun, 24 Oct 2010 15:38:29 +0000 (11:38 -0400)
author Nick Mathewson <nickm@torproject.org>
Sun, 24 Oct 2010 15:38:29 +0000 (11:38 -0400)
committer Nick Mathewson <nickm@torproject.org>
Sun, 24 Oct 2010 15:38:29 +0000 (11:38 -0400)
diff --git a/epoll.c b/epoll.c

index b574bf4d592fe39b3321c92d29798b2e5e244ada..9c8f0e99e711229e4f7106ad70cd1f878a9f6923 100644 (file)
--- a/epoll.c
+++ b/epoll.c
@@ -155,7 +155,6 @@ epoll_apply_changes(struct event_base *base)
         int op, events;
  
         for (i = 0; i < changelist->n_changes; ++i) {
-               int precautionary_add = 0;
                 ch = &changelist->changes[i];
                 events = 0;
  
@@ -203,13 +202,12 @@ epoll_apply_changes(struct event_base *base)
                                   on the fd, we need to try the ADD anyway, in
                                   case the fd was closed at some in the
                                   middle.  If it wasn't, the ADD operation
-                                 will fail with; that's okay.
-                               */
-                               precautionary_add = 1;
+                                 will fail with EEXIST; and we retry it as a
+                                 MOD. */
+                               op = EPOLL_CTL_ADD;
                         } else if (ch->old_events) {
                                 op = EPOLL_CTL_MOD;
                         }
-
                 } else if ((ch->read_change & EV_CHANGE_DEL) ||
                     (ch->write_change & EV_CHANGE_DEL)) {
                         /* If we're deleting anything, we'll want to do a MOD
@@ -255,14 +253,22 @@ epoll_apply_changes(struct event_base *base)
                                                 (int)epev.events,
                                                 ch->fd));
                                 }
-                       } else if (op == EPOLL_CTL_ADD && errno == EEXIST &&
-                           precautionary_add) {
-                               /* If a precautionary ADD operation fails with
-                                  EEXIST, that's fine too.
-                                */
-                               event_debug(("Epoll ADD(%d) on fd %d gave %s: ADD was redundant",
-                                       (int)epev.events,
-                                       ch->fd, strerror(errno)));
+                       } else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
+                               /* If an ADD operation fails with EEXIST,
+                                * either the operation was redundant (as with a
+                                * precautionary add), or we ran into a fun
+                                * kernel bug where using dup*() to duplicate the
+                                * same file into the same fd gives you the same epitem
+                                * rather than a fresh one.  For the second case,
+                                * we must retry with MOD. */
+                               if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
+                                       event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
+                                           (int)epev.events, ch->fd);
+                               } else {
+                                       event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
+                                               (int)epev.events,
+                                               ch->fd));
+                               }
                         } else if (op == EPOLL_CTL_DEL &&
                             (errno == ENOENT || errno == EBADF ||
                                 errno == EPERM)) {
diff --git a/test/regress.c b/test/regress.c

index 21c921b6e8f1ea4841c4743357113ad121cb9051..0cc40170251d0bbf9521d15ad2b04692673c6bf3 100644 (file)
--- a/test/regress.c
+++ b/test/regress.c
@@ -2061,6 +2061,76 @@ end:
         }
  }
  
+#ifndef WIN32
+/* You can't do this test on windows, since dup2 doesn't work on sockets */
+
+static void
+dfd_cb(evutil_socket_t fd, short e, void *data)
+{
+       *(int*)data = (int)e;
+}
+
+/* Regression test for our workaround for a fun epoll/linux related bug
+ * where fd2 = dup(fd1); add(fd2); close(fd2); dup2(fd1,fd2); add(fd2)
+ * will get you an EEXIST */
+static void
+test_dup_fd(void *arg)
+{
+       struct basic_test_data *data = arg;
+       struct event_base *base = data->base;
+       struct event *ev1=NULL, *ev2=NULL;
+       int fd, dfd=-1;
+       int ev1_got, ev2_got;
+
+       tt_int_op(write(data->pair[0], "Hello world",
+               strlen("Hello world")), >, 0);
+       fd = data->pair[1];
+
+       dfd = dup(fd);
+       tt_int_op(dfd, >=, 0);
+
+       ev1 = event_new(base, fd, EV_READ|EV_PERSIST, dfd_cb, &ev1_got);
+       ev2 = event_new(base, dfd, EV_READ|EV_PERSIST, dfd_cb, &ev2_got);
+       ev1_got = ev2_got = 0;
+       event_add(ev1, NULL);
+       event_add(ev2, NULL);
+       event_base_loop(base, EVLOOP_ONCE);
+       tt_int_op(ev1_got, ==, EV_READ);
+       tt_int_op(ev2_got, ==, EV_READ);
+
+       /* Now close and delete dfd then dispatch.  We need to do the
+        * dispatch here so that when we add it later, we think there
+        * was an intermediate delete. */
+       close(dfd);
+       event_del(ev2);
+       ev1_got = ev2_got = 0;
+       event_base_loop(base, EVLOOP_ONCE);
+       tt_want_int_op(ev1_got, ==, EV_READ);
+       tt_int_op(ev2_got, ==, 0);
+
+       /* Re-duplicate the fd.  We need to get the same duplicated
+        * value that we closed to provoke the epoll quirk.  Also, we
+        * need to change the events to write, or else the old lingering
+        * read event will make the test pass whether the change was
+        * successful or not. */
+       tt_int_op(dup2(fd, dfd), ==, dfd);
+       event_free(ev2);
+       ev2 = event_new(base, dfd, EV_WRITE|EV_PERSIST, dfd_cb, &ev2_got);
+       event_add(ev2, NULL);
+       ev1_got = ev2_got = 0;
+       event_base_loop(base, EVLOOP_ONCE);
+       tt_want_int_op(ev1_got, ==, EV_READ);
+       tt_int_op(ev2_got, ==, EV_WRITE);
+
+end:
+       if (ev1)
+               event_free(ev1);
+       if (ev2)
+               event_free(ev2);
+       close(dfd);
+}
+#endif
+
  #ifdef _EVENT_DISABLE_MM_REPLACEMENT
  static void
  test_mm_functions(void *arg)
@@ -2229,6 +2299,9 @@ struct testcase_t main_testcases[] = {
         { "event_once", test_event_once, TT_ISOLATED, &basic_setup, NULL },
         { "event_pending", test_event_pending, TT_ISOLATED, &basic_setup,
           NULL },
+#ifndef WIN32
+       { "dup_fd", test_dup_fd, TT_ISOLATED, &basic_setup, NULL },
+#endif
         { "mm_functions", test_mm_functions, TT_FORK, NULL, NULL },
         BASIC(many_events, TT_ISOLATED),
author	Nick Mathewson <nickm@torproject.org>
	Sun, 24 Oct 2010 15:38:29 +0000 (11:38 -0400)
committer	Nick Mathewson <nickm@torproject.org>
	Sun, 24 Oct 2010 15:38:29 +0000 (11:38 -0400)
epoll.c		patch \| blob \| history
test/regress.c		patch \| blob \| history