From cdaca02c2909452de23244738630f408b8eee8e1 Mon Sep 17 00:00:00 2001
From: Nick Mathewson <nickm@torproject.org>
Date: Wed, 27 May 2009 15:35:00 +0000
Subject: [PATCH] Activate fd events in a pseudorandom order on older backends.

New backends like poll and kqueue and so on add fds to the queue in
the order that they are triggered.  But the select backend currently
activates low-numbered fds first, whereas the poll and win32 backends
currently favor whatever fds have been on for the longest.  This is no
good for fairness.

svn:r1318
---
 ChangeLog          |  1 +
 WIN32-Code/win32.c | 37 ++++++++++++++++++++++++++-----------
 poll.c             | 12 ++++++++----
 select.c           |  7 +++++--
 4 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index fe5077fa..ea9e2f27 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -33,6 +33,7 @@ Changes in 2.0.2-alpha:
  o Fix a deadlock when suspending reads in a bufferevent due to a full buffer. (Spotted by Joachim Bauch.)
  o Fix a memory error when freeing a thread-enabled event base with registered events. (Spotted by Joachim Bauch.)
  o Try to contain degree of failure when running on a win32 version so heavily firewalled that we can't fake a socketpair.
+ o Activate fd events in a pseudorandom order with O(N) backends, so that we don't systematically favor low fds (select) or earlier-added fds (poll, win32).
 
 
 Changes in 2.0.1-alpha:
diff --git a/WIN32-Code/win32.c b/WIN32-Code/win32.c
index aa73192f..4c316c9a 100644
--- a/WIN32-Code/win32.c
+++ b/WIN32-Code/win32.c
@@ -282,8 +282,9 @@ win32_dispatch(struct event_base *base, struct timeval *tv)
 {
 	struct win32op *win32op = base->evbase;
 	int res = 0;
-	unsigned i;
+	unsigned j, i;
 	int fd_count;
+	SOCKET s;
 
 	fd_set_copy(win32op->readset_out, win32op->readset_in);
 	fd_set_copy(win32op->exset_out, win32op->readset_in);
@@ -314,19 +315,33 @@ win32_dispatch(struct event_base *base, struct timeval *tv)
 		evsig_process(base);
 	}
 
-	for (i=0; i<win32op->readset_out->fd_count; ++i) {
-		SOCKET s = win32op->readset_out->fd_array[i];
-		evmap_io_active(base, s, EV_READ);
+	if (win32op->readset_out->fd_count) {
+		i = rand() % win32op->readset_out->fd_count;
+		for (j=0; j<win32op->readset_out->fd_count; ++j) {
+			if (++i >= win32op->readset_out->fd_count)
+				i = 0;
+			s = win32op->readset_out->fd_array[i];
+			evmap_io_active(base, s, EV_READ);
+		}
 	}
-	for (i=0; i<win32op->exset_out->fd_count; ++i) {
-		SOCKET s = win32op->exset_out->fd_array[i];
-		evmap_io_active(base, s, EV_READ);
+	if (win32op->exset_out->fd_count) {
+		i = rand() % win32op->exset_out->fd_count;
+		for (j=0; j<win32op->exset_out->fd_count; ++j) {
+			if (++i >= win32op-exset_out->fd_count)
+				i = 0;
+			s = win32op->exset_out->fd_array[i];
+			evmap_io_active(base, s, EV_READ);
+		}
 	}
-	for (i=0; i<win32op->writeset_out->fd_count; ++i) {
-		SOCKET s = win32op->writeset_out->fd_array[i];
-		evmap_io_active(base, s, EV_WRITE);
+	if (win32op->writeset_out->fd_count) {
+		i = rand() % win32op->writeset_out->fd_count;
+		for (j=0; j<win32op->writeset_out->fd_count; ++j) {
+			if (++i >= win32op-exset_out->fd_count)
+				i = 0;
+			SOCKET s = win32op->writeset_out->fd_array[i];
+			evmap_io_active(base, s, EV_WRITE);
+		}
 	}
-
 	return (0);
 }
 
diff --git a/poll.c b/poll.c
index 222b849e..08e148cb 100644
--- a/poll.c
+++ b/poll.c
@@ -117,7 +117,7 @@ poll_check_ok(struct pollop *pop)
 static int
 poll_dispatch(struct event_base *base, struct timeval *tv)
 {
-	int res, i, msec = -1, nfds;
+	int res, i, j, msec = -1, nfds;
 	struct pollop *pop = base->evbase;
 
 	poll_check_ok(pop);
@@ -142,11 +142,15 @@ poll_dispatch(struct event_base *base, struct timeval *tv)
 
 	event_debug(("%s: poll reports %d", __func__, res));
 
-	if (res == 0)
+	if (res == 0 || nfds == 0)
 		return (0);
 
-	for (i = 0; i < nfds; i++) {
-		int what = pop->event_set[i].revents;
+	i = random() % nfds;
+	for (j = 0; j < nfds; j++) {
+		int what;
+		if (++i == nfds)
+			i = 0;
+		what = pop->event_set[i].revents;
 		if (!what)
 			continue;
 
diff --git a/select.c b/select.c
index f34b905e..98d5749f 100644
--- a/select.c
+++ b/select.c
@@ -114,7 +114,7 @@ check_selectop(struct selectop *sop)
 static int
 select_dispatch(struct event_base *base, struct timeval *tv)
 {
-	int res, i;
+	int res, i, j;
 	struct selectop *sop = base->evbase;
 
 	check_selectop(sop);
@@ -144,7 +144,10 @@ select_dispatch(struct event_base *base, struct timeval *tv)
 	event_debug(("%s: select reports %d", __func__, res));
 
 	check_selectop(sop);
-	for (i = 0; i <= sop->event_fds; ++i) {
+	i = random() % (sop->event_fds+1);
+	for (j = 0; j <= sop->event_fds; ++j) {
+		if (++i >= sop->event_fds+1)
+			i = 0;
 		res = 0;
 		if (FD_ISSET(i, sop->event_readset_out))
 			res |= EV_READ;
-- 
2.40.0