From: Remi Gacogne Date: Wed, 1 Mar 2017 09:36:33 +0000 (+0100) Subject: rec: Use one listening socket per thread when reuseport is enabled X-Git-Tag: rec-4.1.0-alpha1~211^2~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=810ff70506fab6f074afbef854ae6042bad5ac0f;p=pdns rec: Use one listening socket per thread when reuseport is enabled Except if `pdns-distributes-queries` is true, of course. We used to shared the same listening socket between all threads when `pdns-distributes-queries` is set to false, even with `reuseport` set to true and `SO_REUSEPORT` support available. After this commit: * if `pdns-distributes-queries` is true, the distributor thread is still the only one listening to incoming queries * if `pdns-distributes-queries` is false and either `reuseport` is false or `SO_REUSEPORT` support is not available, all threads share the same listening socket as it was before * if `pdns-distributes-queries` is false, `SO_REUSEPORT` support is available and `reuseport` is true, we open a separate listening socket per thread to let the kernel distribute the incoming queries for us, avoiding any thundering herd issue as well as the distributor thread being a bottleneck. --- diff --git a/docs/markdown/recursor/settings.md b/docs/markdown/recursor/settings.md index cb71da62a..09bc266a2 100644 --- a/docs/markdown/recursor/settings.md +++ b/docs/markdown/recursor/settings.md @@ -748,6 +748,16 @@ which also disables outgoing IPv6 support. Don't log queries. +## `reuseport` +* Boolean +* Default: no + +If `SO_REUSEPORT` support is available, allows multiple processes to open a +listening socket on the same port. Since 4.1.0, when `pdns-distributes-queries` is set to +false and `reuseport` is enabled, every thread will open a separate listening socket to let +the kernel distribute the incoming queries, avoiding any thundering herd issue as well as +the distributor thread being a bottleneck, thus leading to much higher performance on multi-core boxes. + ## `root-nx-trust` * Boolean * Default: no (<= 4.0.0), yes diff --git a/pdns/pdns_recursor.cc b/pdns/pdns_recursor.cc index 5323043f4..b05b94021 100644 --- a/pdns/pdns_recursor.cc +++ b/pdns/pdns_recursor.cc @@ -122,6 +122,7 @@ struct ThreadPipeSet int writeFromThread; int readFromThread; }; + typedef vector tcpListenSockets_t; typedef map listenSocketsAddresses_t; // is shared across all threads right now typedef vector > > deferredAdd_t; @@ -130,7 +131,7 @@ static const ComboAddress g_local4("0.0.0.0"), g_local6("::"); static vector g_pipes; // effectively readonly after startup static tcpListenSockets_t g_tcpListenSockets; // shared across threads, but this is fine, never written to from a thread. All threads listen on all sockets static listenSocketsAddresses_t g_listenSocketsAddresses; // is shared across all threads right now -static deferredAdd_t deferredAdd; +static std::unordered_map deferredAdds; static set g_fromtosockets; // listen sockets that use 'sendfromto()' mechanism static vector g_localQueryAddresses4, g_localQueryAddresses6; static AtomicCounter counter; @@ -151,6 +152,8 @@ static bool g_logCommonErrors; static bool g_anyToTcp; static bool g_lowercaseOutgoing; static bool g_weDistributeQueries; // if true, only 1 thread listens on the incoming query sockets +static bool g_reusePort{false}; +static bool g_useOneSocketPerThread; std::unordered_set g_delegationOnly; RecursorControlChannel s_rcc; // only active in thread 0 @@ -1787,7 +1790,7 @@ static void handleNewUDPQuestion(int fd, FDMultiplexer::funcparam_t& var) } } -static void makeTCPServerSockets() +static void makeTCPServerSockets(unsigned int threadId) { int fd; vectorlocals; @@ -1818,7 +1821,7 @@ static void makeTCPServerSockets() setCloseOnExec(fd); int tmp=1; - if(setsockopt(fd,SOL_SOCKET,SO_REUSEADDR,(char*)&tmp,sizeof tmp)<0) { + if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof tmp)<0) { L<= 0) { + if(setsockopt(fd, SOL_TCP, TCP_DEFER_ACCEPT, &tmp, sizeof tmp) >= 0) { if(i==locals.begin()) L<locals; @@ -1917,7 +1919,7 @@ static void makeUDPServerSockets() #ifdef SO_REUSEPORT - if(::arg().mustDo("reuseport")) { + if(g_reusePort) { if(setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0) throw PDNSException("SO_REUSEPORT: "+stringerror()); } @@ -1928,7 +1930,7 @@ static void makeUDPServerSockets() setNonBlocking(fd); - deferredAdd.push_back(make_pair(fd, handleNewUDPQuestion)); + deferredAdds[threadId].push_back(make_pair(fd, handleNewUDPQuestion)); g_listenSocketsAddresses[fd]=sin; // this is written to only from the startup thread, not from the workers if(sin.sin4.sin_family == AF_INET) L<addReadFD(g_pipes[t_id].readToThread, handlePipeRequest); - if(!g_weDistributeQueries || !t_id) // if we distribute queries, only t_id = 0 listens - for(deferredAdd_t::const_iterator i=deferredAdd.begin(); i!=deferredAdd.end(); ++i) - t_fdm->addReadFD(i->first, i->second); + if(g_useOneSocketPerThread) { + for (unsigned int threadId = 0; threadId < g_numWorkerThreads; threadId++) { + for(deferredAdd_t::const_iterator i = deferredAdds[threadId].begin(); i != deferredAdds[threadId].end(); ++i) { + t_fdm->addReadFD(i->first, i->second); + } + } + } + else { + if(!g_weDistributeQueries || !t_id) { // if we distribute queries, only t_id = 0 listens + for(deferredAdd_t::const_iterator i = deferredAdds[0].begin(); i != deferredAdds[0].end(); ++i) { + t_fdm->addReadFD(i->first, i->second); + } + } + } if(!t_id) { t_fdm->addReadFD(s_rcc.d_fd, handleRCC); // control channel