When running with several threads, you can either ask PowerDNS to start a special thread to dispatch the incoming queries to the workers by setting `pdns-distributes-queries` to true, or let the worker threads handle the incoming queries themselves. The dispatched thread enabled by `pdns-distributes-queries` tries to send the same queries to the same thread to maximize the cache-hit ratio, but it might become a bottleneck if the incoming queries rate is too high to be handled by a single thread.
If `pdns-distributes-queries` is set to false and either `SO\_REUSEPORT` support is not available or the `reuseport` directive is set to false, all worker threads share the same listening sockets. This prevents a single thread from having to handle every incoming queries, but can lead to thundering herd issues where all threads are awoken at once when a query arrives. If `SO\_REUSEPORT` support is available and `reuseport` is set to true, separate listening sockets are opened for each worker thread and the query distributions is handled by the kernel, avoiding any thundering herd issue as well as preventing the distributor thread from becoming the bottleneck.
+ From 4.1 onwards, the `cpu-map` parameter can be used to pin worker threads to specific CPUs, in order to keep caches as warm as possible and optimize memory access on NUMA systems.
+
- For best PowerDNS Recursor performance, use a recent version of your operating system, since this generally offers the best event multiplexer implementation available (`kqueue`, `epoll`, `ports` or `/dev/poll`).
- Compile using `g++ 4.1` or later. This compiler really does a good job on PowerDNS, much better than 3.4 or 4.0.
- On AMD/Intel hardware, wherever possible, run a 64-bit binary. This delivers a nearly twofold performance increase. On UltraSPARC, there is no need to run with 64 bits.
When running multiple recursors on the same server, read settings from
"recursor-name.conf", this will also rename the binary image.
+## `cpu-map`
+* String
+* Default: unset
+* Available since: 4.1.0
+
+Set CPU affinity for worker threads, asking the scheduler to run those threads on a single CPU, or a set of CPUs.
+This parameter accepts a space separated list of thread-id=cpu-id, or thread-id=cpu-id-1,cpu-id-2,...,cpu-id-N.
+For example, to make the worker thread 0 run on CPU id 0 and the worker thread 1 on CPUs 1 and 2:
+
+`cpu-map=0=0 1=1,2`
+
+The number of worker threads is determined by the ['threads'](#threads) setting.
+If [`pdns-distributes-queries`'](#pdns-distributes-queries) is set, an additional thread is started, assigned the id 0,
+and is the only one listening on client sockets and accepting queries, distributing them to the other worker threads afterwards.
+
+This parameter is only available on OS that provides the `pthread_setaffinity_np()` function.
+
## `daemon`
* Boolean
* Default: no (since 4.0.0, 'yes' before 4.0.0)
--- /dev/null
+AC_DEFUN([PDNS_CHECK_PTHREAD_NP],[
+ AC_SEARCH_LIBS([pthread_setaffinity_np], [pthread], [AC_DEFINE(HAVE_PTHREAD_SETAFFINITY_NP, [1], [Define to 1 if you have pthread_setaffinity_np])])
+])
return static_cast<unsigned int>(result);
}
+bool isSettingThreadCPUAffinitySupported()
+{
+#ifdef HAVE_PTHREAD_SETAFFINITY_NP
+ return true;
+#else
+ return false;
+#endif
+}
+
+int mapThreadToCPUList(pthread_t tid, const std::set<int>& cpus)
+{
+#ifdef HAVE_PTHREAD_SETAFFINITY_NP
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ for (const auto cpuID : cpus) {
+ CPU_SET(cpuID, &cpuset);
+ }
+
+ return pthread_setaffinity_np(tid,
+ sizeof(cpuset),
+ &cpuset);
+#endif /* HAVE_PTHREAD_SETAFFINITY_NP */
+ return ENOSYS;
+}
unsigned int pdns_stou(const std::string& str, size_t * idx = 0, int base = 10);
+bool isSettingThreadCPUAffinitySupported();
+int mapThreadToCPUList(pthread_t tid, const std::set<int>& cpus);
}
}
+static std::map<unsigned int, std::set<int> > parseCPUMap()
+{
+ std::map<unsigned int, std::set<int> > result;
+
+ const std::string value = ::arg()["cpu-map"];
+
+ if (!value.empty() && !isSettingThreadCPUAffinitySupported()) {
+ L<<Logger::Warning<<"CPU mapping requested but not supported, skipping"<<endl;
+ return result;
+ }
+
+ std::vector<std::string> parts;
+
+ stringtok(parts, value, " \t");
+
+ for(const auto& part : parts) {
+ if (part.find('=') == string::npos)
+ continue;
+
+ try {
+ auto headers = splitField(part, '=');
+ trim(headers.first);
+ trim(headers.second);
+
+ unsigned int threadId = pdns_stou(headers.first);
+ std::vector<std::string> cpus;
+
+ stringtok(cpus, headers.second, ",");
+
+ for(const auto& cpu : cpus) {
+ int cpuId = std::stoi(cpu);
+
+ result[threadId].insert(cpuId);
+ }
+ }
+ catch(const std::exception& e) {
+ L<<Logger::Error<<"Error parsing cpu-map entry '"<<part<<"': "<<e.what()<<endl;
+ }
+ }
+
+ return result;
+}
+
+static void setCPUMap(const std::map<unsigned int, std::set<int> >& cpusMap, unsigned int n, pthread_t tid)
+{
+ const auto& cpuMapping = cpusMap.find(n);
+ if (cpuMapping != cpusMap.cend()) {
+ int rc = mapThreadToCPUList(tid, cpuMapping->second);
+ if (rc == 0) {
+ L<<Logger::Info<<"CPU affinity for worker "<<n<<" has been set to CPU map:";
+ for (const auto cpu : cpuMapping->second) {
+ L<<Logger::Info<<" "<<cpu;
+ }
+ L<<Logger::Info<<endl;
+ }
+ else {
+ L<<Logger::Warning<<"Error setting CPU affinity for worker "<<n<<" to CPU map:";
+ for (const auto cpu : cpuMapping->second) {
+ L<<Logger::Info<<" "<<cpu;
+ }
+ L<<Logger::Info<<strerror(rc)<<endl;
+ }
+ }
+}
+
static int serviceMain(int argc, char*argv[])
{
L.setName(s_programname);
g_snmpAgent->run();
}
+ const auto cpusMap = parseCPUMap();
if(g_numThreads == 1) {
L<<Logger::Warning<<"Operating unthreaded"<<endl;
#ifdef HAVE_SYSTEMD
sd_notify(0, "READY=1");
#endif
+ setCPUMap(cpusMap, 0, pthread_self());
recursorThread(0);
}
else {
L<<Logger::Warning<<"Launching "<< g_numThreads <<" threads"<<endl;
for(unsigned int n=0; n < g_numThreads; ++n) {
pthread_create(&tid, 0, recursorThread, (void*)(long)n);
+
+ setCPUMap(cpusMap, n, tid);
}
void* res;
#ifdef HAVE_SYSTEMD
::arg().set("tcp-fast-open", "Enable TCP Fast Open support on the listening sockets, using the supplied numerical value as the queue size")="0";
::arg().set("nsec3-max-iterations", "Maximum number of iterations allowed for an NSEC3 record")="2500";
+ ::arg().set("cpu-map", "Thread to CPU mapping, space separated thread-id=cpu1,cpu2..cpuN pairs")="";
+
::arg().setCmd("help","Provide a helpful message");
::arg().setCmd("version","Print version string");
::arg().setCmd("config","Output blank configuration");
AC_PROG_CXX
AC_LANG([C++])
-AC_DEFINE([_GNU_SOURCE], [1],
- [Define _GNU_SOURCE so that we get all necessary prototypes]
-)
+AC_GNU_SOURCE
AC_DEFINE([RECURSOR], [1],
[This is the PowerDNS Recursor]
AC_CHECK_FUNCS([strcasestr])
+PDNS_CHECK_PTHREAD_NP
+
AC_SUBST([socketdir])
socketdir="/var/run"
AC_ARG_WITH([socketdir],
--- /dev/null
+../../../m4/pdns_check_pthread_np.m4
\ No newline at end of file