From a94897232c59ba855d79a2dc398c8a6cb1fd8ad5 Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Tue, 30 Jul 2019 19:43:34 +0200 Subject: [PATCH] dnsdist: Use a counter to mark IDState usage instead of the FD --- pdns/dnsdist.cc | 41 +++++++++++++++++++---------------------- pdns/dnsdist.hh | 28 +++++++++++++++++++++------- pdns/dnsdistdist/doh.cc | 12 +++++++----- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index bb35f6804..bbef60979 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -544,11 +544,11 @@ try { } IDState* ids = &dss->idStates[queryId]; - int origFD = ids->origFD; + int64_t usageIndicator = ids->usageIndicator; - if(origFD < 0) { + if(usageIndicator < 0) { /* the corresponding state is marked as not in use, meaning that: - - it was already reused by another thread and the state is gone ; + - it was already cleaned up by another thread and the state is gone ; - we already got a response for this query and this one is a duplicate. Either way, we don't touch it. */ @@ -556,14 +556,13 @@ try { } /* read the potential DOHUnit state as soon as possible, but don't use it - until we have confirmed that we own this state by updating origFD */ + until we have confirmed that we own this state by updating usageIndicator */ auto du = ids->du; /* setting age to 0 to prevent the maintainer thread from cleaning this IDS while we process the response. - We have already a copy of the origFD, so it would - mostly mess up the outstanding counter. */ ids->age = 0; + int origFD = ids->origFD; unsigned int consumed = 0; if (!responseContentMatches(response, responseLen, ids->qname, ids->qtype, ids->qclass, dss->remote, consumed)) { @@ -571,9 +570,9 @@ try { } bool isDoH = du != nullptr; - /* atomically mark the state as available */ - int oldFD = ids->origFD.exchange(-1); - if (oldFD == origFD) { + /* atomically mark the state as available, but only if it has not been altered + in the meantime */ + if (ids->usageIndicator.compare_exchange_strong(usageIndicator, -1)) { /* clear the potential DOHUnit asap, it's ours now and since we just marked the state as unused, someone could overwrite it. */ @@ -1592,13 +1591,14 @@ static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct /* that means that the state was in use, possibly with an allocated DOHUnit that we will need to handle, but we can't touch it before confirming that we now own this state */ - if (ids->origFD != -1) { + if (ids->usageIndicator != -1) { du = ids->du; } - /* we atomically replace the value with 0, we now own this state */ - int oldFD = ids->origFD.exchange(cs.udpFD); - if(oldFD < 0) { + /* we atomically replace the value, we now own this state */ + auto generation = ids->generation++; + int64_t oldUsage = ids->usageIndicator.exchange(generation); + if(oldUsage < 0) { /* the value was -1, meaning that the state was not in use. we reset 'du' because it might have still been in use when we read it. */ du = nullptr; @@ -1607,13 +1607,14 @@ static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct else { /* we are reusing a state, no change in outstanding but if there was an existing DOHUnit we need to handle it because it's about to be overwritten. */ + ids->du = nullptr; ++ss->reuseds; ++g_stats.downstreamTimeouts; - ids->du = nullptr; handleDOHTimeout(du); } ids->cs = &cs; + ids->origFD = cs.udpFD; ids->origID = dh->id; setIDStateFromDNSQuestion(*ids, dq, std::move(qname)); @@ -2091,19 +2092,15 @@ static void healthChecksThread() dss->prev.reuseds.store(dss->reuseds.load()); for(IDState& ids : dss->idStates) { // timeouts - int origFD = ids.origFD; - if(origFD >=0 && ids.age++ > g_udpTimeout) { - /* We set origFD to -1 as soon as possible + int64_t usageIndicator = ids.usageIndicator; + if(usageIndicator >=0 && ids.age++ > g_udpTimeout) { + /* We set usageIndicator to -1 as soon as possible to limit the risk of racing with the responder thread. - The UDP client thread only checks origFD to - know whether outstanding has to be incremented, - so the sooner the better any way since we _will_ - decrement it. */ auto oldDU = ids.du; - if (ids.origFD.exchange(-1) != origFD) { + if (!ids.usageIndicator.compare_exchange_strong(usageIndicator, -1)) { /* this state has been altered in the meantime, don't go anywhere near it */ continue; diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index fff2234e9..72ec1a143 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -541,23 +541,23 @@ struct ClientState; struct IDState { - IDState() : origFD(-1), sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;} + IDState(): sentTime(true), delayMsec(0), tempFailureTTL(boost::none) { origDest.sin4.sin_family = 0;} IDState(const IDState& orig): origRemote(orig.origRemote), origDest(orig.origDest), age(orig.age) { - origFD.store(orig.origFD.load()); + usageIndicator.store(orig.usageIndicator.load()); + origFD = orig.origFD; origID = orig.origID; delayMsec = orig.delayMsec; tempFailureTTL = orig.tempFailureTTL; } - /* We use this value to detect whether this state is in use, in addition to - its use to send the response over UDP. + /* We use this value to detect whether this state is in use. For performance reasons we don't want to use a lock here, but that means we need to be very careful when modifying this value. Modifications happen from: - one of the UDP or DoH 'client' threads receiving a query, selecting a backend then picking one of the states associated to this backend (via the idOffset). - Most of the time this state should not be in use and origFD is -1, but we + Most of the time this state should not be in use and usageIndicator is -1, but we might not yet have received a response for the query previously associated to this state, meaning that we will 'reuse' this state and erase the existing state. If we ever receive a response for this state, it will be discarded. This is @@ -570,9 +570,22 @@ struct IDState the corresponding state and sending the response to the client ; - the 'healthcheck' thread scanning the states to actively discover timeouts, mostly to keep some counters like the 'outstanding' one sane. + We previously based that logic on the origFD (FD on which the query was received, + and therefore from where the response should be sent) but this suffered from an + ABA problem since it was quite likely that a UDP 'client thread' would reset it to the + same value since we only have so much incoming sockets: + - 1/ 'client' thread gets a query and set origFD to its FD, say 5 ; + - 2/ 'receiver' thread gets a response, read the value of origFD to 5, check that the qname, + qtype and qclass match + - 3/ during that time the 'client' thread reuses the state, setting again origFD to 5 ; + - 4/ the 'receiver' thread uses compare_exchange_strong() to only replace the value if it's still + 5, except it's not the same 5 anymore and it overrides a fresh state. + We now use a 32-bit unsigned counter instead, which is incremented every time the state is set, + wrapping around if necessary, and we set an atomic signed 64-bit value, so that we still have -1 + when the state is unused and the value of our counter otherwise. */ - std::atomic origFD; // set to <0 to indicate this state is empty // 4 - + std::atomic usageIndicator{-1}; // set to <0 to indicate this state is empty // 4 + std::atomic generation{0}; // increased every time a state is used, to be able to detect an ABA issue ComboAddress origRemote; // 28 ComboAddress origDest; // 28 StopWatch sentTime; // 16 @@ -593,6 +606,7 @@ struct IDState uint16_t qclass; // 2 uint16_t origID; // 2 uint16_t origFlags; // 2 + int origFD{-1}; int delayMsec; boost::optional tempFailureTTL; bool ednsAdded{false}; diff --git a/pdns/dnsdistdist/doh.cc b/pdns/dnsdistdist/doh.cc index 55ed64059..29b5665b1 100644 --- a/pdns/dnsdistdist/doh.cc +++ b/pdns/dnsdistdist/doh.cc @@ -238,16 +238,17 @@ static int processDOHQuery(DOHUnit* du) IDState* ids = &ss->idStates[idOffset]; ids->age = 0; DOHUnit* oldDU = nullptr; - if (ids->origFD != -1) { + if (ids->usageIndicator != -1) { /* that means that the state was in use, possibly with an allocated DOHUnit that we will need to handle, but we can't touch it before confirming that we now own this state */ oldDU = ids->du; } - /* we atomically replace the value with 0, we now own this state */ - int oldFD = ids->origFD.exchange(0); - if (oldFD < 0) { + /* we atomically replace the value, we now own this state */ + int64_t generation = ids->generation++; + int64_t oldUsage = ids->usageIndicator.exchange(generation); + if (oldUsage < 0) { /* the value was -1, meaning that the state was not in use. we reset 'oldDU' because it might have still been in use when we read it. */ oldDU = nullptr; @@ -262,6 +263,7 @@ static int processDOHQuery(DOHUnit* du) handleDOHTimeout(oldDU); } + ids->origFD = 0; ids->du = du; ids->cs = &cs; @@ -293,7 +295,7 @@ static int processDOHQuery(DOHUnit* du) /* we are about to handle the error, make sure that this pointer is not accessed when the state is cleaned, but first check that it still belongs to us */ - if (ids->origFD == 0 && ids->origFD.exchange(-1) == 0) { + if (ids->usageIndicator.compare_exchange_strong(generation, -1)) { ids->du = nullptr; --ss->outstanding; } -- 2.40.0