From 9bd1a88233e0836036cbbd68b919db6a554fc498 Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Tue, 16 Jul 2019 15:37:25 +0200 Subject: [PATCH] dnsdist: Add comments about the use of IDStates and origFD --- pdns/dnsdist.cc | 32 ++++++++++++++++++++++++++------ pdns/dnsdist.hh | 21 +++++++++++++++++++++ pdns/dnsdistdist/doh.cc | 13 +++++++++---- 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/pdns/dnsdist.cc b/pdns/dnsdist.cc index 086642541..3e6c53284 100644 --- a/pdns/dnsdist.cc +++ b/pdns/dnsdist.cc @@ -546,9 +546,17 @@ try { IDState* ids = &dss->idStates[queryId]; int origFD = ids->origFD; - if(origFD < 0) // duplicate + if(origFD < 0) { + /* the corresponding state is marked as not in use, meaning that: + - it was already reused by another thread and the state is gone ; + - we already got a response for this query and this one is a duplicate. + Either way, we don't touch it. + */ continue; + } + /* read the potential DOHUnit state as soon as possible, but don't use it + until we have confirmed that we own this state by updating origFD */ auto du = ids->du; /* setting age to 0 to prevent the maintainer thread from cleaning this IDS while we process the response. @@ -562,8 +570,12 @@ try { continue; } + /* atomically mark the state as available */ int oldFD = ids->origFD.exchange(-1); if (oldFD == origFD) { + /* clear the potential DOHUnit asap, it's ours now + and since we just marked the state as unused, + someone could overwrite it. */ ids->du = nullptr; /* we only decrement the outstanding counter if the value was not altered in the meantime, which would mean that the state has been actively reused @@ -571,6 +583,7 @@ try { want it to be decremented twice. */ --dss->outstanding; // you'd think an attacker could game this, but we're using connected socket } else { + /* someone updated the state in the meantime, we can't touch the existing pointer */ du = nullptr; } @@ -597,6 +610,8 @@ try { // DoH query du->response = std::string(response, responseLen); if (send(du->rsock, &du, sizeof(du), 0) != sizeof(du)) { + /* at this point we have the only remaining pointer on this + DOHUnit object since we did set ids->du to nullptr earlier */ delete du; } #endif /* HAVE_DNS_OVER_HTTPS */ @@ -1570,19 +1585,24 @@ static void processUDPQuery(ClientState& cs, LocalHolders& holders, const struct ids->age = 0; DOHUnit* du = nullptr; + /* that means that the state was in use, possibly with an allocated + DOHUnit that we will need to handle, but we can't touch it before + confirming that we now own this state */ if (ids->origFD != -1) { du = ids->du; } + /* we atomically replace the value with 0, we now own this state */ int oldFD = ids->origFD.exchange(cs.udpFD); if(oldFD < 0) { - ++ss->outstanding; - /* either it was already -1 so no DOH unit to handle, - or someone handled it before us */ + /* the value was -1, meaning that the state was not in use. + we reset 'du' because it might have still been in use when we read it. */ du = nullptr; + ++ss->outstanding; } else { - // if we are reusing, no change in outstanding + /* we are reusing a state, no change in outstanding but if there was an existing DOHUnit we need + to handle it because it's about to be overwritten. */ ++ss->reuseds; ++g_stats.downstreamTimeouts; ids->du = nullptr; @@ -2084,8 +2104,8 @@ static void healthChecksThread() don't go anywhere near it */ continue; } - handleDOHTimeout(oldDU); ids.du = nullptr; + handleDOHTimeout(oldDU); ids.age = 0; dss->reuseds++; --dss->outstanding; diff --git a/pdns/dnsdist.hh b/pdns/dnsdist.hh index a96a87b43..9c18f8d02 100644 --- a/pdns/dnsdist.hh +++ b/pdns/dnsdist.hh @@ -550,6 +550,27 @@ struct IDState tempFailureTTL = orig.tempFailureTTL; } + /* We use this value to detect whether this state is in use, in addition to + its use to send the response over UDP. + For performance reasons we don't want to use a lock here, but that means + we need to be very careful when modifying this value. Modifications happen + from: + - one of the UDP or DoH 'client' threads receiving a query, selecting a backend + then picking one of the states associated to this backend (via the idOffset). + Most of the time this state should not be in use and origFD is -1, but we + might not yet have received a response for the query previously associated to this + state, meaning that we will 'reuse' this state and erase the existing state. + If we ever receive a response for this state, it will be discarded. This is + mostly fine for UDP except that we still need to be careful in order to miss + the 'outstanding' counters, which should only be increased when we are picking + an empty state, and not when reusing ; + For DoH, though, we have dynamically allocated a DOHUnit object that needs to + be freed, as well as internal objects internals to libh2o. + - one of the UDP receiver threads receiving a response from a backend, picking + the corresponding state and sending the response to the client ; + - the 'healthcheck' thread scanning the states to actively discover timeouts, + mostly to keep some counters like the 'outstanding' one sane. + */ std::atomic origFD; // set to <0 to indicate this state is empty // 4 ComboAddress origRemote; // 28 diff --git a/pdns/dnsdistdist/doh.cc b/pdns/dnsdistdist/doh.cc index 021578a15..ac8a14be9 100644 --- a/pdns/dnsdistdist/doh.cc +++ b/pdns/dnsdistdist/doh.cc @@ -239,20 +239,25 @@ static int processDOHQuery(DOHUnit* du) ids->age = 0; DOHUnit* oldDU = nullptr; if (ids->origFD != -1) { + /* that means that the state was in use, possibly with an allocated + DOHUnit that we will need to handle, but we can't touch it before + confirming that we now own this state */ oldDU = ids->du; } + /* we atomically replace the value with 0, we now own this state */ int oldFD = ids->origFD.exchange(0); if (oldFD < 0) { - // if we are reusing, no change in outstanding - ++ss->outstanding; - /* either it was already -1 so no DOH unit to handle, - or someone handled it before us */ + /* the value was -1, meaning that the state was not in use. + we reset 'oldDU' because it might have still been in use when we read it. */ oldDU = nullptr; + ++ss->outstanding; } else { ++ss->reuseds; ++g_stats.downstreamTimeouts; + /* we are reusing a state, if there was an existing DOHUnit we need + to handle it because it's about to be overwritten. */ ids->du = nullptr; handleDOHTimeout(oldDU); } -- 2.40.0