This feature saves valuable resources by avoiding queries to servers from which we don't receive any answer. It's disabled by default.
SyncRes::s_maxcachettl=::arg().asNum("max-cache-ttl");
SyncRes::s_packetcachettl=::arg().asNum("packetcache-ttl");
SyncRes::s_packetcacheservfailttl=::arg().asNum("packetcache-servfail-ttl");
+ SyncRes::s_serverdownmaxfails=::arg().asNum("server-down-max-fails");
+ SyncRes::s_serverdownthrottletime=::arg().asNum("server-down-throttle-time");
SyncRes::s_serverID=::arg()["server-id"];
if(SyncRes::s_serverID.empty()) {
char tmp[128];
::arg().set("client-tcp-timeout","Timeout in seconds when talking to TCP clients")="2";
::arg().set("max-mthreads", "Maximum number of simultaneous Mtasker threads")="2048";
::arg().set("max-tcp-clients","Maximum number of simultaneous TCP clients")="128";
+ ::arg().set("server-down-max-fails","Maximum number of consecutive timeouts (and unreachables) to mark a server as down ( 0 => disabled )")="0";
+ ::arg().set("server-down-throttle-time","Number of seconds to throttle all queries to a server after being maked as down")="60";
::arg().set("hint-file", "If set, load root hints from this file")="";
::arg().set("max-cache-entries", "If set, maximum number of entries in the main cache")="1000000";
::arg().set("max-negative-ttl", "maximum number of seconds to keep a negative cached entry in memory")="3600";
unsigned int SyncRes::s_maxcachettl;
unsigned int SyncRes::s_packetcachettl;
unsigned int SyncRes::s_packetcacheservfailttl;
+unsigned int SyncRes::s_serverdownmaxfails;
+unsigned int SyncRes::s_serverdownthrottletime;
unsigned int SyncRes::s_queries;
unsigned int SyncRes::s_outgoingtimeouts;
unsigned int SyncRes::s_outqueries;
LOG(prefix<<qname<<": Trying IP "<< remoteIP->toStringWithPort() <<", asking '"<<qname<<"|"<<qtype.getName()<<"'"<<endl);
extern NetmaskGroup* g_dontQuery;
- if(t_sstorage->throttle.shouldThrottle(d_now.tv_sec, make_tuple(*remoteIP, qname, qtype.getCode()))) {
+ if(t_sstorage->throttle.shouldThrottle(d_now.tv_sec, make_tuple(*remoteIP, "", 0))) {
+ LOG(prefix<<qname<<": server throttled "<<endl);
+ s_throttledqueries++; d_throttledqueries++;
+ continue;
+ }
+ else if(t_sstorage->throttle.shouldThrottle(d_now.tv_sec, make_tuple(*remoteIP, qname, qtype.getCode()))) {
LOG(prefix<<qname<<": query throttled "<<endl);
s_throttledqueries++; d_throttledqueries++;
continue;
t_sstorage->nsSpeeds[*tns].submit(*remoteIP, 1000000, &d_now); // 1 sec
}
- if(resolveret==-1)
+ if (s_serverdownmaxfails > 0 && t_sstorage->fails.incr(*remoteIP) >= s_serverdownmaxfails) {
+ LOG(prefix<<qname<<": Max fails reached resolving on "<< remoteIP->toString() <<". Going full throttle for 1 minute" <<endl);
+ t_sstorage->throttle.throttle(d_now.tv_sec, make_tuple(*remoteIP, "", 0), s_serverdownthrottletime, 10000); // mark server as down
+ } else if(resolveret==-1)
t_sstorage->throttle.throttle(d_now.tv_sec, make_tuple(*remoteIP, qname, qtype.getCode()), 60, 100); // unreachable, 1 minute or 100 queries
else
t_sstorage->throttle.throttle(d_now.tv_sec, make_tuple(*remoteIP, qname, qtype.getCode()), 10, 5); // timeout
continue;
}
+ if(s_serverdownmaxfails > 0)
+ t_sstorage->fails.clear(*remoteIP);
+
break; // this IP address worked!
wasLame:; // well, it didn't
LOG(prefix<<qname<<": status=NS "<<*tns<<" ("<< remoteIP->toString() <<") is lame for '"<<auth<<"', trying sibling IP or NS"<<endl);
bool d_needinit;
};
+template<class Thing> class Counters : public boost::noncopyable
+{
+public:
+ Counters()
+ {
+ }
+ unsigned long value(const Thing& t)
+ {
+ typename cont_t::iterator i=d_cont.find(t);
+
+ if(i==d_cont.end()) {
+ return 0;
+ }
+ return (unsigned long)i->second;
+ }
+ unsigned long incr(const Thing& t)
+ {
+ typename cont_t::iterator i=d_cont.find(t);
+
+ if(i==d_cont.end()) {
+ d_cont[t]=1;
+ return 1;
+ }
+ else {
+ if (i->second < std::numeric_limits<unsigned long>::max())
+ i->second++;
+ return (unsigned long)i->second;
+ }
+ }
+ unsigned long decr(const Thing& t)
+ {
+ typename cont_t::iterator i=d_cont.find(t);
+
+ if(i!=d_cont.end() && --i->second == 0) {
+ d_cont.erase(i);
+ return 0;
+ } else
+ return (unsigned long)i->second;
+ }
+ void clear(const Thing& t)
+ {
+ typename cont_t::iterator i=d_cont.find(t);
+
+ if(i!=d_cont.end()) {
+ d_cont.erase(i);
+ }
+ }
+
+private:
+ typedef map<Thing,unsigned long> cont_t;
+ cont_t d_cont;
+};
+
class SyncRes : public boost::noncopyable
{
typedef Throttle<tuple<ComboAddress,string,uint16_t> > throttle_t;
+
+ typedef Counters<ComboAddress> fails_t;
struct timeval d_now;
static unsigned int s_maxnegttl;
static unsigned int s_maxcachettl;
static unsigned int s_packetcachettl;
static unsigned int s_packetcacheservfailttl;
+ static unsigned int s_serverdownmaxfails;
+ static unsigned int s_serverdownthrottletime;
static bool s_nopacketcache;
static string s_serverID;
nsspeeds_t nsSpeeds;
ednsstatus_t ednsstatus;
throttle_t throttle;
+ fails_t fails;
domainmap_t* domainmap;
};