From: Pieter Lexis Date: Fri, 27 Jan 2017 15:57:41 +0000 (+0100) Subject: Auth: Incremental backoff for failed slave checks X-Git-Tag: rec-4.1.0-alpha1~260^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b239dfbac44fd35053b5760e64686445e8210569;p=pdns Auth: Incremental backoff for failed slave checks Closes #349 Closes #602 --- diff --git a/docs/markdown/authoritative/modes-of-operation.md b/docs/markdown/authoritative/modes-of-operation.md index f48e37afa..5a9a88136 100644 --- a/docs/markdown/authoritative/modes-of-operation.md +++ b/docs/markdown/authoritative/modes-of-operation.md @@ -65,6 +65,10 @@ is higher, the domain is retrieved and inserted into the database. In any case, after the check the domain is declared 'fresh', and will only be checked again after '**refresh**' seconds have passed. +When the freshness of a domain cannot be checked, e.g. because the master is offline, PowerDNS will retry the domain after [`slave-cycle-interval`](settings.md#slave-cycle-interval) seconds. +Every time the domain fails it's freshness check, PowerDNS will hold back on checking the domain for `amount of failures * slave-cycle-interval` seconds, with a maximum of [`soa-retry-default`](settings.md#soa-retry-default) seconds between checks. +With default settings, this means that PowerDNS will back off for 1, then 2, then 3 etc. minutes, to a maximum of 60 minutes between checks. + **Warning**: Slave support is OFF by default, turn it on by adding [`slave`](settings.md#slave) to the configuration. **Note**: When running PowerDNS via the provided systemd service file, [`ProtectSystem`](http://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectSystem=) is set to `full`, this means PowerDNS is unable to write to e.g. `/etc` and `/home`, possibly being unable to write AXFR's zones. diff --git a/pdns/communicator.hh b/pdns/communicator.hh index 8171ec4f6..d57c2ada4 100644 --- a/pdns/communicator.hh +++ b/pdns/communicator.hh @@ -217,6 +217,11 @@ private: bool d_masterschanged, d_slaveschanged; bool d_preventSelfNotification; + // Used to keep some state on domains that failed their freshness checks. + // uint64_t == counter of the number of failures (increased by 1 every consecutive slave-cycle-interval that the domain fails) + // time_t == wait at least until this time before attempting a new check + map > d_failedSlaveRefresh; + struct RemoveSentinel { explicit RemoveSentinel(const DNSName& dn, CommunicatorClass* cc) : d_dn(dn), d_cc(cc) diff --git a/pdns/slavecommunicator.cc b/pdns/slavecommunicator.cc index 0df99b075..b95b2c063 100644 --- a/pdns/slavecommunicator.cc +++ b/pdns/slavecommunicator.cc @@ -748,8 +748,13 @@ void CommunicatorClass::slaveRefresh(PacketHandler *P) { Lock l(&d_lock); domains_by_name_t& nameindex=boost::multi_index::get(d_suckdomains); + time_t now = time(0); for(DomainInfo& di : rdomains) { + const auto failed = d_failedSlaveRefresh.find(di.zone); + if (failed != d_failedSlaveRefresh.end() && now < failed->second.second ) + // If the domain has failed before and the time before the next check has not expired, skip this domain + continue; std::vector localaddr; SuckRequest sr; sr.domain=di.zone; @@ -828,6 +833,7 @@ void CommunicatorClass::slaveRefresh(PacketHandler *P) L<