]> granicus.if.org Git - pdns/commitdiff
Gracefully handle a reused downstream TCP connection dying on us
authorRemi Gacogne <rgacogne-github@coredump.fr>
Tue, 1 Dec 2015 13:24:26 +0000 (14:24 +0100)
committerRemi Gacogne <rgacogne-github@coredump.fr>
Tue, 1 Dec 2015 13:30:30 +0000 (14:30 +0100)
In dnsdist, we try to reuse TCP connection to Downstream servers
as much as possible. However, when sending the size of a new
query, we didn't properly handle a connection being closed by the
downstream server.

Turns out, writing tests actually help finding bugs, who
would have thought?

pdns/dnsdist-tcp.cc

index 0508deaa74d9a4853b6f399b6f9a2e64ab580287..739ad5200cbae08939e694990d7b0258cf9e38d4 100644 (file)
@@ -268,9 +268,18 @@ void* tcpClientThread(int pipefd)
           downstream_failures++;
           goto retry;
         }
-      
-        writen2WithTimeout(dsock, query, qlen, ds->tcpSendTimeout);
-      
+
+        try {
+          writen2WithTimeout(dsock, query, qlen, ds->tcpSendTimeout);
+        }
+        catch(const runtime_error& e) {
+          vinfolog("Downstream connection to %s died on us, getting a new one!", ds->getName());
+          close(dsock);
+          sockets[ds->remote]=dsock=setupTCPDownstream(ds->remote);
+          downstream_failures++;
+          goto retry;
+        }
+
         if(!getNonBlockingMsgLen(dsock, &rlen, ds->tcpRecvTimeout)) {
          vinfolog("Downstream connection to %s died on us phase 2, getting a new one!", ds->getName());
           close(dsock);