From 8b92100f1f563aed5e6567b5257b5520bcb955a8 Mon Sep 17 00:00:00 2001 From: Remi Gacogne Date: Tue, 1 Dec 2015 14:24:26 +0100 Subject: [PATCH] Gracefully handle a reused downstream TCP connection dying on us In dnsdist, we try to reuse TCP connection to Downstream servers as much as possible. However, when sending the size of a new query, we didn't properly handle a connection being closed by the downstream server. Turns out, writing tests actually help finding bugs, who would have thought? --- pdns/dnsdist-tcp.cc | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pdns/dnsdist-tcp.cc b/pdns/dnsdist-tcp.cc index 0508deaa7..739ad5200 100644 --- a/pdns/dnsdist-tcp.cc +++ b/pdns/dnsdist-tcp.cc @@ -268,9 +268,18 @@ void* tcpClientThread(int pipefd) downstream_failures++; goto retry; } - - writen2WithTimeout(dsock, query, qlen, ds->tcpSendTimeout); - + + try { + writen2WithTimeout(dsock, query, qlen, ds->tcpSendTimeout); + } + catch(const runtime_error& e) { + vinfolog("Downstream connection to %s died on us, getting a new one!", ds->getName()); + close(dsock); + sockets[ds->remote]=dsock=setupTCPDownstream(ds->remote); + downstream_failures++; + goto retry; + } + if(!getNonBlockingMsgLen(dsock, &rlen, ds->tcpRecvTimeout)) { vinfolog("Downstream connection to %s died on us phase 2, getting a new one!", ds->getName()); close(dsock); -- 2.40.0