From 8b92100f1f563aed5e6567b5257b5520bcb955a8 Mon Sep 17 00:00:00 2001
From: Remi Gacogne <rgacogne-github@coredump.fr>
Date: Tue, 1 Dec 2015 14:24:26 +0100
Subject: [PATCH] Gracefully handle a reused downstream TCP connection dying on
 us

In dnsdist, we try to reuse TCP connection to Downstream servers
as much as possible. However, when sending the size of a new
query, we didn't properly handle a connection being closed by the
downstream server.

Turns out, writing tests actually help finding bugs, who
would have thought?
---
 pdns/dnsdist-tcp.cc | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/pdns/dnsdist-tcp.cc b/pdns/dnsdist-tcp.cc
index 0508deaa7..739ad5200 100644
--- a/pdns/dnsdist-tcp.cc
+++ b/pdns/dnsdist-tcp.cc
@@ -268,9 +268,18 @@ void* tcpClientThread(int pipefd)
           downstream_failures++;
           goto retry;
         }
-      
-        writen2WithTimeout(dsock, query, qlen, ds->tcpSendTimeout);
-      
+
+        try {
+          writen2WithTimeout(dsock, query, qlen, ds->tcpSendTimeout);
+        }
+        catch(const runtime_error& e) {
+          vinfolog("Downstream connection to %s died on us, getting a new one!", ds->getName());
+          close(dsock);
+          sockets[ds->remote]=dsock=setupTCPDownstream(ds->remote);
+          downstream_failures++;
+          goto retry;
+        }
+
         if(!getNonBlockingMsgLen(dsock, &rlen, ds->tcpRecvTimeout)) {
 	  vinfolog("Downstream connection to %s died on us phase 2, getting a new one!", ds->getName());
           close(dsock);
-- 
2.40.0