]> granicus.if.org Git - libevent/commitdiff
evdns: Implement dns requests via tcp
authorayuseleznev <ayuseleznev@iponweb.net>
Thu, 21 May 2020 09:46:20 +0000 (12:46 +0300)
committerayuseleznev <ayuseleznev@iponweb.net>
Thu, 21 May 2020 09:46:20 +0000 (12:46 +0300)
evdns.c
include/event2/dns.h
test/regress_dns.c
test/regress_http.c
test/regress_testutils.c
test/regress_testutils.h

diff --git a/evdns.c b/evdns.c
index a5b31a3c0017ae381a019e13b05e9f7eed2b6590..c30c518864ad9b0e50102243d098a760c17d6b92 100644 (file)
--- a/evdns.c
+++ b/evdns.c
 #include <shlobj.h>
 #endif
 
+#include "event2/buffer.h"
+#include "event2/bufferevent.h"
 #include "event2/dns.h"
 #include "event2/dns_struct.h"
 #include "event2/dns_compat.h"
 #include "event2/util.h"
 #include "event2/event.h"
 #include "event2/event_struct.h"
+#include "event2/listener.h"
 #include "event2/thread.h"
 
 #include "defer-internal.h"
 #include <stdio.h>
 
 #undef MIN
+#undef MAX
 #define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX(a,b) ((a)>(b)?(a):(b))
 
 #define ASSERT_VALID_REQUEST(req) \
        EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
 
 #define CLASS_INET     EVDNS_CLASS_INET
 
+/* Timeout in seconds for idle TCP connections that server keeps alive. */
+#define SERVER_IDLE_CONN_TIMEOUT 10
+/* Timeout in seconds for idle TCP connections that client keeps alive. */
+#define CLIENT_IDLE_CONN_TIMEOUT 5
+/* Default maximum number of simultaneous TCP client connections that DNS server can hold. */
+#define MAX_CLIENT_CONNECTIONS 10
+
 /* Persistent handle.  We keep this separate from 'struct request' since we
  * need some object to last for as long as an evdns_request is outstanding so
  * that it can be canceled, whereas a search request can lead to multiple
@@ -167,10 +179,12 @@ struct evdns_request {
        struct search_state *search_state;
        char *search_origname;  /* needs to be free()ed */
        int search_flags;
+       u16 tcp_flags;
 };
 
 struct request {
        u8 *request;  /* the dns packet data */
+       u16 request_size; /* size of memory block stored in request field */
        u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
        unsigned int request_len;
        int reissue_count;
@@ -200,23 +214,38 @@ struct request {
 struct reply {
        unsigned int type;
        unsigned int have_answer : 1;
+       u32 rr_count;
        union {
-               struct {
-                       u32 addrcount;
-                       u32 addresses[MAX_V4_ADDRS];
-               } a;
-               struct {
-                       u32 addrcount;
-                       struct in6_addr addresses[MAX_V6_ADDRS];
-               } aaaa;
-               struct {
-                       char name[HOST_NAME_MAX];
-               } ptr;
+               u32 *a;
+               struct in6_addr *aaaa;
+               char *ptr_name;
+               void *raw;
        } data;
 };
 
+enum tcp_state {
+       TS_DISCONNECTED,
+       TS_CONNECTING,
+       TS_CONNECTED
+};
+
+struct tcp_connection {
+       struct bufferevent *bev;
+       enum tcp_state state;
+       u16 awaiting_packet_size;
+};
+
+struct evdns_server_port;
+
+struct client_tcp_connection {
+       LIST_ENTRY(client_tcp_connection) next;
+       struct tcp_connection connection;
+       struct evdns_server_port *port;
+};
+
 struct nameserver {
        evutil_socket_t socket;  /* a connected UDP socket */
+       struct tcp_connection *connection; /* intended for TCP support */
        struct sockaddr_storage address;
        ev_socklen_t addrlen;
        int failed_times;  /* number of times which we have given this server a chance */
@@ -240,8 +269,7 @@ struct nameserver {
 };
 
 
-/* Represents a local port where we're listening for DNS requests. Right now, */
-/* only UDP is supported. */
+/* Represents a local port where we're listening for DNS requests. */
 struct evdns_server_port {
        evutil_socket_t socket; /* socket we use to read queries and write replies. */
        int refcnt; /* reference count. */
@@ -254,6 +282,13 @@ struct evdns_server_port {
        struct server_request *pending_replies;
        struct event_base *event_base;
 
+       /* Structures for tcp support */
+       struct evconnlistener *listener;
+       LIST_HEAD(client_list, client_tcp_connection) client_connections;
+       unsigned client_connections_count;
+       unsigned max_client_connections;
+       struct timeval tcp_idle_timeout;
+
 #ifndef EVENT__DISABLE_THREAD_SUPPORT
        void *lock;
 #endif
@@ -282,7 +317,8 @@ struct server_request {
 
        u16 trans_id; /* Transaction id. */
        struct evdns_server_port *port; /* Which port received this request on? */
-       struct sockaddr_storage addr; /* Where to send the response */
+       struct client_tcp_connection *client; /* Equal to NULL in case of UDP connection. */
+       struct sockaddr_storage addr; /* Where to send the response in case of UDP. Equal to NULL in case of TCP connection.*/
        ev_socklen_t addrlen; /* length of addr */
 
        int n_answer; /* how many answer RRs have been set? */
@@ -340,6 +376,12 @@ struct evdns_base {
         * probing to see if it has returned?  */
        struct timeval global_nameserver_probe_initial_timeout;
 
+       /* Combination of DNS_QUERY_USEVC, DNS_QUERY_IGNTC flags
+        * to control requests via TCP. */
+       u16 global_tcp_flags;
+       /* Idle timeout for outgoing TCP connections. */
+       struct timeval global_tcp_idle_timeout;
+
        /** Port to bind to for outgoing DNS packets. */
        struct sockaddr_storage global_outgoing_address;
        /** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
@@ -406,6 +448,7 @@ static struct request *search_request_new(struct evdns_base *base, struct evdns_
 static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
 static u16 transaction_id_pick(struct evdns_base *base);
 static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
+static struct request *request_clone(struct evdns_base *base, struct request* current);
 static void request_submit(struct request *const req);
 
 static int server_request_free(struct server_request *req);
@@ -417,6 +460,9 @@ static int evdns_base_set_option_impl(struct evdns_base *base,
     const char *option, const char *val, int flags);
 static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
 static void evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg);
+static int evdns_server_request_format_response(struct server_request *req, int err);
+static void incoming_conn_cb(struct evconnlistener *listener, evutil_socket_t fd,
+    struct sockaddr *address, int socklen, void *arg);
 
 static int strtoint(const char *const str);
 
@@ -466,6 +512,107 @@ evdns_log_(int severity, const char *fmt, ...)
 
 #define log evdns_log_
 
+/* Initialize tcp_connection structure. */
+static void
+init_tcp_connection(struct tcp_connection *conn, struct bufferevent *bev)
+{
+       memset(conn, 0, sizeof(*conn));
+       conn->state = TS_DISCONNECTED;
+       conn->bev = bev;
+       conn->awaiting_packet_size = 0;
+}
+
+/* Disconnect tcp connection. */
+static void
+evdns_tcp_disconnect(struct tcp_connection *conn)
+{
+       if (!conn)
+               return;
+       conn->state = TS_DISCONNECTED;
+       conn->awaiting_packet_size = 0;
+       if (conn->bev) {
+               bufferevent_setcb(conn->bev, NULL, NULL, NULL, NULL);
+               bufferevent_free(conn->bev);
+               conn->bev = NULL;
+       }
+}
+
+/* Add new tcp client to the list of TCP clients in the TCP DNS server. */
+static struct client_tcp_connection*
+evdns_add_tcp_client(struct evdns_server_port *port, struct bufferevent *bev)
+{
+       struct client_tcp_connection *client;
+       EVUTIL_ASSERT(port && bev);
+       if (port->max_client_connections == port->client_connections_count)
+               goto error;
+
+       client = mm_calloc(1, sizeof(*client));
+       if (!client)
+               goto error;
+       init_tcp_connection(&client->connection, bev);
+       client->port = port;
+       LIST_INSERT_HEAD(&port->client_connections, client, next);
+
+       ++port->client_connections_count;
+       /* we need to hold evdns_server_port as long as one connection at least stays alive */
+       ++port->refcnt;
+       return client;
+error:
+       return NULL;
+}
+
+/* Remove tcp client and free all associated data from the TCP DNS server. */
+static int
+evdns_remove_tcp_client(struct evdns_server_port *port, struct client_tcp_connection *client)
+{
+       if (!port || !client)
+               goto error;
+
+       evdns_tcp_disconnect(&client->connection);
+       LIST_REMOVE(client, next);
+       mm_free(client);
+       --port->client_connections_count;
+       --port->refcnt;
+       return 0;
+error:
+       return -1;
+}
+
+/* Remove all tcp clients and free all associated data from the TCP DNS server. */
+static void
+evdns_remove_all_tcp_clients(struct evdns_server_port *port)
+{
+       struct client_tcp_connection *client;
+       while ((client = LIST_FIRST(&port->client_connections))) {
+               evdns_remove_tcp_client(port, client);
+       }
+}
+
+/* Create new tcp connection structure for DNS client. */
+static struct tcp_connection *
+new_tcp_connecton(struct bufferevent *bev)
+{
+       struct tcp_connection *conn;
+       if (!bev)
+               return NULL;
+
+       conn = mm_calloc(1, sizeof(*conn));
+       if (!conn)
+               return NULL;
+       init_tcp_connection(conn, bev);
+       return conn;
+}
+
+/* Disconnect and free all associated data for the tcp connection in DNS client. */
+static void
+disconnect_and_free_connection(struct tcp_connection *conn)
+{
+       if (!conn)
+               return;
+       evdns_tcp_disconnect(conn);
+       mm_free(conn);
+}
+
 /* This walks the list of inflight requests to find the */
 /* one with a matching transaction id. Returns NULL on */
 /* failure */
@@ -585,6 +732,9 @@ nameserver_failed(struct nameserver *const ns, const char *msg) {
        ns->state = 0;
        ns->failed_times = 1;
 
+       disconnect_and_free_connection(ns->connection);
+       ns->connection = NULL;
+
        if (evtimer_add(&ns->timeout_event,
                &base->global_nameserver_probe_initial_timeout) < 0) {
                log(EVDNS_LOG_WARN,
@@ -795,15 +945,15 @@ reply_run_callback(struct event_callback *d, void *user_pointer)
        case TYPE_A:
                if (cb->have_reply)
                        cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
-                           cb->reply.data.a.addrcount, cb->ttl,
-                           cb->reply.data.a.addresses,
+                           cb->reply.rr_count, cb->ttl,
+                           cb->reply.data.a,
                            user_pointer);
                else
                        cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
                break;
        case TYPE_PTR:
                if (cb->have_reply) {
-                       char *name = cb->reply.data.ptr.name;
+                       char *name = cb->reply.data.ptr_name;
                        cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
                            &name, user_pointer);
                } else {
@@ -813,8 +963,8 @@ reply_run_callback(struct event_callback *d, void *user_pointer)
        case TYPE_AAAA:
                if (cb->have_reply)
                        cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
-                           cb->reply.data.aaaa.addrcount, cb->ttl,
-                           cb->reply.data.aaaa.addresses,
+                           cb->reply.rr_count, cb->ttl,
+                           cb->reply.data.aaaa,
                            user_pointer);
                else
                        cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
@@ -827,6 +977,10 @@ reply_run_callback(struct event_callback *d, void *user_pointer)
                mm_free(cb->handle);
        }
 
+       if (cb->reply.data.raw) {
+               mm_free(cb->reply.data.raw);
+       }
+
        mm_free(cb);
 }
 
@@ -850,6 +1004,8 @@ reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct repl
        if (reply) {
                d->have_reply = 1;
                memcpy(&d->reply, reply, sizeof(struct reply));
+               /* We've taken ownership of the data. */
+               reply->data.raw = NULL;
        }
 
        if (req->handle) {
@@ -867,6 +1023,21 @@ reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct repl
                &d->deferred);
 }
 
+static int
+client_retransmit_through_tcp(struct evdns_request *handle)
+{
+       struct request *req = handle->current_req;
+       struct evdns_base *base = req->base;
+       struct request *newreq = request_clone(base, req);
+       ASSERT_LOCKED(base);
+       if (!newreq) 
+               return 1;
+       request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
+       handle->current_req = newreq;
+       newreq->handle = handle;
+       request_submit(newreq);
+       return 0;
+}
 
 #define _QR_MASK    0x8000U
 #define _OP_MASK    0x7800U
@@ -885,6 +1056,7 @@ static void
 reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
        int error;
        char addrbuf[128];
+       int retransmit_via_tcp = 0;
        static const int error_codes[] = {
                DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
                DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
@@ -897,6 +1069,7 @@ reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply)
                /* there was an error */
                if (flags & _TC_MASK) {
                        error = DNS_ERR_TRUNCATED;
+                       retransmit_via_tcp = (req->handle->tcp_flags & (DNS_QUERY_IGNTC | DNS_QUERY_USEVC)) == 0;
                } else if (flags & _RCODE_MASK) {
                        u16 error_code = (flags & _RCODE_MASK) - 1;
                        if (error_code > 4) {
@@ -946,6 +1119,14 @@ reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply)
                        nameserver_up(req->ns);
                }
 
+               if (retransmit_via_tcp) {
+                       log(EVDNS_LOG_DEBUG, "Recieved truncated reply(flags 0x%x, transanc ID: %d). Retransmiting via TCP.",
+                               req->handle->tcp_flags, req->trans_id);
+                       req->handle->tcp_flags |= DNS_QUERY_USEVC;
+                       client_retransmit_through_tcp(req->handle);
+                       return;
+               }
+
                if (req->handle->search_state &&
                    req->request_type != TYPE_PTR) {
                        /* if we have a list of domains to search in,
@@ -1031,7 +1212,8 @@ name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
 
 /* parses a raw request from a nameserver */
 static int
-reply_parse(struct evdns_base *base, u8 *packet, int length) {
+reply_parse(struct evdns_base *base, u8 *packet, int length)
+{
        int j = 0, k = 0;  /* index into packet */
        u16 t_;  /* used by the macros */
        u32 t32_;  /* used by the macros */
@@ -1043,7 +1225,7 @@ reply_parse(struct evdns_base *base, u8 *packet, int length) {
        u32 ttl, ttl_r = 0xffffffff;
        struct reply reply;
        struct request *req = NULL;
-       unsigned int i;
+       unsigned int i, buf_size;
 
        ASSERT_LOCKED(base);
 
@@ -1109,6 +1291,13 @@ reply_parse(struct evdns_base *base, u8 *packet, int length) {
        if (!name_matches)
                goto err;
 
+       /* We can allocate less for the reply data, but to do it we'll have
+        * to parse the response. To simplify things let's just allocate
+        * a little bit more to avoid complex evaluations.
+        */
+       buf_size = MAX(length - j, HOST_NAME_MAX);
+       reply.data.raw = mm_malloc(buf_size);
+
        /* now we have the answer section which looks like
         * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
         */
@@ -1123,30 +1312,28 @@ reply_parse(struct evdns_base *base, u8 *packet, int length) {
                GET16(datalength);
 
                if (type == TYPE_A && class == CLASS_INET) {
-                       int addrcount, addrtocopy;
+                       int addrcount;
                        if (req->request_type != TYPE_A) {
                                j += datalength; continue;
                        }
                        if ((datalength & 3) != 0) /* not an even number of As. */
                            goto err;
                        addrcount = datalength >> 2;
-                       addrtocopy = MIN(MAX_V4_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
 
                        ttl_r = MIN(ttl_r, ttl);
                        /* we only bother with the first four addresses. */
-                       if (j + 4*addrtocopy > length) goto err;
-                       memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
-                                  packet + j, 4*addrtocopy);
-                       j += 4*addrtocopy;
-                       reply.data.a.addrcount += addrtocopy;
+                       if (j + 4*addrcount > length) goto err;
+                       memcpy(&reply.data.a[reply.rr_count],
+                                  packet + j, 4*addrcount);
+                       j += 4*addrcount;
+                       reply.rr_count += addrcount;
                        reply.have_answer = 1;
-                       if (reply.data.a.addrcount == MAX_V4_ADDRS) break;
                } else if (type == TYPE_PTR && class == CLASS_INET) {
                        if (req->request_type != TYPE_PTR) {
                                j += datalength; continue;
                        }
-                       if (name_parse(packet, length, &j, reply.data.ptr.name,
-                                                  sizeof(reply.data.ptr.name))<0)
+                       if (name_parse(packet, length, &j, reply.data.ptr_name,
+                                                  buf_size)<0)
                                goto err;
                        ttl_r = MIN(ttl_r, ttl);
                        reply.have_answer = 1;
@@ -1161,24 +1348,22 @@ reply_parse(struct evdns_base *base, u8 *packet, int length) {
                                goto err;
                        *req->put_cname_in_ptr = mm_strdup(cname);
                } else if (type == TYPE_AAAA && class == CLASS_INET) {
-                       int addrcount, addrtocopy;
+                       int addrcount;
                        if (req->request_type != TYPE_AAAA) {
                                j += datalength; continue;
                        }
                        if ((datalength & 15) != 0) /* not an even number of AAAAs. */
                                goto err;
                        addrcount = datalength >> 4;  /* each address is 16 bytes long */
-                       addrtocopy = MIN(MAX_V6_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
                        ttl_r = MIN(ttl_r, ttl);
 
                        /* we only bother with the first four addresses. */
-                       if (j + 16*addrtocopy > length) goto err;
-                       memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
-                                  packet + j, 16*addrtocopy);
-                       reply.data.aaaa.addrcount += addrtocopy;
-                       j += 16*addrtocopy;
+                       if (j + 16*addrcount > length) goto err;
+                       memcpy(&reply.data.aaaa[reply.rr_count],
+                                  packet + j, 16*addrcount);
+                       reply.rr_count += addrcount;
+                       j += 16*addrcount;
                        reply.have_answer = 1;
-                       if (reply.data.aaaa.addrcount == MAX_V6_ADDRS) break;
                } else {
                        /* skip over any other type of resource */
                        j += datalength;
@@ -1219,10 +1404,14 @@ reply_parse(struct evdns_base *base, u8 *packet, int length) {
                ttl_r = 0;
 
        reply_handle(req, flags, ttl_r, &reply);
+       if (reply.data.raw)
+               mm_free(reply.data.raw);
        return 0;
  err:
        if (req)
                reply_handle(req, flags, 0, NULL);
+       if (reply.data.raw)
+               mm_free(reply.data.raw);
        return -1;
 }
 
@@ -1230,7 +1419,8 @@ reply_parse(struct evdns_base *base, u8 *packet, int length) {
 /* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
 /* callback. */
 static int
-request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, ev_socklen_t addrlen)
+request_parse(u8 *packet, int length, struct evdns_server_port *port,
+                               struct sockaddr *addr, ev_socklen_t addrlen, struct client_tcp_connection *client)
 {
        int j = 0;      /* index into packet */
        u16 t_;  /* used by the macros */
@@ -1261,9 +1451,12 @@ request_parse(u8 *packet, int length, struct evdns_server_port *port, struct soc
        memset(server_req, 0, sizeof(struct server_request));
 
        server_req->trans_id = trans_id;
-       memcpy(&server_req->addr, addr, addrlen);
-       server_req->addrlen = addrlen;
+       if (addr) {
+               memcpy(&server_req->addr, addr, addrlen);
+               server_req->addrlen = addrlen;
+       }
 
+       server_req->client = client;
        server_req->base.flags = flags;
        server_req->base.nquestions = 0;
        server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
@@ -1420,7 +1613,7 @@ nameserver_read(struct nameserver *ns) {
 /* Read a packet from a DNS client on a server port s, parse it, and */
 /* act accordingly. */
 static void
-server_port_read(struct evdns_server_port *s) {
+server_udp_port_read(struct evdns_server_port *s) {
        u8 packet[1500];
        struct sockaddr_storage addr;
        ev_socklen_t addrlen;
@@ -1440,8 +1633,36 @@ server_port_read(struct evdns_server_port *s) {
                            evutil_socket_error_to_string(err), err);
                        return;
                }
-               request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
+               request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen, NULL);
+       }
+}
+
+static int
+server_send_response(struct evdns_server_port *port, struct server_request *req)
+{
+       u16 packet_size = 0;
+       struct bufferevent *bev = NULL;
+       if (req->client) {
+               bev = req->client->connection.bev;
+               EVUTIL_ASSERT(bev);
+               EVUTIL_ASSERT(req->response_len <= 65535);
+               packet_size = htons((u16)req->response_len);
+               if (bufferevent_write(bev, &packet_size, sizeof(packet_size)))
+                       goto beferevent_error;
+               if (bufferevent_write(bev, (void*)req->response, req->response_len))
+                       goto beferevent_error;
+               return (int)req->response_len;
+       } else {
+               int r = sendto(port->socket, req->response, (int)req->response_len, 0,
+                                       (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
+               return r;
        }
+
+beferevent_error:
+       log(EVDNS_LOG_WARN, "Failed to send reply to request %p for client %p", req, req->client);
+       /* disconnect if we got bufferevent error */
+       evdns_remove_tcp_client(port, req->client);
+       return -1;
 }
 
 /* Try to write all pending replies on a given DNS server port. */
@@ -1451,8 +1672,7 @@ server_port_flush(struct evdns_server_port *port)
        struct server_request *req = port->pending_replies;
        ASSERT_LOCKED(port);
        while (req) {
-               int r = sendto(port->socket, req->response, (int)req->response_len, 0,
-                          (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
+               int r = server_send_response(port, req);
                if (r < 0) {
                        int err = evutil_socket_geterror(port->socket);
                        if (EVUTIL_ERR_RW_RETRIABLE(err))
@@ -1536,7 +1756,7 @@ server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
                server_port_flush(port);
        }
        if (events & EV_READ) {
-               server_port_read(port);
+               server_udp_port_read(port);
        }
        EVDNS_UNLOCK(port);
 }
@@ -1738,7 +1958,11 @@ evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket,
        port->user_data = user_data;
        port->pending_replies = NULL;
        port->event_base = base;
-
+       port->max_client_connections = MAX_CLIENT_CONNECTIONS;
+       port->tcp_idle_timeout.tv_sec = SERVER_IDLE_CONN_TIMEOUT;
+       port->tcp_idle_timeout.tv_usec = 0;
+       port->client_connections_count = 0;
+       LIST_INIT(&port->client_connections);
        event_assign(&port->event, port->event_base,
                                 port->socket, EV_READ | EV_PERSIST,
                                 server_port_ready_callback, port);
@@ -1750,6 +1974,176 @@ evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket,
        return port;
 }
 
+/* exported function */
+struct evdns_server_port *
+evdns_add_server_port_with_listener(struct event_base *base, struct evconnlistener *listener, int flags, evdns_request_callback_fn_type cb, void *user_data)
+{
+       struct evdns_server_port *port;
+       if (!listener)
+               return NULL;
+       if (flags)
+               return NULL; /* flags not yet implemented */
+
+       if (!(port = mm_calloc(1, sizeof(struct evdns_server_port))))
+               return NULL;
+       port->socket = -1;
+       port->refcnt = 1;
+       port->choked = 0;
+       port->closing = 0;
+       port->user_callback = cb;
+       port->user_data = user_data;
+       port->pending_replies = NULL;
+       port->event_base = base;
+       port->max_client_connections = MAX_CLIENT_CONNECTIONS;
+       port->client_connections_count = 0;
+       LIST_INIT(&port->client_connections);
+       port->listener = listener;
+       evconnlistener_set_cb(port->listener, incoming_conn_cb, port);
+
+       EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
+       return port;
+}
+
+static void
+server_tcp_event_cb(struct bufferevent *bev, short events, void *ctx);
+
+static int
+tcp_read_message(struct tcp_connection *conn, u8 **msg, int *msg_len)
+{
+       struct bufferevent *bev = conn->bev;
+       struct evbuffer *input = bufferevent_get_input(bev);
+       u8 *packet = NULL;
+       int r = 0;
+
+       EVUTIL_ASSERT(conn);
+       EVUTIL_ASSERT(conn->state == TS_CONNECTED);
+
+       /* reading new packet size */
+       if (!conn->awaiting_packet_size) {
+               if (evbuffer_get_length(input) < sizeof(ev_uint16_t))
+                       goto awaiting_next;
+
+               bufferevent_read(bev, (void*)&conn->awaiting_packet_size,
+                       sizeof(conn->awaiting_packet_size));
+               conn->awaiting_packet_size = ntohs(conn->awaiting_packet_size);
+               if (conn->awaiting_packet_size <= 0)
+                       goto fail;
+       }
+
+       /* reading new packet content */
+       if (evbuffer_get_length(input) < conn->awaiting_packet_size)
+               goto awaiting_next;
+
+       packet = mm_malloc(conn->awaiting_packet_size);
+       if (!packet)
+               goto fail;
+
+       r = (int)bufferevent_read(bev, (void*)packet, conn->awaiting_packet_size);
+       if (r != conn->awaiting_packet_size) {
+               mm_free (packet);
+               packet = NULL;
+               goto fail;
+       }
+
+       *msg = packet;
+       *msg_len = r;
+awaiting_next:
+       return 0;
+fail:
+       return 1;
+}
+
+static void
+server_tcp_read_packet_cb(struct bufferevent *bev, void *ctx)
+{
+       u8 *msg = NULL;
+       int msg_len = 0;
+       int rc;
+       struct client_tcp_connection *client = (struct client_tcp_connection *)ctx;
+       struct evdns_server_port *port = client->port;
+       struct tcp_connection *conn = &client->connection;
+       EVUTIL_ASSERT(port && bev);
+       EVDNS_LOCK(port);
+
+       while (1) {
+               if (tcp_read_message(conn, &msg, &msg_len)) {
+                       log(EVDNS_LOG_MSG, "Closing client connection %p due to error", bev);
+                       evdns_remove_tcp_client(port, client);
+                       rc = port->refcnt;
+                       EVDNS_UNLOCK(port);
+                       if (!rc)
+                               server_port_free(port);
+                       return;
+               }
+
+               /* Only part of the message was recieved. */
+               if (!msg)
+                       break;
+
+               request_parse(msg, msg_len, port, NULL, 0, client);
+               mm_free(msg);
+               msg = NULL;
+               conn->awaiting_packet_size = 0;
+       }
+
+       bufferevent_setwatermark(bev, EV_READ,
+                       conn->awaiting_packet_size ? conn->awaiting_packet_size : sizeof(ev_uint16_t), 0);
+       bufferevent_setcb(bev, server_tcp_read_packet_cb, NULL, server_tcp_event_cb, ctx);
+       EVDNS_UNLOCK(port);
+}
+
+static void
+server_tcp_event_cb(struct bufferevent *bev, short events, void *ctx)
+{
+       struct client_tcp_connection *client = (struct client_tcp_connection *)ctx;
+       struct evdns_server_port *port = client->port;
+       int rc;
+       EVUTIL_ASSERT(port && bev);
+       EVDNS_LOCK(port);
+       if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR | BEV_EVENT_TIMEOUT)) {
+               log(EVDNS_LOG_DEBUG, "Closing connection %p", bev);
+               evdns_remove_tcp_client(port, client);
+       }
+       rc = port->refcnt;
+       EVDNS_UNLOCK(port);
+       if (!rc)
+               server_port_free(port);
+}
+
+static void
+incoming_conn_cb(struct evconnlistener *listener, evutil_socket_t fd,
+                                 struct sockaddr *address, int socklen, void *arg)
+{
+       struct evdns_server_port *port = (struct evdns_server_port*)arg;
+       struct bufferevent *bev = bufferevent_socket_new(port->event_base, fd, BEV_OPT_CLOSE_ON_FREE);
+       struct client_tcp_connection *client = NULL;
+       struct tcp_connection *cd = NULL;
+
+       if (!bev)
+               goto error;
+       log(EVDNS_LOG_DEBUG, "New incoming client connection %p", bev);
+
+       bufferevent_set_timeouts(bev, &port->tcp_idle_timeout, &port->tcp_idle_timeout);
+
+       client = evdns_add_tcp_client(port, bev);
+       if (!client)
+               goto error;
+       cd = &client->connection;
+
+       cd->state = TS_CONNECTED;
+       bufferevent_setwatermark(bev, EV_READ, sizeof(ev_uint16_t), 0);
+       bufferevent_setcb(bev, server_tcp_read_packet_cb, NULL, server_tcp_event_cb, (void *)client);
+       bufferevent_enable(bev, EV_READ);
+
+       return;
+error:
+       if (bev) {
+               bufferevent_setcb(bev, NULL, NULL, NULL, NULL);
+               bufferevent_free(bev);
+       }
+       return;
+}
+
 struct evdns_server_port *
 evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
 {
@@ -1761,6 +2155,7 @@ void
 evdns_close_server_port(struct evdns_server_port *port)
 {
        EVDNS_LOCK(port);
+       evdns_remove_all_tcp_clients(port);
        if (--port->refcnt == 0) {
                EVDNS_UNLOCK(port);
                server_port_free(port);
@@ -1906,7 +2301,7 @@ evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
 static int
 evdns_server_request_format_response(struct server_request *req, int err)
 {
-       unsigned char buf[1500];
+       unsigned char buf[1024 * 64];
        size_t buf_len = sizeof(buf);
        off_t j = 0, r;
        u16 t_;
@@ -1981,7 +2376,7 @@ evdns_server_request_format_response(struct server_request *req, int err)
                }
        }
 
-       if (j > 512) {
+       if (j > 512 && !req->client) {
 overflow:
                j = 512;
                buf[2] |= 0x02; /* set the truncated bit. */
@@ -2014,9 +2409,8 @@ evdns_server_request_respond(struct evdns_server_request *req_, int err)
                        goto done;
        }
 
-       r = sendto(port->socket, req->response, (int)req->response_len, 0,
-                          (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
-       if (r<0) {
+       r = server_send_response(port, req);
+       if (r < 0 && req->client) {
                int sock_err = evutil_socket_geterror(port->socket);
                if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
                        goto done;
@@ -2075,8 +2469,11 @@ server_request_free_answers(struct server_request *req)
                while (victim) {
                        next = victim->next;
                        mm_free(victim->name);
-                       if (victim->data)
+                       victim->name = NULL;
+                       if (victim->data) {
                                mm_free(victim->data);
+                               victim->data = NULL;
+                       }
                        mm_free(victim);
                        victim = next;
                }
@@ -2091,9 +2488,12 @@ server_request_free(struct server_request *req)
 {
        int i, rc=1, lock=0;
        if (req->base.questions) {
-               for (i = 0; i < req->base.nquestions; ++i)
+               for (i = 0; i < req->base.nquestions; ++i) {
                        mm_free(req->base.questions[i]);
+                       req->base.questions[i] = NULL;
+               }
                mm_free(req->base.questions);
+               req->base.questions = NULL;
        }
 
        if (req->port) {
@@ -2110,6 +2510,7 @@ server_request_free(struct server_request *req)
 
        if (req->response) {
                mm_free(req->response);
+               req->response = NULL;
        }
 
        server_request_free_answers(req);
@@ -2142,8 +2543,15 @@ server_port_free(struct evdns_server_port *port)
                evutil_closesocket(port->socket);
                port->socket = -1;
        }
-       (void) event_del(&port->event);
-       event_debug_unassign(&port->event);
+
+       /* if tcp server */
+       if (port->listener) {
+               evconnlistener_free(port->listener);
+       } else {
+               (void) event_del(&port->event);
+               event_debug_unassign(&port->event);
+       }
+
        EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
        mm_free(port);
 }
@@ -2168,8 +2576,32 @@ evdns_server_request_get_requesting_addr(struct evdns_server_request *req_, stru
        return req->addrlen;
 }
 
-#undef APPEND16
-#undef APPEND32
+static void
+retransmit_all_tcp_requests_for(struct nameserver *server)
+{
+       int i = 0;
+       for (i = 0; i < server->base->n_req_heads; ++i) {
+               struct request *started_at = server->base->req_heads[i];
+               struct request *req = started_at;
+               if (!req)
+                       continue;
+
+               do {
+                       if (req->ns == server && (req->handle->tcp_flags & DNS_QUERY_USEVC)) {
+                               if (req->tx_count >= req->base->global_max_retransmits) {
+                                       log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
+                                               req, req->tx_count);
+                                       reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
+                                       request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
+                               } else {
+                                       (void) evtimer_del(&req->timeout_event);
+                                       evdns_request_transmit(req);
+                               }
+                       }
+                       req = req->next;
+               } while (req != started_at);
+       }
+}
 
 /* this is a libevent callback function which is called when a request */
 /* has timed out. */
@@ -2194,17 +2626,25 @@ evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
                request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
                nameserver_failed(ns, "request timed out.");
        } else {
-               /* retransmit it */
-               log(EVDNS_LOG_DEBUG, "Retransmitting request %p; tx_count==%d",
-                   arg, req->tx_count);
-               (void) evtimer_del(&req->timeout_event);
-               request_swap_ns(req, nameserver_pick(base));
-               evdns_request_transmit(req);
+               /* if request is using tcp connection, so tear connection */
+               if (req->handle->tcp_flags & DNS_QUERY_USEVC) {
+                       disconnect_and_free_connection(req->ns->connection);
+                       req->ns->connection = NULL;
 
-               req->ns->timedout++;
-               if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
-                       req->ns->timedout = 0;
-                       nameserver_failed(req->ns, "request timed out.");
+                       /* client can have the only connection to DNS server */
+                       retransmit_all_tcp_requests_for (req->ns);
+               } else {
+                       /* retransmit it */
+                       log(EVDNS_LOG_DEBUG, "Retransmitting request %p; tx_count==%d by udp", arg, req->tx_count);
+                       (void) evtimer_del(&req->timeout_event);
+                       request_swap_ns(req, nameserver_pick(base));
+                       evdns_request_transmit(req);
+
+                       req->ns->timedout++;
+                       if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
+                               req->ns->timedout = 0;
+                               nameserver_failed(req->ns, "request timed out.");
+                       }
                }
        }
 
@@ -2244,6 +2684,141 @@ evdns_request_transmit_to(struct request *req, struct nameserver *server) {
        }
 }
 
+/* try to connect to a given server. */
+/* */
+/* return: */
+/*   0 ok */
+/*   1 temporary failure */
+/*   2 other failure */
+static int
+evdns_tcp_connect_if_disconnected(struct nameserver *server)
+{
+       struct tcp_connection *conn = server->connection;
+       struct timeval *timeout = &server->base->global_tcp_idle_timeout;
+       if (conn && conn->state != TS_DISCONNECTED && conn->bev != NULL)
+               return 0;
+
+       disconnect_and_free_connection(conn);
+       conn = new_tcp_connecton(bufferevent_socket_new(server->base->event_base, -1, BEV_OPT_CLOSE_ON_FREE));
+       if (!conn)
+               return 2;
+       server->connection = conn;
+
+       if (bufferevent_set_timeouts(conn->bev, timeout, timeout))
+               return 1;
+
+       EVUTIL_ASSERT(conn->state == TS_DISCONNECTED);
+       if (bufferevent_socket_connect(conn->bev, (struct sockaddr *)&server->address, server->addrlen))
+               return 1;
+
+       conn->state = TS_CONNECTING;
+       log(EVDNS_LOG_DEBUG, "New tcp connection %p created", conn);
+       return 0;
+}
+
+static void
+client_tcp_event_cb(struct bufferevent *bev, short events, void *ctx);
+
+
+static void
+client_tcp_read_packet_cb(struct bufferevent *bev, void *ctx)
+{
+       u8 *msg = NULL;
+       int msg_len = 0;
+       struct nameserver *server = (struct nameserver*)ctx;
+       struct tcp_connection *conn = server->connection;
+       EVUTIL_ASSERT(server && bev);
+       EVDNS_LOCK(server->base);
+
+       while (1) {
+               if (tcp_read_message(conn, &msg, &msg_len)) {
+                       disconnect_and_free_connection(server->connection);
+                       server->connection = NULL;
+                       EVDNS_UNLOCK(server->base);
+                       return;
+               }
+
+               /* Only part of the message was recieved. */
+               if (!msg)
+                       break;
+
+               reply_parse(server->base, msg, msg_len);
+               mm_free(msg);
+               msg = NULL;
+               conn->awaiting_packet_size = 0;
+       }
+
+       bufferevent_setwatermark(bev, EV_READ,
+               conn->awaiting_packet_size ? conn->awaiting_packet_size : sizeof(ev_uint16_t), 0);
+       bufferevent_setcb(bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, ctx);
+       EVDNS_UNLOCK(server->base);
+}
+
+static void
+client_tcp_event_cb(struct bufferevent *bev, short events, void *ctx) {
+       struct nameserver *server = (struct nameserver*)ctx;
+       struct tcp_connection *conn = server->connection;
+       EVUTIL_ASSERT(server);
+       EVDNS_LOCK(server->base);
+       EVUTIL_ASSERT(conn && conn->bev == bev && bev);
+
+       log(EVDNS_LOG_DEBUG, "Event %d on connection %p", events, conn);
+
+       if (events & (BEV_EVENT_TIMEOUT)) {
+               disconnect_and_free_connection(server->connection);
+               server->connection = NULL;
+       } else if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR)) {
+               disconnect_and_free_connection(server->connection);
+               server->connection = NULL;
+       } else if (events & BEV_EVENT_CONNECTED) {
+               EVUTIL_ASSERT (conn->state == TS_CONNECTING);
+               conn->state = TS_CONNECTED;
+               evutil_make_socket_nonblocking (bufferevent_getfd (bev));
+               bufferevent_setcb (bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, server);
+               bufferevent_setwatermark (bev, EV_READ, sizeof(ev_uint16_t), 0);
+       }
+       EVDNS_UNLOCK(server->base);
+}
+
+/* try to send a request to a given server. */
+/* */
+/* return: */
+/*   0 ok */
+/*   1 temporary failure */
+/*   2 other failure */
+static int
+evdns_request_transmit_through_tcp(struct request *req, struct nameserver *server) {
+       uint16_t packet_size;
+       struct tcp_connection *conn = NULL;
+       int r;
+       ASSERT_LOCKED(req->base);
+       ASSERT_VALID_REQUEST(req);
+
+       if ((r = evdns_tcp_connect_if_disconnected(server)))
+               return r;
+
+       conn = server->connection;
+       bufferevent_setcb(conn->bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, server);
+
+       log(EVDNS_LOG_DEBUG, "Sending request %p via tcp connection %p", req, conn);
+       packet_size = htons(req->request_len);
+       if (bufferevent_write(conn->bev, &packet_size, sizeof(packet_size)) )
+               goto fail;
+       if (bufferevent_write(conn->bev, (void*)req->request, req->request_len) )
+               goto fail;
+       if (bufferevent_enable(conn->bev, EV_READ))
+               goto fail;
+       if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0)
+               goto fail;
+
+       return 0;
+fail:
+       log(EVDNS_LOG_WARN, "Failed to send request %p via tcp connection %p", req, conn);
+       disconnect_and_free_connection(server->connection);
+       server->connection = NULL;
+       return 2;
+}
+
 /* try to send a request, updating the fields of the request */
 /* as needed */
 /* */
@@ -2273,7 +2848,18 @@ evdns_request_transmit(struct request *req) {
                return 1;
        }
 
-       r = evdns_request_transmit_to(req, req->ns);
+       if (req->handle->tcp_flags & DNS_QUERY_USEVC) {
+               r = evdns_request_transmit_through_tcp (req, req->ns);
+               /*
+               If connection didn't initiated now, so report about temporary problems.
+               We don't mark name server as chocked so udp packets possibly have no
+               problems during transmit. Simply we will retry attempt later */
+               if (r == 1) {
+                       return r;
+               }
+       } else {
+               r = evdns_request_transmit_to(req, req->ns);
+       }
        switch (r) {
        case 1:
                /* temp failure */
@@ -2424,6 +3010,8 @@ evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
        }
        while (1) {
                struct nameserver *next = server->next;
+               disconnect_and_free_connection(server->connection);
+               server->connection = NULL;
                (void) event_del(&server->event);
                if (evtimer_initialized(&server->timeout_event))
                        (void) evtimer_del(&server->timeout_event);
@@ -2562,6 +3150,7 @@ evdns_nameserver_add_impl_(struct evdns_base *base, const struct sockaddr *addre
        memcpy(&ns->address, address, addrlen);
        ns->addrlen = addrlen;
        ns->state = 1;
+       ns->connection = NULL;
        event_assign(&ns->event, ns->base->event_base, ns->socket,
                                 EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
        if (!base->disable_when_inactive && event_add(&ns->event, NULL) < 0) {
@@ -2805,6 +3394,7 @@ request_new(struct evdns_base *base, struct evdns_request *handle, int type,
        }
 
        memset(req, 0, sizeof(struct request));
+       req->request_size = (u16)(sizeof(struct request) + request_max_len);
        req->base = base;
 
        evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
@@ -2854,6 +3444,37 @@ err1:
        return NULL;
 }
 
+static struct request *
+request_clone(struct evdns_base *base, struct request* current)
+{
+       const char issuing_now =
+           (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
+       const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
+       /* the request data is alloced in a single block with the header */
+       struct request *const req = mm_malloc(current->request_size);
+       EVUTIL_ASSERT(current && base);
+       ASSERT_LOCKED(base);
+
+       if (!req)
+               return NULL;
+       memcpy(req, current, current->request_size);
+
+       evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
+
+       /* request data lives just after the header */
+       req->request = ((u8 *) req) + sizeof(struct request);
+       /* We need to replace transact id */
+       request_trans_id_set(req, trans_id);
+
+       req->tx_count = 0;
+       req->ns = issuing_now ? nameserver_pick(base) : NULL;
+       req->next = req->prev = NULL;
+       req->handle = NULL;
+       log(EVDNS_LOG_DEBUG, "Clone new request TID %d from TID %d", req->trans_id, current->trans_id);
+
+       return req;
+}
+
 static void
 request_submit(struct request *const req) {
        struct evdns_base *base = req->base;
@@ -2921,6 +3542,8 @@ evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
        if (handle == NULL)
                return NULL;
        EVDNS_LOCK(base);
+       handle->tcp_flags = base->global_tcp_flags;
+       handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
        if (flags & DNS_QUERY_NO_SEARCH) {
                req =
                        request_new(base, handle, TYPE_A, name, flags,
@@ -2960,6 +3583,8 @@ evdns_base_resolve_ipv6(struct evdns_base *base,
        if (handle == NULL)
                return NULL;
        EVDNS_LOCK(base);
+       handle->tcp_flags = base->global_tcp_flags;
+       handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
        if (flags & DNS_QUERY_NO_SEARCH) {
                req = request_new(base, handle, TYPE_AAAA, name, flags,
                                  callback, ptr);
@@ -3001,6 +3626,8 @@ evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, in
                return NULL;
        log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
        EVDNS_LOCK(base);
+       handle->tcp_flags = base->global_tcp_flags;
+       handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
        req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
        if (req)
                request_submit(req);
@@ -3041,6 +3668,8 @@ evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *
                return NULL;
        log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
        EVDNS_LOCK(base);
+       handle->tcp_flags = base->global_tcp_flags;
+       handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
        req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
        if (req)
                request_submit(req);
@@ -3481,6 +4110,44 @@ str_matches_option(const char *s1, const char *optionname)
                return 0;
 }
 
+/* exported function */
+int
+evdns_server_port_set_option(struct evdns_server_port *port,
+       enum evdns_server_option option, size_t value)
+{
+       int res = 0;
+       EVDNS_LOCK(port);
+       switch (option) {
+       case EVDNS_SOPT_TCP_MAX_CLIENTS:
+               if (!port->listener) {
+                       log(EVDNS_LOG_WARN, "EVDNS_SOPT_TCP_MAX_CLIENTS option can be set only on TCP server");
+                       res = -1;
+                       goto end;
+               }
+               port->max_client_connections = value;
+               log(EVDNS_LOG_DEBUG, "Setting EVDNS_SOPT_TCP_MAX_CLIENTS to %u", port->max_client_connections);
+               break;
+       case EVDNS_SOPT_TCP_IDLE_TIMEOUT:
+               if (!port->listener) {
+                       log(EVDNS_LOG_WARN, "EVDNS_SOPT_TCP_IDLE_TIMEOUT option can be set only on TCP server");
+                       res = -1;
+                       goto end;
+               }
+               port->tcp_idle_timeout.tv_sec = value;
+               port->tcp_idle_timeout.tv_usec = 0;
+               log(EVDNS_LOG_DEBUG, "Setting EVDNS_SOPT_TCP_IDLE_TIMEOUT to %u seconds",
+                       (unsigned)port->tcp_idle_timeout.tv_sec);
+               break;
+       default:
+               log(EVDNS_LOG_WARN, "Invalid DNS server option %d", (int)option);
+               res = -1;
+               break;
+       }
+end:
+       EVDNS_UNLOCK(port);
+       return res;
+}
+
 static int
 evdns_base_set_option_impl(struct evdns_base *base,
     const char *option, const char *val, int flags)
@@ -3565,6 +4232,22 @@ evdns_base_set_option_impl(struct evdns_base *base,
                if (!(flags & DNS_OPTION_MISC)) return 0;
                log(EVDNS_LOG_DEBUG, "Setting SO_SNDBUF to %s", val);
                base->so_sndbuf = buf;
+       } else if (str_matches_option(option, "tcp-idle-timeout:")) {
+               struct timeval tv;
+               if (evdns_strtotimeval(val, &tv) == -1) return -1;
+               if (!(flags & DNS_OPTION_MISC)) return 0;
+               log(EVDNS_LOG_DEBUG, "Setting tcp idle timeout to %s", val);
+               memcpy(&base->global_tcp_idle_timeout, &tv, sizeof(tv));
+       } else if (str_matches_option(option, "use-vc:")) {
+               if (!(flags & DNS_OPTION_MISC)) return 0;
+               if (val && strlen(val)) return -1;
+               log(EVDNS_LOG_DEBUG, "Setting use-vc option");
+               base->global_tcp_flags |= DNS_QUERY_USEVC;
+       } else if (str_matches_option(option, "ignore-tc:")) {
+               if (!(flags & DNS_OPTION_MISC)) return 0;
+               if (val && strlen(val)) return -1;
+               log(EVDNS_LOG_DEBUG, "Setting ignore-tc option");
+               base->global_tcp_flags |= DNS_QUERY_IGNTC;
        }
        return 0;
 }
@@ -4006,6 +4689,7 @@ evdns_base_new(struct event_base *event_base, int flags)
        base->global_getaddrinfo_allow_skew.tv_usec = 0;
        base->global_nameserver_probe_initial_timeout.tv_sec = 10;
        base->global_nameserver_probe_initial_timeout.tv_usec = 0;
+       base->global_tcp_idle_timeout.tv_sec = CLIENT_IDLE_CONN_TIMEOUT;
 
        TAILQ_INIT(&base->hostsdb);
 
@@ -4095,6 +4779,7 @@ evdns_nameserver_free(struct nameserver *server)
                server->probe_request = NULL;
        }
        event_debug_unassign(&server->timeout_event);
+       disconnect_and_free_connection(server->connection);
        mm_free(server);
 }
 
index d63dfff7391810d4de001342bf62e4f255257502..14c09d108d1c8b6384e9f934b02442be0987be09 100644 (file)
@@ -183,7 +183,12 @@ extern "C" {
 #define DNS_PTR 2
 #define DNS_IPv6_AAAA 3
 
-#define DNS_QUERY_NO_SEARCH 1
+/** Disable searching for the query. */
+#define DNS_QUERY_NO_SEARCH 0x01
+/** Use TCP connections ("virtual circuits") for queries rather than UDP datagrams. */
+#define DNS_QUERY_USEVC 0x02
+/** Ignore trancation flag in responses (don't fallback to TCP connections). */
+#define DNS_QUERY_IGNTC 0x04
 
 /* Allow searching */
 #define DNS_OPTION_SEARCH 1
@@ -197,6 +202,9 @@ extern "C" {
  * - attempts:
  * - randomize-case:
  * - initial-probe-timeout:
+ * - tcp-idle-timeout:
+ * - use-vc
+ * - ignore-tc
  */
 #define DNS_OPTION_MISC 4
 /* Load hosts file (i.e. "/etc/hosts") */
@@ -390,7 +398,7 @@ struct evdns_request;
 
   @param base the evdns_base to which to apply this operation
   @param name a DNS hostname
-  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param flags either 0, or combination of DNS_QUERY_* flags.
   @param callback a callback function to invoke when the request is completed
   @param ptr an argument to pass to the callback function
   @return an evdns_request object if successful, or NULL if an error occurred.
@@ -404,7 +412,7 @@ struct evdns_request *evdns_base_resolve_ipv4(struct evdns_base *base, const cha
 
   @param base the evdns_base to which to apply this operation
   @param name a DNS hostname
-  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param flags either 0, or combination of DNS_QUERY_* flags.
   @param callback a callback function to invoke when the request is completed
   @param ptr an argument to pass to the callback function
   @return an evdns_request object if successful, or NULL if an error occurred.
@@ -421,7 +429,7 @@ struct in6_addr;
 
   @param base the evdns_base to which to apply this operation
   @param in an IPv4 address
-  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param flags either 0, or combination of DNS_QUERY_* flags.
   @param callback a callback function to invoke when the request is completed
   @param ptr an argument to pass to the callback function
   @return an evdns_request object if successful, or NULL if an error occurred.
@@ -436,7 +444,7 @@ struct evdns_request *evdns_base_resolve_reverse(struct evdns_base *base, const
 
   @param base the evdns_base to which to apply this operation
   @param in an IPv6 address
-  @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+  @param flags either 0, or combination of DNS_QUERY_* flags.
   @param callback a callback function to invoke when the request is completed
   @param ptr an argument to pass to the callback function
   @return an evdns_request object if successful, or NULL if an error occurred.
@@ -462,11 +470,14 @@ void evdns_cancel_request(struct evdns_base *base, struct evdns_request *req);
 
     ndots, timeout, max-timeouts, max-inflight, attempts, randomize-case,
     bind-to, initial-probe-timeout, getaddrinfo-allow-skew,
-    so-rcvbuf, so-sndbuf.
+    so-rcvbuf, so-sndbuf, tcp-idle-timeout, use-vc, ignore-tc.
 
   In versions before Libevent 2.0.3-alpha, the option name needed to end with
   a colon.
 
+  In case of options without values (use-vc, ingore-tc) val should be an empty
+  string or NULL.
+
   @param base the evdns_base to which to apply this operation
   @param option the name of the configuration option to be modified
   @param val the value to be set
@@ -646,7 +657,7 @@ typedef void (*evdns_request_callback_fn_type)(struct evdns_server_request *, vo
 #define EVDNS_FLAGS_AA 0x400
 #define EVDNS_FLAGS_RD 0x080
 
-/** Create a new DNS server port.
+/** Create a new UDP DNS server port.
 
     @param base The event base to handle events for the server port.
     @param socket A UDP socket to accept DNS requests.
@@ -659,10 +670,60 @@ typedef void (*evdns_request_callback_fn_type)(struct evdns_server_request *, vo
  */
 EVENT2_EXPORT_SYMBOL
 struct evdns_server_port *evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type callback, void *user_data);
+
+struct evconnlistener;
+
+/** Create a new TCP DNS server port.
+
+    @param base The event base to handle events for the server port.
+    @param listener A TCP listener to accept DNS requests.
+    @param flags Always 0 for now.
+    @param callback A function to invoke whenever we get a DNS request
+      on the socket.
+    @param user_data Data to pass to the callback.
+    @return an evdns_server_port structure for this server port or NULL if
+      an error occurred.
+ */
+EVENT2_EXPORT_SYMBOL
+struct evdns_server_port *evdns_add_server_port_with_listener(
+    struct event_base *base, struct evconnlistener *listener, int flags,
+    evdns_request_callback_fn_type callback, void *user_data);
+
 /** Close down a DNS server port, and free associated structures. */
 EVENT2_EXPORT_SYMBOL
 void evdns_close_server_port(struct evdns_server_port *port);
 
+/**
+ * List of configurable evdns_server_port options.
+ *
+ * @see evdns_server_port_set_option()
+ */
+enum evdns_server_option {
+       /**
+        * Maximum number of simultaneous tcp connections (clients)
+        * that server can hold. Can be set only for TCP DNS servers.
+        */
+       EVDNS_SOPT_TCP_MAX_CLIENTS,
+       /**
+        * Idle timeout (in seconds) of incoming TCP connections.
+        * If client doesn't send any requests via the connection
+        * during this period connection is closed by the server.
+        * Can be set only for TCP DNS servers.
+        */
+       EVDNS_SOPT_TCP_IDLE_TIMEOUT,
+};
+
+/**
+   Configure DNS server.
+
+   @param port the evdns_server_port to which to apply this operation
+   @param option @see evdns_server_option for the list of possible options
+   @param val value of the option
+   @return 0 if successful, or -1 if an error occurred
+ */
+EVENT2_EXPORT_SYMBOL
+int evdns_server_port_set_option(struct evdns_server_port *port, enum evdns_server_option option, size_t value);
+
 /** Sets some flags in a reply we're building.
     Allows setting of the AA or RD flags
  */
index a3446ed62e0235964e26fcefd8f8fbdabf6d18c1..e1ebf5082ab08554113747d74a37a5309bdf8cc2 100644 (file)
 
 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 
+#define REPEAT_2(address) \
+       address "," address
+#define REPEAT_4(address) \
+       REPEAT_2(address) "," REPEAT_2(address)
+#define REPEAT_8(address) \
+       REPEAT_4(address) "," REPEAT_4(address)
+#define REPEAT_16(address) \
+       REPEAT_8(address) "," REPEAT_8(address)
+#define REPEAT_32(address) \
+       REPEAT_16(address) "," REPEAT_16(address)
+#define REPEAT_64(address) \
+       REPEAT_32(address) "," REPEAT_32(address)
+#define REPEAT_128(address) \
+       REPEAT_64(address) "," REPEAT_64(address)
+#define REPEAT_256(address) \
+       REPEAT_128(address) "," REPEAT_128(address)
+
 static int dns_ok = 0;
 static int dns_got_cancel = 0;
 static int dns_err = 0;
@@ -477,7 +494,7 @@ struct generic_dns_callback_result {
        int ttl;
        size_t addrs_len;
        void *addrs;
-       char addrs_buf[256];
+       char addrs_buf[4096];
 };
 
 static void
@@ -503,8 +520,8 @@ generic_dns_callback(int result, char type, int count, int ttl, void *addresses,
        }
        if (len) {
                res->addrs_len = len;
-               if (len > 256)
-                       len = 256;
+               if (len > ARRAY_SIZE(res->addrs_buf))
+                       len = ARRAY_SIZE(res->addrs_buf);
                memcpy(res->addrs_buf, addresses, len);
                res->addrs = res->addrs_buf;
        }
@@ -520,6 +537,11 @@ generic_dns_callback(int result, char type, int count, int ttl, void *addresses,
 }
 
 static struct regress_dns_server_table search_table[] = {
+       { "small.a.example.com", "A", REPEAT_64("11.22.33.45"), 0, 0},
+       { "medium.b.example.com", "A", REPEAT_64("11.22.33.45") "," REPEAT_64("12.22.33.45"), 0, 0},
+       { "large.c.example.com", "A",
+               REPEAT_256("11.22.33.45") "," REPEAT_256("12.22.33.45") "," REPEAT_256("13.22.33.45") "," REPEAT_256("14.22.33.45"), 0, 0},
+       { "lost.request.com", "err", "67", 0, 0},
        { "host.a.example.com", "err", "3", 0, 0 },
        { "host.b.example.com", "err", "3", 0, 0 },
        { "host.c.example.com", "A", "11.22.33.44", 0, 0 },
@@ -529,12 +551,31 @@ static struct regress_dns_server_table search_table[] = {
        { "hostn.a.example.com", "errsoa", "0", 0, 0 },
        { "hostn.b.example.com", "errsoa", "3", 0, 0 },
        { "hostn.c.example.com", "err", "0", 0, 0 },
-
        { "host", "err", "3", 0, 0 },
        { "host2", "err", "3", 0, 0 },
        { "*", "err", "3", 0, 0 },
        { NULL, NULL, NULL, 0, 0 }
 };
+
+static struct regress_dns_server_table tcp_search_table[] = {
+       { "small.a.example.com", "A", REPEAT_64("11.22.33.45"), 0, 0},
+       { "medium.b.example.com", "A", REPEAT_64("11.22.33.45") "," REPEAT_64("12.22.33.45"), 0, 0},
+       { "large.c.example.com", "A",
+               REPEAT_256("11.22.33.45") "," REPEAT_256("12.22.33.45") "," REPEAT_256("13.22.33.45") "," REPEAT_256("14.22.33.45"), 0, 0},
+       { "lost.request.com", "err", "67", 0, 0},
+       { NULL, NULL, NULL, 0, 0 }
+};
+
+#define assert_request_results(r, exp_result, exp_addresses) \
+       do { \
+               k_ = parse_csv_address_list(exp_addresses, AF_INET, addrs, ARRAY_SIZE(addrs)); \
+               tt_assert(r.result == exp_result); \
+               tt_assert(r.type == DNS_IPv4_A); \
+               tt_assert(r.count == k_); \
+               for (k_ = 0; k_ < r.count; ++k_) \
+                       tt_int_op(((ev_uint32_t *)r.addrs)[k_], ==, addrs[k_].s_addr); \
+       } while (0)
+
 static void
 dns_search_test_impl(void *arg, int lower)
 {
@@ -553,7 +594,7 @@ dns_search_test_impl(void *arg, int lower)
                table[i].lower = lower;
        }
 
-       tt_assert(regress_dnsserver(base, &portnum, table));
+       tt_assert(regress_dnsserver(base, &portnum, table, NULL));
        evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
 
        dns = evdns_base_new(base, 0);
@@ -659,7 +700,7 @@ dns_search_cancel_test(void *arg)
        struct generic_dns_callback_result r1;
        char buf[64];
 
-       port = regress_get_dnsserver(base, &portnum, NULL,
+       port = regress_get_udp_dnsserver(base, &portnum, NULL,
            search_cancel_server_cb, NULL);
        tt_assert(port);
        evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
@@ -736,7 +777,7 @@ dns_retry_test_impl(void *arg, int flags)
 
        struct generic_dns_callback_result r1;
 
-       port = regress_get_dnsserver(base, &portnum, NULL,
+       port = regress_get_udp_dnsserver(base, &portnum, NULL,
            fail_server_cb, &drop_count);
        tt_assert(port);
        evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
@@ -833,10 +874,10 @@ dns_reissue_test_impl(void *arg, int flags)
        ev_uint16_t portnum1 = 0, portnum2=0;
        char buf1[64], buf2[64];
 
-       port1 = regress_get_dnsserver(base, &portnum1, NULL,
+       port1 = regress_get_udp_dnsserver(base, &portnum1, NULL,
            regress_dns_server_cb, internal_error_table);
        tt_assert(port1);
-       port2 = regress_get_dnsserver(base, &portnum2, NULL,
+       port2 = regress_get_udp_dnsserver(base, &portnum2, NULL,
            regress_dns_server_cb, reissue_table);
        tt_assert(port2);
        evutil_snprintf(buf1, sizeof(buf1), "127.0.0.1:%d", (int)portnum1);
@@ -912,7 +953,7 @@ dns_inflight_test_impl(void *arg, int flags)
        struct generic_dns_callback_result r[20];
        int i;
 
-       dns_port = regress_get_dnsserver(base, &portnum, NULL,
+       dns_port = regress_get_udp_dnsserver(base, &portnum, NULL,
                regress_dns_server_cb, reissue_table);
        tt_assert(dns_port);
        if (disable_when_inactive) {
@@ -977,7 +1018,7 @@ dns_disable_when_inactive_no_ns_test(void *arg)
        tt_assert(inactive_base);
 
        /** Create dns server with inactive base, to avoid replying to clients */
-       tt_assert(regress_dnsserver(inactive_base, &portnum, search_table));
+       tt_assert(regress_dnsserver(inactive_base, &portnum, search_table, NULL));
        evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
 
        dns = evdns_base_new(base, EVDNS_BASE_DISABLE_WHEN_INACTIVE);
@@ -1301,7 +1342,7 @@ test_bufferevent_connect_hostname(void *arg)
        listener_port = regress_get_socket_port(
                evconnlistener_get_fd(listener));
 
-       port = regress_get_dnsserver(data->base, &dns_port, NULL,
+       port = regress_get_udp_dnsserver(data->base, &dns_port, NULL,
            be_getaddrinfo_server_cb, &n_dns);
        tt_assert(port);
        tt_int_op(dns_port, >=, 0);
@@ -1612,7 +1653,7 @@ test_getaddrinfo_async(void *arg)
 
        /* 2. Okay, now we can actually test the asynchronous resolver. */
        /* Start a dummy local dns server... */
-       port = regress_get_dnsserver(data->base, &dns_port, NULL,
+       port = regress_get_udp_dnsserver(data->base, &dns_port, NULL,
            be_getaddrinfo_server_cb, &n_dns_questions);
        tt_assert(port);
        tt_int_op(dns_port, >=, 0);
@@ -2136,7 +2177,7 @@ dns_client_fail_requests_test(void *arg)
        struct generic_dns_callback_result r[20];
        unsigned i;
 
-       dns_port = regress_get_dnsserver(base, &portnum, NULL,
+       dns_port = regress_get_udp_dnsserver(base, &portnum, NULL,
                regress_dns_server_cb, reissue_table);
        tt_assert(dns_port);
 
@@ -2184,7 +2225,7 @@ dns_client_fail_requests_getaddrinfo_test(void *arg)
        struct generic_dns_callback_result r[20];
        int i;
 
-       dns_port = regress_get_dnsserver(base, &portnum, NULL,
+       dns_port = regress_get_udp_dnsserver(base, &portnum, NULL,
                regress_dns_server_cb, reissue_table);
        tt_assert(dns_port);
 
@@ -2286,7 +2327,7 @@ getaddrinfo_race_gotresolve_test(void *arg)
        if (evthread_make_base_notifiable(rp.base) < 0)
                tt_abort_msg("Couldn't make base notifiable!");
 
-       dns_port = regress_get_dnsserver(rp.base, &portnum, NULL,
+       dns_port = regress_get_udp_dnsserver(rp.base, &portnum, NULL,
                                                                         regress_dns_server_cb, reissue_table);
        tt_assert(dns_port);
 
@@ -2359,6 +2400,213 @@ end:
 }
 #endif
 
+static void
+test_tcp_resolve(void *arg)
+{
+       struct basic_test_data *data = arg;
+       struct event_base *base = data->base;
+       struct evdns_base *dns = evdns_base_new(base, 0);
+       ev_uint16_t portnum = 0;
+       struct evdns_request *req = NULL;
+       struct generic_dns_callback_result r;
+       struct in_addr addrs[2048];
+       char buf[64];
+       int k_;
+       exit_base = base;
+
+       tt_assert(base);
+
+       tt_assert(regress_dnsserver(base, &portnum, search_table, tcp_search_table));
+       evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
+
+       tt_assert(!evdns_base_nameserver_ip_add(dns, buf));
+
+       // small table
+       req = evdns_base_resolve_ipv4(
+                       dns, "small.a.example.com", 0, generic_dns_callback, &r);
+       tt_assert(req);
+       n_replies_left = 1;
+       event_base_dispatch(base);
+       assert_request_results(r, DNS_ERR_NONE, REPEAT_64("11.22.33.45"));
+       tt_assert(search_table[0].seen == 1);
+       tt_assert(tcp_search_table[0].seen == 0);
+
+       // medium table
+       req = evdns_base_resolve_ipv4(
+               dns, "medium.b.example.com", DNS_QUERY_IGNTC, generic_dns_callback, &r);
+       tt_assert(req);
+       n_replies_left = 1;
+       event_base_dispatch(base);
+       tt_assert(r.type != DNS_IPv4_A);
+       tt_assert(r.result == DNS_ERR_TRUNCATED);
+       tt_assert(search_table[1].seen == 1);
+       tt_assert(tcp_search_table[1].seen == 0);
+
+       req = evdns_base_resolve_ipv4(
+               dns, "medium.b.example.com", DNS_QUERY_USEVC, generic_dns_callback, &r);
+       tt_assert(req);
+       n_replies_left = 1;
+       event_base_dispatch(base);
+       assert_request_results(r, DNS_ERR_NONE, REPEAT_64("11.22.33.45") "," REPEAT_64("12.22.33.45"));
+       tt_assert(search_table[1].seen == 1);
+       tt_assert(tcp_search_table[1].seen == 1);
+
+       // big table
+       req = evdns_base_resolve_ipv4(
+               dns, "large.c.example.com", DNS_QUERY_IGNTC, generic_dns_callback, &r);
+       tt_assert(req);
+       n_replies_left = 1;
+       event_base_dispatch(base);
+       tt_assert(r.type != DNS_IPv4_A);
+       tt_assert(r.result == DNS_ERR_TRUNCATED);
+       tt_assert(search_table[2].seen == 1);
+       tt_assert(tcp_search_table[2].seen == 0);
+
+       req = evdns_base_resolve_ipv4(
+               dns, "large.c.example.com", 0, generic_dns_callback, &r);
+       tt_assert(req);
+       n_replies_left = 1;
+       event_base_dispatch(base);
+       assert_request_results(r, DNS_ERR_NONE,
+               REPEAT_256("11.22.33.45") "," REPEAT_256("12.22.33.45") "," REPEAT_256("13.22.33.45") "," REPEAT_256("14.22.33.45"));
+       tt_assert(search_table[2].seen == 2);
+       tt_assert(tcp_search_table[2].seen == 1);
+
+       req = evdns_base_resolve_ipv4(
+               dns, "large.c.example.com", DNS_QUERY_USEVC, generic_dns_callback, &r);
+       tt_assert(req);
+       n_replies_left = 1;
+       event_base_dispatch(base);
+       assert_request_results(r, DNS_ERR_NONE,
+               REPEAT_256("11.22.33.45") "," REPEAT_256("12.22.33.45") "," REPEAT_256("13.22.33.45") "," REPEAT_256("14.22.33.45"));
+       tt_assert(search_table[2].seen == 2);
+       tt_assert(tcp_search_table[2].seen == 2);
+
+end:
+       if (dns)
+               evdns_base_free(dns, 0);
+
+       regress_clean_dnsserver();
+}
+
+static void
+test_tcp_resolve_pipeline(void *arg)
+{
+       struct basic_test_data *data = arg;
+       struct event_base *base = data->base;
+       struct evdns_base *dns = evdns_base_new(base, 0);
+       ev_uint16_t portnum = 0;
+       struct evdns_request *reqs[3] = {NULL, NULL, NULL};
+       struct generic_dns_callback_result results[3];
+       char buf[64];
+       struct in_addr addrs[2048];
+       int i, k_;
+       exit_base = base;
+
+       tt_assert(base);
+       tt_assert(regress_dnsserver(base, &portnum, search_table, tcp_search_table));
+       evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
+       tt_assert(!evdns_base_nameserver_ip_add(dns, buf));
+       tt_assert(!evdns_base_set_option(dns, "use-vc", NULL));
+
+       for (i = 0; i < 3; ++i) {
+               reqs[i] = evdns_base_resolve_ipv4(
+                       dns, "large.c.example.com", 0, generic_dns_callback, &results[i]);
+               tt_assert(reqs[i]);
+       }
+
+       n_replies_left = 3;
+       event_base_dispatch(base);
+       for (i = 0; i < 3; ++i) {
+               assert_request_results(results[i], DNS_ERR_NONE,
+                       REPEAT_256("11.22.33.45") "," REPEAT_256("12.22.33.45") "," REPEAT_256("13.22.33.45") "," REPEAT_256("14.22.33.45"));
+       }
+       tt_assert(search_table[2].seen == 0);
+       tt_assert(tcp_search_table[2].seen == 3);
+
+end:
+       if (dns)
+               evdns_base_free(dns, 0);
+       regress_clean_dnsserver();
+}
+
+static void
+test_tcp_resolve_many_clients(void *arg)
+{
+       struct basic_test_data *data = arg;
+       struct event_base *base = data->base;
+       struct evdns_base *dns[3] = {evdns_base_new(base, 0), evdns_base_new(base, 0), evdns_base_new(base, 0)};
+       struct evdns_request *req[3] = {NULL, NULL, NULL};
+       struct generic_dns_callback_result r[3];
+       int k_, i;
+       ev_uint16_t portnum = 0;
+       char buf[64];
+       struct in_addr addrs[2048];
+       exit_base = base;
+       tt_assert(base);
+
+       tt_assert(regress_dnsserver(base, &portnum, search_table, tcp_search_table));
+       evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
+       for (i = 0; i < 3; ++i) {
+               tt_assert(!evdns_base_nameserver_ip_add(dns[i], buf));
+               req[i] = evdns_base_resolve_ipv4(
+                               dns[i], "small.a.example.com", DNS_QUERY_USEVC, generic_dns_callback, &r[i]);
+               tt_assert(req[i]);
+       }
+
+       n_replies_left = 3;
+       event_base_dispatch(base);
+       for (i = 0; i < 3; ++i) {
+               assert_request_results(r[i], DNS_ERR_NONE, REPEAT_64("11.22.33.45"));
+       }
+       tt_assert(search_table[0].seen == 0);
+       tt_assert(tcp_search_table[0].seen == 3);
+
+end:
+       for (i = 0; i < 3; ++i) {
+               if (dns[i])
+                       evdns_base_free(dns[i], 0);
+       }
+       regress_clean_dnsserver();
+}
+
+static void
+test_tcp_timeout(void *arg)
+{
+       struct generic_dns_callback_result r;
+       struct basic_test_data *data = arg;
+       struct event_base *base = data->base;
+       struct evdns_base *dns = evdns_base_new(base, 0);
+       ev_uint16_t portnum = 0;
+       struct evdns_request *req = NULL;
+       char buf[64];
+
+       exit_base = base;
+
+       tt_assert(base);
+
+       tt_assert(!evdns_base_set_option(dns, "timeout:", "1"));
+       tt_assert(regress_dnsserver(base, &portnum, search_table, tcp_search_table));
+       evutil_snprintf(buf, sizeof(buf), "127.0.0.1:%d", (int)portnum);
+
+       tt_assert(!evdns_base_nameserver_ip_add(dns, buf));
+
+       req = evdns_base_resolve_ipv4(
+               dns, "lost.request.com", DNS_QUERY_USEVC, generic_dns_callback, &r);
+       tt_assert(req);
+
+       n_replies_left = 1;
+       event_base_dispatch(base);
+
+       tt_assert(DNS_ERR_TIMEOUT == r.result);
+
+end:
+       if (dns)
+               evdns_base_free(dns, 0);
+
+       regress_clean_dnsserver();
+}
+
 static void
 test_set_so_rcvbuf_so_sndbuf(void *arg)
 {
@@ -2407,6 +2655,10 @@ test_set_option(void *arg)
        const char *addr_port_options[] = {
                "bind-to", "bind-to:",
        };
+       const char *options_without_values[] = {
+               "use-vc", "use-vc:",
+               "ignore-tc", "ignore-tc:",
+       };
 
        dns_base = evdns_base_new(data->base, 0);
        tt_assert(dns_base);
@@ -2437,6 +2689,13 @@ test_set_option(void *arg)
                tt_assert(FAIL == evdns_base_set_option(dns_base, addr_port_options[i], "foo"));
        }
 
+       for (i = 0; i < ARRAY_SIZE(options_without_values); ++i) {
+               tt_assert(SUCCESS == evdns_base_set_option(dns_base, options_without_values[i], NULL));
+               tt_assert(SUCCESS == evdns_base_set_option(dns_base, options_without_values[i], ""));
+               tt_assert(FAIL == evdns_base_set_option(dns_base, options_without_values[i], "1"));
+               tt_assert(FAIL == evdns_base_set_option(dns_base, options_without_values[i], "foo"));
+       }
+
 #undef SUCCESS
 #undef FAIL
 end:
@@ -2444,6 +2703,48 @@ end:
                evdns_base_free(dns_base, 0);
 }
 
+static void
+test_set_server_option(void *arg)
+{
+#define SUCCESS 0
+#define FAIL -1
+       struct basic_test_data *data = arg;
+       struct evdns_server_port *tcp_port = NULL;
+       struct evdns_server_port *udp_port = NULL;
+       evutil_socket_t udp_sock = -1;
+       evutil_socket_t tcp_sock = -1;
+       ev_uint16_t portnum;
+       size_t i;
+       enum evdns_server_option tcp_options[] = {EVDNS_SOPT_TCP_MAX_CLIENTS, EVDNS_SOPT_TCP_IDLE_TIMEOUT};
+
+       portnum = 0;
+       tcp_port = regress_get_tcp_dnsserver(data->base, &portnum, &tcp_sock, NULL, NULL);
+       tt_assert(tcp_port);
+       portnum = 0;
+       udp_port = regress_get_udp_dnsserver(data->base, &portnum, &udp_sock, NULL, NULL);
+       tt_assert(udp_port);
+
+       for (i = 0; i < ARRAY_SIZE(tcp_options); ++i) {
+               tt_assert(SUCCESS == evdns_server_port_set_option(tcp_port, tcp_options[i], 0));
+               tt_assert(SUCCESS == evdns_server_port_set_option(tcp_port, tcp_options[i], 1));
+               tt_assert(SUCCESS == evdns_server_port_set_option(tcp_port, tcp_options[i], 100));
+               tt_assert(FAIL == evdns_server_port_set_option(udp_port, tcp_options[i], 0));
+               tt_assert(FAIL == evdns_server_port_set_option(udp_port, tcp_options[i], 100));
+       }
+
+#undef SUCCESS
+#undef FAIL
+end:
+       if (tcp_port)
+               evdns_close_server_port(tcp_port);
+       if (tcp_sock >= 0)
+               evutil_closesocket(tcp_sock);
+       if (udp_port)
+               evdns_close_server_port(udp_port);
+       if (udp_sock >= 0)
+               evutil_closesocket(udp_sock);
+}
+
 #define DNS_LEGACY(name, flags)                                               \
        { #name, run_legacy_test_fn, flags|TT_LEGACY, &legacy_setup,   \
                    dns_##name }
@@ -2516,11 +2817,21 @@ struct testcase_t dns_testcases[] = {
          getaddrinfo_race_gotresolve_test,
          TT_FORK|TT_OFF_BY_DEFAULT, NULL, NULL },
 #endif
+       { "tcp_resolve", test_tcp_resolve,
+         TT_FORK | TT_NEED_BASE, &basic_setup, NULL },
+       { "tcp_resolve_pipeline", test_tcp_resolve_pipeline,
+         TT_FORK | TT_NEED_BASE, &basic_setup, NULL },
+       { "tcp_resolve_many_clients", test_tcp_resolve_many_clients,
+         TT_FORK | TT_NEED_BASE, &basic_setup, NULL },
+       { "tcp_timeout", test_tcp_timeout,
+         TT_FORK | TT_NEED_BASE, &basic_setup, NULL },
 
        { "set_SO_RCVBUF_SO_SNDBUF", test_set_so_rcvbuf_so_sndbuf,
          TT_FORK|TT_NEED_BASE, &basic_setup, NULL },
        { "set_options", test_set_option,
          TT_FORK|TT_NEED_BASE, &basic_setup, NULL },
+       { "set_server_options", test_set_server_option,
+         TT_FORK|TT_NEED_BASE, &basic_setup, NULL },
 
        END_OF_TESTCASES
 };
index b7656cf8341c2963fb83b7558670e75e1cf18b51..bca7d9573d857d911376788f69438170ceb3f8e5 100644 (file)
@@ -1418,7 +1418,7 @@ http_connection_async_test(void *arg)
        struct evhttp *http = http_setup(&port, data->base, 0);
 
        exit_base = data->base;
-       tt_assert(regress_dnsserver(data->base, &portnum, search_table));
+       tt_assert(regress_dnsserver(data->base, &portnum, search_table, NULL));
 
        dns_base = evdns_base_new(data->base, 0/* init name servers */);
        tt_assert(dns_base);
@@ -1699,7 +1699,7 @@ http_cancel_test(void *arg)
        if (type & BY_HOST) {
                const char *timeout = (type & NS_TIMEOUT) ? "6" : "3";
 
-               tt_assert(regress_dnsserver(data->base, &portnum, search_table));
+               tt_assert(regress_dnsserver(data->base, &portnum, search_table, NULL));
 
                dns_base = evdns_base_new(data->base, 0/* init name servers */);
                tt_assert(dns_base);
@@ -4132,7 +4132,7 @@ http_connection_retry_conn_address_test_impl(void *arg, int ssl)
        struct evdns_base *dns_base = NULL;
        char address[64];
 
-       tt_assert(regress_dnsserver(data->base, &portnum, search_table));
+       tt_assert(regress_dnsserver(data->base, &portnum, search_table, NULL));
        dns_base = evdns_base_new(data->base, 0/* init name servers */);
        tt_assert(dns_base);
 
@@ -4669,7 +4669,7 @@ http_ipv6_for_domain_test_impl(void *arg, int family)
        ev_uint16_t portnum = 0;
        char address[64];
 
-       tt_assert(regress_dnsserver(data->base, &portnum, ipv6_search_table));
+       tt_assert(regress_dnsserver(data->base, &portnum, ipv6_search_table, NULL));
 
        dns_base = evdns_base_new(data->base, 0/* init name servers */);
        tt_assert(dns_base);
index 959347ea71602d9736ee2dda3f7ba756a6c15cb7..ca9c6d25287ec5f88f7d3e66bf04864ee088adb1 100644 (file)
 #include "regress.h"
 #include "regress_testutils.h"
 
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
 /* globals */
-static struct evdns_server_port *dns_port;
-evutil_socket_t dns_sock = -1;
+static struct evdns_server_port *udp_dns_port;
+evutil_socket_t udp_dns_sock = -1;
+static struct evdns_server_port *tcp_dns_port;
+evutil_socket_t tcp_dns_sock = -1;
 
 /* Helper: return the port that a socket is bound on, in host order. */
 int
@@ -90,7 +94,7 @@ regress_get_socket_port(evutil_socket_t fd)
 }
 
 struct evdns_server_port *
-regress_get_dnsserver(struct event_base *base,
+regress_get_udp_dnsserver(struct event_base *base,
     ev_uint16_t *portnum,
     evutil_socket_t *psock,
     evdns_request_callback_fn_type cb,
@@ -126,16 +130,64 @@ end:
        return NULL;
 }
 
+struct evdns_server_port *
+regress_get_tcp_dnsserver(struct event_base *base,
+       ev_uint16_t *portnum,
+       evutil_socket_t *psock,
+       evdns_request_callback_fn_type cb,
+       void *arg)
+{
+       struct evdns_server_port *port = NULL;
+       evutil_socket_t sock;
+       struct sockaddr_in my_addr;
+       struct evconnlistener *listener;
+
+       memset(&my_addr, 0, sizeof(my_addr));
+       my_addr.sin_family = AF_INET;
+       my_addr.sin_port = htons(*portnum);
+       my_addr.sin_addr.s_addr = htonl(0x7f000001UL);
+
+       listener = evconnlistener_new_bind(base, NULL, NULL,
+                       LEV_OPT_CLOSE_ON_FREE | LEV_OPT_REUSEABLE, 128,
+                       (struct sockaddr*)&my_addr, sizeof(my_addr));
+       if (!listener)
+               goto end;
+       port = evdns_add_server_port_with_listener(base, listener, 0, cb, arg);
+       if (!port)
+               goto end;
+
+       sock = evconnlistener_get_fd(listener);
+       if (!*portnum)
+               *portnum = regress_get_socket_port(sock);
+       if (psock)
+               *psock = sock;
+
+       return port;
+end:
+       if (listener)
+               evconnlistener_free(listener);
+       return NULL;
+}
+
 void
 regress_clean_dnsserver(void)
 {
-       if (dns_port) {
-               evdns_close_server_port(dns_port);
-               dns_port = NULL;
+       if (udp_dns_port) {
+               evdns_close_server_port(udp_dns_port);
+               udp_dns_port = NULL;
+       }
+       if (udp_dns_sock >= 0) {
+               evutil_closesocket(udp_dns_sock);
+               udp_dns_sock = -1;
+       }
+
+       if (tcp_dns_port) {
+               evdns_close_server_port(tcp_dns_port);
+               tcp_dns_port = NULL;
        }
-       if (dns_sock >= 0) {
-               evutil_closesocket(dns_sock);
-               dns_sock = -1;
+       if (tcp_dns_sock >= 0) {
+               evutil_closesocket(tcp_dns_sock);
+               tcp_dns_sock = -1;
        }
 }
 
@@ -171,7 +223,11 @@ regress_dns_server_cb(struct evdns_server_request *req, void *data)
 
        if (!strcmp(tab->anstype, "err")) {
                int err = atoi(tab->ans);
-               tt_assert(! evdns_server_request_respond(req, err));
+               if (DNS_ERR_TIMEOUT == err) {
+                       tt_assert(! evdns_server_request_drop(req));
+               } else {
+                       tt_assert(! evdns_server_request_respond(req, err));
+               }
                return;
        } else if (!strcmp(tab->anstype, "errsoa")) {
                int err = atoi(tab->ans);
@@ -191,12 +247,9 @@ regress_dns_server_cb(struct evdns_server_request *req, void *data)
                tt_assert(! evdns_server_request_respond(req, err));
                return;
        } else if (!strcmp(tab->anstype, "A")) {
-               struct in_addr in;
-               if (!evutil_inet_pton(AF_INET, tab->ans, &in)) {
-                       TT_DIE(("Bad A value %s in table", tab->ans));
-               }
-               evdns_server_request_add_a_reply(req, question, 1, &in.s_addr,
-                   100);
+               struct in_addr in[2048];
+               int count = parse_csv_address_list(tab->ans, AF_INET, in, ARRAY_SIZE(in));
+               evdns_server_request_add_a_reply(req, question, count, in, 100);
        } else if (!strcmp(tab->anstype, "AAAA")) {
                struct in6_addr in6;
                if (!evutil_inet_pton(AF_INET6, tab->ans, &in6)) {
@@ -215,11 +268,30 @@ end:
 
 int
 regress_dnsserver(struct event_base *base, ev_uint16_t *port,
-    struct regress_dns_server_table *search_table)
+       struct regress_dns_server_table *udp_seach_table,
+       struct regress_dns_server_table *tcp_seach_table)
 {
-       dns_port = regress_get_dnsserver(base, port, &dns_sock,
-           regress_dns_server_cb, search_table);
-       return dns_port != NULL;
+       if (!udp_seach_table && !tcp_seach_table)
+               goto error;
+
+       if (tcp_seach_table) {
+               tcp_dns_port = regress_get_tcp_dnsserver(base, port, &tcp_dns_sock,
+                       regress_dns_server_cb, tcp_seach_table);
+               if (!tcp_dns_port)
+                       goto error;
+       }
+
+       if (udp_seach_table) {
+               udp_dns_port = regress_get_udp_dnsserver(base, port, &udp_dns_sock,
+                       regress_dns_server_cb, udp_seach_table);
+               if (!udp_dns_port)
+                       goto error;
+       }
+       return 1;
+
+error:
+       regress_clean_dnsserver();
+       return 0;
 }
 
 int
@@ -231,3 +303,29 @@ regress_get_listener_addr(struct evconnlistener *lev,
                return -1;
        return getsockname(s, sa, socklen);
 }
+
+int
+parse_csv_address_list(const char *s, int family, void *addrs, size_t addrs_size)
+{
+       int i = 0;
+       char *token;
+       char buf[16384];
+       void *next_addr;
+
+       tt_assert(family == AF_INET || family == AF_INET6);
+       tt_assert(strlen(s) < ARRAY_SIZE(buf));
+       strcpy(buf, s);
+       token = strtok(buf, ",");
+       do {
+               tt_assert((unsigned)i < addrs_size);
+               next_addr = (family == AF_INET) ? (void *)((struct in_addr*)addrs + i)
+                       : (void *)((struct in6_addr*)addrs + i);
+               if (!evutil_inet_pton(AF_INET, token, next_addr)) {
+                       TT_DIE(("Bad %s value %s in table", (family == AF_INET) ? "A" :"AAAA", token));
+               }
+               ++i;
+               token = strtok (NULL, ",");
+       } while (token);
+end:
+       return i;
+}
index 040516a5858ef2bef29f6d34e3bcd217041f4ce7..562bc454f8e2ee41a658ad6597e9c103d124d15e 100644 (file)
 struct regress_dns_server_table {
        const char *q;
        const char *anstype;
-       const char *ans;
+       const char *ans; /* Comma-separated list of IP numbers (e.g. "1.2.3.4", "1.2.3.4,5.6.7.8") */
        int seen;
        int lower;
 };
 
 struct evdns_server_port *
-regress_get_dnsserver(struct event_base *base,
+regress_get_udp_dnsserver(struct event_base *base,
+    ev_uint16_t *portnum,
+    evutil_socket_t *psock,
+    evdns_request_callback_fn_type cb,
+    void *arg);
+
+struct evdns_server_port *
+regress_get_tcp_dnsserver(struct event_base *base,
     ev_uint16_t *portnum,
     evutil_socket_t *psock,
     evdns_request_callback_fn_type cb,
@@ -51,9 +58,12 @@ int regress_get_socket_port(evutil_socket_t fd);
 void regress_dns_server_cb(
        struct evdns_server_request *req, void *data);
 
-/* globally allocates a dns server that serves from a search table */
+/* Globally allocates a dns server that serves from a search table.
+   TCP and UDP listeners are created on the same port number. If one of the
+   input search tables is NULL appropriate listener is not created. */
 int regress_dnsserver(struct event_base *base, ev_uint16_t *port,
-    struct regress_dns_server_table *seach_table);
+    struct regress_dns_server_table *udp_seach_table,
+    struct regress_dns_server_table *tcp_seach_table);
 
 /* clean up the global dns server resources */
 void regress_clean_dnsserver(void);
@@ -63,5 +73,9 @@ struct sockaddr;
 int regress_get_listener_addr(struct evconnlistener *lev,
     struct sockaddr *sa, ev_socklen_t *socklen);
 
+/* Parse comma-separated list of IP addresses. */
+int parse_csv_address_list(const char *s, int family,
+    void *addrs, size_t addrs_size);
+
 #endif /* REGRESS_TESTUTILS_H_INCLUDED_ */