]> granicus.if.org Git - libevent/blob - evdns.c
buffer: use pread() for evbuffer_file_segment_materialize()
[libevent] / evdns.c
1 /* Copyright 2006-2007 Niels Provos
2  * Copyright 2007-2012 Nick Mathewson and Niels Provos
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. The name of the author may not be used to endorse or promote products
13  *    derived from this software without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* Based on software by Adam Langly. Adam's original message:
28  *
29  * Async DNS Library
30  * Adam Langley <agl@imperialviolet.org>
31  * Public Domain code
32  *
33  * This software is Public Domain. To view a copy of the public domain dedication,
34  * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
35  * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
36  *
37  * I ask and expect, but do not require, that all derivative works contain an
38  * attribution similar to:
39  *      Parts developed by Adam Langley <agl@imperialviolet.org>
40  *
41  * You may wish to replace the word "Parts" with something else depending on
42  * the amount of original code.
43  *
44  * (Derivative works does not include programs which link against, run or include
45  * the source verbatim in their source distributions)
46  *
47  * Version: 0.1b
48  */
49
50 #include "event2/event-config.h"
51 #include "evconfig-private.h"
52
53 #include <sys/types.h>
54
55 #ifndef _FORTIFY_SOURCE
56 #define _FORTIFY_SOURCE 3
57 #endif
58
59 #include <string.h>
60 #include <fcntl.h>
61 #ifdef EVENT__HAVE_SYS_TIME_H
62 #include <sys/time.h>
63 #endif
64 #ifdef EVENT__HAVE_STDINT_H
65 #include <stdint.h>
66 #endif
67 #include <stdlib.h>
68 #include <string.h>
69 #include <errno.h>
70 #ifdef EVENT__HAVE_UNISTD_H
71 #include <unistd.h>
72 #endif
73 #include <limits.h>
74 #include <sys/stat.h>
75 #include <stdio.h>
76 #include <stdarg.h>
77 #ifdef _WIN32
78 #include <winsock2.h>
79 #include <winerror.h>
80 #include <ws2tcpip.h>
81 #ifndef _WIN32_IE
82 #define _WIN32_IE 0x400
83 #endif
84 #include <shlobj.h>
85 #endif
86
87 #include "event2/buffer.h"
88 #include "event2/bufferevent.h"
89 #include "event2/dns.h"
90 #include "event2/dns_struct.h"
91 #include "event2/dns_compat.h"
92 #include "event2/util.h"
93 #include "event2/event.h"
94 #include "event2/event_struct.h"
95 #include "event2/listener.h"
96 #include "event2/thread.h"
97
98 #include "defer-internal.h"
99 #include "log-internal.h"
100 #include "mm-internal.h"
101 #include "strlcpy-internal.h"
102 #include "ipv6-internal.h"
103 #include "util-internal.h"
104 #include "evthread-internal.h"
105 #ifdef _WIN32
106 #include <ctype.h>
107 #include <winsock2.h>
108 #include <windows.h>
109 #include <iphlpapi.h>
110 #include <io.h>
111 #else
112 #include <sys/socket.h>
113 #include <netinet/in.h>
114 #include <arpa/inet.h>
115 #endif
116
117 #ifdef EVENT__HAVE_NETINET_IN6_H
118 #include <netinet/in6.h>
119 #endif
120
121 #define EVDNS_LOG_DEBUG EVENT_LOG_DEBUG
122 #define EVDNS_LOG_WARN EVENT_LOG_WARN
123 #define EVDNS_LOG_MSG EVENT_LOG_MSG
124
125 #ifndef EVDNS_NAME_MAX
126 #define EVDNS_NAME_MAX 255
127 #endif
128
129 #include <stdio.h>
130
131 #undef MIN
132 #undef MAX
133 #define MIN(a,b) ((a)<(b)?(a):(b))
134 #define MAX(a,b) ((a)>(b)?(a):(b))
135
136 #define ASSERT_VALID_REQUEST(req) \
137         EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
138
139 #define u64 ev_uint64_t
140 #define u32 ev_uint32_t
141 #define u16 ev_uint16_t
142 #define u8  ev_uint8_t
143
144 /* maximum number of addresses from a single packet */
145 /* that we bother recording */
146 #define MAX_V4_ADDRS 32
147 #define MAX_V6_ADDRS 32
148
149 /* Maximum allowable size of a DNS message over UDP without EDNS.*/
150 #define DNS_MAX_UDP_SIZE 512
151 /* Maximum allowable size of a DNS message over UDP with EDNS.*/
152 #define EDNS_MAX_UDP_SIZE 65535
153
154 #define EDNS_ENABLED(base) \
155         (((base)->global_max_udp_size) > DNS_MAX_UDP_SIZE)
156
157 #define TYPE_A         EVDNS_TYPE_A
158 #define TYPE_CNAME     5
159 #define TYPE_PTR       EVDNS_TYPE_PTR
160 #define TYPE_SOA       EVDNS_TYPE_SOA
161 #define TYPE_AAAA      EVDNS_TYPE_AAAA
162 #define TYPE_OPT       41
163
164 #define CLASS_INET     EVDNS_CLASS_INET
165
166 /* Timeout in seconds for idle TCP connections that server keeps alive. */
167 #define SERVER_IDLE_CONN_TIMEOUT 10
168 /* Timeout in seconds for idle TCP connections that client keeps alive. */
169 #define CLIENT_IDLE_CONN_TIMEOUT 5
170 /* Default maximum number of simultaneous TCP client connections that DNS server can hold. */
171 #define MAX_CLIENT_CONNECTIONS 10
172
173 struct reply {
174         unsigned int type;
175         unsigned int have_answer : 1;
176         u32 rr_count;
177         union {
178                 u32 *a;
179                 struct in6_addr *aaaa;
180                 char *ptr_name;
181                 void *raw;
182         } data;
183         char *cname;
184 };
185
186
187 /* Persistent handle.  We keep this separate from 'struct request' since we
188  * need some object to last for as long as an evdns_request is outstanding so
189  * that it can be canceled, whereas a search request can lead to multiple
190  * 'struct request' instances being created over its lifetime. */
191 struct evdns_request {
192         struct request *current_req;
193         struct evdns_base *base;
194
195         int pending_cb; /* Waiting for its callback to be invoked; not
196                          * owned by event base any more. */
197
198         /* data used when fulfilling the callback */
199         struct event_callback deferred;
200         evdns_callback_type user_callback;
201         void *user_pointer;
202         u8 request_type;
203         u8 have_reply;
204         u32 ttl;
205         u32 err;
206         struct reply reply;
207
208         /* elements used by the searching code */
209         int search_index;
210         struct search_state *search_state;
211         char *search_origname;  /* needs to be free()ed */
212         int search_flags;
213         u16 tcp_flags;
214 };
215
216 struct request {
217         u8 *request;  /* the dns packet data */
218         u16 request_size; /* size of memory block stored in request field */
219         u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
220         unsigned int request_len;
221         int reissue_count;
222         int tx_count;  /* the number of times that this packet has been sent */
223         struct nameserver *ns;  /* the server which we last sent it */
224
225         /* these objects are kept in a circular list */
226         /* XXX We could turn this into a CIRCLEQ. */
227         struct request *next, *prev;
228
229         struct event timeout_event;
230
231         u16 trans_id;  /* the transaction id */
232         unsigned request_appended :1;   /* true if the request pointer is data which follows this struct */
233         unsigned transmit_me :1;  /* needs to be transmitted */
234         unsigned need_cname :1;   /* make a separate callback for CNAME */
235
236         /* XXXX This is a horrible hack. */
237         char **put_cname_in_ptr; /* store the cname here if we get one. */
238
239         struct evdns_base *base;
240
241         struct evdns_request *handle;
242 };
243
244 enum tcp_state {
245         TS_DISCONNECTED,
246         TS_CONNECTING,
247         TS_CONNECTED
248 };
249
250 struct tcp_connection {
251         struct bufferevent *bev;
252         enum tcp_state state;
253         u16 awaiting_packet_size;
254 };
255
256 struct evdns_server_port;
257
258 struct client_tcp_connection {
259         LIST_ENTRY(client_tcp_connection) next;
260         struct tcp_connection connection;
261         struct evdns_server_port *port;
262 };
263
264 struct nameserver {
265         evutil_socket_t socket;  /* a connected UDP socket */
266         struct tcp_connection *connection; /* intended for TCP support */
267         struct sockaddr_storage address;
268         ev_socklen_t addrlen;
269         int failed_times;  /* number of times which we have given this server a chance */
270         int timedout;  /* number of times in a row a request has timed out */
271         struct event event;
272         /* these objects are kept in a circular list */
273         struct nameserver *next, *prev;
274         struct event timeout_event;  /* used to keep the timeout for */
275                                      /* when we next probe this server. */
276                                      /* Valid if state == 0 */
277         /* Outstanding probe request for this nameserver, if any */
278         struct evdns_request *probe_request;
279         char state;  /* zero if we think that this server is down */
280         char choked;  /* true if we have an EAGAIN from this server's socket */
281         char write_waiting;  /* true if we are waiting for EV_WRITE events */
282         struct evdns_base *base;
283
284         /* Number of currently inflight requests: used
285          * to track when we should add/del the event. */
286         int requests_inflight;
287 };
288
289
290 /* Represents a local port where we're listening for DNS requests. */
291 struct evdns_server_port {
292         evutil_socket_t socket; /* socket we use to read queries and write replies. */
293         int refcnt; /* reference count. */
294         char choked; /* Are we currently blocked from writing? */
295         char closing; /* Are we trying to close this port, pending writes? */
296         evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
297         void *user_data; /* Opaque pointer passed to user_callback */
298         struct event event; /* Read/write event */
299         /* circular list of replies that we want to write. */
300         struct server_request *pending_replies;
301         struct event_base *event_base;
302
303         /* Structures for tcp support */
304         struct evconnlistener *listener;
305         LIST_HEAD(client_list, client_tcp_connection) client_connections;
306         unsigned client_connections_count;
307         unsigned max_client_connections;
308         struct timeval tcp_idle_timeout;
309
310 #ifndef EVENT__DISABLE_THREAD_SUPPORT
311         void *lock;
312 #endif
313 };
314
315 /* Represents part of a reply being built.      (That is, a single RR.) */
316 struct server_reply_item {
317         struct server_reply_item *next; /* next item in sequence. */
318         char *name; /* name part of the RR */
319         u16 type; /* The RR type */
320         u16 class; /* The RR class (usually CLASS_INET) */
321         u32 ttl; /* The RR TTL */
322         char is_name; /* True iff data is a label */
323         u16 datalen; /* Length of data; -1 if data is a label */
324         void *data; /* The contents of the RR */
325 };
326
327 /* Represents a request that we've received as a DNS server, and holds */
328 /* the components of the reply as we're constructing it. */
329 struct server_request {
330         /* Pointers to the next and previous entries on the list of replies */
331         /* that we're waiting to write.  Only set if we have tried to respond */
332         /* and gotten EAGAIN. */
333         struct server_request *next_pending;
334         struct server_request *prev_pending;
335
336         u16 trans_id; /* Transaction id. */
337         struct evdns_server_port *port; /* Which port received this request on? */
338         struct client_tcp_connection *client; /* Equal to NULL in case of UDP connection. */
339         struct sockaddr_storage addr; /* Where to send the response in case of UDP. Equal to NULL in case of TCP connection.*/
340         ev_socklen_t addrlen; /* length of addr */
341         u16 max_udp_reply_size; /* Maximum size of udp reply that client can handle. */
342
343         int n_answer; /* how many answer RRs have been set? */
344         int n_authority; /* how many authority RRs have been set? */
345         int n_additional; /* how many additional RRs have been set? */
346
347         struct server_reply_item *answer; /* linked list of answer RRs */
348         struct server_reply_item *authority; /* linked list of authority RRs */
349         struct server_reply_item *additional; /* linked list of additional RRs */
350
351         /* Constructed response.  Only set once we're ready to send a reply. */
352         /* Once this is set, the RR fields are cleared, and no more should be set. */
353         char *response;
354         size_t response_len;
355
356         /* Caller-visible fields: flags, questions. */
357         struct evdns_server_request base;
358 };
359
360 struct evdns_base {
361         /* An array of n_req_heads circular lists for inflight requests.
362          * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
363          */
364         struct request **req_heads;
365         /* A circular list of requests that we're waiting to send, but haven't
366          * sent yet because there are too many requests inflight */
367         struct request *req_waiting_head;
368         /* A circular list of nameservers. */
369         struct nameserver *server_head;
370         int n_req_heads;
371
372         struct event_base *event_base;
373
374         /* The number of good nameservers that we have */
375         int global_good_nameservers;
376
377         /* inflight requests are contained in the req_head list */
378         /* and are actually going out across the network */
379         int global_requests_inflight;
380         /* requests which aren't inflight are in the waiting list */
381         /* and are counted here */
382         int global_requests_waiting;
383
384         int global_max_requests_inflight;
385
386         struct timeval global_timeout;  /* 5 seconds by default */
387         int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
388         int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
389         /* number of timeouts in a row before we consider this server to be down */
390         int global_max_nameserver_timeout;
391         /* true iff we will use the 0x20 hack to prevent poisoning attacks. */
392         int global_randomize_case;
393         /* Maximum size of a UDP DNS packet. */
394         u16 global_max_udp_size;
395
396         /* The first time that a nameserver fails, how long do we wait before
397          * probing to see if it has returned?  */
398         struct timeval global_nameserver_probe_initial_timeout;
399
400         /* Combination of DNS_QUERY_USEVC, DNS_QUERY_IGNTC flags
401          * to control requests via TCP. */
402         u16 global_tcp_flags;
403         /* Idle timeout for outgoing TCP connections. */
404         struct timeval global_tcp_idle_timeout;
405
406         /** Port to bind to for outgoing DNS packets. */
407         struct sockaddr_storage global_outgoing_address;
408         /** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
409         ev_socklen_t global_outgoing_addrlen;
410
411         struct timeval global_getaddrinfo_allow_skew;
412
413         int so_rcvbuf;
414         int so_sndbuf;
415
416         int getaddrinfo_ipv4_timeouts;
417         int getaddrinfo_ipv6_timeouts;
418         int getaddrinfo_ipv4_answered;
419         int getaddrinfo_ipv6_answered;
420
421         struct search_state *global_search_state;
422
423         TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
424
425 #ifndef EVENT__DISABLE_THREAD_SUPPORT
426         void *lock;
427 #endif
428
429         int disable_when_inactive;
430
431         /* Maximum timeout between two probe packets
432          * will change `global_nameserver_probe_initial_timeout`
433          * when this value is smaller */
434         int ns_max_probe_timeout;
435         /* Backoff factor of probe timeout */
436         int ns_timeout_backoff_factor;
437 };
438
439 struct hosts_entry {
440         TAILQ_ENTRY(hosts_entry) next;
441         union {
442                 struct sockaddr sa;
443                 struct sockaddr_in sin;
444                 struct sockaddr_in6 sin6;
445         } addr;
446         int addrlen;
447         char hostname[1];
448 };
449
450 static struct evdns_base *current_base = NULL;
451
452 struct evdns_base *
453 evdns_get_global_base(void)
454 {
455         return current_base;
456 }
457
458 /* Given a pointer to an evdns_server_request, get the corresponding */
459 /* server_request. */
460 #define TO_SERVER_REQUEST(base_ptr)                                     \
461         ((struct server_request*)                                       \
462           (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
463
464 #define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
465
466 static struct nameserver *nameserver_pick(struct evdns_base *base);
467 static void evdns_request_insert(struct request *req, struct request **head);
468 static void evdns_request_remove(struct request *req, struct request **head);
469 static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
470 static int evdns_transmit(struct evdns_base *base);
471 static int evdns_request_transmit(struct request *req);
472 static void nameserver_send_probe(struct nameserver *const ns);
473 static void search_request_finished(struct evdns_request *const);
474 static int search_try_next(struct evdns_request *const req);
475 static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags);
476 static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
477 static u16 transaction_id_pick(struct evdns_base *base);
478 static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags);
479 static struct request *request_clone(struct evdns_base *base, struct request* current);
480 static void request_submit(struct request *const req);
481
482 static int server_request_free(struct server_request *req);
483 static void server_request_free_answers(struct server_request *req);
484 static void server_port_free(struct evdns_server_port *port);
485 static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
486 static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
487 static int evdns_base_set_option_impl(struct evdns_base *base,
488     const char *option, const char *val, int flags);
489 static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
490 static void evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg);
491 static int evdns_server_request_format_response(struct server_request *req, int err);
492 static void incoming_conn_cb(struct evconnlistener *listener, evutil_socket_t fd,
493     struct sockaddr *address, int socklen, void *arg);
494
495 static int strtoint(const char *const str);
496
497 #ifdef EVENT__DISABLE_THREAD_SUPPORT
498 #define EVDNS_LOCK(base)  EVUTIL_NIL_STMT_
499 #define EVDNS_UNLOCK(base) EVUTIL_NIL_STMT_
500 #define ASSERT_LOCKED(base) EVUTIL_NIL_STMT_
501 #else
502 #define EVDNS_LOCK(base)                        \
503         EVLOCK_LOCK((base)->lock, 0)
504 #define EVDNS_UNLOCK(base)                      \
505         EVLOCK_UNLOCK((base)->lock, 0)
506 #define ASSERT_LOCKED(base)                     \
507         EVLOCK_ASSERT_LOCKED((base)->lock)
508 #endif
509
510 static evdns_debug_log_fn_type evdns_log_fn = NULL;
511
512 void
513 evdns_set_log_fn(evdns_debug_log_fn_type fn)
514 {
515         evdns_log_fn = fn;
516 }
517
518 #ifdef __GNUC__
519 #define EVDNS_LOG_CHECK  __attribute__ ((format(printf, 2, 3)))
520 #else
521 #define EVDNS_LOG_CHECK
522 #endif
523
524 static void evdns_log_(int severity, const char *fmt, ...) EVDNS_LOG_CHECK;
525 static void
526 evdns_log_(int severity, const char *fmt, ...)
527 {
528         va_list args;
529         va_start(args,fmt);
530         if (evdns_log_fn) {
531                 char buf[512];
532                 int is_warn = (severity == EVDNS_LOG_WARN);
533                 evutil_vsnprintf(buf, sizeof(buf), fmt, args);
534                 evdns_log_fn(is_warn, buf);
535         } else {
536                 event_logv_(severity, NULL, fmt, args);
537         }
538         va_end(args);
539 }
540
541 #define log evdns_log_
542
543 /* Initialize tcp_connection structure. */
544 static void
545 init_tcp_connection(struct tcp_connection *conn, struct bufferevent *bev)
546 {
547         memset(conn, 0, sizeof(*conn));
548         conn->state = TS_DISCONNECTED;
549         conn->bev = bev;
550         conn->awaiting_packet_size = 0;
551 }
552
553 /* Disconnect tcp connection. */
554 static void
555 evdns_tcp_disconnect(struct tcp_connection *conn)
556 {
557         if (!conn)
558                 return;
559         conn->state = TS_DISCONNECTED;
560         conn->awaiting_packet_size = 0;
561         if (conn->bev) {
562                 bufferevent_free(conn->bev);
563                 conn->bev = NULL;
564         }
565 }
566
567 /* Add new tcp client to the list of TCP clients in the TCP DNS server. */
568 static struct client_tcp_connection*
569 evdns_add_tcp_client(struct evdns_server_port *port, struct bufferevent *bev)
570 {
571         struct client_tcp_connection *client;
572         EVUTIL_ASSERT(port && bev);
573         if (port->max_client_connections == port->client_connections_count)
574                 goto error;
575
576         client = mm_calloc(1, sizeof(*client));
577         if (!client)
578                 goto error;
579         init_tcp_connection(&client->connection, bev);
580         client->port = port;
581         LIST_INSERT_HEAD(&port->client_connections, client, next);
582
583         ++port->client_connections_count;
584         /* we need to hold evdns_server_port as long as one connection at least stays alive */
585         ++port->refcnt;
586         return client;
587 error:
588         return NULL;
589 }
590
591 /* Remove tcp client and free all associated data from the TCP DNS server. */
592 static int
593 evdns_remove_tcp_client(struct evdns_server_port *port, struct client_tcp_connection *client)
594 {
595         if (!port || !client)
596                 goto error;
597
598         evdns_tcp_disconnect(&client->connection);
599         LIST_REMOVE(client, next);
600         mm_free(client);
601         --port->client_connections_count;
602         --port->refcnt;
603         return 0;
604 error:
605         return -1;
606 }
607
608 /* Remove all tcp clients and free all associated data from the TCP DNS server. */
609 static void
610 evdns_remove_all_tcp_clients(struct evdns_server_port *port)
611 {
612         struct client_tcp_connection *client;
613         while ((client = LIST_FIRST(&port->client_connections))) {
614                 evdns_remove_tcp_client(port, client);
615         }
616 }
617
618 /* Create new tcp connection structure for DNS client. */
619 static struct tcp_connection *
620 new_tcp_connecton(struct bufferevent *bev)
621 {
622         struct tcp_connection *conn;
623         if (!bev)
624                 return NULL;
625
626         conn = mm_calloc(1, sizeof(*conn));
627         if (!conn)
628                 return NULL;
629         init_tcp_connection(conn, bev);
630         return conn;
631 }
632
633 /* Disconnect and free all associated data for the tcp connection in DNS client. */
634 static void
635 disconnect_and_free_connection(struct tcp_connection *conn)
636 {
637         if (!conn)
638                 return;
639         evdns_tcp_disconnect(conn);
640         mm_free(conn);
641 }
642
643 /* This walks the list of inflight requests to find the */
644 /* one with a matching transaction id. Returns NULL on */
645 /* failure */
646 static struct request *
647 request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
648         struct request *req = REQ_HEAD(base, trans_id);
649         struct request *const started_at = req;
650
651         ASSERT_LOCKED(base);
652
653         if (req) {
654                 do {
655                         if (req->trans_id == trans_id) return req;
656                         req = req->next;
657                 } while (req != started_at);
658         }
659
660         return NULL;
661 }
662
663 /* a libevent callback function which is called when a nameserver */
664 /* has gone down and we want to test if it has came back to life yet */
665 static void
666 nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
667         struct nameserver *const ns = (struct nameserver *) arg;
668         (void)fd;
669         (void)events;
670
671         EVDNS_LOCK(ns->base);
672         nameserver_send_probe(ns);
673         EVDNS_UNLOCK(ns->base);
674 }
675
676 /* a libevent callback which is called when a nameserver probe (to see if */
677 /* it has come back to life) times out. We increment the count of failed_times */
678 /* and wait longer to send the next probe packet. */
679 static void
680 nameserver_probe_failed(struct nameserver *const ns) {
681         struct timeval timeout;
682         int i;
683
684         ASSERT_LOCKED(ns->base);
685         (void) evtimer_del(&ns->timeout_event);
686         if (ns->state == 1) {
687                 /* This can happen if the nameserver acts in a way which makes us mark */
688                 /* it as bad and then starts sending good replies. */
689                 return;
690         }
691
692         memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
693             sizeof(struct timeval));
694         for (i = ns->failed_times; i > 0 && timeout.tv_sec < ns->base->ns_max_probe_timeout; --i) {
695                 timeout.tv_sec *= ns->base->ns_timeout_backoff_factor;
696                 timeout.tv_usec *= ns->base->ns_timeout_backoff_factor;
697                 if (timeout.tv_usec > 1000000) {
698                         timeout.tv_sec += timeout.tv_usec / 1000000;
699                         timeout.tv_usec %= 1000000;
700                 }
701         }
702         if (timeout.tv_sec > ns->base->ns_max_probe_timeout) {
703                 timeout.tv_sec = ns->base->ns_max_probe_timeout;
704                 timeout.tv_usec = 0;
705         }
706
707         ns->failed_times++;
708
709         if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
710                 char addrbuf[128];
711                 log(EVDNS_LOG_WARN,
712                     "Error from libevent when adding timer event for %s",
713                     evutil_format_sockaddr_port_(
714                             (struct sockaddr *)&ns->address,
715                             addrbuf, sizeof(addrbuf)));
716         }
717 }
718
719 static void
720 request_swap_ns(struct request *req, struct nameserver *ns) {
721         if (ns && req->ns != ns) {
722                 EVUTIL_ASSERT(req->ns->requests_inflight > 0);
723                 req->ns->requests_inflight--;
724                 ns->requests_inflight++;
725
726                 req->ns = ns;
727         }
728 }
729
730 /* called when a nameserver has been deemed to have failed. For example, too */
731 /* many packets have timed out etc */
732 static void
733 nameserver_failed(struct nameserver *const ns, const char *msg, int err) {
734         struct request *req, *started_at;
735         struct evdns_base *base = ns->base;
736         int i;
737         char addrbuf[128];
738
739         ASSERT_LOCKED(base);
740         /* if this nameserver has already been marked as failed */
741         /* then don't do anything */
742         if (!ns->state) return;
743
744         log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
745             evutil_format_sockaddr_port_(
746                     (struct sockaddr *)&ns->address,
747                     addrbuf, sizeof(addrbuf)),
748             msg);
749
750         base->global_good_nameservers--;
751         EVUTIL_ASSERT(base->global_good_nameservers >= 0);
752         if (base->global_good_nameservers == 0) {
753                 log(EVDNS_LOG_MSG, "All nameservers have failed");
754         }
755
756         ns->state = 0;
757         ns->failed_times = 1;
758
759         if (ns->connection) {
760                 disconnect_and_free_connection(ns->connection);
761                 ns->connection = NULL;
762         } else if (err == ENOTCONN) {
763                 /* XXX: If recvfrom results in ENOTCONN, the socket remains readable
764                  * which triggers another recvfrom. The observed behavior is 100% CPU use.
765                  * This occurs on iOS (kqueue) after the process has been backgrounded
766                  * for a long time (~300 seconds) and then resumed.
767                  * All sockets, TCP and UDP, seem to get ENOTCONN and must be closed.
768                  * https://github.com/libevent/libevent/issues/265 */
769                 const struct sockaddr *address = (const struct sockaddr *)&ns->address;
770                 evutil_closesocket(ns->socket);
771                 ns->socket = evutil_socket_(address->sa_family,
772                         SOCK_DGRAM | EVUTIL_SOCK_NONBLOCK | EVUTIL_SOCK_CLOEXEC, 0);
773
774                 if (base->global_outgoing_addrlen &&
775                         !evutil_sockaddr_is_loopback_(address)) {
776                         if (bind(ns->socket,
777                                         (struct sockaddr *)&base->global_outgoing_address,
778                                         base->global_outgoing_addrlen) < 0) {
779                                 log(EVDNS_LOG_WARN, "Couldn't bind to outgoing address");
780                         }
781                 }
782
783                 event_del(&ns->event);
784                 event_assign(&ns->event, ns->base->event_base, ns->socket,
785                         EV_READ | (ns->write_waiting ? EV_WRITE : 0) | EV_PERSIST,
786                         nameserver_ready_callback, ns);
787                 if (!base->disable_when_inactive && event_add(&ns->event, NULL) < 0) {
788                         log(EVDNS_LOG_WARN, "Couldn't add %s event",
789                                 ns->write_waiting ? "rw": "read");
790                 }
791         }
792         if (evtimer_add(&ns->timeout_event,
793                 &base->global_nameserver_probe_initial_timeout) < 0) {
794                 log(EVDNS_LOG_WARN,
795                     "Error from libevent when adding timer event for %s",
796                     evutil_format_sockaddr_port_(
797                             (struct sockaddr *)&ns->address,
798                             addrbuf, sizeof(addrbuf)));
799                 /* ???? Do more? */
800         }
801
802         /* walk the list of inflight requests to see if any can be reassigned to */
803         /* a different server. Requests in the waiting queue don't have a */
804         /* nameserver assigned yet */
805
806         /* if we don't have *any* good nameservers then there's no point */
807         /* trying to reassign requests to one */
808         if (!base->global_good_nameservers) return;
809
810         for (i = 0; i < base->n_req_heads; ++i) {
811                 req = started_at = base->req_heads[i];
812                 if (req) {
813                         do {
814                                 if (req->tx_count == 0 && req->ns == ns) {
815                                         /* still waiting to go out, can be moved */
816                                         /* to another server */
817                                         request_swap_ns(req, nameserver_pick(base));
818                                 }
819                                 req = req->next;
820                         } while (req != started_at);
821                 }
822         }
823 }
824
825 static void
826 nameserver_up(struct nameserver *const ns)
827 {
828         char addrbuf[128];
829         ASSERT_LOCKED(ns->base);
830         if (ns->state) return;
831         log(EVDNS_LOG_MSG, "Nameserver %s is back up",
832             evutil_format_sockaddr_port_(
833                     (struct sockaddr *)&ns->address,
834                     addrbuf, sizeof(addrbuf)));
835         evtimer_del(&ns->timeout_event);
836         if (ns->probe_request) {
837                 evdns_cancel_request(ns->base, ns->probe_request);
838                 ns->probe_request = NULL;
839         }
840         ns->state = 1;
841         ns->failed_times = 0;
842         ns->timedout = 0;
843         ns->base->global_good_nameservers++;
844 }
845
846 static void
847 request_trans_id_set(struct request *const req, const u16 trans_id) {
848         req->trans_id = trans_id;
849         *((u16 *) req->request) = htons(trans_id);
850 }
851
852 /* Called to remove a request from a list and dealloc it. */
853 /* head is a pointer to the head of the list it should be */
854 /* removed from or NULL if the request isn't in a list. */
855 /* when free_handle is one, free the handle as well. */
856 static void
857 request_finished(struct request *const req, struct request **head, int free_handle) {
858         struct evdns_base *base = req->base;
859         int was_inflight = (head != &base->req_waiting_head);
860         EVDNS_LOCK(base);
861         ASSERT_VALID_REQUEST(req);
862
863         if (head)
864                 evdns_request_remove(req, head);
865
866         log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", (void *)req);
867         if (was_inflight) {
868                 evtimer_del(&req->timeout_event);
869                 base->global_requests_inflight--;
870                 req->ns->requests_inflight--;
871         } else {
872                 base->global_requests_waiting--;
873         }
874         /* it was initialized during request_new / evtimer_assign */
875         event_debug_unassign(&req->timeout_event);
876
877         if (req->ns &&
878             req->ns->requests_inflight == 0 &&
879             req->base->disable_when_inactive) {
880                 event_del(&req->ns->event);
881                 evtimer_del(&req->ns->timeout_event);
882         }
883
884         if (!req->request_appended) {
885                 /* need to free the request data on it's own */
886                 mm_free(req->request);
887         } else {
888                 /* the request data is appended onto the header */
889                 /* so everything gets free()ed when we: */
890         }
891
892         if (req->handle) {
893                 EVUTIL_ASSERT(req->handle->current_req == req);
894
895                 if (free_handle) {
896                         search_request_finished(req->handle);
897                         req->handle->current_req = NULL;
898                         if (! req->handle->pending_cb) {
899                                 /* If we're planning to run the callback,
900                                  * don't free the handle until later. */
901                                 mm_free(req->handle);
902                         }
903                         req->handle = NULL; /* If we have a bug, let's crash
904                                              * early */
905                 } else {
906                         req->handle->current_req = NULL;
907                 }
908         }
909
910         mm_free(req);
911
912         evdns_requests_pump_waiting_queue(base);
913         EVDNS_UNLOCK(base);
914 }
915
916 /* This is called when a server returns a funny error code. */
917 /* We try the request again with another server. */
918 /* */
919 /* return: */
920 /*   0 ok */
921 /*   1 failed/reissue is pointless */
922 static int
923 request_reissue(struct request *req) {
924         const struct nameserver *const last_ns = req->ns;
925         ASSERT_LOCKED(req->base);
926         ASSERT_VALID_REQUEST(req);
927         /* the last nameserver should have been marked as failing */
928         /* by the caller of this function, therefore pick will try */
929         /* not to return it */
930         request_swap_ns(req, nameserver_pick(req->base));
931         if (req->ns == last_ns) {
932                 /* ... but pick did return it */
933                 /* not a lot of point in trying again with the */
934                 /* same server */
935                 return 1;
936         }
937
938         req->reissue_count++;
939         req->tx_count = 0;
940         req->transmit_me = 1;
941
942         return 0;
943 }
944
945 /* this function looks for space on the inflight queue and promotes */
946 /* requests from the waiting queue if it can. */
947 /* */
948 /* TODO: */
949 /* add return code, see at nameserver_pick() and other functions. */
950 static void
951 evdns_requests_pump_waiting_queue(struct evdns_base *base) {
952         ASSERT_LOCKED(base);
953         while (base->global_requests_inflight < base->global_max_requests_inflight &&
954                    base->global_requests_waiting) {
955                 struct request *req;
956
957                 EVUTIL_ASSERT(base->req_waiting_head);
958                 req = base->req_waiting_head;
959
960                 req->ns = nameserver_pick(base);
961                 if (!req->ns)
962                         return;
963
964                 /* move a request from the waiting queue to the inflight queue */
965                 req->ns->requests_inflight++;
966
967                 evdns_request_remove(req, &base->req_waiting_head);
968
969                 base->global_requests_waiting--;
970                 base->global_requests_inflight++;
971
972                 request_trans_id_set(req, transaction_id_pick(base));
973
974                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
975                 evdns_request_transmit(req);
976                 evdns_transmit(base);
977         }
978 }
979
980 static void
981 reply_run_callback(struct event_callback *d, void *user_pointer)
982 {
983         struct evdns_request *handle =
984             EVUTIL_UPCAST(d, struct evdns_request, deferred);
985
986         switch (handle->request_type) {
987         case TYPE_A:
988                 if (handle->have_reply) {
989                         handle->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
990                             handle->reply.rr_count, handle->ttl,
991                             handle->reply.data.a,
992                             user_pointer);
993                         if (handle->reply.cname)
994                                 handle->user_callback(DNS_ERR_NONE, DNS_CNAME, 1,
995                                     handle->ttl, handle->reply.cname, user_pointer);
996                 } else
997                         handle->user_callback(handle->err, 0, 0, handle->ttl, NULL, user_pointer);
998                 break;
999         case TYPE_PTR:
1000                 if (handle->have_reply) {
1001                         char *name = handle->reply.data.ptr_name;
1002                         handle->user_callback(DNS_ERR_NONE, DNS_PTR, 1, handle->ttl,
1003                             &name, user_pointer);
1004                 } else {
1005                         handle->user_callback(handle->err, 0, 0, handle->ttl, NULL, user_pointer);
1006                 }
1007                 break;
1008         case TYPE_AAAA:
1009                 if (handle->have_reply) {
1010                         handle->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
1011                             handle->reply.rr_count, handle->ttl,
1012                             handle->reply.data.aaaa,
1013                             user_pointer);
1014                         if (handle->reply.cname)
1015                                 handle->user_callback(DNS_ERR_NONE, DNS_CNAME, 1,
1016                                     handle->ttl, handle->reply.cname, user_pointer);
1017                 } else
1018                         handle->user_callback(handle->err, 0, 0, handle->ttl, NULL, user_pointer);
1019                 break;
1020         default:
1021                 EVUTIL_ASSERT(0);
1022         }
1023
1024         if (handle->reply.data.raw) {
1025                 mm_free(handle->reply.data.raw);
1026         }
1027
1028         if (handle->reply.cname) {
1029                 mm_free(handle->reply.cname);
1030         }
1031
1032         mm_free(handle);
1033 }
1034
1035 static void
1036 reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
1037 {
1038         struct evdns_request* handle = req->handle;
1039
1040         ASSERT_LOCKED(req->base);
1041
1042         handle->request_type = req->request_type;
1043         handle->ttl = ttl;
1044         handle->err = err;
1045         if (reply) {
1046                 handle->have_reply = 1;
1047                 memcpy(&handle->reply, reply, sizeof(struct reply));
1048                 /* We've taken ownership of the data. */
1049                 reply->data.raw = NULL;
1050         }
1051
1052         handle->pending_cb = 1;
1053
1054         event_deferred_cb_init_(
1055             &handle->deferred,
1056             event_get_priority(&req->timeout_event),
1057             reply_run_callback,
1058             handle->user_pointer);
1059         event_deferred_cb_schedule_(
1060                 req->base->event_base,
1061                 &handle->deferred);
1062 }
1063
1064 static int
1065 client_retransmit_through_tcp(struct evdns_request *handle)
1066 {
1067         struct request *req = handle->current_req;
1068         struct evdns_base *base = req->base;
1069         struct request *newreq = request_clone(base, req);
1070         ASSERT_LOCKED(base);
1071         if (!newreq)
1072                 return 1;
1073         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
1074         handle->current_req = newreq;
1075         newreq->handle = handle;
1076         request_submit(newreq);
1077         return 0;
1078 }
1079
1080 #define _QR_MASK    0x8000U
1081 #define _OP_MASK    0x7800U
1082 #define _AA_MASK    0x0400U
1083 #define _TC_MASK    0x0200U
1084 #define _RD_MASK    0x0100U
1085 #define _RA_MASK    0x0080U
1086 #define _Z_MASK     0x0040U
1087 #define _AD_MASK    0x0020U
1088 #define _CD_MASK    0x0010U
1089 #define _RCODE_MASK 0x000fU
1090 #define _Z_MASK_DEPRECATED 0x0070U
1091
1092 /* this processes a parsed reply packet */
1093 static void
1094 reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
1095         int error;
1096         char addrbuf[128];
1097         int retransmit_via_tcp = 0;
1098         static const int error_codes[] = {
1099                 DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
1100                 DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
1101         };
1102
1103         ASSERT_LOCKED(req->base);
1104         ASSERT_VALID_REQUEST(req);
1105
1106         if (flags & (_RCODE_MASK | _TC_MASK) || !reply || !reply->have_answer) {
1107                 /* there was an error */
1108                 if (flags & _TC_MASK) {
1109                         error = DNS_ERR_TRUNCATED;
1110                         retransmit_via_tcp = (req->handle->tcp_flags & (DNS_QUERY_IGNTC | DNS_QUERY_USEVC)) == 0;
1111                 } else if (flags & _RCODE_MASK) {
1112                         u16 error_code = (flags & _RCODE_MASK) - 1;
1113                         if (error_code > 4) {
1114                                 error = DNS_ERR_UNKNOWN;
1115                         } else {
1116                                 error = error_codes[error_code];
1117                         }
1118                 } else if (reply && !reply->have_answer) {
1119                         error = DNS_ERR_NODATA;
1120                 } else {
1121                         error = DNS_ERR_UNKNOWN;
1122                 }
1123
1124                 switch (error) {
1125                 case DNS_ERR_NOTIMPL:
1126                 case DNS_ERR_REFUSED:
1127                         /* we regard these errors as marking a bad nameserver */
1128                         if (req->reissue_count < req->base->global_max_reissues) {
1129                                 char msg[64];
1130                                 evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
1131                                          error, evdns_err_to_string(error));
1132                                 nameserver_failed(req->ns, msg, 0);
1133                                 if (!request_reissue(req)) return;
1134                         }
1135                         break;
1136                 case DNS_ERR_SERVERFAILED:
1137                         /* rcode 2 (servfailed) sometimes means "we
1138                          * are broken" and sometimes (with some binds)
1139                          * means "that request was very confusing."
1140                          * Treat this as a timeout, not a failure.
1141                          */
1142                         log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
1143                                 "at %s; will allow the request to time out.",
1144                             evutil_format_sockaddr_port_(
1145                                     (struct sockaddr *)&req->ns->address,
1146                                     addrbuf, sizeof(addrbuf)));
1147                         /* Call the timeout function */
1148                         evdns_request_timeout_callback(0, 0, req);
1149                         return;
1150                 default:
1151                         /* we got a good reply from the nameserver: it is up. */
1152                         if (req->handle == req->ns->probe_request) {
1153                                 /* Avoid double-free */
1154                                 req->ns->probe_request = NULL;
1155                         }
1156
1157                         nameserver_up(req->ns);
1158                 }
1159
1160                 if (retransmit_via_tcp) {
1161                         log(EVDNS_LOG_DEBUG, "Recieved truncated reply(flags 0x%x, transanc ID: %d). Retransmiting via TCP.",
1162                                 req->handle->tcp_flags, req->trans_id);
1163                         req->handle->tcp_flags |= DNS_QUERY_USEVC;
1164                         client_retransmit_through_tcp(req->handle);
1165                         return;
1166                 }
1167
1168                 if (req->handle->search_state &&
1169                     req->request_type != TYPE_PTR) {
1170                         /* if we have a list of domains to search in,
1171                          * try the next one */
1172                         if (!search_try_next(req->handle)) {
1173                                 /* a new request was issued so this
1174                                  * request is finished and */
1175                                 /* the user callback will be made when
1176                                  * that request (or a */
1177                                 /* child of it) finishes. */
1178                                 return;
1179                         }
1180                 }
1181
1182                 /* all else failed. Pass the failure up */
1183                 reply_schedule_callback(req, ttl, error, NULL);
1184                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
1185         } else {
1186                 /* all ok, tell the user */
1187                 reply_schedule_callback(req, ttl, 0, reply);
1188                 if (req->handle == req->ns->probe_request)
1189                         req->ns->probe_request = NULL; /* Avoid double-free */
1190                 nameserver_up(req->ns);
1191                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
1192         }
1193 }
1194
1195 static int
1196 name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
1197         int name_end = -1;
1198         int j = *idx;
1199         int ptr_count = 0;
1200 #define GET32(x) do { if (j + 4 > length) goto err; memcpy(&t32_, packet + j, 4); j += 4; x = ntohl(t32_); } while (0)
1201 #define GET16(x) do { if (j + 2 > length) goto err; memcpy(&t_, packet + j, 2); j += 2; x = ntohs(t_); } while (0)
1202 #define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
1203
1204         char *cp = name_out;
1205         const char *const end = name_out + name_out_len;
1206
1207         /* Normally, names are a series of length prefixed strings terminated */
1208         /* with a length of 0 (the lengths are u8's < 63). */
1209         /* However, the length can start with a pair of 1 bits and that */
1210         /* means that the next 14 bits are a pointer within the current */
1211         /* packet. */
1212
1213         for (;;) {
1214                 u8 label_len;
1215                 GET8(label_len);
1216                 if (!label_len) break;
1217                 if (label_len & 0xc0) {
1218                         u8 ptr_low;
1219                         GET8(ptr_low);
1220                         if (name_end < 0) name_end = j;
1221                         j = (((int)label_len & 0x3f) << 8) + ptr_low;
1222                         /* Make sure that the target offset is in-bounds. */
1223                         if (j < 0 || j >= length) return -1;
1224                         /* If we've jumped more times than there are characters in the
1225                          * message, we must have a loop. */
1226                         if (++ptr_count > length) return -1;
1227                         continue;
1228                 }
1229                 if (label_len > 63) return -1;
1230                 if (cp != name_out) {
1231                         if (cp + 1 >= end) return -1;
1232                         *cp++ = '.';
1233                 }
1234                 if (cp + label_len >= end) return -1;
1235                 if (j + label_len > length) return -1;
1236                 memcpy(cp, packet + j, label_len);
1237                 cp += label_len;
1238                 j += label_len;
1239         }
1240         if (cp >= end) return -1;
1241         *cp = '\0';
1242         if (name_end < 0)
1243                 *idx = j;
1244         else
1245                 *idx = name_end;
1246         return 0;
1247  err:
1248         return -1;
1249 }
1250
1251 /* parses a raw request from a nameserver */
1252 static int
1253 reply_parse(struct evdns_base *base, u8 *packet, int length)
1254 {
1255         int j = 0, k = 0;  /* index into packet */
1256         u16 t_;  /* used by the macros */
1257         u32 t32_;  /* used by the macros */
1258         char tmp_name[256], cmp_name[256]; /* used by the macros */
1259         int name_matches = 0;
1260
1261         u16 trans_id, questions, answers, authority, additional, datalength;
1262         u16 flags = 0;
1263         u32 ttl, ttl_r = 0xffffffff;
1264         struct reply reply;
1265         struct request *req = NULL;
1266         unsigned int i, buf_size;
1267
1268         ASSERT_LOCKED(base);
1269
1270         GET16(trans_id);
1271         GET16(flags);
1272         GET16(questions);
1273         GET16(answers);
1274         GET16(authority);
1275         GET16(additional);
1276         (void) authority; /* suppress "unused variable" warnings. */
1277         (void) additional; /* suppress "unused variable" warnings. */
1278
1279         req = request_find_from_trans_id(base, trans_id);
1280         if (!req) return -1;
1281         EVUTIL_ASSERT(req->base == base);
1282
1283         memset(&reply, 0, sizeof(reply));
1284
1285         /* If it's not an answer, it doesn't correspond to any request. */
1286         if (!(flags & _QR_MASK)) return -1;  /* must be an answer */
1287         if ((flags & (_RCODE_MASK|_TC_MASK)) && (flags & (_RCODE_MASK|_TC_MASK)) != DNS_ERR_NOTEXIST) {
1288                 /* there was an error and it's not NXDOMAIN */
1289                 goto err;
1290         }
1291         /* if (!answers) return; */  /* must have an answer of some form */
1292
1293         /* This macro skips a name in the DNS reply. */
1294 #define SKIP_NAME                                               \
1295         do { tmp_name[0] = '\0';                                \
1296                 if (name_parse(packet, length, &j, tmp_name,    \
1297                         sizeof(tmp_name))<0)                    \
1298                         goto err;                               \
1299         } while (0)
1300
1301         reply.type = req->request_type;
1302
1303         /* skip over each question in the reply */
1304         for (i = 0; i < questions; ++i) {
1305                 /* the question looks like
1306                  *   <label:name><u16:type><u16:class>
1307                  */
1308                 tmp_name[0] = '\0';
1309                 cmp_name[0] = '\0';
1310                 k = j;
1311                 if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name)) < 0)
1312                         goto err;
1313                 if (name_parse(req->request, req->request_len, &k,
1314                         cmp_name, sizeof(cmp_name))<0)
1315                         goto err;
1316                 if (!base->global_randomize_case) {
1317                         if (strcmp(tmp_name, cmp_name) == 0)
1318                                 name_matches = 1;
1319                 } else {
1320                         if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0)
1321                                 name_matches = 1;
1322                 }
1323
1324                 j += 4;
1325                 if (j > length)
1326                         goto err;
1327         }
1328
1329         if (!name_matches)
1330                 goto err;
1331
1332         /* We can allocate less for the reply data, but to do it we'll have
1333          * to parse the response. To simplify things let's just allocate
1334          * a little bit more to avoid complex evaluations.
1335          */
1336         buf_size = MAX(length - j, EVDNS_NAME_MAX);
1337         reply.data.raw = mm_malloc(buf_size);
1338
1339         /* now we have the answer section which looks like
1340          * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1341          */
1342
1343         for (i = 0; i < answers; ++i) {
1344                 u16 type, class;
1345
1346                 SKIP_NAME;
1347                 GET16(type);
1348                 GET16(class);
1349                 GET32(ttl);
1350                 GET16(datalength);
1351
1352                 if (type == TYPE_A && class == CLASS_INET) {
1353                         int addrcount;
1354                         if (req->request_type != TYPE_A) {
1355                                 j += datalength; continue;
1356                         }
1357                         if ((datalength & 3) != 0) /* not an even number of As. */
1358                             goto err;
1359                         addrcount = datalength >> 2;
1360
1361                         ttl_r = MIN(ttl_r, ttl);
1362                         /* we only bother with the first four addresses. */
1363                         if (j + 4*addrcount > length) goto err;
1364                         memcpy(&reply.data.a[reply.rr_count],
1365                                    packet + j, 4*addrcount);
1366                         j += 4*addrcount;
1367                         reply.rr_count += addrcount;
1368                         reply.have_answer = 1;
1369                 } else if (type == TYPE_PTR && class == CLASS_INET) {
1370                         if (req->request_type != TYPE_PTR) {
1371                                 j += datalength; continue;
1372                         }
1373                         if (name_parse(packet, length, &j, reply.data.ptr_name,
1374                                                    buf_size)<0)
1375                                 goto err;
1376                         ttl_r = MIN(ttl_r, ttl);
1377                         reply.have_answer = 1;
1378                         break;
1379                 } else if (type == TYPE_CNAME) {
1380                         char cname[EVDNS_NAME_MAX];
1381                         if (name_parse(packet, length, &j, cname,
1382                                 sizeof(cname))<0)
1383                                 goto err;
1384                         if (req->need_cname)
1385                                 reply.cname = mm_strdup(cname);
1386                         if (req->put_cname_in_ptr && !*req->put_cname_in_ptr)
1387                                 *req->put_cname_in_ptr = mm_strdup(cname);
1388                 } else if (type == TYPE_AAAA && class == CLASS_INET) {
1389                         int addrcount;
1390                         if (req->request_type != TYPE_AAAA) {
1391                                 j += datalength; continue;
1392                         }
1393                         if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1394                                 goto err;
1395                         addrcount = datalength >> 4;  /* each address is 16 bytes long */
1396                         ttl_r = MIN(ttl_r, ttl);
1397
1398                         /* we only bother with the first four addresses. */
1399                         if (j + 16*addrcount > length) goto err;
1400                         memcpy(&reply.data.aaaa[reply.rr_count],
1401                                    packet + j, 16*addrcount);
1402                         reply.rr_count += addrcount;
1403                         j += 16*addrcount;
1404                         reply.have_answer = 1;
1405                 } else {
1406                         /* skip over any other type of resource */
1407                         j += datalength;
1408                 }
1409         }
1410
1411         if (!reply.have_answer) {
1412                 for (i = 0; i < authority; ++i) {
1413                         u16 type, class;
1414                         SKIP_NAME;
1415                         GET16(type);
1416                         GET16(class);
1417                         GET32(ttl);
1418                         GET16(datalength);
1419                         if (type == TYPE_SOA && class == CLASS_INET) {
1420                                 u32 serial, refresh, retry, expire, minimum;
1421                                 SKIP_NAME;
1422                                 SKIP_NAME;
1423                                 GET32(serial);
1424                                 GET32(refresh);
1425                                 GET32(retry);
1426                                 GET32(expire);
1427                                 GET32(minimum);
1428                                 (void)expire;
1429                                 (void)retry;
1430                                 (void)refresh;
1431                                 (void)serial;
1432                                 ttl_r = MIN(ttl_r, ttl);
1433                                 ttl_r = MIN(ttl_r, minimum);
1434                         } else {
1435                                 /* skip over any other type of resource */
1436                                 j += datalength;
1437                         }
1438                 }
1439         }
1440
1441         if (ttl_r == 0xffffffff)
1442                 ttl_r = 0;
1443
1444         reply_handle(req, flags, ttl_r, &reply);
1445         if (reply.data.raw)
1446                 mm_free(reply.data.raw);
1447         return 0;
1448  err:
1449         if (req)
1450                 reply_handle(req, flags, 0, NULL);
1451         if (reply.data.raw)
1452                 mm_free(reply.data.raw);
1453         return -1;
1454 }
1455
1456 /* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1457 /* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1458 /* callback. */
1459 static int
1460 request_parse(u8 *packet, int length, struct evdns_server_port *port,
1461                                 struct sockaddr *addr, ev_socklen_t addrlen, struct client_tcp_connection *client)
1462 {
1463         int j = 0;      /* index into packet */
1464         u16 t_;  /* used by the macros */
1465         u32 t32_;  /* used by the macros */
1466         char tmp_name[256]; /* used by the macros */
1467
1468         int i;
1469         u16 trans_id, flags, questions, answers, authority, additional;
1470         struct server_request *server_req = NULL;
1471         u32 ttl;
1472         u16 type, class, rdlen;
1473
1474         ASSERT_LOCKED(port);
1475
1476         /* Get the header fields */
1477         GET16(trans_id);
1478         GET16(flags);
1479         GET16(questions);
1480         GET16(answers);
1481         GET16(authority);
1482         GET16(additional);
1483
1484         if (flags & _QR_MASK) return -1; /* Must not be an answer. */
1485         flags &= (_RD_MASK|_CD_MASK); /* Only RD and CD get preserved. */
1486
1487         server_req = mm_malloc(sizeof(struct server_request));
1488         if (server_req == NULL) return -1;
1489         memset(server_req, 0, sizeof(struct server_request));
1490
1491         server_req->trans_id = trans_id;
1492         if (addr) {
1493                 memcpy(&server_req->addr, addr, addrlen);
1494                 server_req->addrlen = addrlen;
1495         }
1496
1497         server_req->port = port;
1498         server_req->client = client;
1499         server_req->base.flags = flags;
1500         server_req->base.nquestions = 0;
1501         server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1502         if (server_req->base.questions == NULL)
1503                 goto err;
1504
1505         for (i = 0; i < questions; ++i) {
1506                 u16 type, class;
1507                 struct evdns_server_question *q;
1508                 int namelen;
1509                 if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1510                         goto err;
1511                 GET16(type);
1512                 GET16(class);
1513                 namelen = (int)strlen(tmp_name);
1514                 q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1515                 if (!q)
1516                         goto err;
1517                 q->type = type;
1518                 q->dns_question_class = class;
1519                 memcpy(q->name, tmp_name, namelen+1);
1520                 server_req->base.questions[server_req->base.nquestions++] = q;
1521         }
1522
1523 #define SKIP_RR \
1524         do { \
1525                 SKIP_NAME; \
1526                 j += 2 /* type */ + 2 /* class */ + 4 /* ttl */; \
1527                 GET16(rdlen); \
1528                 j += rdlen; \
1529         } while (0)
1530
1531         for (i = 0; i < answers; ++i) {
1532                 SKIP_RR;
1533         }
1534
1535         for (i = 0; i < authority; ++i) {
1536                 SKIP_RR;
1537         }
1538
1539         server_req->max_udp_reply_size = DNS_MAX_UDP_SIZE;
1540         for (i = 0; i < additional; ++i) {
1541                 SKIP_NAME;
1542                 GET16(type);
1543                 GET16(class);
1544                 GET32(ttl);
1545                 GET16(rdlen);
1546                 (void)ttl;
1547                 j += rdlen;
1548                 if (type == TYPE_OPT) {
1549                         /* In case of OPT pseudo-RR `class` field is treated
1550                          * as a requestor's UDP payload size. */
1551                         server_req->max_udp_reply_size = MAX(class, DNS_MAX_UDP_SIZE);
1552                         evdns_server_request_add_reply(&(server_req->base),
1553                                 EVDNS_ADDITIONAL_SECTION,
1554                                 "", /* name */
1555                                 TYPE_OPT, /* type */
1556                                 DNS_MAX_UDP_SIZE, /* class */
1557                                 0, /* ttl */
1558                                 0, /* datalen */
1559                                 0, /* is_name */
1560                                 NULL /* data */
1561                         );
1562                         break;
1563                 }
1564         }
1565
1566         port->refcnt++;
1567
1568         /* Only standard queries are supported. */
1569         if (flags & _OP_MASK) {
1570                 evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1571                 return -1;
1572         }
1573
1574         port->user_callback(&(server_req->base), port->user_data);
1575
1576         return 0;
1577 err:
1578         if (server_req) {
1579                 if (server_req->base.questions) {
1580                         for (i = 0; i < server_req->base.nquestions; ++i)
1581                                 mm_free(server_req->base.questions[i]);
1582                         mm_free(server_req->base.questions);
1583                 }
1584                 mm_free(server_req);
1585         }
1586         return -1;
1587
1588 #undef SKIP_RR
1589 #undef SKIP_NAME
1590 #undef GET32
1591 #undef GET16
1592 #undef GET8
1593 }
1594
1595 /* Try to choose a strong transaction id which isn't already in flight */
1596 static u16
1597 transaction_id_pick(struct evdns_base *base) {
1598         ASSERT_LOCKED(base);
1599         for (;;) {
1600                 u16 trans_id;
1601                 evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1602
1603                 if (trans_id == 0xffff) continue;
1604                 /* now check to see if that id is already inflight */
1605                 if (request_find_from_trans_id(base, trans_id) == NULL)
1606                         return trans_id;
1607         }
1608 }
1609
1610 /* choose a namesever to use. This function will try to ignore */
1611 /* nameservers which we think are down and load balance across the rest */
1612 /* by updating the server_head global each time. */
1613 static struct nameserver *
1614 nameserver_pick(struct evdns_base *base) {
1615         struct nameserver *started_at = base->server_head, *picked;
1616         ASSERT_LOCKED(base);
1617         if (!base->server_head) return NULL;
1618
1619         /* if we don't have any good nameservers then there's no */
1620         /* point in trying to find one. */
1621         if (!base->global_good_nameservers) {
1622                 base->server_head = base->server_head->next;
1623                 return base->server_head;
1624         }
1625
1626         /* remember that nameservers are in a circular list */
1627         for (;;) {
1628                 if (base->server_head->state) {
1629                         /* we think this server is currently good */
1630                         picked = base->server_head;
1631                         base->server_head = base->server_head->next;
1632                         return picked;
1633                 }
1634
1635                 base->server_head = base->server_head->next;
1636                 if (base->server_head == started_at) {
1637                         /* all the nameservers seem to be down */
1638                         /* so we just return this one and hope for the */
1639                         /* best */
1640                         EVUTIL_ASSERT(base->global_good_nameservers == 0);
1641                         picked = base->server_head;
1642                         base->server_head = base->server_head->next;
1643                         return picked;
1644                 }
1645         }
1646 }
1647
1648 /* this is called when a namesever socket is ready for reading */
1649 static void
1650 nameserver_read(struct nameserver *ns) {
1651         struct sockaddr_storage ss;
1652         ev_socklen_t addrlen = sizeof(ss);
1653         char addrbuf[128];
1654         const size_t max_packet_size = ns->base->global_max_udp_size;
1655         u8 *packet = mm_malloc(max_packet_size);
1656         ASSERT_LOCKED(ns->base);
1657
1658         if (!packet) {
1659                 nameserver_failed(ns, "not enough memory", 0);
1660                 return;
1661         }
1662
1663         for (;;) {
1664                 const int r = recvfrom(ns->socket, (void*)packet,
1665                     max_packet_size, 0,
1666                     (struct sockaddr*)&ss, &addrlen);
1667                 if (r < 0) {
1668                         int err = evutil_socket_geterror(ns->socket);
1669                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1670                                 goto done;
1671                         nameserver_failed(ns,
1672                             evutil_socket_error_to_string(err), err);
1673                         goto done;
1674                 }
1675                 if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1676                         (struct sockaddr*)&ns->address, 0)) {
1677                         log(EVDNS_LOG_WARN, "Address mismatch on received "
1678                             "DNS packet.  Apparent source was %s",
1679                             evutil_format_sockaddr_port_(
1680                                     (struct sockaddr *)&ss,
1681                                     addrbuf, sizeof(addrbuf)));
1682                         goto done;
1683                 }
1684
1685                 ns->timedout = 0;
1686                 reply_parse(ns->base, packet, r);
1687         }
1688 done:
1689         mm_free(packet);
1690 }
1691
1692 /* Read a packet from a DNS client on a server port s, parse it, and */
1693 /* act accordingly. */
1694 static void
1695 server_udp_port_read(struct evdns_server_port *s) {
1696         u8 packet[1500];
1697         struct sockaddr_storage addr;
1698         ev_socklen_t addrlen;
1699         int r;
1700         ASSERT_LOCKED(s);
1701
1702         for (;;) {
1703                 addrlen = sizeof(struct sockaddr_storage);
1704                 r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1705                                          (struct sockaddr*) &addr, &addrlen);
1706                 if (r < 0) {
1707                         int err = evutil_socket_geterror(s->socket);
1708                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1709                                 return;
1710                         log(EVDNS_LOG_WARN,
1711                             "Error %s (%d) while reading request.",
1712                             evutil_socket_error_to_string(err), err);
1713                         return;
1714                 }
1715                 request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen, NULL);
1716         }
1717 }
1718
1719 static int
1720 server_send_response(struct evdns_server_port *port, struct server_request *req)
1721 {
1722         u16 packet_size = 0;
1723         struct bufferevent *bev = NULL;
1724         if (req->client) {
1725                 bev = req->client->connection.bev;
1726                 EVUTIL_ASSERT(bev);
1727                 EVUTIL_ASSERT(req->response_len <= 65535);
1728                 packet_size = htons((u16)req->response_len);
1729                 if (bufferevent_write(bev, &packet_size, sizeof(packet_size)))
1730                         goto beferevent_error;
1731                 if (bufferevent_write(bev, (void*)req->response, req->response_len))
1732                         goto beferevent_error;
1733                 return (int)req->response_len;
1734         } else {
1735                 int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1736                                         (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1737                 return r;
1738         }
1739
1740 beferevent_error:
1741         log(EVDNS_LOG_WARN, "Failed to send reply to request %p for client %p", (void *)req, (void *)req->client);
1742         /* disconnect if we got bufferevent error */
1743         evdns_remove_tcp_client(port, req->client);
1744         return -1;
1745 }
1746
1747 /* Try to write all pending replies on a given DNS server port. */
1748 static void
1749 server_port_flush(struct evdns_server_port *port)
1750 {
1751         struct server_request *req = port->pending_replies;
1752         ASSERT_LOCKED(port);
1753         while (req) {
1754                 int r = server_send_response(port, req);
1755                 if (r < 0) {
1756                         int err = evutil_socket_geterror(port->socket);
1757                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1758                                 return;
1759                         log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1760                 }
1761                 if (server_request_free(req)) {
1762                         /* we released the last reference to req->port. */
1763                         return;
1764                 } else {
1765                         EVUTIL_ASSERT(req != port->pending_replies);
1766                         req = port->pending_replies;
1767                 }
1768         }
1769
1770         /* We have no more pending requests; stop listening for 'writeable' events. */
1771         (void) event_del(&port->event);
1772         event_assign(&port->event, port->event_base,
1773                                  port->socket, EV_READ | EV_PERSIST,
1774                                  server_port_ready_callback, port);
1775
1776         if (event_add(&port->event, NULL) < 0) {
1777                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1778                 /* ???? Do more? */
1779         }
1780 }
1781
1782 /* set if we are waiting for the ability to write to this server. */
1783 /* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1784 /* we stop these events. */
1785 static void
1786 nameserver_write_waiting(struct nameserver *ns, char waiting) {
1787         ASSERT_LOCKED(ns->base);
1788         if (ns->write_waiting == waiting) return;
1789
1790         ns->write_waiting = waiting;
1791         (void) event_del(&ns->event);
1792         event_assign(&ns->event, ns->base->event_base,
1793             ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1794             nameserver_ready_callback, ns);
1795         if (event_add(&ns->event, NULL) < 0) {
1796                 char addrbuf[128];
1797                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1798                     evutil_format_sockaddr_port_(
1799                             (struct sockaddr *)&ns->address,
1800                             addrbuf, sizeof(addrbuf)));
1801                 /* ???? Do more? */
1802         }
1803 }
1804
1805 /* a callback function. Called by libevent when the kernel says that */
1806 /* a nameserver socket is ready for writing or reading */
1807 static void
1808 nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1809         struct nameserver *ns = (struct nameserver *) arg;
1810         (void)fd;
1811
1812         EVDNS_LOCK(ns->base);
1813         if (events & EV_WRITE) {
1814                 ns->choked = 0;
1815                 if (!evdns_transmit(ns->base)) {
1816                         nameserver_write_waiting(ns, 0);
1817                 }
1818         }
1819         if (events & EV_READ) {
1820                 nameserver_read(ns);
1821         }
1822         EVDNS_UNLOCK(ns->base);
1823 }
1824
1825 /* a callback function. Called by libevent when the kernel says that */
1826 /* a server socket is ready for writing or reading. */
1827 static void
1828 server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1829         struct evdns_server_port *port = (struct evdns_server_port *) arg;
1830         (void) fd;
1831
1832         EVDNS_LOCK(port);
1833         if (events & EV_WRITE) {
1834                 port->choked = 0;
1835                 server_port_flush(port);
1836         }
1837         if (events & EV_READ) {
1838                 server_udp_port_read(port);
1839         }
1840         EVDNS_UNLOCK(port);
1841 }
1842
1843 /* This is an inefficient representation; only use it via the dnslabel_table_*
1844  * functions, so that is can be safely replaced with something smarter later. */
1845 #define MAX_LABELS 128
1846 /* Structures used to implement name compression */
1847 struct dnslabel_entry { char *v; off_t pos; };
1848 struct dnslabel_table {
1849         int n_labels; /* number of current entries */
1850         /* map from name to position in message */
1851         struct dnslabel_entry labels[MAX_LABELS];
1852 };
1853
1854 /* Initialize dnslabel_table. */
1855 static void
1856 dnslabel_table_init(struct dnslabel_table *table)
1857 {
1858         table->n_labels = 0;
1859 }
1860
1861 /* Free all storage held by table, but not the table itself. */
1862 static void
1863 dnslabel_clear(struct dnslabel_table *table)
1864 {
1865         int i;
1866         for (i = 0; i < table->n_labels; ++i)
1867                 mm_free(table->labels[i].v);
1868         table->n_labels = 0;
1869 }
1870
1871 /* return the position of the label in the current message, or -1 if the label */
1872 /* hasn't been used yet. */
1873 static int
1874 dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1875 {
1876         int i;
1877         for (i = 0; i < table->n_labels; ++i) {
1878                 if (!strcmp(label, table->labels[i].v))
1879                         return table->labels[i].pos;
1880         }
1881         return -1;
1882 }
1883
1884 /* remember that we've used the label at position pos */
1885 static int
1886 dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1887 {
1888         char *v;
1889         int p;
1890         if (table->n_labels == MAX_LABELS)
1891                 return (-1);
1892         v = mm_strdup(label);
1893         if (v == NULL)
1894                 return (-1);
1895         p = table->n_labels++;
1896         table->labels[p].v = v;
1897         table->labels[p].pos = pos;
1898
1899         return (0);
1900 }
1901
1902 /* Converts a string to a length-prefixed set of DNS labels, starting */
1903 /* at buf[j]. name and buf must not overlap. name_len should be the length */
1904 /* of name.      table is optional, and is used for compression. */
1905 /* */
1906 /* Input: abc.def */
1907 /* Output: <3>abc<3>def<0> */
1908 /* */
1909 /* Returns the first index after the encoded name, or negative on error. */
1910 /*       -1      label was > 63 bytes */
1911 /*       -2      name too long to fit in buffer. */
1912 /* */
1913 static off_t
1914 dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1915                                   const char *name, const size_t name_len,
1916                                   struct dnslabel_table *table) {
1917         const char *end = name + name_len;
1918         int ref = 0;
1919         u16 t_;
1920
1921 #define APPEND16(x) do {                                                \
1922                 if (j + 2 > (off_t)buf_len)                             \
1923                         goto overflow;                                  \
1924                 t_ = htons(x);                                          \
1925                 memcpy(buf + j, &t_, 2);                                \
1926                 j += 2;                                                 \
1927         } while (0)
1928 #define APPEND32(x) do {                                                \
1929                 if (j + 4 > (off_t)buf_len)                             \
1930                         goto overflow;                                  \
1931                 t32_ = htonl(x);                                        \
1932                 memcpy(buf + j, &t32_, 4);                              \
1933                 j += 4;                                                 \
1934         } while (0)
1935
1936         if (name_len > 255) return -2;
1937
1938         for (;;) {
1939                 const char *const start = name;
1940                 if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1941                         APPEND16(ref | 0xc000);
1942                         return j;
1943                 }
1944                 name = strchr(name, '.');
1945                 if (!name) {
1946                         const size_t label_len = end - start;
1947                         if (label_len > 63) return -1;
1948                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1949                         if (table) dnslabel_table_add(table, start, j);
1950                         buf[j++] = (ev_uint8_t)label_len;
1951
1952                         memcpy(buf + j, start, label_len);
1953                         j += (int) label_len;
1954                         break;
1955                 } else {
1956                         /* append length of the label. */
1957                         const size_t label_len = name - start;
1958                         if (label_len > 63) return -1;
1959                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1960                         if (table) dnslabel_table_add(table, start, j);
1961                         buf[j++] = (ev_uint8_t)label_len;
1962
1963                         memcpy(buf + j, start, label_len);
1964                         j += (int) label_len;
1965                         /* hop over the '.' */
1966                         name++;
1967                 }
1968         }
1969
1970         /* the labels must be terminated by a 0. */
1971         /* It's possible that the name ended in a . */
1972         /* in which case the zero is already there */
1973         if (!j || buf[j-1]) buf[j++] = 0;
1974         return j;
1975  overflow:
1976         return (-2);
1977 }
1978
1979 /* Finds the length of a dns request for a DNS name of the given */
1980 /* length. The actual request may be smaller than the value returned */
1981 /* here */
1982 static size_t
1983 evdns_request_len(const struct evdns_base *base, const size_t name_len)
1984 {
1985         int addional_section_len = 0;
1986         if (EDNS_ENABLED(base)) {
1987                 addional_section_len = 1 + /* length of domain name string, always 0 */
1988                         2 + /* space for resource type */
1989                         2 + /* space for UDP payload size */
1990                         4 + /* space for extended RCODE flags */
1991                         2;  /* space for length of RDATA, always 0 */
1992         }
1993         return 96 + /* length of the DNS standard header */
1994                 name_len + 2 +
1995                 4 /* space for the resource type */ +
1996                 addional_section_len;
1997 }
1998
1999 /* build a dns request packet into buf. buf should be at least as long */
2000 /* as evdns_request_len told you it should be. */
2001 /* */
2002 /* Returns the amount of space used. Negative on error. */
2003 static int
2004 evdns_request_data_build(const struct evdns_base *base,
2005         const char *const name, const size_t name_len,
2006         const u16 trans_id, const u16 type, const u16 class, u8 *const buf,
2007         size_t buf_len)
2008 {
2009         off_t j = 0;  /* current offset into buf */
2010         u16 t_;  /* used by the macros */
2011         u32 t32_;  /* used by the macros */
2012
2013         APPEND16(trans_id);
2014         APPEND16(0x0100);  /* standard query, recusion needed */
2015         APPEND16(1);  /* one question */
2016         APPEND16(0);  /* no answers */
2017         APPEND16(0);  /* no authority */
2018         APPEND16(EDNS_ENABLED(base) ? 1 : 0); /* additional */
2019
2020         j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
2021         if (j < 0) {
2022                 return (int)j;
2023         }
2024
2025         APPEND16(type);
2026         APPEND16(class);
2027
2028         if (EDNS_ENABLED(base)) {
2029                 /* The OPT pseudo-RR format 
2030                  * (https://tools.ietf.org/html/rfc6891#section-6.1.2)
2031                  * +------------+--------------+------------------------------+
2032                  * | Field Name | Field Type   | Description                  |
2033                  * +------------+--------------+------------------------------+
2034                  * | NAME       | domain name  | MUST be 0 (root domain)      |
2035                  * | TYPE       | u_int16_t    | OPT (41)                     |
2036                  * | CLASS      | u_int16_t    | requestor's UDP payload size |
2037                  * | TTL        | u_int32_t    | extended RCODE and flags     |
2038                  * | RDLEN      | u_int16_t    | length of all RDATA          |
2039                  * | RDATA      | octet stream | {attribute,value} pairs      |
2040                  * +------------+--------------+------------------------------+ */
2041                 buf[j++] = 0;  /* NAME, always 0 */
2042                 APPEND16(TYPE_OPT);  /* OPT type */
2043                 APPEND16(base->global_max_udp_size);  /* max UDP payload size */
2044                 APPEND32(0);  /* No extended RCODE flags set */
2045                 APPEND16(0);  /* length of RDATA is 0 */
2046         }
2047
2048         return (int)j;
2049  overflow:
2050         return (-1);
2051 }
2052
2053 /* exported function */
2054 struct evdns_server_port *
2055 evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
2056 {
2057         struct evdns_server_port *port;
2058         if (flags)
2059                 return NULL; /* flags not yet implemented */
2060         if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
2061                 return NULL;
2062         memset(port, 0, sizeof(struct evdns_server_port));
2063
2064
2065         port->socket = socket;
2066         port->refcnt = 1;
2067         port->choked = 0;
2068         port->closing = 0;
2069         port->user_callback = cb;
2070         port->user_data = user_data;
2071         port->pending_replies = NULL;
2072         port->event_base = base;
2073         port->max_client_connections = MAX_CLIENT_CONNECTIONS;
2074         port->tcp_idle_timeout.tv_sec = SERVER_IDLE_CONN_TIMEOUT;
2075         port->tcp_idle_timeout.tv_usec = 0;
2076         port->client_connections_count = 0;
2077         LIST_INIT(&port->client_connections);
2078         event_assign(&port->event, port->event_base,
2079                                  port->socket, EV_READ | EV_PERSIST,
2080                                  server_port_ready_callback, port);
2081         if (event_add(&port->event, NULL) < 0) {
2082                 mm_free(port);
2083                 return NULL;
2084         }
2085         EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2086         return port;
2087 }
2088
2089 /* exported function */
2090 struct evdns_server_port *
2091 evdns_add_server_port_with_listener(struct event_base *base, struct evconnlistener *listener, int flags, evdns_request_callback_fn_type cb, void *user_data)
2092 {
2093         struct evdns_server_port *port;
2094         if (!listener)
2095                 return NULL;
2096         if (flags)
2097                 return NULL; /* flags not yet implemented */
2098
2099         if (!(port = mm_calloc(1, sizeof(struct evdns_server_port))))
2100                 return NULL;
2101         port->socket = -1;
2102         port->refcnt = 1;
2103         port->choked = 0;
2104         port->closing = 0;
2105         port->user_callback = cb;
2106         port->user_data = user_data;
2107         port->pending_replies = NULL;
2108         port->event_base = base;
2109         port->max_client_connections = MAX_CLIENT_CONNECTIONS;
2110         port->client_connections_count = 0;
2111         LIST_INIT(&port->client_connections);
2112         port->listener = listener;
2113         evconnlistener_set_cb(port->listener, incoming_conn_cb, port);
2114
2115         EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2116         return port;
2117 }
2118
2119 static void
2120 server_tcp_event_cb(struct bufferevent *bev, short events, void *ctx);
2121
2122 static int
2123 tcp_read_message(struct tcp_connection *conn, u8 **msg, int *msg_len)
2124 {
2125         struct bufferevent *bev = conn->bev;
2126         struct evbuffer *input = bufferevent_get_input(bev);
2127         u8 *packet = NULL;
2128         int r = 0;
2129
2130         EVUTIL_ASSERT(conn);
2131         EVUTIL_ASSERT(conn->state == TS_CONNECTED);
2132
2133         /* reading new packet size */
2134         if (!conn->awaiting_packet_size) {
2135                 if (evbuffer_get_length(input) < sizeof(ev_uint16_t))
2136                         goto awaiting_next;
2137
2138                 bufferevent_read(bev, (void*)&conn->awaiting_packet_size,
2139                         sizeof(conn->awaiting_packet_size));
2140                 conn->awaiting_packet_size = ntohs(conn->awaiting_packet_size);
2141                 if (conn->awaiting_packet_size <= 0)
2142                         goto fail;
2143         }
2144
2145         /* reading new packet content */
2146         if (evbuffer_get_length(input) < conn->awaiting_packet_size)
2147                 goto awaiting_next;
2148
2149         packet = mm_malloc(conn->awaiting_packet_size);
2150         if (!packet)
2151                 goto fail;
2152
2153         r = (int)bufferevent_read(bev, (void*)packet, conn->awaiting_packet_size);
2154         if (r != conn->awaiting_packet_size) {
2155                 mm_free(packet);
2156                 packet = NULL;
2157                 goto fail;
2158         }
2159
2160         *msg = packet;
2161         *msg_len = r;
2162 awaiting_next:
2163         return 0;
2164 fail:
2165         return 1;
2166 }
2167
2168 static void
2169 server_tcp_read_packet_cb(struct bufferevent *bev, void *ctx)
2170 {
2171         u8 *msg = NULL;
2172         int msg_len = 0;
2173         int rc;
2174         struct client_tcp_connection *client = (struct client_tcp_connection *)ctx;
2175         struct evdns_server_port *port = client->port;
2176         struct tcp_connection *conn = &client->connection;
2177         EVUTIL_ASSERT(port && bev);
2178         EVDNS_LOCK(port);
2179
2180         while (1) {
2181                 if (tcp_read_message(conn, &msg, &msg_len)) {
2182                         log(EVDNS_LOG_MSG, "Closing client connection %p due to error", (void *)bev);
2183                         evdns_remove_tcp_client(port, client);
2184                         rc = port->refcnt;
2185                         EVDNS_UNLOCK(port);
2186                         if (!rc)
2187                                 server_port_free(port);
2188                         return;
2189                 }
2190
2191                 /* Only part of the message was recieved. */
2192                 if (!msg)
2193                         break;
2194
2195                 request_parse(msg, msg_len, port, NULL, 0, client);
2196                 mm_free(msg);
2197                 msg = NULL;
2198                 conn->awaiting_packet_size = 0;
2199         }
2200
2201         bufferevent_setwatermark(bev, EV_READ,
2202                         conn->awaiting_packet_size ? conn->awaiting_packet_size : sizeof(ev_uint16_t), 0);
2203         bufferevent_setcb(bev, server_tcp_read_packet_cb, NULL, server_tcp_event_cb, ctx);
2204         EVDNS_UNLOCK(port);
2205 }
2206
2207 static void
2208 server_tcp_event_cb(struct bufferevent *bev, short events, void *ctx)
2209 {
2210         struct client_tcp_connection *client = (struct client_tcp_connection *)ctx;
2211         struct evdns_server_port *port = client->port;
2212         int rc;
2213         EVUTIL_ASSERT(port && bev);
2214         EVDNS_LOCK(port);
2215         if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR | BEV_EVENT_TIMEOUT)) {
2216                 log(EVDNS_LOG_DEBUG, "Closing connection %p", (void *)bev);
2217                 evdns_remove_tcp_client(port, client);
2218         }
2219         rc = port->refcnt;
2220         EVDNS_UNLOCK(port);
2221         if (!rc)
2222                 server_port_free(port);
2223 }
2224
2225 static void
2226 incoming_conn_cb(struct evconnlistener *listener, evutil_socket_t fd,
2227                                   struct sockaddr *address, int socklen, void *arg)
2228 {
2229         struct evdns_server_port *port = (struct evdns_server_port*)arg;
2230         struct bufferevent *bev = bufferevent_socket_new(port->event_base, fd, BEV_OPT_CLOSE_ON_FREE);
2231         struct client_tcp_connection *client = NULL;
2232         struct tcp_connection *cd = NULL;
2233
2234         if (!bev)
2235                 goto error;
2236         log(EVDNS_LOG_DEBUG, "New incoming client connection %p", (void *)bev);
2237
2238         bufferevent_set_timeouts(bev, &port->tcp_idle_timeout, &port->tcp_idle_timeout);
2239
2240         client = evdns_add_tcp_client(port, bev);
2241         if (!client)
2242                 goto error;
2243         cd = &client->connection;
2244
2245         cd->state = TS_CONNECTED;
2246         bufferevent_setwatermark(bev, EV_READ, sizeof(ev_uint16_t), 0);
2247         bufferevent_setcb(bev, server_tcp_read_packet_cb, NULL, server_tcp_event_cb, (void *)client);
2248         bufferevent_enable(bev, EV_READ);
2249
2250         return;
2251 error:
2252         if (bev)
2253                 bufferevent_free(bev);
2254         return;
2255 }
2256
2257 struct evdns_server_port *
2258 evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
2259 {
2260         return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
2261 }
2262
2263 /* exported function */
2264 void
2265 evdns_close_server_port(struct evdns_server_port *port)
2266 {
2267         EVDNS_LOCK(port);
2268         evdns_remove_all_tcp_clients(port);
2269         if (--port->refcnt == 0) {
2270                 EVDNS_UNLOCK(port);
2271                 server_port_free(port);
2272         } else {
2273                 port->closing = 1;
2274                 EVDNS_UNLOCK(port);
2275         }
2276 }
2277
2278 /* exported function */
2279 int
2280 evdns_server_request_add_reply(struct evdns_server_request *req_, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
2281 {
2282         struct server_request *req = TO_SERVER_REQUEST(req_);
2283         struct server_reply_item **itemp, *item;
2284         int *countp;
2285         int result = -1;
2286
2287         EVDNS_LOCK(req->port);
2288         if (req->response) /* have we already answered? */
2289                 goto done;
2290
2291         switch (section) {
2292         case EVDNS_ANSWER_SECTION:
2293                 itemp = &req->answer;
2294                 countp = &req->n_answer;
2295                 break;
2296         case EVDNS_AUTHORITY_SECTION:
2297                 itemp = &req->authority;
2298                 countp = &req->n_authority;
2299                 break;
2300         case EVDNS_ADDITIONAL_SECTION:
2301                 itemp = &req->additional;
2302                 countp = &req->n_additional;
2303                 break;
2304         default:
2305                 goto done;
2306         }
2307         while (*itemp) {
2308                 itemp = &((*itemp)->next);
2309         }
2310         item = mm_malloc(sizeof(struct server_reply_item));
2311         if (!item)
2312                 goto done;
2313         item->next = NULL;
2314         if (!(item->name = mm_strdup(name))) {
2315                 mm_free(item);
2316                 goto done;
2317         }
2318         item->type = type;
2319         item->dns_question_class = class;
2320         item->ttl = ttl;
2321         item->is_name = is_name != 0;
2322         item->datalen = 0;
2323         item->data = NULL;
2324         if (data) {
2325                 if (item->is_name) {
2326                         if (!(item->data = mm_strdup(data))) {
2327                                 mm_free(item->name);
2328                                 mm_free(item);
2329                                 goto done;
2330                         }
2331                         item->datalen = (u16)-1;
2332                 } else {
2333                         if (!(item->data = mm_malloc(datalen))) {
2334                                 mm_free(item->name);
2335                                 mm_free(item);
2336                                 goto done;
2337                         }
2338                         item->datalen = datalen;
2339                         memcpy(item->data, data, datalen);
2340                 }
2341         }
2342
2343         *itemp = item;
2344         ++(*countp);
2345         result = 0;
2346 done:
2347         EVDNS_UNLOCK(req->port);
2348         return result;
2349 }
2350
2351 /* exported function */
2352 int
2353 evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
2354 {
2355         return evdns_server_request_add_reply(
2356                   req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
2357                   ttl, n*4, 0, addrs);
2358 }
2359
2360 /* exported function */
2361 int
2362 evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
2363 {
2364         return evdns_server_request_add_reply(
2365                   req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
2366                   ttl, n*16, 0, addrs);
2367 }
2368
2369 /* exported function */
2370 int
2371 evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
2372 {
2373         u32 a;
2374         char buf[32];
2375         if (in && inaddr_name)
2376                 return -1;
2377         else if (!in && !inaddr_name)
2378                 return -1;
2379         if (in) {
2380                 a = ntohl(in->s_addr);
2381                 evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2382                                 (int)(u8)((a    )&0xff),
2383                                 (int)(u8)((a>>8 )&0xff),
2384                                 (int)(u8)((a>>16)&0xff),
2385                                 (int)(u8)((a>>24)&0xff));
2386                 inaddr_name = buf;
2387         }
2388         return evdns_server_request_add_reply(
2389                   req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
2390                   ttl, -1, 1, hostname);
2391 }
2392
2393 /* exported function */
2394 int
2395 evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
2396 {
2397         return evdns_server_request_add_reply(
2398                   req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
2399                   ttl, -1, 1, cname);
2400 }
2401
2402 /* exported function */
2403 void
2404 evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
2405 {
2406         struct server_request *req = TO_SERVER_REQUEST(exreq);
2407         req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
2408         req->base.flags |= flags;
2409 }
2410
2411 static int
2412 evdns_server_request_format_response(struct server_request *req, int err)
2413 {
2414         unsigned char buf[1024 * 64];
2415         size_t buf_len = sizeof(buf);
2416         off_t j = 0, r;
2417         u16 t_;
2418         u32 t32_;
2419         int i;
2420         u16 flags;
2421         struct dnslabel_table table;
2422
2423         if (err < 0 || err > 15) return -1;
2424
2425         /* Set response bit and error code; copy OPCODE and RD fields from
2426          * question; copy RA and AA if set by caller. */
2427         flags = req->base.flags;
2428         flags |= (_QR_MASK | err);
2429
2430         dnslabel_table_init(&table);
2431         APPEND16(req->trans_id);
2432         APPEND16(flags);
2433         APPEND16(req->base.nquestions);
2434         APPEND16(req->n_answer);
2435         APPEND16(req->n_authority);
2436         APPEND16(req->n_additional);
2437
2438         /* Add questions. */
2439         for (i=0; i < req->base.nquestions; ++i) {
2440                 const char *s = req->base.questions[i]->name;
2441                 j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
2442                 if (j < 0) {
2443                         dnslabel_clear(&table);
2444                         return (int) j;
2445                 }
2446                 APPEND16(req->base.questions[i]->type);
2447                 APPEND16(req->base.questions[i]->dns_question_class);
2448         }
2449
2450         /* Add answer, authority, and additional sections. */
2451         for (i=0; i<3; ++i) {
2452                 struct server_reply_item *item;
2453                 if (i==0)
2454                         item = req->answer;
2455                 else if (i==1)
2456                         item = req->authority;
2457                 else
2458                         item = req->additional;
2459                 while (item) {
2460                         r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
2461                         if (r < 0)
2462                                 goto overflow;
2463                         j = r;
2464
2465                         APPEND16(item->type);
2466                         APPEND16(item->dns_question_class);
2467                         APPEND32(item->ttl);
2468                         if (item->is_name) {
2469                                 off_t len_idx = j, name_start;
2470                                 j += 2;
2471                                 name_start = j;
2472                                 r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
2473                                 if (r < 0)
2474                                         goto overflow;
2475                                 j = r;
2476                                 t_ = htons( (short) (j-name_start) );
2477                                 memcpy(buf+len_idx, &t_, 2);
2478                         } else {
2479                                 APPEND16(item->datalen);
2480                                 if (j+item->datalen > (off_t)buf_len)
2481                                         goto overflow;
2482                                 if (item->data) {
2483                                         memcpy(buf+j, item->data, item->datalen);
2484                                         j += item->datalen;
2485                                 } else {
2486                                         EVUTIL_ASSERT(item->datalen == 0);
2487                                 }
2488                         }
2489                         item = item->next;
2490                 }
2491         }
2492
2493         if (j > req->max_udp_reply_size && !req->client) {
2494 overflow:
2495                 j = req->max_udp_reply_size;
2496                 buf[2] |= 0x02; /* set the truncated bit. */
2497         }
2498
2499         req->response_len = j;
2500
2501         if (!(req->response = mm_malloc(req->response_len))) {
2502                 server_request_free_answers(req);
2503                 dnslabel_clear(&table);
2504                 return (-1);
2505         }
2506         memcpy(req->response, buf, req->response_len);
2507         server_request_free_answers(req);
2508         dnslabel_clear(&table);
2509         return (0);
2510 }
2511
2512 /* exported function */
2513 int
2514 evdns_server_request_respond(struct evdns_server_request *req_, int err)
2515 {
2516         struct server_request *req = TO_SERVER_REQUEST(req_);
2517         struct evdns_server_port *port = req->port;
2518         int r = -1;
2519
2520         EVDNS_LOCK(port);
2521         if (!req->response) {
2522                 if ((r = evdns_server_request_format_response(req, err))<0)
2523                         goto done;
2524         }
2525
2526         r = server_send_response(port, req);
2527         if (r < 0 && req->client) {
2528                 int sock_err = evutil_socket_geterror(port->socket);
2529                 if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
2530                         goto done;
2531
2532                 if (port->pending_replies) {
2533                         req->prev_pending = port->pending_replies->prev_pending;
2534                         req->next_pending = port->pending_replies;
2535                         req->prev_pending->next_pending =
2536                                 req->next_pending->prev_pending = req;
2537                 } else {
2538                         req->prev_pending = req->next_pending = req;
2539                         port->pending_replies = req;
2540                         port->choked = 1;
2541
2542                         (void) event_del(&port->event);
2543                         event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
2544
2545                         if (event_add(&port->event, NULL) < 0) {
2546                                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
2547                         }
2548
2549                 }
2550
2551                 r = 1;
2552                 goto done;
2553         }
2554         if (server_request_free(req)) {
2555                 r = 0;
2556                 goto done;
2557         }
2558
2559         if (port->pending_replies)
2560                 server_port_flush(port);
2561
2562         r = 0;
2563 done:
2564         EVDNS_UNLOCK(port);
2565         return r;
2566 }
2567
2568 /* Free all storage held by RRs in req. */
2569 static void
2570 server_request_free_answers(struct server_request *req)
2571 {
2572         struct server_reply_item *victim, *next, **list;
2573         int i;
2574         for (i = 0; i < 3; ++i) {
2575                 if (i==0)
2576                         list = &req->answer;
2577                 else if (i==1)
2578                         list = &req->authority;
2579                 else
2580                         list = &req->additional;
2581
2582                 victim = *list;
2583                 while (victim) {
2584                         next = victim->next;
2585                         mm_free(victim->name);
2586                         victim->name = NULL;
2587                         if (victim->data) {
2588                                 mm_free(victim->data);
2589                                 victim->data = NULL;
2590                         }
2591                         mm_free(victim);
2592                         victim = next;
2593                 }
2594                 *list = NULL;
2595         }
2596 }
2597
2598 /* Free all storage held by req, and remove links to it. */
2599 /* return true iff we just wound up freeing the server_port. */
2600 static int
2601 server_request_free(struct server_request *req)
2602 {
2603         int i, rc=1, lock=0;
2604         if (req->base.questions) {
2605                 for (i = 0; i < req->base.nquestions; ++i) {
2606                         mm_free(req->base.questions[i]);
2607                         req->base.questions[i] = NULL;
2608                 }
2609                 mm_free(req->base.questions);
2610                 req->base.questions = NULL;
2611         }
2612
2613         if (req->port) {
2614                 EVDNS_LOCK(req->port);
2615                 lock=1;
2616                 if (req->port->pending_replies == req) {
2617                         if (req->next_pending && req->next_pending != req)
2618                                 req->port->pending_replies = req->next_pending;
2619                         else
2620                                 req->port->pending_replies = NULL;
2621                 }
2622                 rc = --req->port->refcnt;
2623         }
2624
2625         if (req->response) {
2626                 mm_free(req->response);
2627                 req->response = NULL;
2628         }
2629
2630         server_request_free_answers(req);
2631
2632         if (req->next_pending && req->next_pending != req) {
2633                 req->next_pending->prev_pending = req->prev_pending;
2634                 req->prev_pending->next_pending = req->next_pending;
2635         }
2636
2637         if (rc == 0) {
2638                 EVDNS_UNLOCK(req->port); /* ????? nickm */
2639                 server_port_free(req->port);
2640                 mm_free(req);
2641                 return (1);
2642         }
2643         if (lock)
2644                 EVDNS_UNLOCK(req->port);
2645         mm_free(req);
2646         return (0);
2647 }
2648
2649 /* Free all storage held by an evdns_server_port.  Only called when  */
2650 static void
2651 server_port_free(struct evdns_server_port *port)
2652 {
2653         EVUTIL_ASSERT(port);
2654         EVUTIL_ASSERT(!port->refcnt);
2655         EVUTIL_ASSERT(!port->pending_replies);
2656         if (port->socket > 0) {
2657                 evutil_closesocket(port->socket);
2658                 port->socket = -1;
2659         }
2660
2661         /* if tcp server */
2662         if (port->listener) {
2663                 evconnlistener_free(port->listener);
2664         } else {
2665                 (void) event_del(&port->event);
2666                 event_debug_unassign(&port->event);
2667         }
2668
2669         EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2670         mm_free(port);
2671 }
2672
2673 /* exported function */
2674 int
2675 evdns_server_request_drop(struct evdns_server_request *req_)
2676 {
2677         struct server_request *req = TO_SERVER_REQUEST(req_);
2678         server_request_free(req);
2679         return 0;
2680 }
2681
2682 /* exported function */
2683 int
2684 evdns_server_request_get_requesting_addr(struct evdns_server_request *req_, struct sockaddr *sa, int addr_len)
2685 {
2686         struct server_request *req = TO_SERVER_REQUEST(req_);
2687         if (addr_len < (int)req->addrlen)
2688                 return -1;
2689         memcpy(sa, &(req->addr), req->addrlen);
2690         return req->addrlen;
2691 }
2692
2693 static void
2694 retransmit_all_tcp_requests_for(struct nameserver *server)
2695 {
2696         int i = 0;
2697         for (i = 0; i < server->base->n_req_heads; ++i) {
2698                 struct request *started_at = server->base->req_heads[i];
2699                 struct request *req = started_at;
2700                 if (!req)
2701                         continue;
2702
2703                 do {
2704                         if (req->ns == server && (req->handle->tcp_flags & DNS_QUERY_USEVC)) {
2705                                 if (req->tx_count >= req->base->global_max_retransmits) {
2706                                         log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
2707                                                 (void *)req, req->tx_count);
2708                                         reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2709                                         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2710                                 } else {
2711                                         (void) evtimer_del(&req->timeout_event);
2712                                         evdns_request_transmit(req);
2713                                 }
2714                         }
2715                         req = req->next;
2716                 } while (req != started_at);
2717         }
2718 }
2719
2720 /* this is a libevent callback function which is called when a request */
2721 /* has timed out. */
2722 static void
2723 evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2724         struct request *const req = (struct request *) arg;
2725         struct evdns_base *base = req->base;
2726
2727         (void) fd;
2728         (void) events;
2729
2730         log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2731         EVDNS_LOCK(base);
2732
2733         if (req->tx_count >= req->base->global_max_retransmits) {
2734                 struct nameserver *ns = req->ns;
2735                 /* this request has failed */
2736                 log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
2737                     arg, req->tx_count);
2738                 reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2739
2740                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2741                 nameserver_failed(ns, "request timed out.", 0);
2742         } else {
2743                 /* if request is using tcp connection, so tear connection */
2744                 if (req->handle->tcp_flags & DNS_QUERY_USEVC) {
2745                         disconnect_and_free_connection(req->ns->connection);
2746                         req->ns->connection = NULL;
2747
2748                         /* client can have the only connection to DNS server */
2749                         retransmit_all_tcp_requests_for(req->ns);
2750                 } else {
2751                         /* retransmit it */
2752                         log(EVDNS_LOG_DEBUG, "Retransmitting request %p; tx_count==%d by udp", arg, req->tx_count);
2753                         (void) evtimer_del(&req->timeout_event);
2754                         request_swap_ns(req, nameserver_pick(base));
2755                         evdns_request_transmit(req);
2756
2757                         req->ns->timedout++;
2758                         if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2759                                 req->ns->timedout = 0;
2760                                 nameserver_failed(req->ns, "request timed out.", 0);
2761                         }
2762                 }
2763         }
2764
2765         EVDNS_UNLOCK(base);
2766 }
2767
2768 /* try to send a request to a given server. */
2769 /* */
2770 /* return: */
2771 /*   0 ok */
2772 /*   1 temporary failure */
2773 /*   2 other failure */
2774 static int
2775 evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2776         int r;
2777         ASSERT_LOCKED(req->base);
2778         ASSERT_VALID_REQUEST(req);
2779
2780         if (server->requests_inflight == 1 &&
2781                 req->base->disable_when_inactive &&
2782                 event_add(&server->event, NULL) < 0) {
2783                 return 1;
2784         }
2785
2786         r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2787             (struct sockaddr *)&server->address, server->addrlen);
2788         if (r < 0) {
2789                 int err = evutil_socket_geterror(server->socket);
2790                 if (EVUTIL_ERR_RW_RETRIABLE(err))
2791                         return 1;
2792                 nameserver_failed(req->ns, evutil_socket_error_to_string(err), err);
2793                 return 2;
2794         } else if (r != (int)req->request_len) {
2795                 return 1;  /* short write */
2796         } else {
2797                 return 0;
2798         }
2799 }
2800
2801 /* try to connect to a given server. */
2802 /* */
2803 /* return: */
2804 /*   0 ok */
2805 /*   1 temporary failure */
2806 /*   2 other failure */
2807 static int
2808 evdns_tcp_connect_if_disconnected(struct nameserver *server)
2809 {
2810         struct tcp_connection *conn = server->connection;
2811         struct timeval *timeout = &server->base->global_tcp_idle_timeout;
2812         if (conn && conn->state != TS_DISCONNECTED && conn->bev != NULL)
2813                 return 0;
2814
2815         disconnect_and_free_connection(conn);
2816         conn = new_tcp_connecton(bufferevent_socket_new(server->base->event_base, -1, BEV_OPT_CLOSE_ON_FREE));
2817         if (!conn)
2818                 return 2;
2819         server->connection = conn;
2820
2821         if (bufferevent_set_timeouts(conn->bev, timeout, timeout))
2822                 return 1;
2823
2824         EVUTIL_ASSERT(conn->state == TS_DISCONNECTED);
2825         if (bufferevent_socket_connect(conn->bev, (struct sockaddr *)&server->address, server->addrlen))
2826                 return 1;
2827
2828         conn->state = TS_CONNECTING;
2829         log(EVDNS_LOG_DEBUG, "New tcp connection %p created", (void *)conn);
2830         return 0;
2831 }
2832
2833 static void
2834 client_tcp_event_cb(struct bufferevent *bev, short events, void *ctx);
2835
2836
2837 static void
2838 client_tcp_read_packet_cb(struct bufferevent *bev, void *ctx)
2839 {
2840         u8 *msg = NULL;
2841         int msg_len = 0;
2842         struct nameserver *server = (struct nameserver*)ctx;
2843         struct tcp_connection *conn = server->connection;
2844         EVUTIL_ASSERT(server && bev);
2845         EVDNS_LOCK(server->base);
2846
2847         while (1) {
2848                 if (tcp_read_message(conn, &msg, &msg_len)) {
2849                         disconnect_and_free_connection(server->connection);
2850                         server->connection = NULL;
2851                         EVDNS_UNLOCK(server->base);
2852                         return;
2853                 }
2854
2855                 /* Only part of the message was recieved. */
2856                 if (!msg)
2857                         break;
2858
2859                 reply_parse(server->base, msg, msg_len);
2860                 mm_free(msg);
2861                 msg = NULL;
2862                 conn->awaiting_packet_size = 0;
2863         }
2864
2865         bufferevent_setwatermark(bev, EV_READ,
2866                 conn->awaiting_packet_size ? conn->awaiting_packet_size : sizeof(ev_uint16_t), 0);
2867         bufferevent_setcb(bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, ctx);
2868         EVDNS_UNLOCK(server->base);
2869 }
2870
2871 static void
2872 client_tcp_event_cb(struct bufferevent *bev, short events, void *ctx) {
2873         struct nameserver *server = (struct nameserver*)ctx;
2874         struct tcp_connection *conn = server->connection;
2875         EVUTIL_ASSERT(server);
2876         EVDNS_LOCK(server->base);
2877         EVUTIL_ASSERT(conn && conn->bev == bev && bev);
2878
2879         log(EVDNS_LOG_DEBUG, "Event %d on connection %p", events, (void *)conn);
2880
2881         if (events & (BEV_EVENT_TIMEOUT)) {
2882                 disconnect_and_free_connection(server->connection);
2883                 server->connection = NULL;
2884         } else if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR)) {
2885                 disconnect_and_free_connection(server->connection);
2886                 server->connection = NULL;
2887         } else if (events & BEV_EVENT_CONNECTED) {
2888                 EVUTIL_ASSERT (conn->state == TS_CONNECTING);
2889                 conn->state = TS_CONNECTED;
2890                 evutil_make_socket_nonblocking(bufferevent_getfd(bev));
2891                 bufferevent_setcb(bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, server);
2892                 bufferevent_setwatermark(bev, EV_READ, sizeof(ev_uint16_t), 0);
2893         }
2894         EVDNS_UNLOCK(server->base);
2895 }
2896
2897 /* try to send a request to a given server. */
2898 /* */
2899 /* return: */
2900 /*   0 ok */
2901 /*   1 temporary failure */
2902 /*   2 other failure */
2903 static int
2904 evdns_request_transmit_through_tcp(struct request *req, struct nameserver *server) {
2905         uint16_t packet_size;
2906         struct tcp_connection *conn = NULL;
2907         int r;
2908         ASSERT_LOCKED(req->base);
2909         ASSERT_VALID_REQUEST(req);
2910
2911         if ((r = evdns_tcp_connect_if_disconnected(server)))
2912                 return r;
2913
2914         conn = server->connection;
2915         bufferevent_setcb(conn->bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, server);
2916
2917         log(EVDNS_LOG_DEBUG, "Sending request %p via tcp connection %p", (void *)req, (void *)conn);
2918         packet_size = htons(req->request_len);
2919         if (bufferevent_write(conn->bev, &packet_size, sizeof(packet_size)) )
2920                 goto fail;
2921         if (bufferevent_write(conn->bev, (void*)req->request, req->request_len) )
2922                 goto fail;
2923         if (bufferevent_enable(conn->bev, EV_READ))
2924                 goto fail;
2925         if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0)
2926                 goto fail;
2927
2928         return 0;
2929 fail:
2930         log(EVDNS_LOG_WARN, "Failed to send request %p via tcp connection %p", (void *)req, (void *)conn);
2931         disconnect_and_free_connection(server->connection);
2932         server->connection = NULL;
2933         return 2;
2934 }
2935
2936 /* try to send a request, updating the fields of the request */
2937 /* as needed */
2938 /* */
2939 /* return: */
2940 /*   0 ok */
2941 /*   1 failed */
2942 static int
2943 evdns_request_transmit(struct request *req) {
2944         int retcode = 0, r;
2945
2946         ASSERT_LOCKED(req->base);
2947         ASSERT_VALID_REQUEST(req);
2948         /* if we fail to send this packet then this flag marks it */
2949         /* for evdns_transmit */
2950         req->transmit_me = 1;
2951         EVUTIL_ASSERT(req->trans_id != 0xffff);
2952
2953         if (!req->ns)
2954         {
2955                 /* unable to transmit request if no nameservers */
2956                 return 1;
2957         }
2958
2959         if (req->ns->choked) {
2960                 /* don't bother trying to write to a socket */
2961                 /* which we have had EAGAIN from */
2962                 return 1;
2963         }
2964
2965         if (req->handle->tcp_flags & DNS_QUERY_USEVC) {
2966                 r = evdns_request_transmit_through_tcp(req, req->ns);
2967                 /*
2968                 If connection didn't initiated now, so report about temporary problems.
2969                 We don't mark name server as chocked so udp packets possibly have no
2970                 problems during transmit. Simply we will retry attempt later */
2971                 if (r == 1) {
2972                         return r;
2973                 }
2974         } else {
2975                 r = evdns_request_transmit_to(req, req->ns);
2976         }
2977         switch (r) {
2978         case 1:
2979                 /* temp failure */
2980                 req->ns->choked = 1;
2981                 nameserver_write_waiting(req->ns, 1);
2982                 return 1;
2983         case 2:
2984                 /* failed to transmit the request entirely. we can fallthrough since
2985                  * we'll set a timeout, which will time out, and make us retransmit the
2986                  * request anyway. */
2987                 retcode = 1;
2988                 EVUTIL_FALLTHROUGH;
2989         default:
2990                 /* all ok */
2991                 log(EVDNS_LOG_DEBUG,
2992                     "Setting timeout for request %p, sent to nameserver %p", (void *)req, (void *)req->ns);
2993                 if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
2994                         log(EVDNS_LOG_WARN,
2995                       "Error from libevent when adding timer for request %p",
2996                             (void *)req);
2997                         /* ???? Do more? */
2998                 }
2999                 req->tx_count++;
3000                 req->transmit_me = 0;
3001                 return retcode;
3002         }
3003 }
3004
3005 static void
3006 nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
3007         struct nameserver *const ns = (struct nameserver *) arg;
3008         (void) type;
3009         (void) count;
3010         (void) ttl;
3011         (void) addresses;
3012
3013         if (result == DNS_ERR_CANCEL) {
3014                 /* We canceled this request because the nameserver came up
3015                  * for some other reason.  Do not change our opinion about
3016                  * the nameserver. */
3017                 return;
3018         }
3019
3020         EVDNS_LOCK(ns->base);
3021         ns->probe_request = NULL;
3022         if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
3023                 /* this is a good reply */
3024                 nameserver_up(ns);
3025         } else {
3026                 nameserver_probe_failed(ns);
3027         }
3028         EVDNS_UNLOCK(ns->base);
3029 }
3030
3031 static void
3032 nameserver_send_probe(struct nameserver *const ns) {
3033         struct evdns_request *handle;
3034         struct request *req;
3035         char addrbuf[128];
3036         /* here we need to send a probe to a given nameserver */
3037         /* in the hope that it is up now. */
3038
3039         ASSERT_LOCKED(ns->base);
3040         log(EVDNS_LOG_DEBUG, "Sending probe to %s",
3041             evutil_format_sockaddr_port_(
3042                     (struct sockaddr *)&ns->address,
3043                     addrbuf, sizeof(addrbuf)));
3044         handle = mm_calloc(1, sizeof(*handle));
3045         if (!handle) return;
3046         handle->user_callback = nameserver_probe_callback;
3047         handle->user_pointer = ns;
3048         req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH);
3049         if (!req) {
3050                 mm_free(handle);
3051                 return;
3052         }
3053         ns->probe_request = handle;
3054         /* we force this into the inflight queue no matter what */
3055         request_trans_id_set(req, transaction_id_pick(ns->base));
3056         req->ns = ns;
3057         request_submit(req);
3058 }
3059
3060 /* returns: */
3061 /*   0 didn't try to transmit anything */
3062 /*   1 tried to transmit something */
3063 static int
3064 evdns_transmit(struct evdns_base *base) {
3065         char did_try_to_transmit = 0;
3066         int i;
3067
3068         ASSERT_LOCKED(base);
3069         for (i = 0; i < base->n_req_heads; ++i) {
3070                 if (base->req_heads[i]) {
3071                         struct request *const started_at = base->req_heads[i], *req = started_at;
3072                         /* first transmit all the requests which are currently waiting */
3073                         do {
3074                                 if (req->transmit_me) {
3075                                         did_try_to_transmit = 1;
3076                                         evdns_request_transmit(req);
3077                                 }
3078
3079                                 req = req->next;
3080                         } while (req != started_at);
3081                 }
3082         }
3083
3084         return did_try_to_transmit;
3085 }
3086
3087 /* exported function */
3088 int
3089 evdns_base_count_nameservers(struct evdns_base *base)
3090 {
3091         const struct nameserver *server;
3092         int n = 0;
3093
3094         EVDNS_LOCK(base);
3095         server = base->server_head;
3096         if (!server)
3097                 goto done;
3098         do {
3099                 ++n;
3100                 server = server->next;
3101         } while (server != base->server_head);
3102 done:
3103         EVDNS_UNLOCK(base);
3104         return n;
3105 }
3106
3107 int
3108 evdns_count_nameservers(void)
3109 {
3110         return evdns_base_count_nameservers(current_base);
3111 }
3112
3113 /* exported function */
3114 int
3115 evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
3116 {
3117         struct nameserver *server, *started_at;
3118         int i;
3119
3120         EVDNS_LOCK(base);
3121         server = base->server_head;
3122         started_at = base->server_head;
3123         if (!server) {
3124                 EVDNS_UNLOCK(base);
3125                 return 0;
3126         }
3127         while (1) {
3128                 struct nameserver *next = server->next;
3129                 disconnect_and_free_connection(server->connection);
3130                 server->connection = NULL;
3131                 (void) event_del(&server->event);
3132                 if (evtimer_initialized(&server->timeout_event))
3133                         (void) evtimer_del(&server->timeout_event);
3134                 if (server->probe_request) {
3135                         evdns_cancel_request(server->base, server->probe_request);
3136                         server->probe_request = NULL;
3137                 }
3138                 if (server->socket >= 0)
3139                         evutil_closesocket(server->socket);
3140                 mm_free(server);
3141                 if (next == started_at)
3142                         break;
3143                 server = next;
3144         }
3145         base->server_head = NULL;
3146         base->global_good_nameservers = 0;
3147
3148         for (i = 0; i < base->n_req_heads; ++i) {
3149                 struct request *req, *req_started_at;
3150                 req = req_started_at = base->req_heads[i];
3151                 while (req) {
3152                         struct request *next = req->next;
3153                         req->tx_count = req->reissue_count = 0;
3154                         req->ns = NULL;
3155                         /* ???? What to do about searches? */
3156                         (void) evtimer_del(&req->timeout_event);
3157                         req->trans_id = 0;
3158                         req->transmit_me = 0;
3159
3160                         base->global_requests_waiting++;
3161                         evdns_request_insert(req, &base->req_waiting_head);
3162                         /* We want to insert these suspended elements at the front of
3163                          * the waiting queue, since they were pending before any of
3164                          * the waiting entries were added.  This is a circular list,
3165                          * so we can just shift the start back by one.*/
3166                         base->req_waiting_head = base->req_waiting_head->prev;
3167
3168                         if (next == req_started_at)
3169                                 break;
3170                         req = next;
3171                 }
3172                 base->req_heads[i] = NULL;
3173         }
3174
3175         base->global_requests_inflight = 0;
3176
3177         EVDNS_UNLOCK(base);
3178         return 0;
3179 }
3180
3181 int
3182 evdns_clear_nameservers_and_suspend(void)
3183 {
3184         return evdns_base_clear_nameservers_and_suspend(current_base);
3185 }
3186
3187
3188 /* exported function */
3189 int
3190 evdns_base_resume(struct evdns_base *base)
3191 {
3192         EVDNS_LOCK(base);
3193         evdns_requests_pump_waiting_queue(base);
3194         EVDNS_UNLOCK(base);
3195
3196         return 0;
3197 }
3198
3199 int
3200 evdns_resume(void)
3201 {
3202         return evdns_base_resume(current_base);
3203 }
3204
3205 static int
3206 evdns_nameserver_add_impl_(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
3207         /* first check to see if we already have this nameserver */
3208
3209         const struct nameserver *server = base->server_head, *const started_at = base->server_head;
3210         struct nameserver *ns;
3211         int err = 0;
3212         char addrbuf[128];
3213
3214         ASSERT_LOCKED(base);
3215         if (server) {
3216                 do {
3217                         if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
3218                         server = server->next;
3219                 } while (server != started_at);
3220         }
3221         if (addrlen > (int)sizeof(ns->address)) {
3222                 log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
3223                 return 2;
3224         }
3225
3226         ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
3227         if (!ns) return -1;
3228
3229         memset(ns, 0, sizeof(struct nameserver));
3230         ns->base = base;
3231
3232         evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
3233
3234         ns->socket = evutil_socket_(address->sa_family,
3235             SOCK_DGRAM|EVUTIL_SOCK_NONBLOCK|EVUTIL_SOCK_CLOEXEC, 0);
3236         if (ns->socket < 0) { err = 1; goto out1; }
3237
3238         if (base->global_outgoing_addrlen &&
3239             !evutil_sockaddr_is_loopback_(address)) {
3240                 if (bind(ns->socket,
3241                         (struct sockaddr*)&base->global_outgoing_address,
3242                         base->global_outgoing_addrlen) < 0) {
3243                         log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
3244                         err = 2;
3245                         goto out2;
3246                 }
3247         }
3248
3249         if (base->so_rcvbuf) {
3250                 if (setsockopt(ns->socket, SOL_SOCKET, SO_RCVBUF,
3251                     (void *)&base->so_rcvbuf, sizeof(base->so_rcvbuf))) {
3252                         log(EVDNS_LOG_WARN, "Couldn't set SO_RCVBUF to %i", base->so_rcvbuf);
3253                         err = -SO_RCVBUF;
3254                         goto out2;
3255                 }
3256         }
3257         if (base->so_sndbuf) {
3258                 if (setsockopt(ns->socket, SOL_SOCKET, SO_SNDBUF,
3259                     (void *)&base->so_sndbuf, sizeof(base->so_sndbuf))) {
3260                         log(EVDNS_LOG_WARN, "Couldn't set SO_SNDBUF to %i", base->so_sndbuf);
3261                         err = -SO_SNDBUF;
3262                         goto out2;
3263                 }
3264         }
3265
3266         memcpy(&ns->address, address, addrlen);
3267         ns->addrlen = addrlen;
3268         ns->state = 1;
3269         ns->connection = NULL;
3270         event_assign(&ns->event, ns->base->event_base, ns->socket,
3271                                  EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
3272         if (!base->disable_when_inactive && event_add(&ns->event, NULL) < 0) {
3273                 err = 2;
3274                 goto out2;
3275         }
3276
3277         log(EVDNS_LOG_DEBUG, "Added nameserver %s as %p",
3278             evutil_format_sockaddr_port_(address, addrbuf, sizeof(addrbuf)), (void *)ns);
3279
3280         /* insert this nameserver into the list of them */
3281         if (!base->server_head) {
3282                 ns->next = ns->prev = ns;
3283                 base->server_head = ns;
3284         } else {
3285                 ns->next = base->server_head->next;
3286                 ns->prev = base->server_head;
3287                 base->server_head->next = ns;
3288                 ns->next->prev = ns;
3289         }
3290
3291         base->global_good_nameservers++;
3292
3293         return 0;
3294
3295 out2:
3296         evutil_closesocket(ns->socket);
3297 out1:
3298         event_debug_unassign(&ns->event);
3299         mm_free(ns);
3300         log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
3301             evutil_format_sockaddr_port_(address, addrbuf, sizeof(addrbuf)), err);
3302         return err;
3303 }
3304
3305 /* exported function */
3306 int
3307 evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
3308 {
3309         struct sockaddr_in sin;
3310         int res;
3311         memset(&sin, 0, sizeof(sin));
3312         sin.sin_addr.s_addr = address;
3313         sin.sin_port = htons(53);
3314         sin.sin_family = AF_INET;
3315 #ifdef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
3316         sin.sin_len = sizeof(sin);
3317 #endif
3318         EVDNS_LOCK(base);
3319         res = evdns_nameserver_add_impl_(base, (struct sockaddr*)&sin, sizeof(sin));
3320         EVDNS_UNLOCK(base);
3321         return res;
3322 }
3323
3324 int
3325 evdns_nameserver_add(unsigned long int address) {
3326         if (!current_base)
3327                 current_base = evdns_base_new(NULL, 0);
3328         return evdns_base_nameserver_add(current_base, address);
3329 }
3330
3331 static void
3332 sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
3333 {
3334         if (sa->sa_family == AF_INET) {
3335                 ((struct sockaddr_in *)sa)->sin_port = htons(port);
3336         } else if (sa->sa_family == AF_INET6) {
3337                 ((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
3338         }
3339 }
3340
3341 static ev_uint16_t
3342 sockaddr_getport(struct sockaddr *sa)
3343 {
3344         if (sa->sa_family == AF_INET) {
3345                 return ntohs(((struct sockaddr_in *)sa)->sin_port);
3346         } else if (sa->sa_family == AF_INET6) {
3347                 return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
3348         } else {
3349                 return 0;
3350         }
3351 }
3352
3353 /* exported function */
3354 int
3355 evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
3356         struct sockaddr_storage ss;
3357         struct sockaddr *sa;
3358         int len = sizeof(ss);
3359         int res;
3360         if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
3361                 &len)) {
3362                 log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
3363                         ip_as_string);
3364                 return 4;
3365         }
3366         sa = (struct sockaddr *) &ss;
3367         if (sockaddr_getport(sa) == 0)
3368                 sockaddr_setport(sa, 53);
3369
3370         EVDNS_LOCK(base);
3371         res = evdns_nameserver_add_impl_(base, sa, len);
3372         EVDNS_UNLOCK(base);
3373         return res;
3374 }
3375
3376 int
3377 evdns_nameserver_ip_add(const char *ip_as_string) {
3378         if (!current_base)
3379                 current_base = evdns_base_new(NULL, 0);
3380         return evdns_base_nameserver_ip_add(current_base, ip_as_string);
3381 }
3382
3383 int
3384 evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
3385     const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
3386 {
3387         int res;
3388         EVUTIL_ASSERT(base);
3389         EVDNS_LOCK(base);
3390         res = evdns_nameserver_add_impl_(base, sa, len);
3391         EVDNS_UNLOCK(base);
3392         return res;
3393 }
3394
3395 int
3396 evdns_base_get_nameserver_addr(struct evdns_base *base, int idx,
3397     struct sockaddr *sa, ev_socklen_t len)
3398 {
3399         int result = -1;
3400         int i;
3401         struct nameserver *server;
3402         EVDNS_LOCK(base);
3403         server = base->server_head;
3404         for (i = 0; i < idx && server; ++i, server = server->next) {
3405                 if (server->next == base->server_head)
3406                         goto done;
3407         }
3408         if (! server)
3409                 goto done;
3410
3411         if (server->addrlen > len) {
3412                 result = (int) server->addrlen;
3413                 goto done;
3414         }
3415
3416         memcpy(sa, &server->address, server->addrlen);
3417         result = (int) server->addrlen;
3418 done:
3419         EVDNS_UNLOCK(base);
3420         return result;
3421 }
3422
3423 int
3424 evdns_base_get_nameserver_fd(struct evdns_base *base, int idx)
3425 {
3426         int result = -1;
3427         int i;
3428         struct nameserver *server;
3429         EVDNS_LOCK(base);
3430         server = base->server_head;
3431         for (i = 0; i < idx && server; ++i, server = server->next) {
3432                 if (server->next == base->server_head)
3433                         goto done;
3434         }
3435         if (! server)
3436                 goto done;
3437         result = server->socket;
3438 done:
3439         EVDNS_UNLOCK(base);
3440         return result;
3441 }
3442
3443
3444 /* remove from the queue */
3445 static void
3446 evdns_request_remove(struct request *req, struct request **head)
3447 {
3448         ASSERT_LOCKED(req->base);
3449         ASSERT_VALID_REQUEST(req);
3450
3451 #if 0
3452         {
3453                 struct request *ptr;
3454                 int found = 0;
3455                 EVUTIL_ASSERT(*head != NULL);
3456
3457                 ptr = *head;
3458                 do {
3459                         if (ptr == req) {
3460                                 found = 1;
3461                                 break;
3462                         }
3463                         ptr = ptr->next;
3464                 } while (ptr != *head);
3465                 EVUTIL_ASSERT(found);
3466
3467                 EVUTIL_ASSERT(req->next);
3468         }
3469 #endif
3470
3471         if (req->next == req) {
3472                 /* only item in the list */
3473                 *head = NULL;
3474         } else {
3475                 req->next->prev = req->prev;
3476                 req->prev->next = req->next;
3477                 if (*head == req) *head = req->next;
3478         }
3479         req->next = req->prev = NULL;
3480 }
3481
3482 /* insert into the tail of the queue */
3483 static void
3484 evdns_request_insert(struct request *req, struct request **head) {
3485         ASSERT_LOCKED(req->base);
3486         ASSERT_VALID_REQUEST(req);
3487         if (!*head) {
3488                 *head = req;
3489                 req->next = req->prev = req;
3490                 return;
3491         }
3492
3493         req->prev = (*head)->prev;
3494         req->prev->next = req;
3495         req->next = *head;
3496         (*head)->prev = req;
3497 }
3498
3499 static int
3500 string_num_dots(const char *s) {
3501         int count = 0;
3502         while ((s = strchr(s, '.'))) {
3503                 s++;
3504                 count++;
3505         }
3506         return count;
3507 }
3508
3509 static struct request *
3510 request_new(struct evdns_base *base, struct evdns_request *handle, int type,
3511             const char *name, int flags) {
3512
3513         const char issuing_now =
3514             (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
3515
3516         const size_t name_len = strlen(name);
3517         const size_t request_max_len = evdns_request_len(base, name_len);
3518         const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
3519         /* the request data is alloced in a single block with the header */
3520         struct request *const req =
3521             mm_malloc(sizeof(struct request) + request_max_len);
3522         int rlen;
3523         char namebuf[256];
3524         (void) flags;
3525
3526         ASSERT_LOCKED(base);
3527
3528         if (!req) return NULL;
3529
3530         if (name_len >= sizeof(namebuf)) {
3531                 mm_free(req);
3532                 return NULL;
3533         }
3534
3535         memset(req, 0, sizeof(struct request));
3536         req->request_size = (u16)(sizeof(struct request) + request_max_len);
3537         req->base = base;
3538
3539         evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
3540
3541         if (base->global_randomize_case) {
3542                 unsigned i;
3543                 char randbits[(sizeof(namebuf)+7)/8];
3544                 strlcpy(namebuf, name, sizeof(namebuf));
3545                 evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
3546                 for (i = 0; i < name_len; ++i) {
3547                         if (EVUTIL_ISALPHA_(namebuf[i])) {
3548                                 if ((randbits[i >> 3] & (1<<(i & 7))))
3549                                         namebuf[i] |= 0x20;
3550                                 else
3551                                         namebuf[i] &= ~0x20;
3552                         }
3553                 }
3554                 name = namebuf;
3555         }
3556
3557         /* request data lives just after the header */
3558         req->request = ((u8 *) req) + sizeof(struct request);
3559         /* denotes that the request data shouldn't be free()ed */
3560         req->request_appended = 1;
3561         rlen = evdns_request_data_build(base, name, name_len, trans_id,
3562             type, CLASS_INET, req->request, request_max_len);
3563         if (rlen < 0)
3564                 goto err1;
3565
3566         req->request_len = rlen;
3567         req->trans_id = trans_id;
3568         req->tx_count = 0;
3569         req->request_type = type;
3570         req->ns = issuing_now ? nameserver_pick(base) : NULL;
3571         req->next = req->prev = NULL;
3572         req->handle = handle;
3573         if (handle) {
3574                 handle->current_req = req;
3575                 handle->base = base;
3576         }
3577
3578         if (flags & DNS_CNAME_CALLBACK)
3579                 req->need_cname = 1;
3580
3581         return req;
3582 err1:
3583         mm_free(req);
3584         return NULL;
3585 }
3586
3587 static struct request *
3588 request_clone(struct evdns_base *base, struct request* current)
3589 {
3590         const char issuing_now =
3591             (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
3592         const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
3593         /* the request data is alloced in a single block with the header */
3594         struct request *const req = mm_malloc(current->request_size);
3595         EVUTIL_ASSERT(current && base);
3596         ASSERT_LOCKED(base);
3597
3598         if (!req)
3599                 return NULL;
3600         memcpy(req, current, current->request_size);
3601
3602         evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
3603
3604         /* request data lives just after the header */
3605         req->request = ((u8 *) req) + sizeof(struct request);
3606         /* We need to replace transact id */
3607         request_trans_id_set(req, trans_id);
3608
3609         req->tx_count = 0;
3610         req->ns = issuing_now ? nameserver_pick(base) : NULL;
3611         req->next = req->prev = NULL;
3612         req->handle = NULL;
3613         log(EVDNS_LOG_DEBUG, "Clone new request TID %d from TID %d", req->trans_id, current->trans_id);
3614
3615         return req;
3616 }
3617
3618 static void
3619 request_submit(struct request *const req) {
3620         struct evdns_base *base = req->base;
3621         ASSERT_LOCKED(base);
3622         ASSERT_VALID_REQUEST(req);
3623         if (req->ns) {
3624                 /* if it has a nameserver assigned then this is going */
3625                 /* straight into the inflight queue */
3626                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
3627
3628                 base->global_requests_inflight++;
3629                 req->ns->requests_inflight++;
3630
3631                 evdns_request_transmit(req);
3632         } else {
3633                 evdns_request_insert(req, &base->req_waiting_head);
3634                 base->global_requests_waiting++;
3635         }
3636 }
3637
3638 /* exported function */
3639 void
3640 evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
3641 {
3642         struct request *req;
3643
3644         if (!handle->current_req)
3645                 return;
3646
3647         if (!base) {
3648                 /* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
3649                 base = handle->base;
3650                 if (!base)
3651                         base = handle->current_req->base;
3652         }
3653
3654         EVDNS_LOCK(base);
3655         if (handle->pending_cb) {
3656                 EVDNS_UNLOCK(base);
3657                 return;
3658         }
3659
3660         req = handle->current_req;
3661         ASSERT_VALID_REQUEST(req);
3662
3663         reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
3664         if (req->ns) {
3665                 /* remove from inflight queue */
3666                 request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
3667         } else {
3668                 /* remove from global_waiting head */
3669                 request_finished(req, &base->req_waiting_head, 1);
3670         }
3671         EVDNS_UNLOCK(base);
3672 }
3673
3674 /* exported function */
3675 struct evdns_request *
3676 evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
3677     evdns_callback_type callback, void *ptr) {
3678         struct evdns_request *handle;
3679         struct request *req;
3680         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
3681         handle = mm_calloc(1, sizeof(*handle));
3682         if (handle == NULL)
3683                 return NULL;
3684         handle->user_callback = callback;
3685         handle->user_pointer = ptr;
3686         EVDNS_LOCK(base);
3687         handle->tcp_flags = base->global_tcp_flags;
3688         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3689         if (flags & DNS_QUERY_NO_SEARCH) {
3690                 req =
3691                         request_new(base, handle, TYPE_A, name, flags);
3692                 if (req)
3693                         request_submit(req);
3694         } else {
3695                 search_request_new(base, handle, TYPE_A, name, flags);
3696         }
3697         if (handle->current_req == NULL) {
3698                 mm_free(handle);
3699                 handle = NULL;
3700         }
3701         EVDNS_UNLOCK(base);
3702         return handle;
3703 }
3704
3705 int evdns_resolve_ipv4(const char *name, int flags,
3706                                            evdns_callback_type callback, void *ptr)
3707 {
3708         return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
3709                 ? 0 : -1;
3710 }
3711
3712
3713 /* exported function */
3714 struct evdns_request *
3715 evdns_base_resolve_ipv6(struct evdns_base *base,
3716     const char *name, int flags,
3717     evdns_callback_type callback, void *ptr)
3718 {
3719         struct evdns_request *handle;
3720         struct request *req;
3721         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
3722         handle = mm_calloc(1, sizeof(*handle));
3723         if (handle == NULL)
3724                 return NULL;
3725         handle->user_callback = callback;
3726         handle->user_pointer = ptr;
3727         EVDNS_LOCK(base);
3728         handle->tcp_flags = base->global_tcp_flags;
3729         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3730         if (flags & DNS_QUERY_NO_SEARCH) {
3731                 req = request_new(base, handle, TYPE_AAAA, name, flags);
3732                 if (req)
3733                         request_submit(req);
3734         } else {
3735                 search_request_new(base, handle, TYPE_AAAA, name, flags);
3736         }
3737         if (handle->current_req == NULL) {
3738                 mm_free(handle);
3739                 handle = NULL;
3740         }
3741         EVDNS_UNLOCK(base);
3742         return handle;
3743 }
3744
3745 int evdns_resolve_ipv6(const char *name, int flags,
3746     evdns_callback_type callback, void *ptr) {
3747         return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
3748                 ? 0 : -1;
3749 }
3750
3751 struct evdns_request *
3752 evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3753         char buf[32];
3754         struct evdns_request *handle;
3755         struct request *req;
3756         u32 a;
3757         EVUTIL_ASSERT(in);
3758         a = ntohl(in->s_addr);
3759         evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
3760                         (int)(u8)((a    )&0xff),
3761                         (int)(u8)((a>>8 )&0xff),
3762                         (int)(u8)((a>>16)&0xff),
3763                         (int)(u8)((a>>24)&0xff));
3764         handle = mm_calloc(1, sizeof(*handle));
3765         if (handle == NULL)
3766                 return NULL;
3767         handle->user_callback = callback;
3768         handle->user_pointer = ptr;
3769         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
3770         EVDNS_LOCK(base);
3771         handle->tcp_flags = base->global_tcp_flags;
3772         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3773         req = request_new(base, handle, TYPE_PTR, buf, flags);
3774         if (req)
3775                 request_submit(req);
3776         if (handle->current_req == NULL) {
3777                 mm_free(handle);
3778                 handle = NULL;
3779         }
3780         EVDNS_UNLOCK(base);
3781         return (handle);
3782 }
3783
3784 int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3785         return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
3786                 ? 0 : -1;
3787 }
3788
3789 struct evdns_request *
3790 evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3791         /* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
3792         char buf[73];
3793         char *cp;
3794         struct evdns_request *handle;
3795         struct request *req;
3796         int i;
3797         EVUTIL_ASSERT(in);
3798         cp = buf;
3799         for (i=15; i >= 0; --i) {
3800                 u8 byte = in->s6_addr[i];
3801                 *cp++ = "0123456789abcdef"[byte & 0x0f];
3802                 *cp++ = '.';
3803                 *cp++ = "0123456789abcdef"[byte >> 4];
3804                 *cp++ = '.';
3805         }
3806         EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
3807         memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
3808         handle = mm_calloc(1, sizeof(*handle));
3809         if (handle == NULL)
3810                 return NULL;
3811         handle->user_callback = callback;
3812         handle->user_pointer = ptr;
3813         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
3814         EVDNS_LOCK(base);
3815         handle->tcp_flags = base->global_tcp_flags;
3816         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3817         req = request_new(base, handle, TYPE_PTR, buf, flags);
3818         if (req)
3819                 request_submit(req);
3820         if (handle->current_req == NULL) {
3821                 mm_free(handle);
3822                 handle = NULL;
3823         }
3824         EVDNS_UNLOCK(base);
3825         return (handle);
3826 }
3827
3828 int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3829         return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
3830                 ? 0 : -1;
3831 }
3832
3833 /* ================================================================= */
3834 /* Search support */
3835 /* */
3836 /* the libc resolver has support for searching a number of domains */
3837 /* to find a name. If nothing else then it takes the single domain */
3838 /* from the gethostname() call. */
3839 /* */
3840 /* It can also be configured via the domain and search options in a */
3841 /* resolv.conf. */
3842 /* */
3843 /* The ndots option controls how many dots it takes for the resolver */
3844 /* to decide that a name is non-local and so try a raw lookup first. */
3845
3846 struct search_domain {
3847         int len;
3848         struct search_domain *next;
3849         /* the text string is appended to this structure */
3850 };
3851
3852 struct search_state {
3853         int refcount;
3854         int ndots;
3855         int num_domains;
3856         struct search_domain *head;
3857 };
3858
3859 static void
3860 search_state_decref(struct search_state *const state) {
3861         if (!state) return;
3862         state->refcount--;
3863         if (!state->refcount) {
3864                 struct search_domain *next, *dom;
3865                 for (dom = state->head; dom; dom = next) {
3866                         next = dom->next;
3867                         mm_free(dom);
3868                 }
3869                 mm_free(state);
3870         }
3871 }
3872
3873 static struct search_state *
3874 search_state_new(void) {
3875         struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
3876         if (!state) return NULL;
3877         memset(state, 0, sizeof(struct search_state));
3878         state->refcount = 1;
3879         state->ndots = 1;
3880
3881         return state;
3882 }
3883
3884 static void
3885 search_postfix_clear(struct evdns_base *base) {
3886         search_state_decref(base->global_search_state);
3887
3888         base->global_search_state = search_state_new();
3889 }
3890
3891 /* exported function */
3892 void
3893 evdns_base_search_clear(struct evdns_base *base)
3894 {
3895         EVDNS_LOCK(base);
3896         search_postfix_clear(base);
3897         EVDNS_UNLOCK(base);
3898 }
3899
3900 void
3901 evdns_search_clear(void) {
3902         evdns_base_search_clear(current_base);
3903 }
3904
3905 static void
3906 search_postfix_add(struct evdns_base *base, const char *domain) {
3907         size_t domain_len;
3908         struct search_domain *sdomain;
3909         while (domain[0] == '.') domain++;
3910         domain_len = strlen(domain);
3911
3912         ASSERT_LOCKED(base);
3913         if (!base->global_search_state) base->global_search_state = search_state_new();
3914         if (!base->global_search_state) return;
3915         base->global_search_state->num_domains++;
3916
3917         sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
3918         if (!sdomain) return;
3919         memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
3920         sdomain->next = base->global_search_state->head;
3921         sdomain->len = (int) domain_len;
3922
3923         base->global_search_state->head = sdomain;
3924 }
3925
3926 /* reverse the order of members in the postfix list. This is needed because, */
3927 /* when parsing resolv.conf we push elements in the wrong order */
3928 static void
3929 search_reverse(struct evdns_base *base) {
3930         struct search_domain *cur, *prev = NULL, *next;
3931         ASSERT_LOCKED(base);
3932         cur = base->global_search_state->head;
3933         while (cur) {
3934                 next = cur->next;
3935                 cur->next = prev;
3936                 prev = cur;
3937                 cur = next;
3938         }
3939
3940         base->global_search_state->head = prev;
3941 }
3942
3943 /* exported function */
3944 void
3945 evdns_base_search_add(struct evdns_base *base, const char *domain) {
3946         EVDNS_LOCK(base);
3947         search_postfix_add(base, domain);
3948         EVDNS_UNLOCK(base);
3949 }
3950 void
3951 evdns_search_add(const char *domain) {
3952         evdns_base_search_add(current_base, domain);
3953 }
3954
3955 /* exported function */
3956 void
3957 evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
3958         EVDNS_LOCK(base);
3959         if (!base->global_search_state) base->global_search_state = search_state_new();
3960         if (base->global_search_state)
3961                 base->global_search_state->ndots = ndots;
3962         EVDNS_UNLOCK(base);
3963 }
3964 void
3965 evdns_search_ndots_set(const int ndots) {
3966         evdns_base_search_ndots_set(current_base, ndots);
3967 }
3968
3969 static void
3970 search_set_from_hostname(struct evdns_base *base) {
3971         char hostname[EVDNS_NAME_MAX + 1], *domainname;
3972
3973         ASSERT_LOCKED(base);
3974         search_postfix_clear(base);
3975         if (gethostname(hostname, sizeof(hostname))) return;
3976         domainname = strchr(hostname, '.');
3977         if (!domainname) return;
3978         search_postfix_add(base, domainname);
3979 }
3980
3981 /* warning: returns malloced string */
3982 static char *
3983 search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3984         const size_t base_len = strlen(base_name);
3985         char need_to_append_dot;
3986         struct search_domain *dom;
3987
3988         if (!base_len) return NULL;
3989         need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
3990
3991         for (dom = state->head; dom; dom = dom->next) {
3992                 if (!n--) {
3993                         /* this is the postfix we want */
3994                         /* the actual postfix string is kept at the end of the structure */
3995                         const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
3996                         const int postfix_len = dom->len;
3997                         char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
3998                         if (!newname) return NULL;
3999                         memcpy(newname, base_name, base_len);
4000                         if (need_to_append_dot) newname[base_len] = '.';
4001                         memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
4002                         newname[base_len + need_to_append_dot + postfix_len] = 0;
4003                         return newname;
4004                 }
4005         }
4006
4007         /* we ran off the end of the list and still didn't find the requested string */
4008         EVUTIL_ASSERT(0);
4009         return NULL; /* unreachable; stops warnings in some compilers. */
4010 }
4011
4012 static struct request *
4013 search_request_new(struct evdns_base *base, struct evdns_request *handle,
4014                    int type, const char *const name, int flags) {
4015         ASSERT_LOCKED(base);
4016         EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
4017         EVUTIL_ASSERT(handle->current_req == NULL);
4018         if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
4019              base->global_search_state &&
4020                  base->global_search_state->num_domains) {
4021                 /* we have some domains to search */
4022                 struct request *req;
4023                 if (string_num_dots(name) >= base->global_search_state->ndots) {
4024                         req = request_new(base, handle, type, name, flags);
4025                         if (!req) return NULL;
4026                         handle->search_index = -1;
4027                 } else {
4028                         char *const new_name = search_make_new(base->global_search_state, 0, name);
4029                         if (!new_name) return NULL;
4030                         req = request_new(base, handle, type, new_name, flags);
4031                         mm_free(new_name);
4032                         if (!req) return NULL;
4033                         handle->search_index = 0;
4034                 }
4035                 EVUTIL_ASSERT(handle->search_origname == NULL);
4036                 handle->search_origname = mm_strdup(name);
4037                 if (handle->search_origname == NULL) {
4038                         /* XXX Should we dealloc req? If yes, how? */
4039                         if (req)
4040                                 mm_free(req);
4041                         return NULL;
4042                 }
4043                 handle->search_state = base->global_search_state;
4044                 handle->search_flags = flags;
4045                 base->global_search_state->refcount++;
4046                 request_submit(req);
4047                 return req;
4048         } else {
4049                 struct request *const req = request_new(base, handle, type, name, flags);
4050                 if (!req) return NULL;
4051                 request_submit(req);
4052                 return req;
4053         }
4054 }
4055
4056 /* this is called when a request has failed to find a name. We need to check */
4057 /* if it is part of a search and, if so, try the next name in the list */
4058 /* returns: */
4059 /*   0 another request has been submitted */
4060 /*   1 no more requests needed */
4061 static int
4062 search_try_next(struct evdns_request *const handle) {
4063         struct request *req = handle->current_req;
4064         struct evdns_base *base = req->base;
4065         struct request *newreq;
4066         ASSERT_LOCKED(base);
4067         if (handle->search_state) {
4068                 /* it is part of a search */
4069                 char *new_name;
4070                 handle->search_index++;
4071                 if (handle->search_index >= handle->search_state->num_domains) {
4072                         /* no more postfixes to try, however we may need to try */
4073                         /* this name without a postfix */
4074                         if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
4075                                 /* yep, we need to try it raw */
4076                                 newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags);
4077                                 log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
4078                                 if (newreq) {
4079                                         search_request_finished(handle);
4080                                         goto submit_next;
4081                                 }
4082                         }
4083                         return 1;
4084                 }
4085
4086                 new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
4087                 if (!new_name) return 1;
4088                 log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
4089                 newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags);
4090                 mm_free(new_name);
4091                 if (!newreq) return 1;
4092                 goto submit_next;
4093         }
4094         return 1;
4095
4096 submit_next:
4097         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
4098         handle->current_req = newreq;
4099         newreq->handle = handle;
4100         request_submit(newreq);
4101         return 0;
4102 }
4103
4104 static void
4105 search_request_finished(struct evdns_request *const handle) {
4106         ASSERT_LOCKED(handle->current_req->base);
4107         if (handle->search_state) {
4108                 search_state_decref(handle->search_state);
4109                 handle->search_state = NULL;
4110         }
4111         if (handle->search_origname) {
4112                 mm_free(handle->search_origname);
4113                 handle->search_origname = NULL;
4114         }
4115 }
4116
4117 /* ================================================================= */
4118 /* Parsing resolv.conf files */
4119
4120 static void
4121 evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
4122         int add_default = flags & DNS_OPTION_NAMESERVERS;
4123         if (flags & DNS_OPTION_NAMESERVERS_NO_DEFAULT)
4124                 add_default = 0;
4125
4126         /* if the file isn't found then we assume a local resolver */
4127         ASSERT_LOCKED(base);
4128         if (flags & DNS_OPTION_SEARCH)
4129                 search_set_from_hostname(base);
4130         if (add_default)
4131                 evdns_base_nameserver_ip_add(base, "127.0.0.1");
4132 }
4133
4134 #ifndef EVENT__HAVE_STRTOK_R
4135 static char *
4136 strtok_r(char *s, const char *delim, char **state) {
4137         char *cp, *start;
4138         start = cp = s ? s : *state;
4139         if (!cp)
4140                 return NULL;
4141         while (*cp && !strchr(delim, *cp))
4142                 ++cp;
4143         if (!*cp) {
4144                 if (cp == start)
4145                         return NULL;
4146                 *state = NULL;
4147                 return start;
4148         } else {
4149                 *cp++ = '\0';
4150                 *state = cp;
4151                 return start;
4152         }
4153 }
4154 #endif
4155
4156 /* helper version of atoi which returns -1 on error */
4157 static int
4158 strtoint(const char *const str)
4159 {
4160         char *endptr;
4161         const int r = strtol(str, &endptr, 10);
4162         if (*endptr) return -1;
4163         return r;
4164 }
4165
4166 /* Parse a number of seconds into a timeval; return -1 on error. */
4167 static int
4168 evdns_strtotimeval(const char *const str, struct timeval *out)
4169 {
4170         double d;
4171         char *endptr;
4172         d = strtod(str, &endptr);
4173         if (*endptr) return -1;
4174         if (d < 0) return -1;
4175         out->tv_sec = (int) d;
4176         out->tv_usec = (int) ((d - (int) d)*1000000);
4177         if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
4178                 return -1;
4179         return 0;
4180 }
4181
4182 /* helper version of atoi that returns -1 on error and clips to bounds. */
4183 static int
4184 strtoint_clipped(const char *const str, int min, int max)
4185 {
4186         int r = strtoint(str);
4187         if (r == -1)
4188                 return r;
4189         else if (r<min)
4190                 return min;
4191         else if (r>max)
4192                 return max;
4193         else
4194                 return r;
4195 }
4196
4197 static int
4198 evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
4199 {
4200         int old_n_heads = base->n_req_heads, n_heads;
4201         struct request **old_heads = base->req_heads, **new_heads, *req;
4202         int i;
4203
4204         ASSERT_LOCKED(base);
4205         if (maxinflight < 1)
4206                 maxinflight = 1;
4207         n_heads = (maxinflight+4) / 5;
4208         EVUTIL_ASSERT(n_heads > 0);
4209         new_heads = mm_calloc(n_heads, sizeof(struct request*));
4210         if (!new_heads)
4211                 return (-1);
4212         if (old_heads) {
4213                 for (i = 0; i < old_n_heads; ++i) {
4214                         while (old_heads[i]) {
4215                                 req = old_heads[i];
4216                                 evdns_request_remove(req, &old_heads[i]);
4217                                 evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
4218                         }
4219                 }
4220                 mm_free(old_heads);
4221         }
4222         base->req_heads = new_heads;
4223         base->n_req_heads = n_heads;
4224         base->global_max_requests_inflight = maxinflight;
4225         return (0);
4226 }
4227
4228 /* exported function */
4229 int
4230 evdns_base_set_option(struct evdns_base *base,
4231     const char *option, const char *val)
4232 {
4233         int res;
4234         EVDNS_LOCK(base);
4235         res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
4236         EVDNS_UNLOCK(base);
4237         return res;
4238 }
4239
4240 static inline int
4241 str_matches_option(const char *s1, const char *optionname)
4242 {
4243         /* Option names are given as "option:" We accept either 'option' in
4244          * s1, or 'option:randomjunk'.  The latter form is to implement the
4245          * resolv.conf parser. */
4246         size_t optlen = strlen(optionname);
4247         size_t slen = strlen(s1);
4248         if (slen == optlen || slen == optlen - 1)
4249                 return !strncmp(s1, optionname, slen);
4250         else if (slen > optlen)
4251                 return !strncmp(s1, optionname, optlen);
4252         else
4253                 return 0;
4254 }
4255
4256 /* exported function */
4257 int
4258 evdns_server_port_set_option(struct evdns_server_port *port,
4259         enum evdns_server_option option, size_t value)
4260 {
4261         int res = 0;
4262         EVDNS_LOCK(port);
4263         switch (option) {
4264         case EVDNS_SOPT_TCP_MAX_CLIENTS:
4265                 if (!port->listener) {
4266                         log(EVDNS_LOG_WARN, "EVDNS_SOPT_TCP_MAX_CLIENTS option can be set only on TCP server");
4267                         res = -1;
4268                         goto end;
4269                 }
4270                 port->max_client_connections = value;
4271                 log(EVDNS_LOG_DEBUG, "Setting EVDNS_SOPT_TCP_MAX_CLIENTS to %u", port->max_client_connections);
4272                 break;
4273         case EVDNS_SOPT_TCP_IDLE_TIMEOUT:
4274                 if (!port->listener) {
4275                         log(EVDNS_LOG_WARN, "EVDNS_SOPT_TCP_IDLE_TIMEOUT option can be set only on TCP server");
4276                         res = -1;
4277                         goto end;
4278                 }
4279                 port->tcp_idle_timeout.tv_sec = value;
4280                 port->tcp_idle_timeout.tv_usec = 0;
4281                 log(EVDNS_LOG_DEBUG, "Setting EVDNS_SOPT_TCP_IDLE_TIMEOUT to %u seconds",
4282                         (unsigned)port->tcp_idle_timeout.tv_sec);
4283                 break;
4284         default:
4285                 log(EVDNS_LOG_WARN, "Invalid DNS server option %d", (int)option);
4286                 res = -1;
4287                 break;
4288         }
4289 end:
4290         EVDNS_UNLOCK(port);
4291         return res;
4292 }
4293
4294 static int
4295 evdns_base_set_option_impl(struct evdns_base *base,
4296     const char *option, const char *val, int flags)
4297 {
4298         ASSERT_LOCKED(base);
4299         if (str_matches_option(option, "ndots:")) {
4300                 const int ndots = strtoint(val);
4301                 if (ndots == -1) return -1;
4302                 if (!(flags & DNS_OPTION_SEARCH)) return 0;
4303                 log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
4304                 if (!base->global_search_state) base->global_search_state = search_state_new();
4305                 if (!base->global_search_state) return -1;
4306                 base->global_search_state->ndots = ndots;
4307         } else if (str_matches_option(option, "timeout:")) {
4308                 struct timeval tv;
4309                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4310                 if (!(flags & DNS_OPTION_MISC)) return 0;
4311                 log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
4312                 memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
4313         } else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
4314                 struct timeval tv;
4315                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4316                 if (!(flags & DNS_OPTION_MISC)) return 0;
4317                 log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
4318                     val);
4319                 memcpy(&base->global_getaddrinfo_allow_skew, &tv,
4320                     sizeof(struct timeval));
4321         } else if (str_matches_option(option, "max-timeouts:")) {
4322                 const int maxtimeout = strtoint_clipped(val, 1, 255);
4323                 if (maxtimeout == -1) return -1;
4324                 if (!(flags & DNS_OPTION_MISC)) return 0;
4325                 log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
4326                         maxtimeout);
4327                 base->global_max_nameserver_timeout = maxtimeout;
4328         } else if (str_matches_option(option, "max-inflight:")) {
4329                 const int maxinflight = strtoint_clipped(val, 1, 65000);
4330                 if (maxinflight == -1) return -1;
4331                 if (!(flags & DNS_OPTION_MISC)) return 0;
4332                 log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
4333                         maxinflight);
4334                 evdns_base_set_max_requests_inflight(base, maxinflight);
4335         } else if (str_matches_option(option, "attempts:")) {
4336                 int retries = strtoint(val);
4337                 if (retries == -1) return -1;
4338                 if (retries > 255) retries = 255;
4339                 if (!(flags & DNS_OPTION_MISC)) return 0;
4340                 log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
4341                 base->global_max_retransmits = retries;
4342         } else if (str_matches_option(option, "randomize-case:")) {
4343                 int randcase = strtoint(val);
4344                 if (randcase == -1) return -1;
4345                 if (!(flags & DNS_OPTION_MISC)) return 0;
4346                 base->global_randomize_case = randcase;
4347         } else if (str_matches_option(option, "bind-to:")) {
4348                 /* XXX This only applies to successive nameservers, not
4349                  * to already-configured ones.  We might want to fix that. */
4350                 int len = sizeof(base->global_outgoing_address);
4351                 if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
4352                 if (evutil_parse_sockaddr_port(val,
4353                         (struct sockaddr*)&base->global_outgoing_address, &len))
4354                         return -1;
4355                 base->global_outgoing_addrlen = len;
4356         } else if (str_matches_option(option, "initial-probe-timeout:")) {
4357                 struct timeval tv;
4358                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4359                 if (tv.tv_sec > 3600)
4360                         tv.tv_sec = 3600;
4361                 if (!(flags & DNS_OPTION_MISC)) return 0;
4362                 log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
4363                     val);
4364                 memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
4365                     sizeof(tv));
4366         } else if (str_matches_option(option, "max-probe-timeout:")) {
4367                 const int max_probe_timeout = strtoint_clipped(val, 1, 3600);
4368                 if (max_probe_timeout == -1) return -1;
4369                 if (!(flags & DNS_OPTION_MISC)) return 0;
4370                 log(EVDNS_LOG_DEBUG, "Setting maximum probe timeout to %d",
4371                         max_probe_timeout);
4372                 base->ns_max_probe_timeout = max_probe_timeout;
4373                 if (base->global_nameserver_probe_initial_timeout.tv_sec > max_probe_timeout) {
4374                         base->global_nameserver_probe_initial_timeout.tv_sec = max_probe_timeout;
4375                         base->global_nameserver_probe_initial_timeout.tv_usec = 0;
4376                         log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
4377                                 val);
4378                 }
4379         } else if (str_matches_option(option, "probe-backoff-factor:")) {
4380                 const int backoff_backtor = strtoint_clipped(val, 1, 10);
4381                 if (backoff_backtor == -1) return -1;
4382                 if (!(flags & DNS_OPTION_MISC)) return 0;
4383                 log(EVDNS_LOG_DEBUG, "Setting probe timeout backoff factor to %d",
4384                         backoff_backtor);
4385                 base->ns_timeout_backoff_factor = backoff_backtor;
4386         } else if (str_matches_option(option, "so-rcvbuf:")) {
4387                 int buf = strtoint(val);
4388                 if (buf == -1) return -1;
4389                 if (!(flags & DNS_OPTION_MISC)) return 0;
4390                 log(EVDNS_LOG_DEBUG, "Setting SO_RCVBUF to %s", val);
4391                 base->so_rcvbuf = buf;
4392         } else if (str_matches_option(option, "so-sndbuf:")) {
4393                 int buf = strtoint(val);
4394                 if (buf == -1) return -1;
4395                 if (!(flags & DNS_OPTION_MISC)) return 0;
4396                 log(EVDNS_LOG_DEBUG, "Setting SO_SNDBUF to %s", val);
4397                 base->so_sndbuf = buf;
4398         } else if (str_matches_option(option, "tcp-idle-timeout:")) {
4399                 struct timeval tv;
4400                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4401                 if (!(flags & DNS_OPTION_MISC)) return 0;
4402                 log(EVDNS_LOG_DEBUG, "Setting tcp idle timeout to %s", val);
4403                 memcpy(&base->global_tcp_idle_timeout, &tv, sizeof(tv));
4404         } else if (str_matches_option(option, "use-vc:")) {
4405                 if (!(flags & DNS_OPTION_MISC)) return 0;
4406                 if (val && strlen(val)) return -1;
4407                 log(EVDNS_LOG_DEBUG, "Setting use-vc option");
4408                 base->global_tcp_flags |= DNS_QUERY_USEVC;
4409         } else if (str_matches_option(option, "ignore-tc:")) {
4410                 if (!(flags & DNS_OPTION_MISC)) return 0;
4411                 if (val && strlen(val)) return -1;
4412                 log(EVDNS_LOG_DEBUG, "Setting ignore-tc option");
4413                 base->global_tcp_flags |= DNS_QUERY_IGNTC;
4414         } else if (str_matches_option(option, "edns-udp-size:")) {
4415                 const int sz = strtoint_clipped(val, DNS_MAX_UDP_SIZE, EDNS_MAX_UDP_SIZE);
4416                 if (sz == -1) return -1;
4417                 if (!(flags & DNS_OPTION_MISC)) return 0;
4418                 log(EVDNS_LOG_DEBUG, "Setting edns-udp-size to %d", sz);
4419                 base->global_max_udp_size = sz;
4420         }
4421         return 0;
4422 }
4423
4424 int
4425 evdns_set_option(const char *option, const char *val, int flags)
4426 {
4427         if (!current_base)
4428                 current_base = evdns_base_new(NULL, 0);
4429         return evdns_base_set_option(current_base, option, val);
4430 }
4431
4432 static void
4433 resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
4434         char *strtok_state;
4435         static const char *const delims = " \t";
4436 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
4437
4438
4439         char *const first_token = strtok_r(start, delims, &strtok_state);
4440         ASSERT_LOCKED(base);
4441         if (!first_token) return;
4442
4443         if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
4444                 const char *const nameserver = NEXT_TOKEN;
4445
4446                 if (nameserver)
4447                         evdns_base_nameserver_ip_add(base, nameserver);
4448         } else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
4449                 const char *const domain = NEXT_TOKEN;
4450                 if (domain) {
4451                         search_postfix_clear(base);
4452                         search_postfix_add(base, domain);
4453                 }
4454         } else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
4455                 const char *domain;
4456                 search_postfix_clear(base);
4457
4458                 while ((domain = NEXT_TOKEN)) {
4459                         search_postfix_add(base, domain);
4460                 }
4461                 search_reverse(base);
4462         } else if (!strcmp(first_token, "options")) {
4463                 const char *option;
4464                 while ((option = NEXT_TOKEN)) {
4465                         const char *val = strchr(option, ':');
4466                         evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
4467                 }
4468         }
4469 #undef NEXT_TOKEN
4470 }
4471
4472 /* exported function */
4473 /* returns: */
4474 /*   0 no errors */
4475 /*   1 failed to open file */
4476 /*   2 failed to stat file */
4477 /*   3 file too large */
4478 /*   4 out of memory */
4479 /*   5 short read from file */
4480 int
4481 evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
4482         int res;
4483         EVDNS_LOCK(base);
4484         res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
4485         EVDNS_UNLOCK(base);
4486         return res;
4487 }
4488
4489 static char *
4490 evdns_get_default_hosts_filename(void)
4491 {
4492 #ifdef _WIN32
4493         /* Windows is a little coy about where it puts its configuration
4494          * files.  Sure, they're _usually_ in C:\windows\system32, but
4495          * there's no reason in principle they couldn't be in
4496          * W:\hoboken chicken emergency\
4497          */
4498         char path[MAX_PATH+1];
4499         static const char hostfile[] = "\\drivers\\etc\\hosts";
4500         char *path_out;
4501         size_t len_out;
4502
4503         if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
4504                 return NULL;
4505         len_out = strlen(path)+strlen(hostfile)+1;
4506         path_out = mm_malloc(len_out);
4507         evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
4508         return path_out;
4509 #else
4510         return mm_strdup("/etc/hosts");
4511 #endif
4512 }
4513
4514 static int
4515 evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
4516         size_t n;
4517         char *resolv;
4518         char *start;
4519         int err = 0;
4520         int add_default;
4521
4522         log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
4523
4524         add_default = flags & DNS_OPTION_NAMESERVERS;
4525         if (flags & DNS_OPTION_NAMESERVERS_NO_DEFAULT)
4526                 add_default = 0;
4527
4528         if (flags & DNS_OPTION_HOSTSFILE) {
4529                 char *fname = evdns_get_default_hosts_filename();
4530                 evdns_base_load_hosts(base, fname);
4531                 if (fname)
4532                         mm_free(fname);
4533         }
4534
4535         if (!filename) {
4536                 evdns_resolv_set_defaults(base, flags);
4537                 return 1;
4538         }
4539
4540         if ((err = evutil_read_file_(filename, &resolv, &n, 0)) < 0) {
4541                 if (err == -1) {
4542                         /* No file. */
4543                         evdns_resolv_set_defaults(base, flags);
4544                         return 1;
4545                 } else {
4546                         return 2;
4547                 }
4548         }
4549
4550         start = resolv;
4551         for (;;) {
4552                 char *const newline = strchr(start, '\n');
4553                 if (!newline) {
4554                         resolv_conf_parse_line(base, start, flags);
4555                         break;
4556                 } else {
4557                         *newline = 0;
4558                         resolv_conf_parse_line(base, start, flags);
4559                         start = newline + 1;
4560                 }
4561         }
4562
4563         if (!base->server_head && add_default) {
4564                 /* no nameservers were configured. */
4565                 evdns_base_nameserver_ip_add(base, "127.0.0.1");
4566                 err = 6;
4567         }
4568         if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
4569                 search_set_from_hostname(base);
4570         }
4571
4572         mm_free(resolv);
4573         return err;
4574 }
4575
4576 int
4577 evdns_resolv_conf_parse(int flags, const char *const filename) {
4578         if (!current_base)
4579                 current_base = evdns_base_new(NULL, 0);
4580         return evdns_base_resolv_conf_parse(current_base, flags, filename);
4581 }
4582
4583
4584 #ifdef _WIN32
4585 /* Add multiple nameservers from a space-or-comma-separated list. */
4586 static int
4587 evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
4588         const char *addr;
4589         char *buf;
4590         int r;
4591         ASSERT_LOCKED(base);
4592         while (*ips) {
4593                 while (isspace(*ips) || *ips == ',' || *ips == '\t')
4594                         ++ips;
4595                 addr = ips;
4596                 while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
4597                     *ips=='[' || *ips==']')
4598                         ++ips;
4599                 buf = mm_malloc(ips-addr+1);
4600                 if (!buf) return 4;
4601                 memcpy(buf, addr, ips-addr);
4602                 buf[ips-addr] = '\0';
4603                 r = evdns_base_nameserver_ip_add(base, buf);
4604                 mm_free(buf);
4605                 if (r) return r;
4606         }
4607         return 0;
4608 }
4609
4610 typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
4611
4612 /* Use the windows GetNetworkParams interface in iphlpapi.dll to */
4613 /* figure out what our nameservers are. */
4614 static int
4615 load_nameservers_with_getnetworkparams(struct evdns_base *base)
4616 {
4617         /* Based on MSDN examples and inspection of  c-ares code. */
4618         FIXED_INFO *fixed;
4619         HMODULE handle = 0;
4620         ULONG size = sizeof(FIXED_INFO);
4621         void *buf = NULL;
4622         int status = 0, r, added_any;
4623         IP_ADDR_STRING *ns;
4624         GetNetworkParams_fn_t fn;
4625
4626         ASSERT_LOCKED(base);
4627         if (!(handle = evutil_load_windows_system_library_(
4628                         TEXT("iphlpapi.dll")))) {
4629                 log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
4630                 status = -1;
4631                 goto done;
4632         }
4633         if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
4634                 log(EVDNS_LOG_WARN, "Could not get address of function.");
4635                 status = -1;
4636                 goto done;
4637         }
4638
4639         buf = mm_malloc(size);
4640         if (!buf) { status = 4; goto done; }
4641         fixed = buf;
4642         r = fn(fixed, &size);
4643         if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
4644                 status = -1;
4645                 goto done;
4646         }
4647         if (r != ERROR_SUCCESS) {
4648                 mm_free(buf);
4649                 buf = mm_malloc(size);
4650                 if (!buf) { status = 4; goto done; }
4651                 fixed = buf;
4652                 r = fn(fixed, &size);
4653                 if (r != ERROR_SUCCESS) {
4654                         log(EVDNS_LOG_DEBUG, "fn() failed.");
4655                         status = -1;
4656                         goto done;
4657                 }
4658         }
4659
4660         EVUTIL_ASSERT(fixed);
4661         added_any = 0;
4662         ns = &(fixed->DnsServerList);
4663         while (ns) {
4664                 r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
4665                 if (r) {
4666                         log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
4667                                 (ns->IpAddress.String),(int)GetLastError());
4668                         status = r;
4669                 } else {
4670                         ++added_any;
4671                         log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
4672                 }
4673
4674                 ns = ns->Next;
4675         }
4676
4677         if (!added_any) {
4678                 log(EVDNS_LOG_DEBUG, "No nameservers added.");
4679                 if (status == 0)
4680                         status = -1;
4681         } else {
4682                 status = 0;
4683         }
4684
4685  done:
4686         if (buf)
4687                 mm_free(buf);
4688         if (handle)
4689                 FreeLibrary(handle);
4690         return status;
4691 }
4692
4693 static int
4694 config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
4695 {
4696         char *buf;
4697         DWORD bufsz = 0, type = 0;
4698         int status = 0;
4699
4700         ASSERT_LOCKED(base);
4701         if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
4702             != ERROR_MORE_DATA)
4703                 return -1;
4704         if (!(buf = mm_malloc(bufsz)))
4705                 return -1;
4706
4707         if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
4708             == ERROR_SUCCESS && bufsz > 1) {
4709                 status = evdns_nameserver_ip_add_line(base,buf);
4710         }
4711
4712         mm_free(buf);
4713         return status;
4714 }
4715
4716 #define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
4717 #define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
4718 #define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
4719
4720 static int
4721 load_nameservers_from_registry(struct evdns_base *base)
4722 {
4723         int found = 0;
4724         int r;
4725 #define TRY(k, name) \
4726         if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
4727                 log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
4728                 found = 1;                                              \
4729         } else if (!found) {                                            \
4730                 log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
4731                     #k,#name);                                          \
4732         }
4733
4734         ASSERT_LOCKED(base);
4735
4736         if (((int)GetVersion()) > 0) { /* NT */
4737                 HKEY nt_key = 0, interfaces_key = 0;
4738
4739                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
4740                                  KEY_READ, &nt_key) != ERROR_SUCCESS) {
4741                         log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
4742                         return -1;
4743                 }
4744                 r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
4745                              KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
4746                              &interfaces_key);
4747                 if (r != ERROR_SUCCESS) {
4748                         log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
4749                         return -1;
4750                 }
4751                 TRY(nt_key, "NameServer");
4752                 TRY(nt_key, "DhcpNameServer");
4753                 TRY(interfaces_key, "NameServer");
4754                 TRY(interfaces_key, "DhcpNameServer");
4755                 RegCloseKey(interfaces_key);
4756                 RegCloseKey(nt_key);
4757         } else {
4758                 HKEY win_key = 0;
4759                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
4760                                  KEY_READ, &win_key) != ERROR_SUCCESS) {
4761                         log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
4762                         return -1;
4763                 }
4764                 TRY(win_key, "NameServer");
4765                 RegCloseKey(win_key);
4766         }
4767
4768         if (found == 0) {
4769                 log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
4770         }
4771
4772         return found ? 0 : -1;
4773 #undef TRY
4774 }
4775
4776 int
4777 evdns_base_config_windows_nameservers(struct evdns_base *base)
4778 {
4779         int r;
4780         char *fname;
4781         if (base == NULL)
4782                 base = current_base;
4783         if (base == NULL)
4784                 return -1;
4785         EVDNS_LOCK(base);
4786         fname = evdns_get_default_hosts_filename();
4787         log(EVDNS_LOG_DEBUG, "Loading hosts entries from %s", fname);
4788         evdns_base_load_hosts(base, fname);
4789         if (fname)
4790                 mm_free(fname);
4791
4792         if (load_nameservers_with_getnetworkparams(base) == 0) {
4793                 EVDNS_UNLOCK(base);
4794                 return 0;
4795         }
4796         r = load_nameservers_from_registry(base);
4797
4798         EVDNS_UNLOCK(base);
4799         return r;
4800 }
4801
4802 int
4803 evdns_config_windows_nameservers(void)
4804 {
4805         if (!current_base) {
4806                 current_base = evdns_base_new(NULL, 1);
4807                 return current_base == NULL ? -1 : 0;
4808         } else {
4809                 return evdns_base_config_windows_nameservers(current_base);
4810         }
4811 }
4812 #endif
4813
4814 struct evdns_base *
4815 evdns_base_new(struct event_base *event_base, int flags)
4816 {
4817         struct evdns_base *base;
4818
4819         if (evutil_secure_rng_init() < 0) {
4820                 log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
4821                     "DNS can't run.");
4822                 return NULL;
4823         }
4824
4825         /* Give the evutil library a hook into its evdns-enabled
4826          * functionality.  We can't just call evdns_getaddrinfo directly or
4827          * else libevent-core will depend on libevent-extras. */
4828         evutil_set_evdns_getaddrinfo_fn_(evdns_getaddrinfo);
4829         evutil_set_evdns_getaddrinfo_cancel_fn_(evdns_getaddrinfo_cancel);
4830
4831         base = mm_malloc(sizeof(struct evdns_base));
4832         if (base == NULL)
4833                 return (NULL);
4834         memset(base, 0, sizeof(struct evdns_base));
4835         base->req_waiting_head = NULL;
4836
4837         EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
4838         EVDNS_LOCK(base);
4839
4840         /* Set max requests inflight and allocate req_heads. */
4841         base->req_heads = NULL;
4842
4843         evdns_base_set_max_requests_inflight(base, 64);
4844
4845         base->server_head = NULL;
4846         base->event_base = event_base;
4847         base->global_good_nameservers = base->global_requests_inflight =
4848                 base->global_requests_waiting = 0;
4849
4850         base->global_timeout.tv_sec = 5;
4851         base->global_timeout.tv_usec = 0;
4852         base->global_max_reissues = 1;
4853         base->global_max_retransmits = 3;
4854         base->global_max_nameserver_timeout = 3;
4855         base->global_search_state = NULL;
4856         base->global_randomize_case = 1;
4857         base->global_max_udp_size = DNS_MAX_UDP_SIZE;
4858         base->global_getaddrinfo_allow_skew.tv_sec = 3;
4859         base->global_getaddrinfo_allow_skew.tv_usec = 0;
4860         base->global_nameserver_probe_initial_timeout.tv_sec = 10;
4861         base->global_nameserver_probe_initial_timeout.tv_usec = 0;
4862         base->ns_max_probe_timeout = 3600;
4863         base->ns_timeout_backoff_factor = 3;
4864         base->global_tcp_idle_timeout.tv_sec = CLIENT_IDLE_CONN_TIMEOUT;
4865
4866         TAILQ_INIT(&base->hostsdb);
4867
4868 #define EVDNS_BASE_ALL_FLAGS ( \
4869         EVDNS_BASE_INITIALIZE_NAMESERVERS | \
4870         EVDNS_BASE_DISABLE_WHEN_INACTIVE  | \
4871         EVDNS_BASE_NAMESERVERS_NO_DEFAULT | \
4872         0)
4873
4874         if (flags & ~EVDNS_BASE_ALL_FLAGS) {
4875                 flags = EVDNS_BASE_INITIALIZE_NAMESERVERS;
4876                 log(EVDNS_LOG_WARN,
4877                     "Unrecognized flag passed to evdns_base_new(). Assuming "
4878                     "you meant EVDNS_BASE_INITIALIZE_NAMESERVERS.");
4879         }
4880 #undef EVDNS_BASE_ALL_FLAGS
4881
4882         if (flags & EVDNS_BASE_INITIALIZE_NAMESERVERS) {
4883                 int r;
4884                 int opts = DNS_OPTIONS_ALL;
4885                 if (flags & EVDNS_BASE_NAMESERVERS_NO_DEFAULT) {
4886                         opts |= DNS_OPTION_NAMESERVERS_NO_DEFAULT;
4887                 }
4888
4889 #ifdef _WIN32
4890                 r = evdns_base_config_windows_nameservers(base);
4891 #else
4892                 r = evdns_base_resolv_conf_parse(base, opts, "/etc/resolv.conf");
4893 #endif
4894                 if (r) {
4895                         evdns_base_free_and_unlock(base, 0);
4896                         return NULL;
4897                 }
4898         }
4899         if (flags & EVDNS_BASE_DISABLE_WHEN_INACTIVE) {
4900                 base->disable_when_inactive = 1;
4901         }
4902
4903         EVDNS_UNLOCK(base);
4904         return base;
4905 }
4906
4907 int
4908 evdns_init(void)
4909 {
4910         struct evdns_base *base = evdns_base_new(NULL, 1);
4911         if (base) {
4912                 current_base = base;
4913                 return 0;
4914         } else {
4915                 return -1;
4916         }
4917 }
4918
4919 const char *
4920 evdns_err_to_string(int err)
4921 {
4922     switch (err) {
4923         case DNS_ERR_NONE: return "no error";
4924         case DNS_ERR_FORMAT: return "misformatted query";
4925         case DNS_ERR_SERVERFAILED: return "server failed";
4926         case DNS_ERR_NOTEXIST: return "name does not exist";
4927         case DNS_ERR_NOTIMPL: return "query not implemented";
4928         case DNS_ERR_REFUSED: return "refused";
4929
4930         case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
4931         case DNS_ERR_UNKNOWN: return "unknown";
4932         case DNS_ERR_TIMEOUT: return "request timed out";
4933         case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
4934         case DNS_ERR_CANCEL: return "dns request canceled";
4935         case DNS_ERR_NODATA: return "no records in the reply";
4936         default: return "[Unknown error code]";
4937     }
4938 }
4939
4940 static void
4941 evdns_nameserver_free(struct nameserver *server)
4942 {
4943         if (server->socket >= 0)
4944                 evutil_closesocket(server->socket);
4945         (void) event_del(&server->event);
4946         event_debug_unassign(&server->event);
4947         if (server->state == 0)
4948                 (void) event_del(&server->timeout_event);
4949         if (server->probe_request) {
4950                 evdns_cancel_request(server->base, server->probe_request);
4951                 server->probe_request = NULL;
4952         }
4953         event_debug_unassign(&server->timeout_event);
4954         disconnect_and_free_connection(server->connection);
4955         mm_free(server);
4956 }
4957
4958 static void
4959 evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
4960 {
4961         struct nameserver *server, *server_next;
4962         struct search_domain *dom, *dom_next;
4963         int i;
4964
4965         /* Requires that we hold the lock. */
4966
4967         /* TODO(nickm) we might need to refcount here. */
4968
4969         while (base->req_waiting_head) {
4970                 if (fail_requests)
4971                         reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
4972                 request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
4973         }
4974         for (i = 0; i < base->n_req_heads; ++i) {
4975                 while (base->req_heads[i]) {
4976                         if (fail_requests)
4977                                 reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
4978                         request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
4979                 }
4980         }
4981         base->global_requests_inflight = base->global_requests_waiting = 0;
4982
4983         for (server = base->server_head; server; server = server_next) {
4984                 server_next = server->next;
4985                 /** already done something before */
4986                 server->probe_request = NULL;
4987                 evdns_nameserver_free(server);
4988                 if (server_next == base->server_head)
4989                         break;
4990         }
4991         base->server_head = NULL;
4992         base->global_good_nameservers = 0;
4993
4994         if (base->global_search_state) {
4995                 for (dom = base->global_search_state->head; dom; dom = dom_next) {
4996                         dom_next = dom->next;
4997                         mm_free(dom);
4998                 }
4999                 mm_free(base->global_search_state);
5000                 base->global_search_state = NULL;
5001         }
5002
5003         {
5004                 struct hosts_entry *victim;
5005                 while ((victim = TAILQ_FIRST(&base->hostsdb))) {
5006                         TAILQ_REMOVE(&base->hostsdb, victim, next);
5007                         mm_free(victim);
5008                 }
5009         }
5010
5011         mm_free(base->req_heads);
5012
5013         EVDNS_UNLOCK(base);
5014         EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
5015
5016         mm_free(base);
5017 }
5018
5019 void
5020 evdns_base_free(struct evdns_base *base, int fail_requests)
5021 {
5022         EVDNS_LOCK(base);
5023         evdns_base_free_and_unlock(base, fail_requests);
5024 }
5025
5026 void
5027 evdns_base_clear_host_addresses(struct evdns_base *base)
5028 {
5029         struct hosts_entry *victim;
5030         EVDNS_LOCK(base);
5031         while ((victim = TAILQ_FIRST(&base->hostsdb))) {
5032                 TAILQ_REMOVE(&base->hostsdb, victim, next);
5033                 mm_free(victim);
5034         }
5035         EVDNS_UNLOCK(base);
5036 }
5037
5038 void
5039 evdns_shutdown(int fail_requests)
5040 {
5041         if (current_base) {
5042                 struct evdns_base *b = current_base;
5043                 current_base = NULL;
5044                 evdns_base_free(b, fail_requests);
5045         }
5046         evdns_log_fn = NULL;
5047 }
5048
5049 static int
5050 evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
5051 {
5052         char *strtok_state;
5053         static const char *const delims = " \t";
5054         char *const addr = strtok_r(line, delims, &strtok_state);
5055         char *hostname, *hash;
5056         struct sockaddr_storage ss;
5057         int socklen = sizeof(ss);
5058         ASSERT_LOCKED(base);
5059
5060 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
5061
5062         if (!addr || *addr == '#')
5063                 return 0;
5064
5065         memset(&ss, 0, sizeof(ss));
5066         if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
5067                 return -1;
5068         if (socklen > (int)sizeof(struct sockaddr_in6))
5069                 return -1;
5070
5071         if (sockaddr_getport((struct sockaddr*)&ss))
5072                 return -1;
5073
5074         while ((hostname = NEXT_TOKEN)) {
5075                 struct hosts_entry *he;
5076                 size_t namelen;
5077                 if ((hash = strchr(hostname, '#'))) {
5078                         if (hash == hostname)
5079                                 return 0;
5080                         *hash = '\0';
5081                 }
5082
5083                 namelen = strlen(hostname);
5084
5085                 he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
5086                 if (!he)
5087                         return -1;
5088                 EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
5089                 memcpy(&he->addr, &ss, socklen);
5090                 memcpy(he->hostname, hostname, namelen+1);
5091                 he->addrlen = socklen;
5092
5093                 TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
5094
5095                 if (hash)
5096                         return 0;
5097         }
5098
5099         return 0;
5100 #undef NEXT_TOKEN
5101 }
5102
5103 static int
5104 evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
5105 {
5106         char *str=NULL, *cp, *eol;
5107         size_t len;
5108         int err=0;
5109
5110         ASSERT_LOCKED(base);
5111
5112         if (hosts_fname == NULL ||
5113             (err = evutil_read_file_(hosts_fname, &str, &len, 0)) < 0) {
5114                 char tmp[64];
5115                 strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
5116                 evdns_base_parse_hosts_line(base, tmp);
5117                 strlcpy(tmp, "::1   localhost", sizeof(tmp));
5118                 evdns_base_parse_hosts_line(base, tmp);
5119                 return err ? -1 : 0;
5120         }
5121
5122         /* This will break early if there is a NUL in the hosts file.
5123          * Probably not a problem.*/
5124         cp = str;
5125         for (;;) {
5126                 eol = strchr(cp, '\n');
5127
5128                 if (eol) {
5129                         *eol = '\0';
5130                         evdns_base_parse_hosts_line(base, cp);
5131                         cp = eol+1;
5132                 } else {
5133                         evdns_base_parse_hosts_line(base, cp);
5134                         break;
5135                 }
5136         }
5137
5138         mm_free(str);
5139         return 0;
5140 }
5141
5142 int
5143 evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
5144 {
5145         int res;
5146         if (!base)
5147                 base = current_base;
5148         EVDNS_LOCK(base);
5149         res = evdns_base_load_hosts_impl(base, hosts_fname);
5150         EVDNS_UNLOCK(base);
5151         return res;
5152 }
5153
5154 /* A single request for a getaddrinfo, either v4 or v6. */
5155 struct getaddrinfo_subrequest {
5156         struct evdns_request *r;
5157         ev_uint32_t type;
5158 };
5159
5160 /* State data used to implement an in-progress getaddrinfo. */
5161 struct evdns_getaddrinfo_request {
5162         struct evdns_base *evdns_base;
5163         /* Copy of the modified 'hints' data that we'll use to build
5164          * answers. */
5165         struct evutil_addrinfo hints;
5166         /* The callback to invoke when we're done */
5167         evdns_getaddrinfo_cb user_cb;
5168         /* User-supplied data to give to the callback. */
5169         void *user_data;
5170         /* The port to use when building sockaddrs. */
5171         ev_uint16_t port;
5172         /* The sub_request for an A record (if any) */
5173         struct getaddrinfo_subrequest ipv4_request;
5174         /* The sub_request for an AAAA record (if any) */
5175         struct getaddrinfo_subrequest ipv6_request;
5176
5177         /* The cname result that we were told (if any) */
5178         char *cname_result;
5179
5180         /* If we have one request answered and one request still inflight,
5181          * then this field holds the answer from the first request... */
5182         struct evutil_addrinfo *pending_result;
5183         /* And this event is a timeout that will tell us to cancel the second
5184          * request if it's taking a long time. */
5185         struct event timeout;
5186
5187         /* And this field holds the error code from the first request... */
5188         int pending_error;
5189         /* If this is set, the user canceled this request. */
5190         unsigned user_canceled : 1;
5191         /* If this is set, the user can no longer cancel this request; we're
5192          * just waiting for the free. */
5193         unsigned request_done : 1;
5194 };
5195
5196 /* Convert an evdns errors to the equivalent getaddrinfo error. */
5197 static int
5198 evdns_err_to_getaddrinfo_err(int e1)
5199 {
5200         /* XXX Do this better! */
5201         if (e1 == DNS_ERR_NONE)
5202                 return 0;
5203         else if (e1 == DNS_ERR_NOTEXIST)
5204                 return EVUTIL_EAI_NONAME;
5205         else
5206                 return EVUTIL_EAI_FAIL;
5207 }
5208
5209 /* Return the more informative of two getaddrinfo errors. */
5210 static int
5211 getaddrinfo_merge_err(int e1, int e2)
5212 {
5213         /* XXXX be cleverer here. */
5214         if (e1 == 0)
5215                 return e2;
5216         else
5217                 return e1;
5218 }
5219
5220 static void
5221 free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
5222 {
5223         /* DO NOT CALL this if either of the requests is pending.  Only once
5224          * both callbacks have been invoked is it safe to free the request */
5225         if (data->pending_result)
5226                 evutil_freeaddrinfo(data->pending_result);
5227         if (data->cname_result)
5228                 mm_free(data->cname_result);
5229         event_del(&data->timeout);
5230         mm_free(data);
5231         return;
5232 }
5233
5234 static void
5235 add_cname_to_reply(struct evdns_getaddrinfo_request *data,
5236     struct evutil_addrinfo *ai)
5237 {
5238         if (data->cname_result && ai) {
5239                 ai->ai_canonname = data->cname_result;
5240                 data->cname_result = NULL;
5241         }
5242 }
5243
5244 /* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
5245  * request has finished, but the other one took too long to answer. Pass
5246  * along the answer we got, and cancel the other request.
5247  */
5248 static void
5249 evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
5250 {
5251         int v4_timedout = 0, v6_timedout = 0;
5252         struct evdns_getaddrinfo_request *data = ptr;
5253
5254         /* Cancel any pending requests, and note which one */
5255         if (data->ipv4_request.r) {
5256                 /* XXXX This does nothing if the request's callback is already
5257                  * running (pending_cb is set). */
5258                 evdns_cancel_request(NULL, data->ipv4_request.r);
5259                 v4_timedout = 1;
5260                 EVDNS_LOCK(data->evdns_base);
5261                 ++data->evdns_base->getaddrinfo_ipv4_timeouts;
5262                 EVDNS_UNLOCK(data->evdns_base);
5263         }
5264         if (data->ipv6_request.r) {
5265                 /* XXXX This does nothing if the request's callback is already
5266                  * running (pending_cb is set). */
5267                 evdns_cancel_request(NULL, data->ipv6_request.r);
5268                 v6_timedout = 1;
5269                 EVDNS_LOCK(data->evdns_base);
5270                 ++data->evdns_base->getaddrinfo_ipv6_timeouts;
5271                 EVDNS_UNLOCK(data->evdns_base);
5272         }
5273
5274         /* We only use this timeout callback when we have an answer for
5275          * one address. */
5276         EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
5277
5278         /* Report the outcome of the other request that didn't time out. */
5279         if (data->pending_result) {
5280                 add_cname_to_reply(data, data->pending_result);
5281                 data->user_cb(0, data->pending_result, data->user_data);
5282                 data->pending_result = NULL;
5283         } else {
5284                 int e = data->pending_error;
5285                 if (!e)
5286                         e = EVUTIL_EAI_AGAIN;
5287                 data->user_cb(e, NULL, data->user_data);
5288         }
5289
5290         data->user_cb = NULL; /* prevent double-call if evdns callbacks are
5291                                * in-progress. XXXX It would be better if this
5292                                * weren't necessary. */
5293
5294         if (!v4_timedout && !v6_timedout) {
5295                 /* should be impossible? XXXX */
5296                 free_getaddrinfo_request(data);
5297         }
5298 }
5299
5300 static int
5301 evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
5302     struct evdns_getaddrinfo_request *data)
5303 {
5304         return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
5305 }
5306
5307 static inline int
5308 evdns_result_is_answer(int result)
5309 {
5310         return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
5311             result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
5312 }
5313
5314 static void
5315 evdns_getaddrinfo_gotresolve(int result, char type, int count,
5316     int ttl, void *addresses, void *arg)
5317 {
5318         int i;
5319         struct getaddrinfo_subrequest *req = arg;
5320         struct getaddrinfo_subrequest *other_req;
5321         struct evdns_getaddrinfo_request *data;
5322
5323         struct evutil_addrinfo *res;
5324
5325         struct sockaddr_in sin;
5326         struct sockaddr_in6 sin6;
5327         struct sockaddr *sa;
5328         int socklen, addrlen;
5329         void *addrp;
5330         int err;
5331         int user_canceled;
5332
5333         EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
5334         if (req->type == DNS_IPv4_A) {
5335                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
5336                 other_req = &data->ipv6_request;
5337         } else {
5338                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
5339                 other_req = &data->ipv4_request;
5340         }
5341
5342         /** Called from evdns_base_free() with @fail_requests == 1 */
5343         if (result != DNS_ERR_SHUTDOWN) {
5344                 EVDNS_LOCK(data->evdns_base);
5345                 if (evdns_result_is_answer(result)) {
5346                         if (req->type == DNS_IPv4_A)
5347                                 ++data->evdns_base->getaddrinfo_ipv4_answered;
5348                         else
5349                                 ++data->evdns_base->getaddrinfo_ipv6_answered;
5350                 }
5351                 user_canceled = data->user_canceled;
5352                 if (other_req->r == NULL)
5353                         data->request_done = 1;
5354                 EVDNS_UNLOCK(data->evdns_base);
5355         } else {
5356                 data->evdns_base = NULL;
5357                 user_canceled = data->user_canceled;
5358         }
5359
5360         req->r = NULL;
5361
5362         if (result == DNS_ERR_CANCEL && ! user_canceled) {
5363                 /* Internal cancel request from timeout or internal error.
5364                  * we already answered the user. */
5365                 if (other_req->r == NULL)
5366                         free_getaddrinfo_request(data);
5367                 return;
5368         }
5369
5370         if (data->user_cb == NULL) {
5371                 /* We already answered.  XXXX This shouldn't be needed; see
5372                  * comments in evdns_getaddrinfo_timeout_cb */
5373                 free_getaddrinfo_request(data);
5374                 return;
5375         }
5376
5377         if (result == DNS_ERR_NONE) {
5378                 if (count == 0)
5379                         err = EVUTIL_EAI_NODATA;
5380                 else
5381                         err = 0;
5382         } else {
5383                 err = evdns_err_to_getaddrinfo_err(result);
5384         }
5385
5386         if (err) {
5387                 /* Looks like we got an error. */
5388                 if (other_req->r) {
5389                         /* The other request is still working; maybe it will
5390                          * succeed. */
5391                         /* XXXX handle failure from set_timeout */
5392                         if (result != DNS_ERR_SHUTDOWN) {
5393                                 evdns_getaddrinfo_set_timeout(data->evdns_base, data);
5394                         }
5395                         data->pending_error = err;
5396                         return;
5397                 }
5398
5399                 if (user_canceled) {
5400                         data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
5401                 } else if (data->pending_result) {
5402                         /* If we have an answer waiting, and we weren't
5403                          * canceled, ignore this error. */
5404                         add_cname_to_reply(data, data->pending_result);
5405                         data->user_cb(0, data->pending_result, data->user_data);
5406                         data->pending_result = NULL;
5407                 } else {
5408                         if (data->pending_error)
5409                                 err = getaddrinfo_merge_err(err,
5410                                     data->pending_error);
5411                         data->user_cb(err, NULL, data->user_data);
5412                 }
5413                 free_getaddrinfo_request(data);
5414                 return;
5415         } else if (user_canceled) {
5416                 if (other_req->r) {
5417                         /* The other request is still working; let it hit this
5418                          * callback with EVUTIL_EAI_CANCEL callback and report
5419                          * the failure. */
5420                         return;
5421                 }
5422                 data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
5423                 free_getaddrinfo_request(data);
5424                 return;
5425         }
5426
5427         /* Looks like we got some answers. We should turn them into addrinfos
5428          * and then either queue those or return them all. */
5429         EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
5430
5431         if (type == DNS_IPv4_A) {
5432                 memset(&sin, 0, sizeof(sin));
5433                 sin.sin_family = AF_INET;
5434                 sin.sin_port = htons(data->port);
5435 #ifdef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
5436                 sin.sin_len = sizeof(sin);
5437 #endif
5438
5439                 sa = (struct sockaddr *)&sin;
5440                 socklen = sizeof(sin);
5441                 addrlen = 4;
5442                 addrp = &sin.sin_addr.s_addr;
5443         } else {
5444                 memset(&sin6, 0, sizeof(sin6));
5445                 sin6.sin6_family = AF_INET6;
5446                 sin6.sin6_port = htons(data->port);
5447 #ifdef EVENT__HAVE_STRUCT_SOCKADDR_IN6_SIN6_LEN
5448                 sin6.sin6_len = sizeof(sin6);
5449 #endif
5450
5451                 sa = (struct sockaddr *)&sin6;
5452                 socklen = sizeof(sin6);
5453                 addrlen = 16;
5454                 addrp = &sin6.sin6_addr.s6_addr;
5455         }
5456
5457         res = NULL;
5458         for (i=0; i < count; ++i) {
5459                 struct evutil_addrinfo *ai;
5460                 memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
5461                 ai = evutil_new_addrinfo_(sa, socklen, &data->hints);
5462                 if (!ai) {
5463                         if (other_req->r) {
5464                                 evdns_cancel_request(NULL, other_req->r);
5465                         }
5466                         data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
5467                         if (res)
5468                                 evutil_freeaddrinfo(res);
5469
5470                         if (other_req->r == NULL)
5471                                 free_getaddrinfo_request(data);
5472                         return;
5473                 }
5474                 res = evutil_addrinfo_append_(res, ai);
5475         }
5476
5477         if (other_req->r) {
5478                 /* The other request is still in progress; wait for it */
5479                 /* XXXX handle failure from set_timeout */
5480                 evdns_getaddrinfo_set_timeout(data->evdns_base, data);
5481                 data->pending_result = res;
5482                 return;
5483         } else {
5484                 /* The other request is done or never started; append its
5485                  * results (if any) and return them. */
5486                 if (data->pending_result) {
5487                         if (req->type == DNS_IPv4_A)
5488                                 res = evutil_addrinfo_append_(res,
5489                                     data->pending_result);
5490                         else
5491                                 res = evutil_addrinfo_append_(
5492                                     data->pending_result, res);
5493                         data->pending_result = NULL;
5494                 }
5495
5496                 /* Call the user callback. */
5497                 add_cname_to_reply(data, res);
5498                 data->user_cb(0, res, data->user_data);
5499
5500                 /* Free data. */
5501                 free_getaddrinfo_request(data);
5502         }
5503 }
5504
5505 static struct hosts_entry *
5506 find_hosts_entry(struct evdns_base *base, const char *hostname,
5507     struct hosts_entry *find_after)
5508 {
5509         struct hosts_entry *e;
5510
5511         if (find_after)
5512                 e = TAILQ_NEXT(find_after, next);
5513         else
5514                 e = TAILQ_FIRST(&base->hostsdb);
5515
5516         for (; e; e = TAILQ_NEXT(e, next)) {
5517                 if (!evutil_ascii_strcasecmp(e->hostname, hostname))
5518                         return e;
5519         }
5520         return NULL;
5521 }
5522
5523 static int
5524 evdns_getaddrinfo_fromhosts(struct evdns_base *base,
5525     const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
5526     struct evutil_addrinfo **res)
5527 {
5528         int n_found = 0;
5529         struct hosts_entry *e;
5530         struct evutil_addrinfo *ai=NULL;
5531         int f = hints->ai_family;
5532
5533         EVDNS_LOCK(base);
5534         for (e = find_hosts_entry(base, nodename, NULL); e;
5535             e = find_hosts_entry(base, nodename, e)) {
5536                 struct evutil_addrinfo *ai_new;
5537                 ++n_found;
5538                 if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
5539                     (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
5540                         continue;
5541                 ai_new = evutil_new_addrinfo_(&e->addr.sa, e->addrlen, hints);
5542                 if (!ai_new) {
5543                         n_found = 0;
5544                         goto out;
5545                 }
5546                 sockaddr_setport(ai_new->ai_addr, port);
5547                 ai = evutil_addrinfo_append_(ai, ai_new);
5548         }
5549         EVDNS_UNLOCK(base);
5550 out:
5551         if (n_found) {
5552                 /* Note that we return an empty answer if we found entries for
5553                  * this hostname but none were of the right address type. */
5554                 *res = ai;
5555                 return 0;
5556         } else {
5557                 if (ai)
5558                         evutil_freeaddrinfo(ai);
5559                 return -1;
5560         }
5561 }
5562
5563 struct evdns_getaddrinfo_request *
5564 evdns_getaddrinfo(struct evdns_base *dns_base,
5565     const char *nodename, const char *servname,
5566     const struct evutil_addrinfo *hints_in,
5567     evdns_getaddrinfo_cb cb, void *arg)
5568 {
5569         struct evdns_getaddrinfo_request *data;
5570         struct evutil_addrinfo hints;
5571         struct evutil_addrinfo *res = NULL;
5572         int err;
5573         int port = 0;
5574         int want_cname = 0;
5575         int started = 0;
5576
5577         if (!dns_base) {
5578                 dns_base = current_base;
5579                 if (!dns_base) {
5580                         log(EVDNS_LOG_WARN,
5581                             "Call to getaddrinfo_async with no "
5582                             "evdns_base configured.");
5583                         cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
5584                         return NULL;
5585                 }
5586         }
5587
5588         /* If we _must_ answer this immediately, do so. */
5589         if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
5590                 res = NULL;
5591                 err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
5592                 cb(err, res, arg);
5593                 return NULL;
5594         }
5595
5596         if (hints_in) {
5597                 memcpy(&hints, hints_in, sizeof(hints));
5598         } else {
5599                 memset(&hints, 0, sizeof(hints));
5600                 hints.ai_family = PF_UNSPEC;
5601         }
5602
5603         evutil_adjust_hints_for_addrconfig_(&hints);
5604
5605         /* Now try to see if we _can_ answer immediately. */
5606         /* (It would be nice to do this by calling getaddrinfo directly, with
5607          * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
5608          * a reliable way to distinguish the "that wasn't a numeric host!" case
5609          * from any other EAI_NONAME cases.) */
5610         err = evutil_getaddrinfo_common_(nodename, servname, &hints, &res, &port);
5611         if (err != EVUTIL_EAI_NEED_RESOLVE) {
5612                 cb(err, res, arg);
5613                 return NULL;
5614         }
5615
5616         /* If there is an entry in the hosts file, we should give it now. */
5617         if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
5618                 cb(0, res, arg);
5619                 return NULL;
5620         }
5621
5622         /* Okay, things are serious now. We're going to need to actually
5623          * launch a request.
5624          */
5625         data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
5626         if (!data) {
5627                 cb(EVUTIL_EAI_MEMORY, NULL, arg);
5628                 return NULL;
5629         }
5630
5631         memcpy(&data->hints, &hints, sizeof(data->hints));
5632         data->port = (ev_uint16_t)port;
5633         data->ipv4_request.type = DNS_IPv4_A;
5634         data->ipv6_request.type = DNS_IPv6_AAAA;
5635         data->user_cb = cb;
5636         data->user_data = arg;
5637         data->evdns_base = dns_base;
5638
5639         want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
5640
5641         /* If we are asked for a PF_UNSPEC address, we launch two requests in
5642          * parallel: one for an A address and one for an AAAA address.  We
5643          * can't send just one request, since many servers only answer one
5644          * question per DNS request.
5645          *
5646          * Once we have the answer to one request, we allow for a short
5647          * timeout before we report it, to see if the other one arrives.  If
5648          * they both show up in time, then we report both the answers.
5649          *
5650          * If too many addresses of one type time out or fail, we should stop
5651          * launching those requests. (XXX we don't do that yet.)
5652          */
5653
5654         EVDNS_LOCK(dns_base);
5655
5656         if (hints.ai_family != PF_INET6) {
5657                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
5658                     nodename, (void *)&data->ipv4_request);
5659
5660                 data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
5661                     nodename, 0, evdns_getaddrinfo_gotresolve,
5662                     &data->ipv4_request);
5663                 if (want_cname && data->ipv4_request.r)
5664                         data->ipv4_request.r->current_req->put_cname_in_ptr =
5665                             &data->cname_result;
5666         }
5667         if (hints.ai_family != PF_INET) {
5668                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
5669                     nodename, (void *)&data->ipv6_request);
5670
5671                 data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
5672                     nodename, 0, evdns_getaddrinfo_gotresolve,
5673                     &data->ipv6_request);
5674                 if (want_cname && data->ipv6_request.r)
5675                         data->ipv6_request.r->current_req->put_cname_in_ptr =
5676                             &data->cname_result;
5677         }
5678
5679         evtimer_assign(&data->timeout, dns_base->event_base,
5680             evdns_getaddrinfo_timeout_cb, data);
5681
5682         started = (data->ipv4_request.r || data->ipv6_request.r);
5683
5684         EVDNS_UNLOCK(dns_base);
5685
5686         if (started) {
5687                 return data;
5688         } else {
5689                 mm_free(data);
5690                 cb(EVUTIL_EAI_FAIL, NULL, arg);
5691                 return NULL;
5692         }
5693 }
5694
5695 void
5696 evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
5697 {
5698         EVDNS_LOCK(data->evdns_base);
5699         if (data->request_done) {
5700                 EVDNS_UNLOCK(data->evdns_base);
5701                 return;
5702         }
5703         event_del(&data->timeout);
5704         data->user_canceled = 1;
5705         if (data->ipv4_request.r)
5706                 evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
5707         if (data->ipv6_request.r)
5708                 evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
5709         EVDNS_UNLOCK(data->evdns_base);
5710 }