]> granicus.if.org Git - libevent/blob - evdns.c
epoll: use epoll_pwait2() if available
[libevent] / evdns.c
1 /* Copyright 2006-2007 Niels Provos
2  * Copyright 2007-2012 Nick Mathewson and Niels Provos
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  * 3. The name of the author may not be used to endorse or promote products
13  *    derived from this software without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /* Based on software by Adam Langly. Adam's original message:
28  *
29  * Async DNS Library
30  * Adam Langley <agl@imperialviolet.org>
31  * Public Domain code
32  *
33  * This software is Public Domain. To view a copy of the public domain dedication,
34  * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
35  * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
36  *
37  * I ask and expect, but do not require, that all derivative works contain an
38  * attribution similar to:
39  *      Parts developed by Adam Langley <agl@imperialviolet.org>
40  *
41  * You may wish to replace the word "Parts" with something else depending on
42  * the amount of original code.
43  *
44  * (Derivative works does not include programs which link against, run or include
45  * the source verbatim in their source distributions)
46  *
47  * Version: 0.1b
48  */
49
50 #include "event2/event-config.h"
51 #include "evconfig-private.h"
52
53 #include <sys/types.h>
54
55 #ifndef _FORTIFY_SOURCE
56 #define _FORTIFY_SOURCE 3
57 #endif
58
59 #include <string.h>
60 #include <fcntl.h>
61 #ifdef EVENT__HAVE_SYS_TIME_H
62 #include <sys/time.h>
63 #endif
64 #ifdef EVENT__HAVE_STDINT_H
65 #include <stdint.h>
66 #endif
67 #include <stdlib.h>
68 #include <string.h>
69 #include <errno.h>
70 #ifdef EVENT__HAVE_UNISTD_H
71 #include <unistd.h>
72 #endif
73 #include <limits.h>
74 #include <sys/stat.h>
75 #include <stdio.h>
76 #include <stdarg.h>
77 #ifdef _WIN32
78 #include <winsock2.h>
79 #include <winerror.h>
80 #include <ws2tcpip.h>
81 #ifndef _WIN32_IE
82 #define _WIN32_IE 0x400
83 #endif
84 #include <shlobj.h>
85 #endif
86
87 #include "event2/buffer.h"
88 #include "event2/bufferevent.h"
89 #include "event2/dns.h"
90 #include "event2/dns_struct.h"
91 #include "event2/dns_compat.h"
92 #include "event2/util.h"
93 #include "event2/event.h"
94 #include "event2/event_struct.h"
95 #include "event2/listener.h"
96 #include "event2/thread.h"
97
98 #include "defer-internal.h"
99 #include "log-internal.h"
100 #include "mm-internal.h"
101 #include "strlcpy-internal.h"
102 #include "ipv6-internal.h"
103 #include "util-internal.h"
104 #include "evthread-internal.h"
105 #ifdef _WIN32
106 #include <ctype.h>
107 #include <winsock2.h>
108 #include <windows.h>
109 #include <iphlpapi.h>
110 #include <io.h>
111 #else
112 #include <sys/socket.h>
113 #include <netinet/in.h>
114 #include <arpa/inet.h>
115 #endif
116
117 #ifdef EVENT__HAVE_NETINET_IN6_H
118 #include <netinet/in6.h>
119 #endif
120
121 #define EVDNS_LOG_DEBUG EVENT_LOG_DEBUG
122 #define EVDNS_LOG_WARN EVENT_LOG_WARN
123 #define EVDNS_LOG_MSG EVENT_LOG_MSG
124
125 #ifndef EVDNS_NAME_MAX
126 #define EVDNS_NAME_MAX 255
127 #endif
128
129 #include <stdio.h>
130
131 #undef MIN
132 #undef MAX
133 #define MIN(a,b) ((a)<(b)?(a):(b))
134 #define MAX(a,b) ((a)>(b)?(a):(b))
135
136 #define ASSERT_VALID_REQUEST(req) \
137         EVUTIL_ASSERT((req)->handle && (req)->handle->current_req == (req))
138
139 #define u64 ev_uint64_t
140 #define u32 ev_uint32_t
141 #define u16 ev_uint16_t
142 #define u8  ev_uint8_t
143
144 /* maximum number of addresses from a single packet */
145 /* that we bother recording */
146 #define MAX_V4_ADDRS 32
147 #define MAX_V6_ADDRS 32
148
149 /* Maximum allowable size of a DNS message over UDP without EDNS.*/
150 #define DNS_MAX_UDP_SIZE 512
151 /* Maximum allowable size of a DNS message over UDP with EDNS.*/
152 #define EDNS_MAX_UDP_SIZE 65535
153
154 #define EDNS_ENABLED(base) \
155         (((base)->global_max_udp_size) > DNS_MAX_UDP_SIZE)
156
157 #define TYPE_A         EVDNS_TYPE_A
158 #define TYPE_CNAME     5
159 #define TYPE_PTR       EVDNS_TYPE_PTR
160 #define TYPE_SOA       EVDNS_TYPE_SOA
161 #define TYPE_AAAA      EVDNS_TYPE_AAAA
162 #define TYPE_OPT       41
163
164 #define CLASS_INET     EVDNS_CLASS_INET
165
166 /* Timeout in seconds for idle TCP connections that server keeps alive. */
167 #define SERVER_IDLE_CONN_TIMEOUT 10
168 /* Timeout in seconds for idle TCP connections that client keeps alive. */
169 #define CLIENT_IDLE_CONN_TIMEOUT 5
170 /* Default maximum number of simultaneous TCP client connections that DNS server can hold. */
171 #define MAX_CLIENT_CONNECTIONS 10
172
173 /* Persistent handle.  We keep this separate from 'struct request' since we
174  * need some object to last for as long as an evdns_request is outstanding so
175  * that it can be canceled, whereas a search request can lead to multiple
176  * 'struct request' instances being created over its lifetime. */
177 struct evdns_request {
178         struct request *current_req;
179         struct evdns_base *base;
180
181         int pending_cb; /* Waiting for its callback to be invoked; not
182                          * owned by event base any more. */
183
184         /* elements used by the searching code */
185         int search_index;
186         struct search_state *search_state;
187         char *search_origname;  /* needs to be free()ed */
188         int search_flags;
189         u16 tcp_flags;
190 };
191
192 struct request {
193         u8 *request;  /* the dns packet data */
194         u16 request_size; /* size of memory block stored in request field */
195         u8 request_type; /* TYPE_PTR or TYPE_A or TYPE_AAAA */
196         unsigned int request_len;
197         int reissue_count;
198         int tx_count;  /* the number of times that this packet has been sent */
199         void *user_pointer;  /* the pointer given to us for this request */
200         evdns_callback_type user_callback;
201         struct nameserver *ns;  /* the server which we last sent it */
202
203         /* these objects are kept in a circular list */
204         /* XXX We could turn this into a CIRCLEQ. */
205         struct request *next, *prev;
206
207         struct event timeout_event;
208
209         u16 trans_id;  /* the transaction id */
210         unsigned request_appended :1;   /* true if the request pointer is data which follows this struct */
211         unsigned transmit_me :1;  /* needs to be transmitted */
212         unsigned need_cname :1;   /* make a separate callback for CNAME */
213
214         /* XXXX This is a horrible hack. */
215         char **put_cname_in_ptr; /* store the cname here if we get one. */
216
217         struct evdns_base *base;
218
219         struct evdns_request *handle;
220 };
221
222 struct reply {
223         unsigned int type;
224         unsigned int have_answer : 1;
225         u32 rr_count;
226         union {
227                 u32 *a;
228                 struct in6_addr *aaaa;
229                 char *ptr_name;
230                 void *raw;
231         } data;
232         char *cname;
233 };
234
235 enum tcp_state {
236         TS_DISCONNECTED,
237         TS_CONNECTING,
238         TS_CONNECTED
239 };
240
241 struct tcp_connection {
242         struct bufferevent *bev;
243         enum tcp_state state;
244         u16 awaiting_packet_size;
245 };
246
247 struct evdns_server_port;
248
249 struct client_tcp_connection {
250         LIST_ENTRY(client_tcp_connection) next;
251         struct tcp_connection connection;
252         struct evdns_server_port *port;
253 };
254
255 struct nameserver {
256         evutil_socket_t socket;  /* a connected UDP socket */
257         struct tcp_connection *connection; /* intended for TCP support */
258         struct sockaddr_storage address;
259         ev_socklen_t addrlen;
260         int failed_times;  /* number of times which we have given this server a chance */
261         int timedout;  /* number of times in a row a request has timed out */
262         struct event event;
263         /* these objects are kept in a circular list */
264         struct nameserver *next, *prev;
265         struct event timeout_event;  /* used to keep the timeout for */
266                                      /* when we next probe this server. */
267                                      /* Valid if state == 0 */
268         /* Outstanding probe request for this nameserver, if any */
269         struct evdns_request *probe_request;
270         char state;  /* zero if we think that this server is down */
271         char choked;  /* true if we have an EAGAIN from this server's socket */
272         char write_waiting;  /* true if we are waiting for EV_WRITE events */
273         struct evdns_base *base;
274
275         /* Number of currently inflight requests: used
276          * to track when we should add/del the event. */
277         int requests_inflight;
278 };
279
280
281 /* Represents a local port where we're listening for DNS requests. */
282 struct evdns_server_port {
283         evutil_socket_t socket; /* socket we use to read queries and write replies. */
284         int refcnt; /* reference count. */
285         char choked; /* Are we currently blocked from writing? */
286         char closing; /* Are we trying to close this port, pending writes? */
287         evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
288         void *user_data; /* Opaque pointer passed to user_callback */
289         struct event event; /* Read/write event */
290         /* circular list of replies that we want to write. */
291         struct server_request *pending_replies;
292         struct event_base *event_base;
293
294         /* Structures for tcp support */
295         struct evconnlistener *listener;
296         LIST_HEAD(client_list, client_tcp_connection) client_connections;
297         unsigned client_connections_count;
298         unsigned max_client_connections;
299         struct timeval tcp_idle_timeout;
300
301 #ifndef EVENT__DISABLE_THREAD_SUPPORT
302         void *lock;
303 #endif
304 };
305
306 /* Represents part of a reply being built.      (That is, a single RR.) */
307 struct server_reply_item {
308         struct server_reply_item *next; /* next item in sequence. */
309         char *name; /* name part of the RR */
310         u16 type; /* The RR type */
311         u16 class; /* The RR class (usually CLASS_INET) */
312         u32 ttl; /* The RR TTL */
313         char is_name; /* True iff data is a label */
314         u16 datalen; /* Length of data; -1 if data is a label */
315         void *data; /* The contents of the RR */
316 };
317
318 /* Represents a request that we've received as a DNS server, and holds */
319 /* the components of the reply as we're constructing it. */
320 struct server_request {
321         /* Pointers to the next and previous entries on the list of replies */
322         /* that we're waiting to write.  Only set if we have tried to respond */
323         /* and gotten EAGAIN. */
324         struct server_request *next_pending;
325         struct server_request *prev_pending;
326
327         u16 trans_id; /* Transaction id. */
328         struct evdns_server_port *port; /* Which port received this request on? */
329         struct client_tcp_connection *client; /* Equal to NULL in case of UDP connection. */
330         struct sockaddr_storage addr; /* Where to send the response in case of UDP. Equal to NULL in case of TCP connection.*/
331         ev_socklen_t addrlen; /* length of addr */
332         u16 max_udp_reply_size; /* Maximum size of udp reply that client can handle. */
333
334         int n_answer; /* how many answer RRs have been set? */
335         int n_authority; /* how many authority RRs have been set? */
336         int n_additional; /* how many additional RRs have been set? */
337
338         struct server_reply_item *answer; /* linked list of answer RRs */
339         struct server_reply_item *authority; /* linked list of authority RRs */
340         struct server_reply_item *additional; /* linked list of additional RRs */
341
342         /* Constructed response.  Only set once we're ready to send a reply. */
343         /* Once this is set, the RR fields are cleared, and no more should be set. */
344         char *response;
345         size_t response_len;
346
347         /* Caller-visible fields: flags, questions. */
348         struct evdns_server_request base;
349 };
350
351 struct evdns_base {
352         /* An array of n_req_heads circular lists for inflight requests.
353          * Each inflight request req is in req_heads[req->trans_id % n_req_heads].
354          */
355         struct request **req_heads;
356         /* A circular list of requests that we're waiting to send, but haven't
357          * sent yet because there are too many requests inflight */
358         struct request *req_waiting_head;
359         /* A circular list of nameservers. */
360         struct nameserver *server_head;
361         int n_req_heads;
362
363         struct event_base *event_base;
364
365         /* The number of good nameservers that we have */
366         int global_good_nameservers;
367
368         /* inflight requests are contained in the req_head list */
369         /* and are actually going out across the network */
370         int global_requests_inflight;
371         /* requests which aren't inflight are in the waiting list */
372         /* and are counted here */
373         int global_requests_waiting;
374
375         int global_max_requests_inflight;
376
377         struct timeval global_timeout;  /* 5 seconds by default */
378         int global_max_reissues;  /* a reissue occurs when we get some errors from the server */
379         int global_max_retransmits;  /* number of times we'll retransmit a request which timed out */
380         /* number of timeouts in a row before we consider this server to be down */
381         int global_max_nameserver_timeout;
382         /* true iff we will use the 0x20 hack to prevent poisoning attacks. */
383         int global_randomize_case;
384         /* Maximum size of a UDP DNS packet. */
385         u16 global_max_udp_size;
386
387         /* The first time that a nameserver fails, how long do we wait before
388          * probing to see if it has returned?  */
389         struct timeval global_nameserver_probe_initial_timeout;
390
391         /* Combination of DNS_QUERY_USEVC, DNS_QUERY_IGNTC flags
392          * to control requests via TCP. */
393         u16 global_tcp_flags;
394         /* Idle timeout for outgoing TCP connections. */
395         struct timeval global_tcp_idle_timeout;
396
397         /** Port to bind to for outgoing DNS packets. */
398         struct sockaddr_storage global_outgoing_address;
399         /** ev_socklen_t for global_outgoing_address. 0 if it isn't set. */
400         ev_socklen_t global_outgoing_addrlen;
401
402         struct timeval global_getaddrinfo_allow_skew;
403
404         int so_rcvbuf;
405         int so_sndbuf;
406
407         int getaddrinfo_ipv4_timeouts;
408         int getaddrinfo_ipv6_timeouts;
409         int getaddrinfo_ipv4_answered;
410         int getaddrinfo_ipv6_answered;
411
412         struct search_state *global_search_state;
413
414         TAILQ_HEAD(hosts_list, hosts_entry) hostsdb;
415
416 #ifndef EVENT__DISABLE_THREAD_SUPPORT
417         void *lock;
418 #endif
419
420         int disable_when_inactive;
421
422         /* Maximum timeout between two probe packets
423          * will change `global_nameserver_probe_initial_timeout`
424          * when this value is smaller */
425         int ns_max_probe_timeout;
426         /* Backoff factor of probe timeout */
427         int ns_timeout_backoff_factor;
428 };
429
430 struct hosts_entry {
431         TAILQ_ENTRY(hosts_entry) next;
432         union {
433                 struct sockaddr sa;
434                 struct sockaddr_in sin;
435                 struct sockaddr_in6 sin6;
436         } addr;
437         int addrlen;
438         char hostname[1];
439 };
440
441 static struct evdns_base *current_base = NULL;
442
443 struct evdns_base *
444 evdns_get_global_base(void)
445 {
446         return current_base;
447 }
448
449 /* Given a pointer to an evdns_server_request, get the corresponding */
450 /* server_request. */
451 #define TO_SERVER_REQUEST(base_ptr)                                     \
452         ((struct server_request*)                                       \
453           (((char*)(base_ptr) - evutil_offsetof(struct server_request, base))))
454
455 #define REQ_HEAD(base, id) ((base)->req_heads[id % (base)->n_req_heads])
456
457 static struct nameserver *nameserver_pick(struct evdns_base *base);
458 static void evdns_request_insert(struct request *req, struct request **head);
459 static void evdns_request_remove(struct request *req, struct request **head);
460 static void nameserver_ready_callback(evutil_socket_t fd, short events, void *arg);
461 static int evdns_transmit(struct evdns_base *base);
462 static int evdns_request_transmit(struct request *req);
463 static void nameserver_send_probe(struct nameserver *const ns);
464 static void search_request_finished(struct evdns_request *const);
465 static int search_try_next(struct evdns_request *const req);
466 static struct request *search_request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
467 static void evdns_requests_pump_waiting_queue(struct evdns_base *base);
468 static u16 transaction_id_pick(struct evdns_base *base);
469 static struct request *request_new(struct evdns_base *base, struct evdns_request *handle, int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
470 static struct request *request_clone(struct evdns_base *base, struct request* current);
471 static void request_submit(struct request *const req);
472
473 static int server_request_free(struct server_request *req);
474 static void server_request_free_answers(struct server_request *req);
475 static void server_port_free(struct evdns_server_port *port);
476 static void server_port_ready_callback(evutil_socket_t fd, short events, void *arg);
477 static int evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename);
478 static int evdns_base_set_option_impl(struct evdns_base *base,
479     const char *option, const char *val, int flags);
480 static void evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests);
481 static void evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg);
482 static int evdns_server_request_format_response(struct server_request *req, int err);
483 static void incoming_conn_cb(struct evconnlistener *listener, evutil_socket_t fd,
484     struct sockaddr *address, int socklen, void *arg);
485
486 static int strtoint(const char *const str);
487
488 #ifdef EVENT__DISABLE_THREAD_SUPPORT
489 #define EVDNS_LOCK(base)  EVUTIL_NIL_STMT_
490 #define EVDNS_UNLOCK(base) EVUTIL_NIL_STMT_
491 #define ASSERT_LOCKED(base) EVUTIL_NIL_STMT_
492 #else
493 #define EVDNS_LOCK(base)                        \
494         EVLOCK_LOCK((base)->lock, 0)
495 #define EVDNS_UNLOCK(base)                      \
496         EVLOCK_UNLOCK((base)->lock, 0)
497 #define ASSERT_LOCKED(base)                     \
498         EVLOCK_ASSERT_LOCKED((base)->lock)
499 #endif
500
501 static evdns_debug_log_fn_type evdns_log_fn = NULL;
502
503 void
504 evdns_set_log_fn(evdns_debug_log_fn_type fn)
505 {
506         evdns_log_fn = fn;
507 }
508
509 #ifdef __GNUC__
510 #define EVDNS_LOG_CHECK  __attribute__ ((format(printf, 2, 3)))
511 #else
512 #define EVDNS_LOG_CHECK
513 #endif
514
515 static void evdns_log_(int severity, const char *fmt, ...) EVDNS_LOG_CHECK;
516 static void
517 evdns_log_(int severity, const char *fmt, ...)
518 {
519         va_list args;
520         va_start(args,fmt);
521         if (evdns_log_fn) {
522                 char buf[512];
523                 int is_warn = (severity == EVDNS_LOG_WARN);
524                 evutil_vsnprintf(buf, sizeof(buf), fmt, args);
525                 evdns_log_fn(is_warn, buf);
526         } else {
527                 event_logv_(severity, NULL, fmt, args);
528         }
529         va_end(args);
530 }
531
532 #define log evdns_log_
533
534 /* Initialize tcp_connection structure. */
535 static void
536 init_tcp_connection(struct tcp_connection *conn, struct bufferevent *bev)
537 {
538         memset(conn, 0, sizeof(*conn));
539         conn->state = TS_DISCONNECTED;
540         conn->bev = bev;
541         conn->awaiting_packet_size = 0;
542 }
543
544 /* Disconnect tcp connection. */
545 static void
546 evdns_tcp_disconnect(struct tcp_connection *conn)
547 {
548         if (!conn)
549                 return;
550         conn->state = TS_DISCONNECTED;
551         conn->awaiting_packet_size = 0;
552         if (conn->bev) {
553                 bufferevent_free(conn->bev);
554                 conn->bev = NULL;
555         }
556 }
557
558 /* Add new tcp client to the list of TCP clients in the TCP DNS server. */
559 static struct client_tcp_connection*
560 evdns_add_tcp_client(struct evdns_server_port *port, struct bufferevent *bev)
561 {
562         struct client_tcp_connection *client;
563         EVUTIL_ASSERT(port && bev);
564         if (port->max_client_connections == port->client_connections_count)
565                 goto error;
566
567         client = mm_calloc(1, sizeof(*client));
568         if (!client)
569                 goto error;
570         init_tcp_connection(&client->connection, bev);
571         client->port = port;
572         LIST_INSERT_HEAD(&port->client_connections, client, next);
573
574         ++port->client_connections_count;
575         /* we need to hold evdns_server_port as long as one connection at least stays alive */
576         ++port->refcnt;
577         return client;
578 error:
579         return NULL;
580 }
581
582 /* Remove tcp client and free all associated data from the TCP DNS server. */
583 static int
584 evdns_remove_tcp_client(struct evdns_server_port *port, struct client_tcp_connection *client)
585 {
586         if (!port || !client)
587                 goto error;
588
589         evdns_tcp_disconnect(&client->connection);
590         LIST_REMOVE(client, next);
591         mm_free(client);
592         --port->client_connections_count;
593         --port->refcnt;
594         return 0;
595 error:
596         return -1;
597 }
598
599 /* Remove all tcp clients and free all associated data from the TCP DNS server. */
600 static void
601 evdns_remove_all_tcp_clients(struct evdns_server_port *port)
602 {
603         struct client_tcp_connection *client;
604         while ((client = LIST_FIRST(&port->client_connections))) {
605                 evdns_remove_tcp_client(port, client);
606         }
607 }
608
609 /* Create new tcp connection structure for DNS client. */
610 static struct tcp_connection *
611 new_tcp_connecton(struct bufferevent *bev)
612 {
613         struct tcp_connection *conn;
614         if (!bev)
615                 return NULL;
616
617         conn = mm_calloc(1, sizeof(*conn));
618         if (!conn)
619                 return NULL;
620         init_tcp_connection(conn, bev);
621         return conn;
622 }
623
624 /* Disconnect and free all associated data for the tcp connection in DNS client. */
625 static void
626 disconnect_and_free_connection(struct tcp_connection *conn)
627 {
628         if (!conn)
629                 return;
630         evdns_tcp_disconnect(conn);
631         mm_free(conn);
632 }
633
634 /* This walks the list of inflight requests to find the */
635 /* one with a matching transaction id. Returns NULL on */
636 /* failure */
637 static struct request *
638 request_find_from_trans_id(struct evdns_base *base, u16 trans_id) {
639         struct request *req = REQ_HEAD(base, trans_id);
640         struct request *const started_at = req;
641
642         ASSERT_LOCKED(base);
643
644         if (req) {
645                 do {
646                         if (req->trans_id == trans_id) return req;
647                         req = req->next;
648                 } while (req != started_at);
649         }
650
651         return NULL;
652 }
653
654 /* a libevent callback function which is called when a nameserver */
655 /* has gone down and we want to test if it has came back to life yet */
656 static void
657 nameserver_prod_callback(evutil_socket_t fd, short events, void *arg) {
658         struct nameserver *const ns = (struct nameserver *) arg;
659         (void)fd;
660         (void)events;
661
662         EVDNS_LOCK(ns->base);
663         nameserver_send_probe(ns);
664         EVDNS_UNLOCK(ns->base);
665 }
666
667 /* a libevent callback which is called when a nameserver probe (to see if */
668 /* it has come back to life) times out. We increment the count of failed_times */
669 /* and wait longer to send the next probe packet. */
670 static void
671 nameserver_probe_failed(struct nameserver *const ns) {
672         struct timeval timeout;
673         int i;
674
675         ASSERT_LOCKED(ns->base);
676         (void) evtimer_del(&ns->timeout_event);
677         if (ns->state == 1) {
678                 /* This can happen if the nameserver acts in a way which makes us mark */
679                 /* it as bad and then starts sending good replies. */
680                 return;
681         }
682
683         memcpy(&timeout, &ns->base->global_nameserver_probe_initial_timeout,
684             sizeof(struct timeval));
685         for (i = ns->failed_times; i > 0 && timeout.tv_sec < ns->base->ns_max_probe_timeout; --i) {
686                 timeout.tv_sec *= ns->base->ns_timeout_backoff_factor;
687                 timeout.tv_usec *= ns->base->ns_timeout_backoff_factor;
688                 if (timeout.tv_usec > 1000000) {
689                         timeout.tv_sec += timeout.tv_usec / 1000000;
690                         timeout.tv_usec %= 1000000;
691                 }
692         }
693         if (timeout.tv_sec > ns->base->ns_max_probe_timeout) {
694                 timeout.tv_sec = ns->base->ns_max_probe_timeout;
695                 timeout.tv_usec = 0;
696         }
697
698         ns->failed_times++;
699
700         if (evtimer_add(&ns->timeout_event, &timeout) < 0) {
701                 char addrbuf[128];
702                 log(EVDNS_LOG_WARN,
703                     "Error from libevent when adding timer event for %s",
704                     evutil_format_sockaddr_port_(
705                             (struct sockaddr *)&ns->address,
706                             addrbuf, sizeof(addrbuf)));
707         }
708 }
709
710 static void
711 request_swap_ns(struct request *req, struct nameserver *ns) {
712         if (ns && req->ns != ns) {
713                 EVUTIL_ASSERT(req->ns->requests_inflight > 0);
714                 req->ns->requests_inflight--;
715                 ns->requests_inflight++;
716
717                 req->ns = ns;
718         }
719 }
720
721 /* called when a nameserver has been deemed to have failed. For example, too */
722 /* many packets have timed out etc */
723 static void
724 nameserver_failed(struct nameserver *const ns, const char *msg, int err) {
725         struct request *req, *started_at;
726         struct evdns_base *base = ns->base;
727         int i;
728         char addrbuf[128];
729
730         ASSERT_LOCKED(base);
731         /* if this nameserver has already been marked as failed */
732         /* then don't do anything */
733         if (!ns->state) return;
734
735         log(EVDNS_LOG_MSG, "Nameserver %s has failed: %s",
736             evutil_format_sockaddr_port_(
737                     (struct sockaddr *)&ns->address,
738                     addrbuf, sizeof(addrbuf)),
739             msg);
740
741         base->global_good_nameservers--;
742         EVUTIL_ASSERT(base->global_good_nameservers >= 0);
743         if (base->global_good_nameservers == 0) {
744                 log(EVDNS_LOG_MSG, "All nameservers have failed");
745         }
746
747         ns->state = 0;
748         ns->failed_times = 1;
749
750         if (ns->connection) {
751                 disconnect_and_free_connection(ns->connection);
752                 ns->connection = NULL;
753         } else if (err == ENOTCONN) {
754                 /* XXX: If recvfrom results in ENOTCONN, the socket remains readable
755                  * which triggers another recvfrom. The observed behavior is 100% CPU use.
756                  * This occurs on iOS (kqueue) after the process has been backgrounded
757                  * for a long time (~300 seconds) and then resumed.
758                  * All sockets, TCP and UDP, seem to get ENOTCONN and must be closed.
759                  * https://github.com/libevent/libevent/issues/265 */
760                 const struct sockaddr *address = (const struct sockaddr *)&ns->address;
761                 evutil_closesocket(ns->socket);
762                 ns->socket = evutil_socket_(address->sa_family,
763                         SOCK_DGRAM | EVUTIL_SOCK_NONBLOCK | EVUTIL_SOCK_CLOEXEC, 0);
764
765                 if (base->global_outgoing_addrlen &&
766                         !evutil_sockaddr_is_loopback_(address)) {
767                         if (bind(ns->socket,
768                                         (struct sockaddr *)&base->global_outgoing_address,
769                                         base->global_outgoing_addrlen) < 0) {
770                                 log(EVDNS_LOG_WARN, "Couldn't bind to outgoing address");
771                         }
772                 }
773
774                 event_del(&ns->event);
775                 event_assign(&ns->event, ns->base->event_base, ns->socket,
776                         EV_READ | (ns->write_waiting ? EV_WRITE : 0) | EV_PERSIST,
777                         nameserver_ready_callback, ns);
778                 if (!base->disable_when_inactive && event_add(&ns->event, NULL) < 0) {
779                         log(EVDNS_LOG_WARN, "Couldn't add %s event",
780                                 ns->write_waiting ? "rw": "read");
781                 }
782         }
783         if (evtimer_add(&ns->timeout_event,
784                 &base->global_nameserver_probe_initial_timeout) < 0) {
785                 log(EVDNS_LOG_WARN,
786                     "Error from libevent when adding timer event for %s",
787                     evutil_format_sockaddr_port_(
788                             (struct sockaddr *)&ns->address,
789                             addrbuf, sizeof(addrbuf)));
790                 /* ???? Do more? */
791         }
792
793         /* walk the list of inflight requests to see if any can be reassigned to */
794         /* a different server. Requests in the waiting queue don't have a */
795         /* nameserver assigned yet */
796
797         /* if we don't have *any* good nameservers then there's no point */
798         /* trying to reassign requests to one */
799         if (!base->global_good_nameservers) return;
800
801         for (i = 0; i < base->n_req_heads; ++i) {
802                 req = started_at = base->req_heads[i];
803                 if (req) {
804                         do {
805                                 if (req->tx_count == 0 && req->ns == ns) {
806                                         /* still waiting to go out, can be moved */
807                                         /* to another server */
808                                         request_swap_ns(req, nameserver_pick(base));
809                                 }
810                                 req = req->next;
811                         } while (req != started_at);
812                 }
813         }
814 }
815
816 static void
817 nameserver_up(struct nameserver *const ns)
818 {
819         char addrbuf[128];
820         ASSERT_LOCKED(ns->base);
821         if (ns->state) return;
822         log(EVDNS_LOG_MSG, "Nameserver %s is back up",
823             evutil_format_sockaddr_port_(
824                     (struct sockaddr *)&ns->address,
825                     addrbuf, sizeof(addrbuf)));
826         evtimer_del(&ns->timeout_event);
827         if (ns->probe_request) {
828                 evdns_cancel_request(ns->base, ns->probe_request);
829                 ns->probe_request = NULL;
830         }
831         ns->state = 1;
832         ns->failed_times = 0;
833         ns->timedout = 0;
834         ns->base->global_good_nameservers++;
835 }
836
837 static void
838 request_trans_id_set(struct request *const req, const u16 trans_id) {
839         req->trans_id = trans_id;
840         *((u16 *) req->request) = htons(trans_id);
841 }
842
843 /* Called to remove a request from a list and dealloc it. */
844 /* head is a pointer to the head of the list it should be */
845 /* removed from or NULL if the request isn't in a list. */
846 /* when free_handle is one, free the handle as well. */
847 static void
848 request_finished(struct request *const req, struct request **head, int free_handle) {
849         struct evdns_base *base = req->base;
850         int was_inflight = (head != &base->req_waiting_head);
851         EVDNS_LOCK(base);
852         ASSERT_VALID_REQUEST(req);
853
854         if (head)
855                 evdns_request_remove(req, head);
856
857         log(EVDNS_LOG_DEBUG, "Removing timeout for request %p", (void *)req);
858         if (was_inflight) {
859                 evtimer_del(&req->timeout_event);
860                 base->global_requests_inflight--;
861                 req->ns->requests_inflight--;
862         } else {
863                 base->global_requests_waiting--;
864         }
865         /* it was initialized during request_new / evtimer_assign */
866         event_debug_unassign(&req->timeout_event);
867
868         if (req->ns &&
869             req->ns->requests_inflight == 0 &&
870             req->base->disable_when_inactive) {
871                 event_del(&req->ns->event);
872                 evtimer_del(&req->ns->timeout_event);
873         }
874
875         if (!req->request_appended) {
876                 /* need to free the request data on it's own */
877                 mm_free(req->request);
878         } else {
879                 /* the request data is appended onto the header */
880                 /* so everything gets free()ed when we: */
881         }
882
883         if (req->handle) {
884                 EVUTIL_ASSERT(req->handle->current_req == req);
885
886                 if (free_handle) {
887                         search_request_finished(req->handle);
888                         req->handle->current_req = NULL;
889                         if (! req->handle->pending_cb) {
890                                 /* If we're planning to run the callback,
891                                  * don't free the handle until later. */
892                                 mm_free(req->handle);
893                         }
894                         req->handle = NULL; /* If we have a bug, let's crash
895                                              * early */
896                 } else {
897                         req->handle->current_req = NULL;
898                 }
899         }
900
901         mm_free(req);
902
903         evdns_requests_pump_waiting_queue(base);
904         EVDNS_UNLOCK(base);
905 }
906
907 /* This is called when a server returns a funny error code. */
908 /* We try the request again with another server. */
909 /* */
910 /* return: */
911 /*   0 ok */
912 /*   1 failed/reissue is pointless */
913 static int
914 request_reissue(struct request *req) {
915         const struct nameserver *const last_ns = req->ns;
916         ASSERT_LOCKED(req->base);
917         ASSERT_VALID_REQUEST(req);
918         /* the last nameserver should have been marked as failing */
919         /* by the caller of this function, therefore pick will try */
920         /* not to return it */
921         request_swap_ns(req, nameserver_pick(req->base));
922         if (req->ns == last_ns) {
923                 /* ... but pick did return it */
924                 /* not a lot of point in trying again with the */
925                 /* same server */
926                 return 1;
927         }
928
929         req->reissue_count++;
930         req->tx_count = 0;
931         req->transmit_me = 1;
932
933         return 0;
934 }
935
936 /* this function looks for space on the inflight queue and promotes */
937 /* requests from the waiting queue if it can. */
938 /* */
939 /* TODO: */
940 /* add return code, see at nameserver_pick() and other functions. */
941 static void
942 evdns_requests_pump_waiting_queue(struct evdns_base *base) {
943         ASSERT_LOCKED(base);
944         while (base->global_requests_inflight < base->global_max_requests_inflight &&
945                    base->global_requests_waiting) {
946                 struct request *req;
947
948                 EVUTIL_ASSERT(base->req_waiting_head);
949                 req = base->req_waiting_head;
950
951                 req->ns = nameserver_pick(base);
952                 if (!req->ns)
953                         return;
954
955                 /* move a request from the waiting queue to the inflight queue */
956                 req->ns->requests_inflight++;
957
958                 evdns_request_remove(req, &base->req_waiting_head);
959
960                 base->global_requests_waiting--;
961                 base->global_requests_inflight++;
962
963                 request_trans_id_set(req, transaction_id_pick(base));
964
965                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
966                 evdns_request_transmit(req);
967                 evdns_transmit(base);
968         }
969 }
970
971 /* TODO(nickm) document */
972 struct deferred_reply_callback {
973         struct event_callback deferred;
974         struct evdns_request *handle;
975         u8 request_type;
976         u8 have_reply;
977         u32 ttl;
978         u32 err;
979         evdns_callback_type user_callback;
980         struct reply reply;
981 };
982
983 static void
984 reply_run_callback(struct event_callback *d, void *user_pointer)
985 {
986         struct deferred_reply_callback *cb =
987             EVUTIL_UPCAST(d, struct deferred_reply_callback, deferred);
988
989         switch (cb->request_type) {
990         case TYPE_A:
991                 if (cb->have_reply) {
992                         cb->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
993                             cb->reply.rr_count, cb->ttl,
994                             cb->reply.data.a,
995                             user_pointer);
996                         if (cb->reply.cname)
997                                 cb->user_callback(DNS_ERR_NONE, DNS_CNAME, 1,
998                                     cb->ttl, cb->reply.cname, user_pointer);
999                 } else
1000                         cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
1001                 break;
1002         case TYPE_PTR:
1003                 if (cb->have_reply) {
1004                         char *name = cb->reply.data.ptr_name;
1005                         cb->user_callback(DNS_ERR_NONE, DNS_PTR, 1, cb->ttl,
1006                             &name, user_pointer);
1007                 } else {
1008                         cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
1009                 }
1010                 break;
1011         case TYPE_AAAA:
1012                 if (cb->have_reply) {
1013                         cb->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
1014                             cb->reply.rr_count, cb->ttl,
1015                             cb->reply.data.aaaa,
1016                             user_pointer);
1017                         if (cb->reply.cname)
1018                                 cb->user_callback(DNS_ERR_NONE, DNS_CNAME, 1,
1019                                     cb->ttl, cb->reply.cname, user_pointer);
1020                 } else
1021                         cb->user_callback(cb->err, 0, 0, cb->ttl, NULL, user_pointer);
1022                 break;
1023         default:
1024                 EVUTIL_ASSERT(0);
1025         }
1026
1027         if (cb->handle && cb->handle->pending_cb) {
1028                 mm_free(cb->handle);
1029         }
1030
1031         if (cb->reply.data.raw) {
1032                 mm_free(cb->reply.data.raw);
1033         }
1034
1035         if (cb->reply.cname) {
1036                 mm_free(cb->reply.cname);
1037         }
1038
1039         mm_free(cb);
1040 }
1041
1042 static void
1043 reply_schedule_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply)
1044 {
1045         struct deferred_reply_callback *d = mm_calloc(1, sizeof(*d));
1046
1047         if (!d) {
1048                 event_warn("%s: Couldn't allocate space for deferred callback.",
1049                     __func__);
1050                 return;
1051         }
1052
1053         ASSERT_LOCKED(req->base);
1054
1055         d->request_type = req->request_type;
1056         d->user_callback = req->user_callback;
1057         d->ttl = ttl;
1058         d->err = err;
1059         if (reply) {
1060                 d->have_reply = 1;
1061                 memcpy(&d->reply, reply, sizeof(struct reply));
1062                 /* We've taken ownership of the data. */
1063                 reply->data.raw = NULL;
1064         }
1065
1066         if (req->handle) {
1067                 req->handle->pending_cb = 1;
1068                 d->handle = req->handle;
1069         }
1070
1071         event_deferred_cb_init_(
1072             &d->deferred,
1073             event_get_priority(&req->timeout_event),
1074             reply_run_callback,
1075             req->user_pointer);
1076         event_deferred_cb_schedule_(
1077                 req->base->event_base,
1078                 &d->deferred);
1079 }
1080
1081 static int
1082 client_retransmit_through_tcp(struct evdns_request *handle)
1083 {
1084         struct request *req = handle->current_req;
1085         struct evdns_base *base = req->base;
1086         struct request *newreq = request_clone(base, req);
1087         ASSERT_LOCKED(base);
1088         if (!newreq)
1089                 return 1;
1090         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
1091         handle->current_req = newreq;
1092         newreq->handle = handle;
1093         request_submit(newreq);
1094         return 0;
1095 }
1096
1097 #define _QR_MASK    0x8000U
1098 #define _OP_MASK    0x7800U
1099 #define _AA_MASK    0x0400U
1100 #define _TC_MASK    0x0200U
1101 #define _RD_MASK    0x0100U
1102 #define _RA_MASK    0x0080U
1103 #define _Z_MASK     0x0040U
1104 #define _AD_MASK    0x0020U
1105 #define _CD_MASK    0x0010U
1106 #define _RCODE_MASK 0x000fU
1107 #define _Z_MASK_DEPRECATED 0x0070U
1108
1109 /* this processes a parsed reply packet */
1110 static void
1111 reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
1112         int error;
1113         char addrbuf[128];
1114         int retransmit_via_tcp = 0;
1115         static const int error_codes[] = {
1116                 DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
1117                 DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
1118         };
1119
1120         ASSERT_LOCKED(req->base);
1121         ASSERT_VALID_REQUEST(req);
1122
1123         if (flags & (_RCODE_MASK | _TC_MASK) || !reply || !reply->have_answer) {
1124                 /* there was an error */
1125                 if (flags & _TC_MASK) {
1126                         error = DNS_ERR_TRUNCATED;
1127                         retransmit_via_tcp = (req->handle->tcp_flags & (DNS_QUERY_IGNTC | DNS_QUERY_USEVC)) == 0;
1128                 } else if (flags & _RCODE_MASK) {
1129                         u16 error_code = (flags & _RCODE_MASK) - 1;
1130                         if (error_code > 4) {
1131                                 error = DNS_ERR_UNKNOWN;
1132                         } else {
1133                                 error = error_codes[error_code];
1134                         }
1135                 } else if (reply && !reply->have_answer) {
1136                         error = DNS_ERR_NODATA;
1137                 } else {
1138                         error = DNS_ERR_UNKNOWN;
1139                 }
1140
1141                 switch (error) {
1142                 case DNS_ERR_NOTIMPL:
1143                 case DNS_ERR_REFUSED:
1144                         /* we regard these errors as marking a bad nameserver */
1145                         if (req->reissue_count < req->base->global_max_reissues) {
1146                                 char msg[64];
1147                                 evutil_snprintf(msg, sizeof(msg), "Bad response %d (%s)",
1148                                          error, evdns_err_to_string(error));
1149                                 nameserver_failed(req->ns, msg, 0);
1150                                 if (!request_reissue(req)) return;
1151                         }
1152                         break;
1153                 case DNS_ERR_SERVERFAILED:
1154                         /* rcode 2 (servfailed) sometimes means "we
1155                          * are broken" and sometimes (with some binds)
1156                          * means "that request was very confusing."
1157                          * Treat this as a timeout, not a failure.
1158                          */
1159                         log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver"
1160                                 "at %s; will allow the request to time out.",
1161                             evutil_format_sockaddr_port_(
1162                                     (struct sockaddr *)&req->ns->address,
1163                                     addrbuf, sizeof(addrbuf)));
1164                         /* Call the timeout function */
1165                         evdns_request_timeout_callback(0, 0, req);
1166                         return;
1167                 default:
1168                         /* we got a good reply from the nameserver: it is up. */
1169                         if (req->handle == req->ns->probe_request) {
1170                                 /* Avoid double-free */
1171                                 req->ns->probe_request = NULL;
1172                         }
1173
1174                         nameserver_up(req->ns);
1175                 }
1176
1177                 if (retransmit_via_tcp) {
1178                         log(EVDNS_LOG_DEBUG, "Recieved truncated reply(flags 0x%x, transanc ID: %d). Retransmiting via TCP.",
1179                                 req->handle->tcp_flags, req->trans_id);
1180                         req->handle->tcp_flags |= DNS_QUERY_USEVC;
1181                         client_retransmit_through_tcp(req->handle);
1182                         return;
1183                 }
1184
1185                 if (req->handle->search_state &&
1186                     req->request_type != TYPE_PTR) {
1187                         /* if we have a list of domains to search in,
1188                          * try the next one */
1189                         if (!search_try_next(req->handle)) {
1190                                 /* a new request was issued so this
1191                                  * request is finished and */
1192                                 /* the user callback will be made when
1193                                  * that request (or a */
1194                                 /* child of it) finishes. */
1195                                 return;
1196                         }
1197                 }
1198
1199                 /* all else failed. Pass the failure up */
1200                 reply_schedule_callback(req, ttl, error, NULL);
1201                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
1202         } else {
1203                 /* all ok, tell the user */
1204                 reply_schedule_callback(req, ttl, 0, reply);
1205                 if (req->handle == req->ns->probe_request)
1206                         req->ns->probe_request = NULL; /* Avoid double-free */
1207                 nameserver_up(req->ns);
1208                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
1209         }
1210 }
1211
1212 static int
1213 name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
1214         int name_end = -1;
1215         int j = *idx;
1216         int ptr_count = 0;
1217 #define GET32(x) do { if (j + 4 > length) goto err; memcpy(&t32_, packet + j, 4); j += 4; x = ntohl(t32_); } while (0)
1218 #define GET16(x) do { if (j + 2 > length) goto err; memcpy(&t_, packet + j, 2); j += 2; x = ntohs(t_); } while (0)
1219 #define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while (0)
1220
1221         char *cp = name_out;
1222         const char *const end = name_out + name_out_len;
1223
1224         /* Normally, names are a series of length prefixed strings terminated */
1225         /* with a length of 0 (the lengths are u8's < 63). */
1226         /* However, the length can start with a pair of 1 bits and that */
1227         /* means that the next 14 bits are a pointer within the current */
1228         /* packet. */
1229
1230         for (;;) {
1231                 u8 label_len;
1232                 GET8(label_len);
1233                 if (!label_len) break;
1234                 if (label_len & 0xc0) {
1235                         u8 ptr_low;
1236                         GET8(ptr_low);
1237                         if (name_end < 0) name_end = j;
1238                         j = (((int)label_len & 0x3f) << 8) + ptr_low;
1239                         /* Make sure that the target offset is in-bounds. */
1240                         if (j < 0 || j >= length) return -1;
1241                         /* If we've jumped more times than there are characters in the
1242                          * message, we must have a loop. */
1243                         if (++ptr_count > length) return -1;
1244                         continue;
1245                 }
1246                 if (label_len > 63) return -1;
1247                 if (cp != name_out) {
1248                         if (cp + 1 >= end) return -1;
1249                         *cp++ = '.';
1250                 }
1251                 if (cp + label_len >= end) return -1;
1252                 if (j + label_len > length) return -1;
1253                 memcpy(cp, packet + j, label_len);
1254                 cp += label_len;
1255                 j += label_len;
1256         }
1257         if (cp >= end) return -1;
1258         *cp = '\0';
1259         if (name_end < 0)
1260                 *idx = j;
1261         else
1262                 *idx = name_end;
1263         return 0;
1264  err:
1265         return -1;
1266 }
1267
1268 /* parses a raw request from a nameserver */
1269 static int
1270 reply_parse(struct evdns_base *base, u8 *packet, int length)
1271 {
1272         int j = 0, k = 0;  /* index into packet */
1273         u16 t_;  /* used by the macros */
1274         u32 t32_;  /* used by the macros */
1275         char tmp_name[256], cmp_name[256]; /* used by the macros */
1276         int name_matches = 0;
1277
1278         u16 trans_id, questions, answers, authority, additional, datalength;
1279         u16 flags = 0;
1280         u32 ttl, ttl_r = 0xffffffff;
1281         struct reply reply;
1282         struct request *req = NULL;
1283         unsigned int i, buf_size;
1284
1285         ASSERT_LOCKED(base);
1286
1287         GET16(trans_id);
1288         GET16(flags);
1289         GET16(questions);
1290         GET16(answers);
1291         GET16(authority);
1292         GET16(additional);
1293         (void) authority; /* suppress "unused variable" warnings. */
1294         (void) additional; /* suppress "unused variable" warnings. */
1295
1296         req = request_find_from_trans_id(base, trans_id);
1297         if (!req) return -1;
1298         EVUTIL_ASSERT(req->base == base);
1299
1300         memset(&reply, 0, sizeof(reply));
1301
1302         /* If it's not an answer, it doesn't correspond to any request. */
1303         if (!(flags & _QR_MASK)) return -1;  /* must be an answer */
1304         if ((flags & (_RCODE_MASK|_TC_MASK)) && (flags & (_RCODE_MASK|_TC_MASK)) != DNS_ERR_NOTEXIST) {
1305                 /* there was an error and it's not NXDOMAIN */
1306                 goto err;
1307         }
1308         /* if (!answers) return; */  /* must have an answer of some form */
1309
1310         /* This macro skips a name in the DNS reply. */
1311 #define SKIP_NAME                                               \
1312         do { tmp_name[0] = '\0';                                \
1313                 if (name_parse(packet, length, &j, tmp_name,    \
1314                         sizeof(tmp_name))<0)                    \
1315                         goto err;                               \
1316         } while (0)
1317
1318         reply.type = req->request_type;
1319
1320         /* skip over each question in the reply */
1321         for (i = 0; i < questions; ++i) {
1322                 /* the question looks like
1323                  *   <label:name><u16:type><u16:class>
1324                  */
1325                 tmp_name[0] = '\0';
1326                 cmp_name[0] = '\0';
1327                 k = j;
1328                 if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name)) < 0)
1329                         goto err;
1330                 if (name_parse(req->request, req->request_len, &k,
1331                         cmp_name, sizeof(cmp_name))<0)
1332                         goto err;
1333                 if (!base->global_randomize_case) {
1334                         if (strcmp(tmp_name, cmp_name) == 0)
1335                                 name_matches = 1;
1336                 } else {
1337                         if (evutil_ascii_strcasecmp(tmp_name, cmp_name) == 0)
1338                                 name_matches = 1;
1339                 }
1340
1341                 j += 4;
1342                 if (j > length)
1343                         goto err;
1344         }
1345
1346         if (!name_matches)
1347                 goto err;
1348
1349         /* We can allocate less for the reply data, but to do it we'll have
1350          * to parse the response. To simplify things let's just allocate
1351          * a little bit more to avoid complex evaluations.
1352          */
1353         buf_size = MAX(length - j, EVDNS_NAME_MAX);
1354         reply.data.raw = mm_malloc(buf_size);
1355
1356         /* now we have the answer section which looks like
1357          * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
1358          */
1359
1360         for (i = 0; i < answers; ++i) {
1361                 u16 type, class;
1362
1363                 SKIP_NAME;
1364                 GET16(type);
1365                 GET16(class);
1366                 GET32(ttl);
1367                 GET16(datalength);
1368
1369                 if (type == TYPE_A && class == CLASS_INET) {
1370                         int addrcount;
1371                         if (req->request_type != TYPE_A) {
1372                                 j += datalength; continue;
1373                         }
1374                         if ((datalength & 3) != 0) /* not an even number of As. */
1375                             goto err;
1376                         addrcount = datalength >> 2;
1377
1378                         ttl_r = MIN(ttl_r, ttl);
1379                         /* we only bother with the first four addresses. */
1380                         if (j + 4*addrcount > length) goto err;
1381                         memcpy(&reply.data.a[reply.rr_count],
1382                                    packet + j, 4*addrcount);
1383                         j += 4*addrcount;
1384                         reply.rr_count += addrcount;
1385                         reply.have_answer = 1;
1386                 } else if (type == TYPE_PTR && class == CLASS_INET) {
1387                         if (req->request_type != TYPE_PTR) {
1388                                 j += datalength; continue;
1389                         }
1390                         if (name_parse(packet, length, &j, reply.data.ptr_name,
1391                                                    buf_size)<0)
1392                                 goto err;
1393                         ttl_r = MIN(ttl_r, ttl);
1394                         reply.have_answer = 1;
1395                         break;
1396                 } else if (type == TYPE_CNAME) {
1397                         char cname[EVDNS_NAME_MAX];
1398                         if (name_parse(packet, length, &j, cname,
1399                                 sizeof(cname))<0)
1400                                 goto err;
1401                         if (req->need_cname)
1402                                 reply.cname = mm_strdup(cname);
1403                         if (req->put_cname_in_ptr && !*req->put_cname_in_ptr)
1404                                 *req->put_cname_in_ptr = mm_strdup(cname);
1405                 } else if (type == TYPE_AAAA && class == CLASS_INET) {
1406                         int addrcount;
1407                         if (req->request_type != TYPE_AAAA) {
1408                                 j += datalength; continue;
1409                         }
1410                         if ((datalength & 15) != 0) /* not an even number of AAAAs. */
1411                                 goto err;
1412                         addrcount = datalength >> 4;  /* each address is 16 bytes long */
1413                         ttl_r = MIN(ttl_r, ttl);
1414
1415                         /* we only bother with the first four addresses. */
1416                         if (j + 16*addrcount > length) goto err;
1417                         memcpy(&reply.data.aaaa[reply.rr_count],
1418                                    packet + j, 16*addrcount);
1419                         reply.rr_count += addrcount;
1420                         j += 16*addrcount;
1421                         reply.have_answer = 1;
1422                 } else {
1423                         /* skip over any other type of resource */
1424                         j += datalength;
1425                 }
1426         }
1427
1428         if (!reply.have_answer) {
1429                 for (i = 0; i < authority; ++i) {
1430                         u16 type, class;
1431                         SKIP_NAME;
1432                         GET16(type);
1433                         GET16(class);
1434                         GET32(ttl);
1435                         GET16(datalength);
1436                         if (type == TYPE_SOA && class == CLASS_INET) {
1437                                 u32 serial, refresh, retry, expire, minimum;
1438                                 SKIP_NAME;
1439                                 SKIP_NAME;
1440                                 GET32(serial);
1441                                 GET32(refresh);
1442                                 GET32(retry);
1443                                 GET32(expire);
1444                                 GET32(minimum);
1445                                 (void)expire;
1446                                 (void)retry;
1447                                 (void)refresh;
1448                                 (void)serial;
1449                                 ttl_r = MIN(ttl_r, ttl);
1450                                 ttl_r = MIN(ttl_r, minimum);
1451                         } else {
1452                                 /* skip over any other type of resource */
1453                                 j += datalength;
1454                         }
1455                 }
1456         }
1457
1458         if (ttl_r == 0xffffffff)
1459                 ttl_r = 0;
1460
1461         reply_handle(req, flags, ttl_r, &reply);
1462         if (reply.data.raw)
1463                 mm_free(reply.data.raw);
1464         return 0;
1465  err:
1466         if (req)
1467                 reply_handle(req, flags, 0, NULL);
1468         if (reply.data.raw)
1469                 mm_free(reply.data.raw);
1470         return -1;
1471 }
1472
1473 /* Parse a raw request (packet,length) sent to a nameserver port (port) from */
1474 /* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
1475 /* callback. */
1476 static int
1477 request_parse(u8 *packet, int length, struct evdns_server_port *port,
1478                                 struct sockaddr *addr, ev_socklen_t addrlen, struct client_tcp_connection *client)
1479 {
1480         int j = 0;      /* index into packet */
1481         u16 t_;  /* used by the macros */
1482         u32 t32_;  /* used by the macros */
1483         char tmp_name[256]; /* used by the macros */
1484
1485         int i;
1486         u16 trans_id, flags, questions, answers, authority, additional;
1487         struct server_request *server_req = NULL;
1488         u32 ttl;
1489         u16 type, class, rdlen;
1490
1491         ASSERT_LOCKED(port);
1492
1493         /* Get the header fields */
1494         GET16(trans_id);
1495         GET16(flags);
1496         GET16(questions);
1497         GET16(answers);
1498         GET16(authority);
1499         GET16(additional);
1500
1501         if (flags & _QR_MASK) return -1; /* Must not be an answer. */
1502         flags &= (_RD_MASK|_CD_MASK); /* Only RD and CD get preserved. */
1503
1504         server_req = mm_malloc(sizeof(struct server_request));
1505         if (server_req == NULL) return -1;
1506         memset(server_req, 0, sizeof(struct server_request));
1507
1508         server_req->trans_id = trans_id;
1509         if (addr) {
1510                 memcpy(&server_req->addr, addr, addrlen);
1511                 server_req->addrlen = addrlen;
1512         }
1513
1514         server_req->port = port;
1515         server_req->client = client;
1516         server_req->base.flags = flags;
1517         server_req->base.nquestions = 0;
1518         server_req->base.questions = mm_calloc(sizeof(struct evdns_server_question *), questions);
1519         if (server_req->base.questions == NULL)
1520                 goto err;
1521
1522         for (i = 0; i < questions; ++i) {
1523                 u16 type, class;
1524                 struct evdns_server_question *q;
1525                 int namelen;
1526                 if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
1527                         goto err;
1528                 GET16(type);
1529                 GET16(class);
1530                 namelen = (int)strlen(tmp_name);
1531                 q = mm_malloc(sizeof(struct evdns_server_question) + namelen);
1532                 if (!q)
1533                         goto err;
1534                 q->type = type;
1535                 q->dns_question_class = class;
1536                 memcpy(q->name, tmp_name, namelen+1);
1537                 server_req->base.questions[server_req->base.nquestions++] = q;
1538         }
1539
1540 #define SKIP_RR \
1541         do { \
1542                 SKIP_NAME; \
1543                 j += 2 /* type */ + 2 /* class */ + 4 /* ttl */; \
1544                 GET16(rdlen); \
1545                 j += rdlen; \
1546         } while (0)
1547
1548         for (i = 0; i < answers; ++i) {
1549                 SKIP_RR;
1550         }
1551
1552         for (i = 0; i < authority; ++i) {
1553                 SKIP_RR;
1554         }
1555
1556         server_req->max_udp_reply_size = DNS_MAX_UDP_SIZE;
1557         for (i = 0; i < additional; ++i) {
1558                 SKIP_NAME;
1559                 GET16(type);
1560                 GET16(class);
1561                 GET32(ttl);
1562                 GET16(rdlen);
1563                 (void)ttl;
1564                 j += rdlen;
1565                 if (type == TYPE_OPT) {
1566                         /* In case of OPT pseudo-RR `class` field is treated
1567                          * as a requestor's UDP payload size. */
1568                         server_req->max_udp_reply_size = MAX(class, DNS_MAX_UDP_SIZE);
1569                         evdns_server_request_add_reply(&(server_req->base),
1570                                 EVDNS_ADDITIONAL_SECTION,
1571                                 "", /* name */
1572                                 TYPE_OPT, /* type */
1573                                 DNS_MAX_UDP_SIZE, /* class */
1574                                 0, /* ttl */
1575                                 0, /* datalen */
1576                                 0, /* is_name */
1577                                 NULL /* data */
1578                         );
1579                         break;
1580                 }
1581         }
1582
1583         port->refcnt++;
1584
1585         /* Only standard queries are supported. */
1586         if (flags & _OP_MASK) {
1587                 evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
1588                 return -1;
1589         }
1590
1591         port->user_callback(&(server_req->base), port->user_data);
1592
1593         return 0;
1594 err:
1595         if (server_req) {
1596                 if (server_req->base.questions) {
1597                         for (i = 0; i < server_req->base.nquestions; ++i)
1598                                 mm_free(server_req->base.questions[i]);
1599                         mm_free(server_req->base.questions);
1600                 }
1601                 mm_free(server_req);
1602         }
1603         return -1;
1604
1605 #undef SKIP_RR
1606 #undef SKIP_NAME
1607 #undef GET32
1608 #undef GET16
1609 #undef GET8
1610 }
1611
1612 /* Try to choose a strong transaction id which isn't already in flight */
1613 static u16
1614 transaction_id_pick(struct evdns_base *base) {
1615         ASSERT_LOCKED(base);
1616         for (;;) {
1617                 u16 trans_id;
1618                 evutil_secure_rng_get_bytes(&trans_id, sizeof(trans_id));
1619
1620                 if (trans_id == 0xffff) continue;
1621                 /* now check to see if that id is already inflight */
1622                 if (request_find_from_trans_id(base, trans_id) == NULL)
1623                         return trans_id;
1624         }
1625 }
1626
1627 /* choose a namesever to use. This function will try to ignore */
1628 /* nameservers which we think are down and load balance across the rest */
1629 /* by updating the server_head global each time. */
1630 static struct nameserver *
1631 nameserver_pick(struct evdns_base *base) {
1632         struct nameserver *started_at = base->server_head, *picked;
1633         ASSERT_LOCKED(base);
1634         if (!base->server_head) return NULL;
1635
1636         /* if we don't have any good nameservers then there's no */
1637         /* point in trying to find one. */
1638         if (!base->global_good_nameservers) {
1639                 base->server_head = base->server_head->next;
1640                 return base->server_head;
1641         }
1642
1643         /* remember that nameservers are in a circular list */
1644         for (;;) {
1645                 if (base->server_head->state) {
1646                         /* we think this server is currently good */
1647                         picked = base->server_head;
1648                         base->server_head = base->server_head->next;
1649                         return picked;
1650                 }
1651
1652                 base->server_head = base->server_head->next;
1653                 if (base->server_head == started_at) {
1654                         /* all the nameservers seem to be down */
1655                         /* so we just return this one and hope for the */
1656                         /* best */
1657                         EVUTIL_ASSERT(base->global_good_nameservers == 0);
1658                         picked = base->server_head;
1659                         base->server_head = base->server_head->next;
1660                         return picked;
1661                 }
1662         }
1663 }
1664
1665 /* this is called when a namesever socket is ready for reading */
1666 static void
1667 nameserver_read(struct nameserver *ns) {
1668         struct sockaddr_storage ss;
1669         ev_socklen_t addrlen = sizeof(ss);
1670         char addrbuf[128];
1671         const size_t max_packet_size = ns->base->global_max_udp_size;
1672         u8 *packet = mm_malloc(max_packet_size);
1673         ASSERT_LOCKED(ns->base);
1674
1675         if (!packet) {
1676                 nameserver_failed(ns, "not enough memory", 0);
1677                 return;
1678         }
1679
1680         for (;;) {
1681                 const int r = recvfrom(ns->socket, (void*)packet,
1682                     max_packet_size, 0,
1683                     (struct sockaddr*)&ss, &addrlen);
1684                 if (r < 0) {
1685                         int err = evutil_socket_geterror(ns->socket);
1686                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1687                                 goto done;
1688                         nameserver_failed(ns,
1689                             evutil_socket_error_to_string(err), err);
1690                         goto done;
1691                 }
1692                 if (evutil_sockaddr_cmp((struct sockaddr*)&ss,
1693                         (struct sockaddr*)&ns->address, 0)) {
1694                         log(EVDNS_LOG_WARN, "Address mismatch on received "
1695                             "DNS packet.  Apparent source was %s",
1696                             evutil_format_sockaddr_port_(
1697                                     (struct sockaddr *)&ss,
1698                                     addrbuf, sizeof(addrbuf)));
1699                         goto done;
1700                 }
1701
1702                 ns->timedout = 0;
1703                 reply_parse(ns->base, packet, r);
1704         }
1705 done:
1706         mm_free(packet);
1707 }
1708
1709 /* Read a packet from a DNS client on a server port s, parse it, and */
1710 /* act accordingly. */
1711 static void
1712 server_udp_port_read(struct evdns_server_port *s) {
1713         u8 packet[1500];
1714         struct sockaddr_storage addr;
1715         ev_socklen_t addrlen;
1716         int r;
1717         ASSERT_LOCKED(s);
1718
1719         for (;;) {
1720                 addrlen = sizeof(struct sockaddr_storage);
1721                 r = recvfrom(s->socket, (void*)packet, sizeof(packet), 0,
1722                                          (struct sockaddr*) &addr, &addrlen);
1723                 if (r < 0) {
1724                         int err = evutil_socket_geterror(s->socket);
1725                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1726                                 return;
1727                         log(EVDNS_LOG_WARN,
1728                             "Error %s (%d) while reading request.",
1729                             evutil_socket_error_to_string(err), err);
1730                         return;
1731                 }
1732                 request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen, NULL);
1733         }
1734 }
1735
1736 static int
1737 server_send_response(struct evdns_server_port *port, struct server_request *req)
1738 {
1739         u16 packet_size = 0;
1740         struct bufferevent *bev = NULL;
1741         if (req->client) {
1742                 bev = req->client->connection.bev;
1743                 EVUTIL_ASSERT(bev);
1744                 EVUTIL_ASSERT(req->response_len <= 65535);
1745                 packet_size = htons((u16)req->response_len);
1746                 if (bufferevent_write(bev, &packet_size, sizeof(packet_size)))
1747                         goto beferevent_error;
1748                 if (bufferevent_write(bev, (void*)req->response, req->response_len))
1749                         goto beferevent_error;
1750                 return (int)req->response_len;
1751         } else {
1752                 int r = sendto(port->socket, req->response, (int)req->response_len, 0,
1753                                         (struct sockaddr*) &req->addr, (ev_socklen_t)req->addrlen);
1754                 return r;
1755         }
1756
1757 beferevent_error:
1758         log(EVDNS_LOG_WARN, "Failed to send reply to request %p for client %p", (void *)req, (void *)req->client);
1759         /* disconnect if we got bufferevent error */
1760         evdns_remove_tcp_client(port, req->client);
1761         return -1;
1762 }
1763
1764 /* Try to write all pending replies on a given DNS server port. */
1765 static void
1766 server_port_flush(struct evdns_server_port *port)
1767 {
1768         struct server_request *req = port->pending_replies;
1769         ASSERT_LOCKED(port);
1770         while (req) {
1771                 int r = server_send_response(port, req);
1772                 if (r < 0) {
1773                         int err = evutil_socket_geterror(port->socket);
1774                         if (EVUTIL_ERR_RW_RETRIABLE(err))
1775                                 return;
1776                         log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", evutil_socket_error_to_string(err), err);
1777                 }
1778                 if (server_request_free(req)) {
1779                         /* we released the last reference to req->port. */
1780                         return;
1781                 } else {
1782                         EVUTIL_ASSERT(req != port->pending_replies);
1783                         req = port->pending_replies;
1784                 }
1785         }
1786
1787         /* We have no more pending requests; stop listening for 'writeable' events. */
1788         (void) event_del(&port->event);
1789         event_assign(&port->event, port->event_base,
1790                                  port->socket, EV_READ | EV_PERSIST,
1791                                  server_port_ready_callback, port);
1792
1793         if (event_add(&port->event, NULL) < 0) {
1794                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
1795                 /* ???? Do more? */
1796         }
1797 }
1798
1799 /* set if we are waiting for the ability to write to this server. */
1800 /* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
1801 /* we stop these events. */
1802 static void
1803 nameserver_write_waiting(struct nameserver *ns, char waiting) {
1804         ASSERT_LOCKED(ns->base);
1805         if (ns->write_waiting == waiting) return;
1806
1807         ns->write_waiting = waiting;
1808         (void) event_del(&ns->event);
1809         event_assign(&ns->event, ns->base->event_base,
1810             ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
1811             nameserver_ready_callback, ns);
1812         if (event_add(&ns->event, NULL) < 0) {
1813                 char addrbuf[128];
1814                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
1815                     evutil_format_sockaddr_port_(
1816                             (struct sockaddr *)&ns->address,
1817                             addrbuf, sizeof(addrbuf)));
1818                 /* ???? Do more? */
1819         }
1820 }
1821
1822 /* a callback function. Called by libevent when the kernel says that */
1823 /* a nameserver socket is ready for writing or reading */
1824 static void
1825 nameserver_ready_callback(evutil_socket_t fd, short events, void *arg) {
1826         struct nameserver *ns = (struct nameserver *) arg;
1827         (void)fd;
1828
1829         EVDNS_LOCK(ns->base);
1830         if (events & EV_WRITE) {
1831                 ns->choked = 0;
1832                 if (!evdns_transmit(ns->base)) {
1833                         nameserver_write_waiting(ns, 0);
1834                 }
1835         }
1836         if (events & EV_READ) {
1837                 nameserver_read(ns);
1838         }
1839         EVDNS_UNLOCK(ns->base);
1840 }
1841
1842 /* a callback function. Called by libevent when the kernel says that */
1843 /* a server socket is ready for writing or reading. */
1844 static void
1845 server_port_ready_callback(evutil_socket_t fd, short events, void *arg) {
1846         struct evdns_server_port *port = (struct evdns_server_port *) arg;
1847         (void) fd;
1848
1849         EVDNS_LOCK(port);
1850         if (events & EV_WRITE) {
1851                 port->choked = 0;
1852                 server_port_flush(port);
1853         }
1854         if (events & EV_READ) {
1855                 server_udp_port_read(port);
1856         }
1857         EVDNS_UNLOCK(port);
1858 }
1859
1860 /* This is an inefficient representation; only use it via the dnslabel_table_*
1861  * functions, so that is can be safely replaced with something smarter later. */
1862 #define MAX_LABELS 128
1863 /* Structures used to implement name compression */
1864 struct dnslabel_entry { char *v; off_t pos; };
1865 struct dnslabel_table {
1866         int n_labels; /* number of current entries */
1867         /* map from name to position in message */
1868         struct dnslabel_entry labels[MAX_LABELS];
1869 };
1870
1871 /* Initialize dnslabel_table. */
1872 static void
1873 dnslabel_table_init(struct dnslabel_table *table)
1874 {
1875         table->n_labels = 0;
1876 }
1877
1878 /* Free all storage held by table, but not the table itself. */
1879 static void
1880 dnslabel_clear(struct dnslabel_table *table)
1881 {
1882         int i;
1883         for (i = 0; i < table->n_labels; ++i)
1884                 mm_free(table->labels[i].v);
1885         table->n_labels = 0;
1886 }
1887
1888 /* return the position of the label in the current message, or -1 if the label */
1889 /* hasn't been used yet. */
1890 static int
1891 dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
1892 {
1893         int i;
1894         for (i = 0; i < table->n_labels; ++i) {
1895                 if (!strcmp(label, table->labels[i].v))
1896                         return table->labels[i].pos;
1897         }
1898         return -1;
1899 }
1900
1901 /* remember that we've used the label at position pos */
1902 static int
1903 dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
1904 {
1905         char *v;
1906         int p;
1907         if (table->n_labels == MAX_LABELS)
1908                 return (-1);
1909         v = mm_strdup(label);
1910         if (v == NULL)
1911                 return (-1);
1912         p = table->n_labels++;
1913         table->labels[p].v = v;
1914         table->labels[p].pos = pos;
1915
1916         return (0);
1917 }
1918
1919 /* Converts a string to a length-prefixed set of DNS labels, starting */
1920 /* at buf[j]. name and buf must not overlap. name_len should be the length */
1921 /* of name.      table is optional, and is used for compression. */
1922 /* */
1923 /* Input: abc.def */
1924 /* Output: <3>abc<3>def<0> */
1925 /* */
1926 /* Returns the first index after the encoded name, or negative on error. */
1927 /*       -1      label was > 63 bytes */
1928 /*       -2      name too long to fit in buffer. */
1929 /* */
1930 static off_t
1931 dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
1932                                   const char *name, const size_t name_len,
1933                                   struct dnslabel_table *table) {
1934         const char *end = name + name_len;
1935         int ref = 0;
1936         u16 t_;
1937
1938 #define APPEND16(x) do {                                                \
1939                 if (j + 2 > (off_t)buf_len)                             \
1940                         goto overflow;                                  \
1941                 t_ = htons(x);                                          \
1942                 memcpy(buf + j, &t_, 2);                                \
1943                 j += 2;                                                 \
1944         } while (0)
1945 #define APPEND32(x) do {                                                \
1946                 if (j + 4 > (off_t)buf_len)                             \
1947                         goto overflow;                                  \
1948                 t32_ = htonl(x);                                        \
1949                 memcpy(buf + j, &t32_, 4);                              \
1950                 j += 4;                                                 \
1951         } while (0)
1952
1953         if (name_len > 255) return -2;
1954
1955         for (;;) {
1956                 const char *const start = name;
1957                 if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
1958                         APPEND16(ref | 0xc000);
1959                         return j;
1960                 }
1961                 name = strchr(name, '.');
1962                 if (!name) {
1963                         const size_t label_len = end - start;
1964                         if (label_len > 63) return -1;
1965                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1966                         if (table) dnslabel_table_add(table, start, j);
1967                         buf[j++] = (ev_uint8_t)label_len;
1968
1969                         memcpy(buf + j, start, label_len);
1970                         j += (int) label_len;
1971                         break;
1972                 } else {
1973                         /* append length of the label. */
1974                         const size_t label_len = name - start;
1975                         if (label_len > 63) return -1;
1976                         if ((size_t)(j+label_len+1) > buf_len) return -2;
1977                         if (table) dnslabel_table_add(table, start, j);
1978                         buf[j++] = (ev_uint8_t)label_len;
1979
1980                         memcpy(buf + j, start, label_len);
1981                         j += (int) label_len;
1982                         /* hop over the '.' */
1983                         name++;
1984                 }
1985         }
1986
1987         /* the labels must be terminated by a 0. */
1988         /* It's possible that the name ended in a . */
1989         /* in which case the zero is already there */
1990         if (!j || buf[j-1]) buf[j++] = 0;
1991         return j;
1992  overflow:
1993         return (-2);
1994 }
1995
1996 /* Finds the length of a dns request for a DNS name of the given */
1997 /* length. The actual request may be smaller than the value returned */
1998 /* here */
1999 static size_t
2000 evdns_request_len(const struct evdns_base *base, const size_t name_len)
2001 {
2002         int addional_section_len = 0;
2003         if (EDNS_ENABLED(base)) {
2004                 addional_section_len = 1 + /* length of domain name string, always 0 */
2005                         2 + /* space for resource type */
2006                         2 + /* space for UDP payload size */
2007                         4 + /* space for extended RCODE flags */
2008                         2;  /* space for length of RDATA, always 0 */
2009         }
2010         return 96 + /* length of the DNS standard header */
2011                 name_len + 2 +
2012                 4 /* space for the resource type */ +
2013                 addional_section_len;
2014 }
2015
2016 /* build a dns request packet into buf. buf should be at least as long */
2017 /* as evdns_request_len told you it should be. */
2018 /* */
2019 /* Returns the amount of space used. Negative on error. */
2020 static int
2021 evdns_request_data_build(const struct evdns_base *base,
2022         const char *const name, const size_t name_len,
2023         const u16 trans_id, const u16 type, const u16 class, u8 *const buf,
2024         size_t buf_len)
2025 {
2026         off_t j = 0;  /* current offset into buf */
2027         u16 t_;  /* used by the macros */
2028         u32 t32_;  /* used by the macros */
2029
2030         APPEND16(trans_id);
2031         APPEND16(0x0100);  /* standard query, recusion needed */
2032         APPEND16(1);  /* one question */
2033         APPEND16(0);  /* no answers */
2034         APPEND16(0);  /* no authority */
2035         APPEND16(EDNS_ENABLED(base) ? 1 : 0); /* additional */
2036
2037         j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
2038         if (j < 0) {
2039                 return (int)j;
2040         }
2041
2042         APPEND16(type);
2043         APPEND16(class);
2044
2045         if (EDNS_ENABLED(base)) {
2046                 /* The OPT pseudo-RR format 
2047                  * (https://tools.ietf.org/html/rfc6891#section-6.1.2)
2048                  * +------------+--------------+------------------------------+
2049                  * | Field Name | Field Type   | Description                  |
2050                  * +------------+--------------+------------------------------+
2051                  * | NAME       | domain name  | MUST be 0 (root domain)      |
2052                  * | TYPE       | u_int16_t    | OPT (41)                     |
2053                  * | CLASS      | u_int16_t    | requestor's UDP payload size |
2054                  * | TTL        | u_int32_t    | extended RCODE and flags     |
2055                  * | RDLEN      | u_int16_t    | length of all RDATA          |
2056                  * | RDATA      | octet stream | {attribute,value} pairs      |
2057                  * +------------+--------------+------------------------------+ */
2058                 buf[j++] = 0;  /* NAME, always 0 */
2059                 APPEND16(TYPE_OPT);  /* OPT type */
2060                 APPEND16(base->global_max_udp_size);  /* max UDP payload size */
2061                 APPEND32(0);  /* No extended RCODE flags set */
2062                 APPEND16(0);  /* length of RDATA is 0 */
2063         }
2064
2065         return (int)j;
2066  overflow:
2067         return (-1);
2068 }
2069
2070 /* exported function */
2071 struct evdns_server_port *
2072 evdns_add_server_port_with_base(struct event_base *base, evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
2073 {
2074         struct evdns_server_port *port;
2075         if (flags)
2076                 return NULL; /* flags not yet implemented */
2077         if (!(port = mm_malloc(sizeof(struct evdns_server_port))))
2078                 return NULL;
2079         memset(port, 0, sizeof(struct evdns_server_port));
2080
2081
2082         port->socket = socket;
2083         port->refcnt = 1;
2084         port->choked = 0;
2085         port->closing = 0;
2086         port->user_callback = cb;
2087         port->user_data = user_data;
2088         port->pending_replies = NULL;
2089         port->event_base = base;
2090         port->max_client_connections = MAX_CLIENT_CONNECTIONS;
2091         port->tcp_idle_timeout.tv_sec = SERVER_IDLE_CONN_TIMEOUT;
2092         port->tcp_idle_timeout.tv_usec = 0;
2093         port->client_connections_count = 0;
2094         LIST_INIT(&port->client_connections);
2095         event_assign(&port->event, port->event_base,
2096                                  port->socket, EV_READ | EV_PERSIST,
2097                                  server_port_ready_callback, port);
2098         if (event_add(&port->event, NULL) < 0) {
2099                 mm_free(port);
2100                 return NULL;
2101         }
2102         EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2103         return port;
2104 }
2105
2106 /* exported function */
2107 struct evdns_server_port *
2108 evdns_add_server_port_with_listener(struct event_base *base, struct evconnlistener *listener, int flags, evdns_request_callback_fn_type cb, void *user_data)
2109 {
2110         struct evdns_server_port *port;
2111         if (!listener)
2112                 return NULL;
2113         if (flags)
2114                 return NULL; /* flags not yet implemented */
2115
2116         if (!(port = mm_calloc(1, sizeof(struct evdns_server_port))))
2117                 return NULL;
2118         port->socket = -1;
2119         port->refcnt = 1;
2120         port->choked = 0;
2121         port->closing = 0;
2122         port->user_callback = cb;
2123         port->user_data = user_data;
2124         port->pending_replies = NULL;
2125         port->event_base = base;
2126         port->max_client_connections = MAX_CLIENT_CONNECTIONS;
2127         port->client_connections_count = 0;
2128         LIST_INIT(&port->client_connections);
2129         port->listener = listener;
2130         evconnlistener_set_cb(port->listener, incoming_conn_cb, port);
2131
2132         EVTHREAD_ALLOC_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2133         return port;
2134 }
2135
2136 static void
2137 server_tcp_event_cb(struct bufferevent *bev, short events, void *ctx);
2138
2139 static int
2140 tcp_read_message(struct tcp_connection *conn, u8 **msg, int *msg_len)
2141 {
2142         struct bufferevent *bev = conn->bev;
2143         struct evbuffer *input = bufferevent_get_input(bev);
2144         u8 *packet = NULL;
2145         int r = 0;
2146
2147         EVUTIL_ASSERT(conn);
2148         EVUTIL_ASSERT(conn->state == TS_CONNECTED);
2149
2150         /* reading new packet size */
2151         if (!conn->awaiting_packet_size) {
2152                 if (evbuffer_get_length(input) < sizeof(ev_uint16_t))
2153                         goto awaiting_next;
2154
2155                 bufferevent_read(bev, (void*)&conn->awaiting_packet_size,
2156                         sizeof(conn->awaiting_packet_size));
2157                 conn->awaiting_packet_size = ntohs(conn->awaiting_packet_size);
2158                 if (conn->awaiting_packet_size <= 0)
2159                         goto fail;
2160         }
2161
2162         /* reading new packet content */
2163         if (evbuffer_get_length(input) < conn->awaiting_packet_size)
2164                 goto awaiting_next;
2165
2166         packet = mm_malloc(conn->awaiting_packet_size);
2167         if (!packet)
2168                 goto fail;
2169
2170         r = (int)bufferevent_read(bev, (void*)packet, conn->awaiting_packet_size);
2171         if (r != conn->awaiting_packet_size) {
2172                 mm_free(packet);
2173                 packet = NULL;
2174                 goto fail;
2175         }
2176
2177         *msg = packet;
2178         *msg_len = r;
2179 awaiting_next:
2180         return 0;
2181 fail:
2182         return 1;
2183 }
2184
2185 static void
2186 server_tcp_read_packet_cb(struct bufferevent *bev, void *ctx)
2187 {
2188         u8 *msg = NULL;
2189         int msg_len = 0;
2190         int rc;
2191         struct client_tcp_connection *client = (struct client_tcp_connection *)ctx;
2192         struct evdns_server_port *port = client->port;
2193         struct tcp_connection *conn = &client->connection;
2194         EVUTIL_ASSERT(port && bev);
2195         EVDNS_LOCK(port);
2196
2197         while (1) {
2198                 if (tcp_read_message(conn, &msg, &msg_len)) {
2199                         log(EVDNS_LOG_MSG, "Closing client connection %p due to error", (void *)bev);
2200                         evdns_remove_tcp_client(port, client);
2201                         rc = port->refcnt;
2202                         EVDNS_UNLOCK(port);
2203                         if (!rc)
2204                                 server_port_free(port);
2205                         return;
2206                 }
2207
2208                 /* Only part of the message was recieved. */
2209                 if (!msg)
2210                         break;
2211
2212                 request_parse(msg, msg_len, port, NULL, 0, client);
2213                 mm_free(msg);
2214                 msg = NULL;
2215                 conn->awaiting_packet_size = 0;
2216         }
2217
2218         bufferevent_setwatermark(bev, EV_READ,
2219                         conn->awaiting_packet_size ? conn->awaiting_packet_size : sizeof(ev_uint16_t), 0);
2220         bufferevent_setcb(bev, server_tcp_read_packet_cb, NULL, server_tcp_event_cb, ctx);
2221         EVDNS_UNLOCK(port);
2222 }
2223
2224 static void
2225 server_tcp_event_cb(struct bufferevent *bev, short events, void *ctx)
2226 {
2227         struct client_tcp_connection *client = (struct client_tcp_connection *)ctx;
2228         struct evdns_server_port *port = client->port;
2229         int rc;
2230         EVUTIL_ASSERT(port && bev);
2231         EVDNS_LOCK(port);
2232         if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR | BEV_EVENT_TIMEOUT)) {
2233                 log(EVDNS_LOG_DEBUG, "Closing connection %p", (void *)bev);
2234                 evdns_remove_tcp_client(port, client);
2235         }
2236         rc = port->refcnt;
2237         EVDNS_UNLOCK(port);
2238         if (!rc)
2239                 server_port_free(port);
2240 }
2241
2242 static void
2243 incoming_conn_cb(struct evconnlistener *listener, evutil_socket_t fd,
2244                                   struct sockaddr *address, int socklen, void *arg)
2245 {
2246         struct evdns_server_port *port = (struct evdns_server_port*)arg;
2247         struct bufferevent *bev = bufferevent_socket_new(port->event_base, fd, BEV_OPT_CLOSE_ON_FREE);
2248         struct client_tcp_connection *client = NULL;
2249         struct tcp_connection *cd = NULL;
2250
2251         if (!bev)
2252                 goto error;
2253         log(EVDNS_LOG_DEBUG, "New incoming client connection %p", (void *)bev);
2254
2255         bufferevent_set_timeouts(bev, &port->tcp_idle_timeout, &port->tcp_idle_timeout);
2256
2257         client = evdns_add_tcp_client(port, bev);
2258         if (!client)
2259                 goto error;
2260         cd = &client->connection;
2261
2262         cd->state = TS_CONNECTED;
2263         bufferevent_setwatermark(bev, EV_READ, sizeof(ev_uint16_t), 0);
2264         bufferevent_setcb(bev, server_tcp_read_packet_cb, NULL, server_tcp_event_cb, (void *)client);
2265         bufferevent_enable(bev, EV_READ);
2266
2267         return;
2268 error:
2269         if (bev)
2270                 bufferevent_free(bev);
2271         return;
2272 }
2273
2274 struct evdns_server_port *
2275 evdns_add_server_port(evutil_socket_t socket, int flags, evdns_request_callback_fn_type cb, void *user_data)
2276 {
2277         return evdns_add_server_port_with_base(NULL, socket, flags, cb, user_data);
2278 }
2279
2280 /* exported function */
2281 void
2282 evdns_close_server_port(struct evdns_server_port *port)
2283 {
2284         EVDNS_LOCK(port);
2285         evdns_remove_all_tcp_clients(port);
2286         if (--port->refcnt == 0) {
2287                 EVDNS_UNLOCK(port);
2288                 server_port_free(port);
2289         } else {
2290                 port->closing = 1;
2291                 EVDNS_UNLOCK(port);
2292         }
2293 }
2294
2295 /* exported function */
2296 int
2297 evdns_server_request_add_reply(struct evdns_server_request *req_, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
2298 {
2299         struct server_request *req = TO_SERVER_REQUEST(req_);
2300         struct server_reply_item **itemp, *item;
2301         int *countp;
2302         int result = -1;
2303
2304         EVDNS_LOCK(req->port);
2305         if (req->response) /* have we already answered? */
2306                 goto done;
2307
2308         switch (section) {
2309         case EVDNS_ANSWER_SECTION:
2310                 itemp = &req->answer;
2311                 countp = &req->n_answer;
2312                 break;
2313         case EVDNS_AUTHORITY_SECTION:
2314                 itemp = &req->authority;
2315                 countp = &req->n_authority;
2316                 break;
2317         case EVDNS_ADDITIONAL_SECTION:
2318                 itemp = &req->additional;
2319                 countp = &req->n_additional;
2320                 break;
2321         default:
2322                 goto done;
2323         }
2324         while (*itemp) {
2325                 itemp = &((*itemp)->next);
2326         }
2327         item = mm_malloc(sizeof(struct server_reply_item));
2328         if (!item)
2329                 goto done;
2330         item->next = NULL;
2331         if (!(item->name = mm_strdup(name))) {
2332                 mm_free(item);
2333                 goto done;
2334         }
2335         item->type = type;
2336         item->dns_question_class = class;
2337         item->ttl = ttl;
2338         item->is_name = is_name != 0;
2339         item->datalen = 0;
2340         item->data = NULL;
2341         if (data) {
2342                 if (item->is_name) {
2343                         if (!(item->data = mm_strdup(data))) {
2344                                 mm_free(item->name);
2345                                 mm_free(item);
2346                                 goto done;
2347                         }
2348                         item->datalen = (u16)-1;
2349                 } else {
2350                         if (!(item->data = mm_malloc(datalen))) {
2351                                 mm_free(item->name);
2352                                 mm_free(item);
2353                                 goto done;
2354                         }
2355                         item->datalen = datalen;
2356                         memcpy(item->data, data, datalen);
2357                 }
2358         }
2359
2360         *itemp = item;
2361         ++(*countp);
2362         result = 0;
2363 done:
2364         EVDNS_UNLOCK(req->port);
2365         return result;
2366 }
2367
2368 /* exported function */
2369 int
2370 evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
2371 {
2372         return evdns_server_request_add_reply(
2373                   req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
2374                   ttl, n*4, 0, addrs);
2375 }
2376
2377 /* exported function */
2378 int
2379 evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, const void *addrs, int ttl)
2380 {
2381         return evdns_server_request_add_reply(
2382                   req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
2383                   ttl, n*16, 0, addrs);
2384 }
2385
2386 /* exported function */
2387 int
2388 evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
2389 {
2390         u32 a;
2391         char buf[32];
2392         if (in && inaddr_name)
2393                 return -1;
2394         else if (!in && !inaddr_name)
2395                 return -1;
2396         if (in) {
2397                 a = ntohl(in->s_addr);
2398                 evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
2399                                 (int)(u8)((a    )&0xff),
2400                                 (int)(u8)((a>>8 )&0xff),
2401                                 (int)(u8)((a>>16)&0xff),
2402                                 (int)(u8)((a>>24)&0xff));
2403                 inaddr_name = buf;
2404         }
2405         return evdns_server_request_add_reply(
2406                   req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
2407                   ttl, -1, 1, hostname);
2408 }
2409
2410 /* exported function */
2411 int
2412 evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
2413 {
2414         return evdns_server_request_add_reply(
2415                   req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
2416                   ttl, -1, 1, cname);
2417 }
2418
2419 /* exported function */
2420 void
2421 evdns_server_request_set_flags(struct evdns_server_request *exreq, int flags)
2422 {
2423         struct server_request *req = TO_SERVER_REQUEST(exreq);
2424         req->base.flags &= ~(EVDNS_FLAGS_AA|EVDNS_FLAGS_RD);
2425         req->base.flags |= flags;
2426 }
2427
2428 static int
2429 evdns_server_request_format_response(struct server_request *req, int err)
2430 {
2431         unsigned char buf[1024 * 64];
2432         size_t buf_len = sizeof(buf);
2433         off_t j = 0, r;
2434         u16 t_;
2435         u32 t32_;
2436         int i;
2437         u16 flags;
2438         struct dnslabel_table table;
2439
2440         if (err < 0 || err > 15) return -1;
2441
2442         /* Set response bit and error code; copy OPCODE and RD fields from
2443          * question; copy RA and AA if set by caller. */
2444         flags = req->base.flags;
2445         flags |= (_QR_MASK | err);
2446
2447         dnslabel_table_init(&table);
2448         APPEND16(req->trans_id);
2449         APPEND16(flags);
2450         APPEND16(req->base.nquestions);
2451         APPEND16(req->n_answer);
2452         APPEND16(req->n_authority);
2453         APPEND16(req->n_additional);
2454
2455         /* Add questions. */
2456         for (i=0; i < req->base.nquestions; ++i) {
2457                 const char *s = req->base.questions[i]->name;
2458                 j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
2459                 if (j < 0) {
2460                         dnslabel_clear(&table);
2461                         return (int) j;
2462                 }
2463                 APPEND16(req->base.questions[i]->type);
2464                 APPEND16(req->base.questions[i]->dns_question_class);
2465         }
2466
2467         /* Add answer, authority, and additional sections. */
2468         for (i=0; i<3; ++i) {
2469                 struct server_reply_item *item;
2470                 if (i==0)
2471                         item = req->answer;
2472                 else if (i==1)
2473                         item = req->authority;
2474                 else
2475                         item = req->additional;
2476                 while (item) {
2477                         r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
2478                         if (r < 0)
2479                                 goto overflow;
2480                         j = r;
2481
2482                         APPEND16(item->type);
2483                         APPEND16(item->dns_question_class);
2484                         APPEND32(item->ttl);
2485                         if (item->is_name) {
2486                                 off_t len_idx = j, name_start;
2487                                 j += 2;
2488                                 name_start = j;
2489                                 r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
2490                                 if (r < 0)
2491                                         goto overflow;
2492                                 j = r;
2493                                 t_ = htons( (short) (j-name_start) );
2494                                 memcpy(buf+len_idx, &t_, 2);
2495                         } else {
2496                                 APPEND16(item->datalen);
2497                                 if (j+item->datalen > (off_t)buf_len)
2498                                         goto overflow;
2499                                 if (item->data) {
2500                                         memcpy(buf+j, item->data, item->datalen);
2501                                         j += item->datalen;
2502                                 } else {
2503                                         EVUTIL_ASSERT(item->datalen == 0);
2504                                 }
2505                         }
2506                         item = item->next;
2507                 }
2508         }
2509
2510         if (j > req->max_udp_reply_size && !req->client) {
2511 overflow:
2512                 j = req->max_udp_reply_size;
2513                 buf[2] |= 0x02; /* set the truncated bit. */
2514         }
2515
2516         req->response_len = j;
2517
2518         if (!(req->response = mm_malloc(req->response_len))) {
2519                 server_request_free_answers(req);
2520                 dnslabel_clear(&table);
2521                 return (-1);
2522         }
2523         memcpy(req->response, buf, req->response_len);
2524         server_request_free_answers(req);
2525         dnslabel_clear(&table);
2526         return (0);
2527 }
2528
2529 /* exported function */
2530 int
2531 evdns_server_request_respond(struct evdns_server_request *req_, int err)
2532 {
2533         struct server_request *req = TO_SERVER_REQUEST(req_);
2534         struct evdns_server_port *port = req->port;
2535         int r = -1;
2536
2537         EVDNS_LOCK(port);
2538         if (!req->response) {
2539                 if ((r = evdns_server_request_format_response(req, err))<0)
2540                         goto done;
2541         }
2542
2543         r = server_send_response(port, req);
2544         if (r < 0 && req->client) {
2545                 int sock_err = evutil_socket_geterror(port->socket);
2546                 if (EVUTIL_ERR_RW_RETRIABLE(sock_err))
2547                         goto done;
2548
2549                 if (port->pending_replies) {
2550                         req->prev_pending = port->pending_replies->prev_pending;
2551                         req->next_pending = port->pending_replies;
2552                         req->prev_pending->next_pending =
2553                                 req->next_pending->prev_pending = req;
2554                 } else {
2555                         req->prev_pending = req->next_pending = req;
2556                         port->pending_replies = req;
2557                         port->choked = 1;
2558
2559                         (void) event_del(&port->event);
2560                         event_assign(&port->event, port->event_base, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
2561
2562                         if (event_add(&port->event, NULL) < 0) {
2563                                 log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
2564                         }
2565
2566                 }
2567
2568                 r = 1;
2569                 goto done;
2570         }
2571         if (server_request_free(req)) {
2572                 r = 0;
2573                 goto done;
2574         }
2575
2576         if (port->pending_replies)
2577                 server_port_flush(port);
2578
2579         r = 0;
2580 done:
2581         EVDNS_UNLOCK(port);
2582         return r;
2583 }
2584
2585 /* Free all storage held by RRs in req. */
2586 static void
2587 server_request_free_answers(struct server_request *req)
2588 {
2589         struct server_reply_item *victim, *next, **list;
2590         int i;
2591         for (i = 0; i < 3; ++i) {
2592                 if (i==0)
2593                         list = &req->answer;
2594                 else if (i==1)
2595                         list = &req->authority;
2596                 else
2597                         list = &req->additional;
2598
2599                 victim = *list;
2600                 while (victim) {
2601                         next = victim->next;
2602                         mm_free(victim->name);
2603                         victim->name = NULL;
2604                         if (victim->data) {
2605                                 mm_free(victim->data);
2606                                 victim->data = NULL;
2607                         }
2608                         mm_free(victim);
2609                         victim = next;
2610                 }
2611                 *list = NULL;
2612         }
2613 }
2614
2615 /* Free all storage held by req, and remove links to it. */
2616 /* return true iff we just wound up freeing the server_port. */
2617 static int
2618 server_request_free(struct server_request *req)
2619 {
2620         int i, rc=1, lock=0;
2621         if (req->base.questions) {
2622                 for (i = 0; i < req->base.nquestions; ++i) {
2623                         mm_free(req->base.questions[i]);
2624                         req->base.questions[i] = NULL;
2625                 }
2626                 mm_free(req->base.questions);
2627                 req->base.questions = NULL;
2628         }
2629
2630         if (req->port) {
2631                 EVDNS_LOCK(req->port);
2632                 lock=1;
2633                 if (req->port->pending_replies == req) {
2634                         if (req->next_pending && req->next_pending != req)
2635                                 req->port->pending_replies = req->next_pending;
2636                         else
2637                                 req->port->pending_replies = NULL;
2638                 }
2639                 rc = --req->port->refcnt;
2640         }
2641
2642         if (req->response) {
2643                 mm_free(req->response);
2644                 req->response = NULL;
2645         }
2646
2647         server_request_free_answers(req);
2648
2649         if (req->next_pending && req->next_pending != req) {
2650                 req->next_pending->prev_pending = req->prev_pending;
2651                 req->prev_pending->next_pending = req->next_pending;
2652         }
2653
2654         if (rc == 0) {
2655                 EVDNS_UNLOCK(req->port); /* ????? nickm */
2656                 server_port_free(req->port);
2657                 mm_free(req);
2658                 return (1);
2659         }
2660         if (lock)
2661                 EVDNS_UNLOCK(req->port);
2662         mm_free(req);
2663         return (0);
2664 }
2665
2666 /* Free all storage held by an evdns_server_port.  Only called when  */
2667 static void
2668 server_port_free(struct evdns_server_port *port)
2669 {
2670         EVUTIL_ASSERT(port);
2671         EVUTIL_ASSERT(!port->refcnt);
2672         EVUTIL_ASSERT(!port->pending_replies);
2673         if (port->socket > 0) {
2674                 evutil_closesocket(port->socket);
2675                 port->socket = -1;
2676         }
2677
2678         /* if tcp server */
2679         if (port->listener) {
2680                 evconnlistener_free(port->listener);
2681         } else {
2682                 (void) event_del(&port->event);
2683                 event_debug_unassign(&port->event);
2684         }
2685
2686         EVTHREAD_FREE_LOCK(port->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
2687         mm_free(port);
2688 }
2689
2690 /* exported function */
2691 int
2692 evdns_server_request_drop(struct evdns_server_request *req_)
2693 {
2694         struct server_request *req = TO_SERVER_REQUEST(req_);
2695         server_request_free(req);
2696         return 0;
2697 }
2698
2699 /* exported function */
2700 int
2701 evdns_server_request_get_requesting_addr(struct evdns_server_request *req_, struct sockaddr *sa, int addr_len)
2702 {
2703         struct server_request *req = TO_SERVER_REQUEST(req_);
2704         if (addr_len < (int)req->addrlen)
2705                 return -1;
2706         memcpy(sa, &(req->addr), req->addrlen);
2707         return req->addrlen;
2708 }
2709
2710 static void
2711 retransmit_all_tcp_requests_for(struct nameserver *server)
2712 {
2713         int i = 0;
2714         for (i = 0; i < server->base->n_req_heads; ++i) {
2715                 struct request *started_at = server->base->req_heads[i];
2716                 struct request *req = started_at;
2717                 if (!req)
2718                         continue;
2719
2720                 do {
2721                         if (req->ns == server && (req->handle->tcp_flags & DNS_QUERY_USEVC)) {
2722                                 if (req->tx_count >= req->base->global_max_retransmits) {
2723                                         log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
2724                                                 (void *)req, req->tx_count);
2725                                         reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2726                                         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2727                                 } else {
2728                                         (void) evtimer_del(&req->timeout_event);
2729                                         evdns_request_transmit(req);
2730                                 }
2731                         }
2732                         req = req->next;
2733                 } while (req != started_at);
2734         }
2735 }
2736
2737 /* this is a libevent callback function which is called when a request */
2738 /* has timed out. */
2739 static void
2740 evdns_request_timeout_callback(evutil_socket_t fd, short events, void *arg) {
2741         struct request *const req = (struct request *) arg;
2742         struct evdns_base *base = req->base;
2743
2744         (void) fd;
2745         (void) events;
2746
2747         log(EVDNS_LOG_DEBUG, "Request %p timed out", arg);
2748         EVDNS_LOCK(base);
2749
2750         if (req->tx_count >= req->base->global_max_retransmits) {
2751                 struct nameserver *ns = req->ns;
2752                 /* this request has failed */
2753                 log(EVDNS_LOG_DEBUG, "Giving up on request %p; tx_count==%d",
2754                     arg, req->tx_count);
2755                 reply_schedule_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
2756
2757                 request_finished(req, &REQ_HEAD(req->base, req->trans_id), 1);
2758                 nameserver_failed(ns, "request timed out.", 0);
2759         } else {
2760                 /* if request is using tcp connection, so tear connection */
2761                 if (req->handle->tcp_flags & DNS_QUERY_USEVC) {
2762                         disconnect_and_free_connection(req->ns->connection);
2763                         req->ns->connection = NULL;
2764
2765                         /* client can have the only connection to DNS server */
2766                         retransmit_all_tcp_requests_for(req->ns);
2767                 } else {
2768                         /* retransmit it */
2769                         log(EVDNS_LOG_DEBUG, "Retransmitting request %p; tx_count==%d by udp", arg, req->tx_count);
2770                         (void) evtimer_del(&req->timeout_event);
2771                         request_swap_ns(req, nameserver_pick(base));
2772                         evdns_request_transmit(req);
2773
2774                         req->ns->timedout++;
2775                         if (req->ns->timedout > req->base->global_max_nameserver_timeout) {
2776                                 req->ns->timedout = 0;
2777                                 nameserver_failed(req->ns, "request timed out.", 0);
2778                         }
2779                 }
2780         }
2781
2782         EVDNS_UNLOCK(base);
2783 }
2784
2785 /* try to send a request to a given server. */
2786 /* */
2787 /* return: */
2788 /*   0 ok */
2789 /*   1 temporary failure */
2790 /*   2 other failure */
2791 static int
2792 evdns_request_transmit_to(struct request *req, struct nameserver *server) {
2793         int r;
2794         ASSERT_LOCKED(req->base);
2795         ASSERT_VALID_REQUEST(req);
2796
2797         if (server->requests_inflight == 1 &&
2798                 req->base->disable_when_inactive &&
2799                 event_add(&server->event, NULL) < 0) {
2800                 return 1;
2801         }
2802
2803         r = sendto(server->socket, (void*)req->request, req->request_len, 0,
2804             (struct sockaddr *)&server->address, server->addrlen);
2805         if (r < 0) {
2806                 int err = evutil_socket_geterror(server->socket);
2807                 if (EVUTIL_ERR_RW_RETRIABLE(err))
2808                         return 1;
2809                 nameserver_failed(req->ns, evutil_socket_error_to_string(err), err);
2810                 return 2;
2811         } else if (r != (int)req->request_len) {
2812                 return 1;  /* short write */
2813         } else {
2814                 return 0;
2815         }
2816 }
2817
2818 /* try to connect to a given server. */
2819 /* */
2820 /* return: */
2821 /*   0 ok */
2822 /*   1 temporary failure */
2823 /*   2 other failure */
2824 static int
2825 evdns_tcp_connect_if_disconnected(struct nameserver *server)
2826 {
2827         struct tcp_connection *conn = server->connection;
2828         struct timeval *timeout = &server->base->global_tcp_idle_timeout;
2829         if (conn && conn->state != TS_DISCONNECTED && conn->bev != NULL)
2830                 return 0;
2831
2832         disconnect_and_free_connection(conn);
2833         conn = new_tcp_connecton(bufferevent_socket_new(server->base->event_base, -1, BEV_OPT_CLOSE_ON_FREE));
2834         if (!conn)
2835                 return 2;
2836         server->connection = conn;
2837
2838         if (bufferevent_set_timeouts(conn->bev, timeout, timeout))
2839                 return 1;
2840
2841         EVUTIL_ASSERT(conn->state == TS_DISCONNECTED);
2842         if (bufferevent_socket_connect(conn->bev, (struct sockaddr *)&server->address, server->addrlen))
2843                 return 1;
2844
2845         conn->state = TS_CONNECTING;
2846         log(EVDNS_LOG_DEBUG, "New tcp connection %p created", (void *)conn);
2847         return 0;
2848 }
2849
2850 static void
2851 client_tcp_event_cb(struct bufferevent *bev, short events, void *ctx);
2852
2853
2854 static void
2855 client_tcp_read_packet_cb(struct bufferevent *bev, void *ctx)
2856 {
2857         u8 *msg = NULL;
2858         int msg_len = 0;
2859         struct nameserver *server = (struct nameserver*)ctx;
2860         struct tcp_connection *conn = server->connection;
2861         EVUTIL_ASSERT(server && bev);
2862         EVDNS_LOCK(server->base);
2863
2864         while (1) {
2865                 if (tcp_read_message(conn, &msg, &msg_len)) {
2866                         disconnect_and_free_connection(server->connection);
2867                         server->connection = NULL;
2868                         EVDNS_UNLOCK(server->base);
2869                         return;
2870                 }
2871
2872                 /* Only part of the message was recieved. */
2873                 if (!msg)
2874                         break;
2875
2876                 reply_parse(server->base, msg, msg_len);
2877                 mm_free(msg);
2878                 msg = NULL;
2879                 conn->awaiting_packet_size = 0;
2880         }
2881
2882         bufferevent_setwatermark(bev, EV_READ,
2883                 conn->awaiting_packet_size ? conn->awaiting_packet_size : sizeof(ev_uint16_t), 0);
2884         bufferevent_setcb(bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, ctx);
2885         EVDNS_UNLOCK(server->base);
2886 }
2887
2888 static void
2889 client_tcp_event_cb(struct bufferevent *bev, short events, void *ctx) {
2890         struct nameserver *server = (struct nameserver*)ctx;
2891         struct tcp_connection *conn = server->connection;
2892         EVUTIL_ASSERT(server);
2893         EVDNS_LOCK(server->base);
2894         EVUTIL_ASSERT(conn && conn->bev == bev && bev);
2895
2896         log(EVDNS_LOG_DEBUG, "Event %d on connection %p", events, (void *)conn);
2897
2898         if (events & (BEV_EVENT_TIMEOUT)) {
2899                 disconnect_and_free_connection(server->connection);
2900                 server->connection = NULL;
2901         } else if (events & (BEV_EVENT_EOF | BEV_EVENT_ERROR)) {
2902                 disconnect_and_free_connection(server->connection);
2903                 server->connection = NULL;
2904         } else if (events & BEV_EVENT_CONNECTED) {
2905                 EVUTIL_ASSERT (conn->state == TS_CONNECTING);
2906                 conn->state = TS_CONNECTED;
2907                 evutil_make_socket_nonblocking(bufferevent_getfd(bev));
2908                 bufferevent_setcb(bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, server);
2909                 bufferevent_setwatermark(bev, EV_READ, sizeof(ev_uint16_t), 0);
2910         }
2911         EVDNS_UNLOCK(server->base);
2912 }
2913
2914 /* try to send a request to a given server. */
2915 /* */
2916 /* return: */
2917 /*   0 ok */
2918 /*   1 temporary failure */
2919 /*   2 other failure */
2920 static int
2921 evdns_request_transmit_through_tcp(struct request *req, struct nameserver *server) {
2922         uint16_t packet_size;
2923         struct tcp_connection *conn = NULL;
2924         int r;
2925         ASSERT_LOCKED(req->base);
2926         ASSERT_VALID_REQUEST(req);
2927
2928         if ((r = evdns_tcp_connect_if_disconnected(server)))
2929                 return r;
2930
2931         conn = server->connection;
2932         bufferevent_setcb(conn->bev, client_tcp_read_packet_cb, NULL, client_tcp_event_cb, server);
2933
2934         log(EVDNS_LOG_DEBUG, "Sending request %p via tcp connection %p", (void *)req, (void *)conn);
2935         packet_size = htons(req->request_len);
2936         if (bufferevent_write(conn->bev, &packet_size, sizeof(packet_size)) )
2937                 goto fail;
2938         if (bufferevent_write(conn->bev, (void*)req->request, req->request_len) )
2939                 goto fail;
2940         if (bufferevent_enable(conn->bev, EV_READ))
2941                 goto fail;
2942         if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0)
2943                 goto fail;
2944
2945         return 0;
2946 fail:
2947         log(EVDNS_LOG_WARN, "Failed to send request %p via tcp connection %p", (void *)req, (void *)conn);
2948         disconnect_and_free_connection(server->connection);
2949         server->connection = NULL;
2950         return 2;
2951 }
2952
2953 /* try to send a request, updating the fields of the request */
2954 /* as needed */
2955 /* */
2956 /* return: */
2957 /*   0 ok */
2958 /*   1 failed */
2959 static int
2960 evdns_request_transmit(struct request *req) {
2961         int retcode = 0, r;
2962
2963         ASSERT_LOCKED(req->base);
2964         ASSERT_VALID_REQUEST(req);
2965         /* if we fail to send this packet then this flag marks it */
2966         /* for evdns_transmit */
2967         req->transmit_me = 1;
2968         EVUTIL_ASSERT(req->trans_id != 0xffff);
2969
2970         if (!req->ns)
2971         {
2972                 /* unable to transmit request if no nameservers */
2973                 return 1;
2974         }
2975
2976         if (req->ns->choked) {
2977                 /* don't bother trying to write to a socket */
2978                 /* which we have had EAGAIN from */
2979                 return 1;
2980         }
2981
2982         if (req->handle->tcp_flags & DNS_QUERY_USEVC) {
2983                 r = evdns_request_transmit_through_tcp(req, req->ns);
2984                 /*
2985                 If connection didn't initiated now, so report about temporary problems.
2986                 We don't mark name server as chocked so udp packets possibly have no
2987                 problems during transmit. Simply we will retry attempt later */
2988                 if (r == 1) {
2989                         return r;
2990                 }
2991         } else {
2992                 r = evdns_request_transmit_to(req, req->ns);
2993         }
2994         switch (r) {
2995         case 1:
2996                 /* temp failure */
2997                 req->ns->choked = 1;
2998                 nameserver_write_waiting(req->ns, 1);
2999                 return 1;
3000         case 2:
3001                 /* failed to transmit the request entirely. we can fallthrough since
3002                  * we'll set a timeout, which will time out, and make us retransmit the
3003                  * request anyway. */
3004                 retcode = 1;
3005                 EVUTIL_FALLTHROUGH;
3006         default:
3007                 /* all ok */
3008                 log(EVDNS_LOG_DEBUG,
3009                     "Setting timeout for request %p, sent to nameserver %p", (void *)req, (void *)req->ns);
3010                 if (evtimer_add(&req->timeout_event, &req->base->global_timeout) < 0) {
3011                         log(EVDNS_LOG_WARN,
3012                       "Error from libevent when adding timer for request %p",
3013                             (void *)req);
3014                         /* ???? Do more? */
3015                 }
3016                 req->tx_count++;
3017                 req->transmit_me = 0;
3018                 return retcode;
3019         }
3020 }
3021
3022 static void
3023 nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
3024         struct nameserver *const ns = (struct nameserver *) arg;
3025         (void) type;
3026         (void) count;
3027         (void) ttl;
3028         (void) addresses;
3029
3030         if (result == DNS_ERR_CANCEL) {
3031                 /* We canceled this request because the nameserver came up
3032                  * for some other reason.  Do not change our opinion about
3033                  * the nameserver. */
3034                 return;
3035         }
3036
3037         EVDNS_LOCK(ns->base);
3038         ns->probe_request = NULL;
3039         if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
3040                 /* this is a good reply */
3041                 nameserver_up(ns);
3042         } else {
3043                 nameserver_probe_failed(ns);
3044         }
3045         EVDNS_UNLOCK(ns->base);
3046 }
3047
3048 static void
3049 nameserver_send_probe(struct nameserver *const ns) {
3050         struct evdns_request *handle;
3051         struct request *req;
3052         char addrbuf[128];
3053         /* here we need to send a probe to a given nameserver */
3054         /* in the hope that it is up now. */
3055
3056         ASSERT_LOCKED(ns->base);
3057         log(EVDNS_LOG_DEBUG, "Sending probe to %s",
3058             evutil_format_sockaddr_port_(
3059                     (struct sockaddr *)&ns->address,
3060                     addrbuf, sizeof(addrbuf)));
3061         handle = mm_calloc(1, sizeof(*handle));
3062         if (!handle) return;
3063         req = request_new(ns->base, handle, TYPE_A, "google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
3064         if (!req) {
3065                 mm_free(handle);
3066                 return;
3067         }
3068         ns->probe_request = handle;
3069         /* we force this into the inflight queue no matter what */
3070         request_trans_id_set(req, transaction_id_pick(ns->base));
3071         req->ns = ns;
3072         request_submit(req);
3073 }
3074
3075 /* returns: */
3076 /*   0 didn't try to transmit anything */
3077 /*   1 tried to transmit something */
3078 static int
3079 evdns_transmit(struct evdns_base *base) {
3080         char did_try_to_transmit = 0;
3081         int i;
3082
3083         ASSERT_LOCKED(base);
3084         for (i = 0; i < base->n_req_heads; ++i) {
3085                 if (base->req_heads[i]) {
3086                         struct request *const started_at = base->req_heads[i], *req = started_at;
3087                         /* first transmit all the requests which are currently waiting */
3088                         do {
3089                                 if (req->transmit_me) {
3090                                         did_try_to_transmit = 1;
3091                                         evdns_request_transmit(req);
3092                                 }
3093
3094                                 req = req->next;
3095                         } while (req != started_at);
3096                 }
3097         }
3098
3099         return did_try_to_transmit;
3100 }
3101
3102 /* exported function */
3103 int
3104 evdns_base_count_nameservers(struct evdns_base *base)
3105 {
3106         const struct nameserver *server;
3107         int n = 0;
3108
3109         EVDNS_LOCK(base);
3110         server = base->server_head;
3111         if (!server)
3112                 goto done;
3113         do {
3114                 ++n;
3115                 server = server->next;
3116         } while (server != base->server_head);
3117 done:
3118         EVDNS_UNLOCK(base);
3119         return n;
3120 }
3121
3122 int
3123 evdns_count_nameservers(void)
3124 {
3125         return evdns_base_count_nameservers(current_base);
3126 }
3127
3128 /* exported function */
3129 int
3130 evdns_base_clear_nameservers_and_suspend(struct evdns_base *base)
3131 {
3132         struct nameserver *server, *started_at;
3133         int i;
3134
3135         EVDNS_LOCK(base);
3136         server = base->server_head;
3137         started_at = base->server_head;
3138         if (!server) {
3139                 EVDNS_UNLOCK(base);
3140                 return 0;
3141         }
3142         while (1) {
3143                 struct nameserver *next = server->next;
3144                 disconnect_and_free_connection(server->connection);
3145                 server->connection = NULL;
3146                 (void) event_del(&server->event);
3147                 if (evtimer_initialized(&server->timeout_event))
3148                         (void) evtimer_del(&server->timeout_event);
3149                 if (server->probe_request) {
3150                         evdns_cancel_request(server->base, server->probe_request);
3151                         server->probe_request = NULL;
3152                 }
3153                 if (server->socket >= 0)
3154                         evutil_closesocket(server->socket);
3155                 mm_free(server);
3156                 if (next == started_at)
3157                         break;
3158                 server = next;
3159         }
3160         base->server_head = NULL;
3161         base->global_good_nameservers = 0;
3162
3163         for (i = 0; i < base->n_req_heads; ++i) {
3164                 struct request *req, *req_started_at;
3165                 req = req_started_at = base->req_heads[i];
3166                 while (req) {
3167                         struct request *next = req->next;
3168                         req->tx_count = req->reissue_count = 0;
3169                         req->ns = NULL;
3170                         /* ???? What to do about searches? */
3171                         (void) evtimer_del(&req->timeout_event);
3172                         req->trans_id = 0;
3173                         req->transmit_me = 0;
3174
3175                         base->global_requests_waiting++;
3176                         evdns_request_insert(req, &base->req_waiting_head);
3177                         /* We want to insert these suspended elements at the front of
3178                          * the waiting queue, since they were pending before any of
3179                          * the waiting entries were added.  This is a circular list,
3180                          * so we can just shift the start back by one.*/
3181                         base->req_waiting_head = base->req_waiting_head->prev;
3182
3183                         if (next == req_started_at)
3184                                 break;
3185                         req = next;
3186                 }
3187                 base->req_heads[i] = NULL;
3188         }
3189
3190         base->global_requests_inflight = 0;
3191
3192         EVDNS_UNLOCK(base);
3193         return 0;
3194 }
3195
3196 int
3197 evdns_clear_nameservers_and_suspend(void)
3198 {
3199         return evdns_base_clear_nameservers_and_suspend(current_base);
3200 }
3201
3202
3203 /* exported function */
3204 int
3205 evdns_base_resume(struct evdns_base *base)
3206 {
3207         EVDNS_LOCK(base);
3208         evdns_requests_pump_waiting_queue(base);
3209         EVDNS_UNLOCK(base);
3210
3211         return 0;
3212 }
3213
3214 int
3215 evdns_resume(void)
3216 {
3217         return evdns_base_resume(current_base);
3218 }
3219
3220 static int
3221 evdns_nameserver_add_impl_(struct evdns_base *base, const struct sockaddr *address, int addrlen) {
3222         /* first check to see if we already have this nameserver */
3223
3224         const struct nameserver *server = base->server_head, *const started_at = base->server_head;
3225         struct nameserver *ns;
3226         int err = 0;
3227         char addrbuf[128];
3228
3229         ASSERT_LOCKED(base);
3230         if (server) {
3231                 do {
3232                         if (!evutil_sockaddr_cmp((struct sockaddr*)&server->address, address, 1)) return 3;
3233                         server = server->next;
3234                 } while (server != started_at);
3235         }
3236         if (addrlen > (int)sizeof(ns->address)) {
3237                 log(EVDNS_LOG_DEBUG, "Addrlen %d too long.", (int)addrlen);
3238                 return 2;
3239         }
3240
3241         ns = (struct nameserver *) mm_malloc(sizeof(struct nameserver));
3242         if (!ns) return -1;
3243
3244         memset(ns, 0, sizeof(struct nameserver));
3245         ns->base = base;
3246
3247         evtimer_assign(&ns->timeout_event, ns->base->event_base, nameserver_prod_callback, ns);
3248
3249         ns->socket = evutil_socket_(address->sa_family,
3250             SOCK_DGRAM|EVUTIL_SOCK_NONBLOCK|EVUTIL_SOCK_CLOEXEC, 0);
3251         if (ns->socket < 0) { err = 1; goto out1; }
3252
3253         if (base->global_outgoing_addrlen &&
3254             !evutil_sockaddr_is_loopback_(address)) {
3255                 if (bind(ns->socket,
3256                         (struct sockaddr*)&base->global_outgoing_address,
3257                         base->global_outgoing_addrlen) < 0) {
3258                         log(EVDNS_LOG_WARN,"Couldn't bind to outgoing address");
3259                         err = 2;
3260                         goto out2;
3261                 }
3262         }
3263
3264         if (base->so_rcvbuf) {
3265                 if (setsockopt(ns->socket, SOL_SOCKET, SO_RCVBUF,
3266                     (void *)&base->so_rcvbuf, sizeof(base->so_rcvbuf))) {
3267                         log(EVDNS_LOG_WARN, "Couldn't set SO_RCVBUF to %i", base->so_rcvbuf);
3268                         err = -SO_RCVBUF;
3269                         goto out2;
3270                 }
3271         }
3272         if (base->so_sndbuf) {
3273                 if (setsockopt(ns->socket, SOL_SOCKET, SO_SNDBUF,
3274                     (void *)&base->so_sndbuf, sizeof(base->so_sndbuf))) {
3275                         log(EVDNS_LOG_WARN, "Couldn't set SO_SNDBUF to %i", base->so_sndbuf);
3276                         err = -SO_SNDBUF;
3277                         goto out2;
3278                 }
3279         }
3280
3281         memcpy(&ns->address, address, addrlen);
3282         ns->addrlen = addrlen;
3283         ns->state = 1;
3284         ns->connection = NULL;
3285         event_assign(&ns->event, ns->base->event_base, ns->socket,
3286                                  EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
3287         if (!base->disable_when_inactive && event_add(&ns->event, NULL) < 0) {
3288                 err = 2;
3289                 goto out2;
3290         }
3291
3292         log(EVDNS_LOG_DEBUG, "Added nameserver %s as %p",
3293             evutil_format_sockaddr_port_(address, addrbuf, sizeof(addrbuf)), (void *)ns);
3294
3295         /* insert this nameserver into the list of them */
3296         if (!base->server_head) {
3297                 ns->next = ns->prev = ns;
3298                 base->server_head = ns;
3299         } else {
3300                 ns->next = base->server_head->next;
3301                 ns->prev = base->server_head;
3302                 base->server_head->next = ns;
3303                 ns->next->prev = ns;
3304         }
3305
3306         base->global_good_nameservers++;
3307
3308         return 0;
3309
3310 out2:
3311         evutil_closesocket(ns->socket);
3312 out1:
3313         event_debug_unassign(&ns->event);
3314         mm_free(ns);
3315         log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d",
3316             evutil_format_sockaddr_port_(address, addrbuf, sizeof(addrbuf)), err);
3317         return err;
3318 }
3319
3320 /* exported function */
3321 int
3322 evdns_base_nameserver_add(struct evdns_base *base, unsigned long int address)
3323 {
3324         struct sockaddr_in sin;
3325         int res;
3326         memset(&sin, 0, sizeof(sin));
3327         sin.sin_addr.s_addr = address;
3328         sin.sin_port = htons(53);
3329         sin.sin_family = AF_INET;
3330 #ifdef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
3331         sin.sin_len = sizeof(sin);
3332 #endif
3333         EVDNS_LOCK(base);
3334         res = evdns_nameserver_add_impl_(base, (struct sockaddr*)&sin, sizeof(sin));
3335         EVDNS_UNLOCK(base);
3336         return res;
3337 }
3338
3339 int
3340 evdns_nameserver_add(unsigned long int address) {
3341         if (!current_base)
3342                 current_base = evdns_base_new(NULL, 0);
3343         return evdns_base_nameserver_add(current_base, address);
3344 }
3345
3346 static void
3347 sockaddr_setport(struct sockaddr *sa, ev_uint16_t port)
3348 {
3349         if (sa->sa_family == AF_INET) {
3350                 ((struct sockaddr_in *)sa)->sin_port = htons(port);
3351         } else if (sa->sa_family == AF_INET6) {
3352                 ((struct sockaddr_in6 *)sa)->sin6_port = htons(port);
3353         }
3354 }
3355
3356 static ev_uint16_t
3357 sockaddr_getport(struct sockaddr *sa)
3358 {
3359         if (sa->sa_family == AF_INET) {
3360                 return ntohs(((struct sockaddr_in *)sa)->sin_port);
3361         } else if (sa->sa_family == AF_INET6) {
3362                 return ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
3363         } else {
3364                 return 0;
3365         }
3366 }
3367
3368 /* exported function */
3369 int
3370 evdns_base_nameserver_ip_add(struct evdns_base *base, const char *ip_as_string) {
3371         struct sockaddr_storage ss;
3372         struct sockaddr *sa;
3373         int len = sizeof(ss);
3374         int res;
3375         if (evutil_parse_sockaddr_port(ip_as_string, (struct sockaddr *)&ss,
3376                 &len)) {
3377                 log(EVDNS_LOG_WARN, "Unable to parse nameserver address %s",
3378                         ip_as_string);
3379                 return 4;
3380         }
3381         sa = (struct sockaddr *) &ss;
3382         if (sockaddr_getport(sa) == 0)
3383                 sockaddr_setport(sa, 53);
3384
3385         EVDNS_LOCK(base);
3386         res = evdns_nameserver_add_impl_(base, sa, len);
3387         EVDNS_UNLOCK(base);
3388         return res;
3389 }
3390
3391 int
3392 evdns_nameserver_ip_add(const char *ip_as_string) {
3393         if (!current_base)
3394                 current_base = evdns_base_new(NULL, 0);
3395         return evdns_base_nameserver_ip_add(current_base, ip_as_string);
3396 }
3397
3398 int
3399 evdns_base_nameserver_sockaddr_add(struct evdns_base *base,
3400     const struct sockaddr *sa, ev_socklen_t len, unsigned flags)
3401 {
3402         int res;
3403         EVUTIL_ASSERT(base);
3404         EVDNS_LOCK(base);
3405         res = evdns_nameserver_add_impl_(base, sa, len);
3406         EVDNS_UNLOCK(base);
3407         return res;
3408 }
3409
3410 int
3411 evdns_base_get_nameserver_addr(struct evdns_base *base, int idx,
3412     struct sockaddr *sa, ev_socklen_t len)
3413 {
3414         int result = -1;
3415         int i;
3416         struct nameserver *server;
3417         EVDNS_LOCK(base);
3418         server = base->server_head;
3419         for (i = 0; i < idx && server; ++i, server = server->next) {
3420                 if (server->next == base->server_head)
3421                         goto done;
3422         }
3423         if (! server)
3424                 goto done;
3425
3426         if (server->addrlen > len) {
3427                 result = (int) server->addrlen;
3428                 goto done;
3429         }
3430
3431         memcpy(sa, &server->address, server->addrlen);
3432         result = (int) server->addrlen;
3433 done:
3434         EVDNS_UNLOCK(base);
3435         return result;
3436 }
3437
3438 int
3439 evdns_base_get_nameserver_fd(struct evdns_base *base, int idx)
3440 {
3441         int result = -1;
3442         int i;
3443         struct nameserver *server;
3444         EVDNS_LOCK(base);
3445         server = base->server_head;
3446         for (i = 0; i < idx && server; ++i, server = server->next) {
3447                 if (server->next == base->server_head)
3448                         goto done;
3449         }
3450         if (! server)
3451                 goto done;
3452         result = server->socket;
3453 done:
3454         EVDNS_UNLOCK(base);
3455         return result;
3456 }
3457
3458
3459 /* remove from the queue */
3460 static void
3461 evdns_request_remove(struct request *req, struct request **head)
3462 {
3463         ASSERT_LOCKED(req->base);
3464         ASSERT_VALID_REQUEST(req);
3465
3466 #if 0
3467         {
3468                 struct request *ptr;
3469                 int found = 0;
3470                 EVUTIL_ASSERT(*head != NULL);
3471
3472                 ptr = *head;
3473                 do {
3474                         if (ptr == req) {
3475                                 found = 1;
3476                                 break;
3477                         }
3478                         ptr = ptr->next;
3479                 } while (ptr != *head);
3480                 EVUTIL_ASSERT(found);
3481
3482                 EVUTIL_ASSERT(req->next);
3483         }
3484 #endif
3485
3486         if (req->next == req) {
3487                 /* only item in the list */
3488                 *head = NULL;
3489         } else {
3490                 req->next->prev = req->prev;
3491                 req->prev->next = req->next;
3492                 if (*head == req) *head = req->next;
3493         }
3494         req->next = req->prev = NULL;
3495 }
3496
3497 /* insert into the tail of the queue */
3498 static void
3499 evdns_request_insert(struct request *req, struct request **head) {
3500         ASSERT_LOCKED(req->base);
3501         ASSERT_VALID_REQUEST(req);
3502         if (!*head) {
3503                 *head = req;
3504                 req->next = req->prev = req;
3505                 return;
3506         }
3507
3508         req->prev = (*head)->prev;
3509         req->prev->next = req;
3510         req->next = *head;
3511         (*head)->prev = req;
3512 }
3513
3514 static int
3515 string_num_dots(const char *s) {
3516         int count = 0;
3517         while ((s = strchr(s, '.'))) {
3518                 s++;
3519                 count++;
3520         }
3521         return count;
3522 }
3523
3524 static struct request *
3525 request_new(struct evdns_base *base, struct evdns_request *handle, int type,
3526             const char *name, int flags, evdns_callback_type callback,
3527             void *user_ptr) {
3528
3529         const char issuing_now =
3530             (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
3531
3532         const size_t name_len = strlen(name);
3533         const size_t request_max_len = evdns_request_len(base, name_len);
3534         const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
3535         /* the request data is alloced in a single block with the header */
3536         struct request *const req =
3537             mm_malloc(sizeof(struct request) + request_max_len);
3538         int rlen;
3539         char namebuf[256];
3540         (void) flags;
3541
3542         ASSERT_LOCKED(base);
3543
3544         if (!req) return NULL;
3545
3546         if (name_len >= sizeof(namebuf)) {
3547                 mm_free(req);
3548                 return NULL;
3549         }
3550
3551         memset(req, 0, sizeof(struct request));
3552         req->request_size = (u16)(sizeof(struct request) + request_max_len);
3553         req->base = base;
3554
3555         evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
3556
3557         if (base->global_randomize_case) {
3558                 unsigned i;
3559                 char randbits[(sizeof(namebuf)+7)/8];
3560                 strlcpy(namebuf, name, sizeof(namebuf));
3561                 evutil_secure_rng_get_bytes(randbits, (name_len+7)/8);
3562                 for (i = 0; i < name_len; ++i) {
3563                         if (EVUTIL_ISALPHA_(namebuf[i])) {
3564                                 if ((randbits[i >> 3] & (1<<(i & 7))))
3565                                         namebuf[i] |= 0x20;
3566                                 else
3567                                         namebuf[i] &= ~0x20;
3568                         }
3569                 }
3570                 name = namebuf;
3571         }
3572
3573         /* request data lives just after the header */
3574         req->request = ((u8 *) req) + sizeof(struct request);
3575         /* denotes that the request data shouldn't be free()ed */
3576         req->request_appended = 1;
3577         rlen = evdns_request_data_build(base, name, name_len, trans_id,
3578             type, CLASS_INET, req->request, request_max_len);
3579         if (rlen < 0)
3580                 goto err1;
3581
3582         req->request_len = rlen;
3583         req->trans_id = trans_id;
3584         req->tx_count = 0;
3585         req->request_type = type;
3586         req->user_pointer = user_ptr;
3587         req->user_callback = callback;
3588         req->ns = issuing_now ? nameserver_pick(base) : NULL;
3589         req->next = req->prev = NULL;
3590         req->handle = handle;
3591         if (handle) {
3592                 handle->current_req = req;
3593                 handle->base = base;
3594         }
3595
3596         if (flags & DNS_CNAME_CALLBACK)
3597                 req->need_cname = 1;
3598
3599         return req;
3600 err1:
3601         mm_free(req);
3602         return NULL;
3603 }
3604
3605 static struct request *
3606 request_clone(struct evdns_base *base, struct request* current)
3607 {
3608         const char issuing_now =
3609             (base->global_requests_inflight < base->global_max_requests_inflight) ? 1 : 0;
3610         const u16 trans_id = issuing_now ? transaction_id_pick(base) : 0xffff;
3611         /* the request data is alloced in a single block with the header */
3612         struct request *const req = mm_malloc(current->request_size);
3613         EVUTIL_ASSERT(current && base);
3614         ASSERT_LOCKED(base);
3615
3616         if (!req)
3617                 return NULL;
3618         memcpy(req, current, current->request_size);
3619
3620         evtimer_assign(&req->timeout_event, req->base->event_base, evdns_request_timeout_callback, req);
3621
3622         /* request data lives just after the header */
3623         req->request = ((u8 *) req) + sizeof(struct request);
3624         /* We need to replace transact id */
3625         request_trans_id_set(req, trans_id);
3626
3627         req->tx_count = 0;
3628         req->ns = issuing_now ? nameserver_pick(base) : NULL;
3629         req->next = req->prev = NULL;
3630         req->handle = NULL;
3631         log(EVDNS_LOG_DEBUG, "Clone new request TID %d from TID %d", req->trans_id, current->trans_id);
3632
3633         return req;
3634 }
3635
3636 static void
3637 request_submit(struct request *const req) {
3638         struct evdns_base *base = req->base;
3639         ASSERT_LOCKED(base);
3640         ASSERT_VALID_REQUEST(req);
3641         if (req->ns) {
3642                 /* if it has a nameserver assigned then this is going */
3643                 /* straight into the inflight queue */
3644                 evdns_request_insert(req, &REQ_HEAD(base, req->trans_id));
3645
3646                 base->global_requests_inflight++;
3647                 req->ns->requests_inflight++;
3648
3649                 evdns_request_transmit(req);
3650         } else {
3651                 evdns_request_insert(req, &base->req_waiting_head);
3652                 base->global_requests_waiting++;
3653         }
3654 }
3655
3656 /* exported function */
3657 void
3658 evdns_cancel_request(struct evdns_base *base, struct evdns_request *handle)
3659 {
3660         struct request *req;
3661
3662         if (!handle->current_req)
3663                 return;
3664
3665         if (!base) {
3666                 /* This redundancy is silly; can we fix it? (Not for 2.0) XXXX */
3667                 base = handle->base;
3668                 if (!base)
3669                         base = handle->current_req->base;
3670         }
3671
3672         EVDNS_LOCK(base);
3673         if (handle->pending_cb) {
3674                 EVDNS_UNLOCK(base);
3675                 return;
3676         }
3677
3678         req = handle->current_req;
3679         ASSERT_VALID_REQUEST(req);
3680
3681         reply_schedule_callback(req, 0, DNS_ERR_CANCEL, NULL);
3682         if (req->ns) {
3683                 /* remove from inflight queue */
3684                 request_finished(req, &REQ_HEAD(base, req->trans_id), 1);
3685         } else {
3686                 /* remove from global_waiting head */
3687                 request_finished(req, &base->req_waiting_head, 1);
3688         }
3689         EVDNS_UNLOCK(base);
3690 }
3691
3692 /* exported function */
3693 struct evdns_request *
3694 evdns_base_resolve_ipv4(struct evdns_base *base, const char *name, int flags,
3695     evdns_callback_type callback, void *ptr) {
3696         struct evdns_request *handle;
3697         struct request *req;
3698         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
3699         handle = mm_calloc(1, sizeof(*handle));
3700         if (handle == NULL)
3701                 return NULL;
3702         EVDNS_LOCK(base);
3703         handle->tcp_flags = base->global_tcp_flags;
3704         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3705         if (flags & DNS_QUERY_NO_SEARCH) {
3706                 req =
3707                         request_new(base, handle, TYPE_A, name, flags,
3708                                     callback, ptr);
3709                 if (req)
3710                         request_submit(req);
3711         } else {
3712                 search_request_new(base, handle, TYPE_A, name, flags,
3713                     callback, ptr);
3714         }
3715         if (handle->current_req == NULL) {
3716                 mm_free(handle);
3717                 handle = NULL;
3718         }
3719         EVDNS_UNLOCK(base);
3720         return handle;
3721 }
3722
3723 int evdns_resolve_ipv4(const char *name, int flags,
3724                                            evdns_callback_type callback, void *ptr)
3725 {
3726         return evdns_base_resolve_ipv4(current_base, name, flags, callback, ptr)
3727                 ? 0 : -1;
3728 }
3729
3730
3731 /* exported function */
3732 struct evdns_request *
3733 evdns_base_resolve_ipv6(struct evdns_base *base,
3734     const char *name, int flags,
3735     evdns_callback_type callback, void *ptr)
3736 {
3737         struct evdns_request *handle;
3738         struct request *req;
3739         log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
3740         handle = mm_calloc(1, sizeof(*handle));
3741         if (handle == NULL)
3742                 return NULL;
3743         EVDNS_LOCK(base);
3744         handle->tcp_flags = base->global_tcp_flags;
3745         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3746         if (flags & DNS_QUERY_NO_SEARCH) {
3747                 req = request_new(base, handle, TYPE_AAAA, name, flags,
3748                                   callback, ptr);
3749                 if (req)
3750                         request_submit(req);
3751         } else {
3752                 search_request_new(base, handle, TYPE_AAAA, name, flags,
3753                     callback, ptr);
3754         }
3755         if (handle->current_req == NULL) {
3756                 mm_free(handle);
3757                 handle = NULL;
3758         }
3759         EVDNS_UNLOCK(base);
3760         return handle;
3761 }
3762
3763 int evdns_resolve_ipv6(const char *name, int flags,
3764     evdns_callback_type callback, void *ptr) {
3765         return evdns_base_resolve_ipv6(current_base, name, flags, callback, ptr)
3766                 ? 0 : -1;
3767 }
3768
3769 struct evdns_request *
3770 evdns_base_resolve_reverse(struct evdns_base *base, const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3771         char buf[32];
3772         struct evdns_request *handle;
3773         struct request *req;
3774         u32 a;
3775         EVUTIL_ASSERT(in);
3776         a = ntohl(in->s_addr);
3777         evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
3778                         (int)(u8)((a    )&0xff),
3779                         (int)(u8)((a>>8 )&0xff),
3780                         (int)(u8)((a>>16)&0xff),
3781                         (int)(u8)((a>>24)&0xff));
3782         handle = mm_calloc(1, sizeof(*handle));
3783         if (handle == NULL)
3784                 return NULL;
3785         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
3786         EVDNS_LOCK(base);
3787         handle->tcp_flags = base->global_tcp_flags;
3788         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3789         req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
3790         if (req)
3791                 request_submit(req);
3792         if (handle->current_req == NULL) {
3793                 mm_free(handle);
3794                 handle = NULL;
3795         }
3796         EVDNS_UNLOCK(base);
3797         return (handle);
3798 }
3799
3800 int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3801         return evdns_base_resolve_reverse(current_base, in, flags, callback, ptr)
3802                 ? 0 : -1;
3803 }
3804
3805 struct evdns_request *
3806 evdns_base_resolve_reverse_ipv6(struct evdns_base *base, const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3807         /* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
3808         char buf[73];
3809         char *cp;
3810         struct evdns_request *handle;
3811         struct request *req;
3812         int i;
3813         EVUTIL_ASSERT(in);
3814         cp = buf;
3815         for (i=15; i >= 0; --i) {
3816                 u8 byte = in->s6_addr[i];
3817                 *cp++ = "0123456789abcdef"[byte & 0x0f];
3818                 *cp++ = '.';
3819                 *cp++ = "0123456789abcdef"[byte >> 4];
3820                 *cp++ = '.';
3821         }
3822         EVUTIL_ASSERT(cp + strlen("ip6.arpa") < buf+sizeof(buf));
3823         memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
3824         handle = mm_calloc(1, sizeof(*handle));
3825         if (handle == NULL)
3826                 return NULL;
3827         log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
3828         EVDNS_LOCK(base);
3829         handle->tcp_flags = base->global_tcp_flags;
3830         handle->tcp_flags |= flags & (DNS_QUERY_USEVC | DNS_QUERY_IGNTC);
3831         req = request_new(base, handle, TYPE_PTR, buf, flags, callback, ptr);
3832         if (req)
3833                 request_submit(req);
3834         if (handle->current_req == NULL) {
3835                 mm_free(handle);
3836                 handle = NULL;
3837         }
3838         EVDNS_UNLOCK(base);
3839         return (handle);
3840 }
3841
3842 int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
3843         return evdns_base_resolve_reverse_ipv6(current_base, in, flags, callback, ptr)
3844                 ? 0 : -1;
3845 }
3846
3847 /* ================================================================= */
3848 /* Search support */
3849 /* */
3850 /* the libc resolver has support for searching a number of domains */
3851 /* to find a name. If nothing else then it takes the single domain */
3852 /* from the gethostname() call. */
3853 /* */
3854 /* It can also be configured via the domain and search options in a */
3855 /* resolv.conf. */
3856 /* */
3857 /* The ndots option controls how many dots it takes for the resolver */
3858 /* to decide that a name is non-local and so try a raw lookup first. */
3859
3860 struct search_domain {
3861         int len;
3862         struct search_domain *next;
3863         /* the text string is appended to this structure */
3864 };
3865
3866 struct search_state {
3867         int refcount;
3868         int ndots;
3869         int num_domains;
3870         struct search_domain *head;
3871 };
3872
3873 static void
3874 search_state_decref(struct search_state *const state) {
3875         if (!state) return;
3876         state->refcount--;
3877         if (!state->refcount) {
3878                 struct search_domain *next, *dom;
3879                 for (dom = state->head; dom; dom = next) {
3880                         next = dom->next;
3881                         mm_free(dom);
3882                 }
3883                 mm_free(state);
3884         }
3885 }
3886
3887 static struct search_state *
3888 search_state_new(void) {
3889         struct search_state *state = (struct search_state *) mm_malloc(sizeof(struct search_state));
3890         if (!state) return NULL;
3891         memset(state, 0, sizeof(struct search_state));
3892         state->refcount = 1;
3893         state->ndots = 1;
3894
3895         return state;
3896 }
3897
3898 static void
3899 search_postfix_clear(struct evdns_base *base) {
3900         search_state_decref(base->global_search_state);
3901
3902         base->global_search_state = search_state_new();
3903 }
3904
3905 /* exported function */
3906 void
3907 evdns_base_search_clear(struct evdns_base *base)
3908 {
3909         EVDNS_LOCK(base);
3910         search_postfix_clear(base);
3911         EVDNS_UNLOCK(base);
3912 }
3913
3914 void
3915 evdns_search_clear(void) {
3916         evdns_base_search_clear(current_base);
3917 }
3918
3919 static void
3920 search_postfix_add(struct evdns_base *base, const char *domain) {
3921         size_t domain_len;
3922         struct search_domain *sdomain;
3923         while (domain[0] == '.') domain++;
3924         domain_len = strlen(domain);
3925
3926         ASSERT_LOCKED(base);
3927         if (!base->global_search_state) base->global_search_state = search_state_new();
3928         if (!base->global_search_state) return;
3929         base->global_search_state->num_domains++;
3930
3931         sdomain = (struct search_domain *) mm_malloc(sizeof(struct search_domain) + domain_len);
3932         if (!sdomain) return;
3933         memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
3934         sdomain->next = base->global_search_state->head;
3935         sdomain->len = (int) domain_len;
3936
3937         base->global_search_state->head = sdomain;
3938 }
3939
3940 /* reverse the order of members in the postfix list. This is needed because, */
3941 /* when parsing resolv.conf we push elements in the wrong order */
3942 static void
3943 search_reverse(struct evdns_base *base) {
3944         struct search_domain *cur, *prev = NULL, *next;
3945         ASSERT_LOCKED(base);
3946         cur = base->global_search_state->head;
3947         while (cur) {
3948                 next = cur->next;
3949                 cur->next = prev;
3950                 prev = cur;
3951                 cur = next;
3952         }
3953
3954         base->global_search_state->head = prev;
3955 }
3956
3957 /* exported function */
3958 void
3959 evdns_base_search_add(struct evdns_base *base, const char *domain) {
3960         EVDNS_LOCK(base);
3961         search_postfix_add(base, domain);
3962         EVDNS_UNLOCK(base);
3963 }
3964 void
3965 evdns_search_add(const char *domain) {
3966         evdns_base_search_add(current_base, domain);
3967 }
3968
3969 /* exported function */
3970 void
3971 evdns_base_search_ndots_set(struct evdns_base *base, const int ndots) {
3972         EVDNS_LOCK(base);
3973         if (!base->global_search_state) base->global_search_state = search_state_new();
3974         if (base->global_search_state)
3975                 base->global_search_state->ndots = ndots;
3976         EVDNS_UNLOCK(base);
3977 }
3978 void
3979 evdns_search_ndots_set(const int ndots) {
3980         evdns_base_search_ndots_set(current_base, ndots);
3981 }
3982
3983 static void
3984 search_set_from_hostname(struct evdns_base *base) {
3985         char hostname[EVDNS_NAME_MAX + 1], *domainname;
3986
3987         ASSERT_LOCKED(base);
3988         search_postfix_clear(base);
3989         if (gethostname(hostname, sizeof(hostname))) return;
3990         domainname = strchr(hostname, '.');
3991         if (!domainname) return;
3992         search_postfix_add(base, domainname);
3993 }
3994
3995 /* warning: returns malloced string */
3996 static char *
3997 search_make_new(const struct search_state *const state, int n, const char *const base_name) {
3998         const size_t base_len = strlen(base_name);
3999         char need_to_append_dot;
4000         struct search_domain *dom;
4001
4002         if (!base_len) return NULL;
4003         need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
4004
4005         for (dom = state->head; dom; dom = dom->next) {
4006                 if (!n--) {
4007                         /* this is the postfix we want */
4008                         /* the actual postfix string is kept at the end of the structure */
4009                         const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
4010                         const int postfix_len = dom->len;
4011                         char *const newname = (char *) mm_malloc(base_len + need_to_append_dot + postfix_len + 1);
4012                         if (!newname) return NULL;
4013                         memcpy(newname, base_name, base_len);
4014                         if (need_to_append_dot) newname[base_len] = '.';
4015                         memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
4016                         newname[base_len + need_to_append_dot + postfix_len] = 0;
4017                         return newname;
4018                 }
4019         }
4020
4021         /* we ran off the end of the list and still didn't find the requested string */
4022         EVUTIL_ASSERT(0);
4023         return NULL; /* unreachable; stops warnings in some compilers. */
4024 }
4025
4026 static struct request *
4027 search_request_new(struct evdns_base *base, struct evdns_request *handle,
4028                    int type, const char *const name, int flags,
4029                    evdns_callback_type user_callback, void *user_arg) {
4030         ASSERT_LOCKED(base);
4031         EVUTIL_ASSERT(type == TYPE_A || type == TYPE_AAAA);
4032         EVUTIL_ASSERT(handle->current_req == NULL);
4033         if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
4034              base->global_search_state &&
4035                  base->global_search_state->num_domains) {
4036                 /* we have some domains to search */
4037                 struct request *req;
4038                 if (string_num_dots(name) >= base->global_search_state->ndots) {
4039                         req = request_new(base, handle, type, name, flags, user_callback, user_arg);
4040                         if (!req) return NULL;
4041                         handle->search_index = -1;
4042                 } else {
4043                         char *const new_name = search_make_new(base->global_search_state, 0, name);
4044                         if (!new_name) return NULL;
4045                         req = request_new(base, handle, type, new_name, flags, user_callback, user_arg);
4046                         mm_free(new_name);
4047                         if (!req) return NULL;
4048                         handle->search_index = 0;
4049                 }
4050                 EVUTIL_ASSERT(handle->search_origname == NULL);
4051                 handle->search_origname = mm_strdup(name);
4052                 if (handle->search_origname == NULL) {
4053                         /* XXX Should we dealloc req? If yes, how? */
4054                         if (req)
4055                                 mm_free(req);
4056                         return NULL;
4057                 }
4058                 handle->search_state = base->global_search_state;
4059                 handle->search_flags = flags;
4060                 base->global_search_state->refcount++;
4061                 request_submit(req);
4062                 return req;
4063         } else {
4064                 struct request *const req = request_new(base, handle, type, name, flags, user_callback, user_arg);
4065                 if (!req) return NULL;
4066                 request_submit(req);
4067                 return req;
4068         }
4069 }
4070
4071 /* this is called when a request has failed to find a name. We need to check */
4072 /* if it is part of a search and, if so, try the next name in the list */
4073 /* returns: */
4074 /*   0 another request has been submitted */
4075 /*   1 no more requests needed */
4076 static int
4077 search_try_next(struct evdns_request *const handle) {
4078         struct request *req = handle->current_req;
4079         struct evdns_base *base = req->base;
4080         struct request *newreq;
4081         ASSERT_LOCKED(base);
4082         if (handle->search_state) {
4083                 /* it is part of a search */
4084                 char *new_name;
4085                 handle->search_index++;
4086                 if (handle->search_index >= handle->search_state->num_domains) {
4087                         /* no more postfixes to try, however we may need to try */
4088                         /* this name without a postfix */
4089                         if (string_num_dots(handle->search_origname) < handle->search_state->ndots) {
4090                                 /* yep, we need to try it raw */
4091                                 newreq = request_new(base, NULL, req->request_type, handle->search_origname, handle->search_flags, req->user_callback, req->user_pointer);
4092                                 log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", handle->search_origname);
4093                                 if (newreq) {
4094                                         search_request_finished(handle);
4095                                         goto submit_next;
4096                                 }
4097                         }
4098                         return 1;
4099                 }
4100
4101                 new_name = search_make_new(handle->search_state, handle->search_index, handle->search_origname);
4102                 if (!new_name) return 1;
4103                 log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, handle->search_index);
4104                 newreq = request_new(base, NULL, req->request_type, new_name, handle->search_flags, req->user_callback, req->user_pointer);
4105                 mm_free(new_name);
4106                 if (!newreq) return 1;
4107                 goto submit_next;
4108         }
4109         return 1;
4110
4111 submit_next:
4112         request_finished(req, &REQ_HEAD(req->base, req->trans_id), 0);
4113         handle->current_req = newreq;
4114         newreq->handle = handle;
4115         request_submit(newreq);
4116         return 0;
4117 }
4118
4119 static void
4120 search_request_finished(struct evdns_request *const handle) {
4121         ASSERT_LOCKED(handle->current_req->base);
4122         if (handle->search_state) {
4123                 search_state_decref(handle->search_state);
4124                 handle->search_state = NULL;
4125         }
4126         if (handle->search_origname) {
4127                 mm_free(handle->search_origname);
4128                 handle->search_origname = NULL;
4129         }
4130 }
4131
4132 /* ================================================================= */
4133 /* Parsing resolv.conf files */
4134
4135 static void
4136 evdns_resolv_set_defaults(struct evdns_base *base, int flags) {
4137         int add_default = flags & DNS_OPTION_NAMESERVERS;
4138         if (flags & DNS_OPTION_NAMESERVERS_NO_DEFAULT)
4139                 add_default = 0;
4140
4141         /* if the file isn't found then we assume a local resolver */
4142         ASSERT_LOCKED(base);
4143         if (flags & DNS_OPTION_SEARCH)
4144                 search_set_from_hostname(base);
4145         if (add_default)
4146                 evdns_base_nameserver_ip_add(base, "127.0.0.1");
4147 }
4148
4149 #ifndef EVENT__HAVE_STRTOK_R
4150 static char *
4151 strtok_r(char *s, const char *delim, char **state) {
4152         char *cp, *start;
4153         start = cp = s ? s : *state;
4154         if (!cp)
4155                 return NULL;
4156         while (*cp && !strchr(delim, *cp))
4157                 ++cp;
4158         if (!*cp) {
4159                 if (cp == start)
4160                         return NULL;
4161                 *state = NULL;
4162                 return start;
4163         } else {
4164                 *cp++ = '\0';
4165                 *state = cp;
4166                 return start;
4167         }
4168 }
4169 #endif
4170
4171 /* helper version of atoi which returns -1 on error */
4172 static int
4173 strtoint(const char *const str)
4174 {
4175         char *endptr;
4176         const int r = strtol(str, &endptr, 10);
4177         if (*endptr) return -1;
4178         return r;
4179 }
4180
4181 /* Parse a number of seconds into a timeval; return -1 on error. */
4182 static int
4183 evdns_strtotimeval(const char *const str, struct timeval *out)
4184 {
4185         double d;
4186         char *endptr;
4187         d = strtod(str, &endptr);
4188         if (*endptr) return -1;
4189         if (d < 0) return -1;
4190         out->tv_sec = (int) d;
4191         out->tv_usec = (int) ((d - (int) d)*1000000);
4192         if (out->tv_sec == 0 && out->tv_usec < 1000) /* less than 1 msec */
4193                 return -1;
4194         return 0;
4195 }
4196
4197 /* helper version of atoi that returns -1 on error and clips to bounds. */
4198 static int
4199 strtoint_clipped(const char *const str, int min, int max)
4200 {
4201         int r = strtoint(str);
4202         if (r == -1)
4203                 return r;
4204         else if (r<min)
4205                 return min;
4206         else if (r>max)
4207                 return max;
4208         else
4209                 return r;
4210 }
4211
4212 static int
4213 evdns_base_set_max_requests_inflight(struct evdns_base *base, int maxinflight)
4214 {
4215         int old_n_heads = base->n_req_heads, n_heads;
4216         struct request **old_heads = base->req_heads, **new_heads, *req;
4217         int i;
4218
4219         ASSERT_LOCKED(base);
4220         if (maxinflight < 1)
4221                 maxinflight = 1;
4222         n_heads = (maxinflight+4) / 5;
4223         EVUTIL_ASSERT(n_heads > 0);
4224         new_heads = mm_calloc(n_heads, sizeof(struct request*));
4225         if (!new_heads)
4226                 return (-1);
4227         if (old_heads) {
4228                 for (i = 0; i < old_n_heads; ++i) {
4229                         while (old_heads[i]) {
4230                                 req = old_heads[i];
4231                                 evdns_request_remove(req, &old_heads[i]);
4232                                 evdns_request_insert(req, &new_heads[req->trans_id % n_heads]);
4233                         }
4234                 }
4235                 mm_free(old_heads);
4236         }
4237         base->req_heads = new_heads;
4238         base->n_req_heads = n_heads;
4239         base->global_max_requests_inflight = maxinflight;
4240         return (0);
4241 }
4242
4243 /* exported function */
4244 int
4245 evdns_base_set_option(struct evdns_base *base,
4246     const char *option, const char *val)
4247 {
4248         int res;
4249         EVDNS_LOCK(base);
4250         res = evdns_base_set_option_impl(base, option, val, DNS_OPTIONS_ALL);
4251         EVDNS_UNLOCK(base);
4252         return res;
4253 }
4254
4255 static inline int
4256 str_matches_option(const char *s1, const char *optionname)
4257 {
4258         /* Option names are given as "option:" We accept either 'option' in
4259          * s1, or 'option:randomjunk'.  The latter form is to implement the
4260          * resolv.conf parser. */
4261         size_t optlen = strlen(optionname);
4262         size_t slen = strlen(s1);
4263         if (slen == optlen || slen == optlen - 1)
4264                 return !strncmp(s1, optionname, slen);
4265         else if (slen > optlen)
4266                 return !strncmp(s1, optionname, optlen);
4267         else
4268                 return 0;
4269 }
4270
4271 /* exported function */
4272 int
4273 evdns_server_port_set_option(struct evdns_server_port *port,
4274         enum evdns_server_option option, size_t value)
4275 {
4276         int res = 0;
4277         EVDNS_LOCK(port);
4278         switch (option) {
4279         case EVDNS_SOPT_TCP_MAX_CLIENTS:
4280                 if (!port->listener) {
4281                         log(EVDNS_LOG_WARN, "EVDNS_SOPT_TCP_MAX_CLIENTS option can be set only on TCP server");
4282                         res = -1;
4283                         goto end;
4284                 }
4285                 port->max_client_connections = value;
4286                 log(EVDNS_LOG_DEBUG, "Setting EVDNS_SOPT_TCP_MAX_CLIENTS to %u", port->max_client_connections);
4287                 break;
4288         case EVDNS_SOPT_TCP_IDLE_TIMEOUT:
4289                 if (!port->listener) {
4290                         log(EVDNS_LOG_WARN, "EVDNS_SOPT_TCP_IDLE_TIMEOUT option can be set only on TCP server");
4291                         res = -1;
4292                         goto end;
4293                 }
4294                 port->tcp_idle_timeout.tv_sec = value;
4295                 port->tcp_idle_timeout.tv_usec = 0;
4296                 log(EVDNS_LOG_DEBUG, "Setting EVDNS_SOPT_TCP_IDLE_TIMEOUT to %u seconds",
4297                         (unsigned)port->tcp_idle_timeout.tv_sec);
4298                 break;
4299         default:
4300                 log(EVDNS_LOG_WARN, "Invalid DNS server option %d", (int)option);
4301                 res = -1;
4302                 break;
4303         }
4304 end:
4305         EVDNS_UNLOCK(port);
4306         return res;
4307 }
4308
4309 static int
4310 evdns_base_set_option_impl(struct evdns_base *base,
4311     const char *option, const char *val, int flags)
4312 {
4313         ASSERT_LOCKED(base);
4314         if (str_matches_option(option, "ndots:")) {
4315                 const int ndots = strtoint(val);
4316                 if (ndots == -1) return -1;
4317                 if (!(flags & DNS_OPTION_SEARCH)) return 0;
4318                 log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
4319                 if (!base->global_search_state) base->global_search_state = search_state_new();
4320                 if (!base->global_search_state) return -1;
4321                 base->global_search_state->ndots = ndots;
4322         } else if (str_matches_option(option, "timeout:")) {
4323                 struct timeval tv;
4324                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4325                 if (!(flags & DNS_OPTION_MISC)) return 0;
4326                 log(EVDNS_LOG_DEBUG, "Setting timeout to %s", val);
4327                 memcpy(&base->global_timeout, &tv, sizeof(struct timeval));
4328         } else if (str_matches_option(option, "getaddrinfo-allow-skew:")) {
4329                 struct timeval tv;
4330                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4331                 if (!(flags & DNS_OPTION_MISC)) return 0;
4332                 log(EVDNS_LOG_DEBUG, "Setting getaddrinfo-allow-skew to %s",
4333                     val);
4334                 memcpy(&base->global_getaddrinfo_allow_skew, &tv,
4335                     sizeof(struct timeval));
4336         } else if (str_matches_option(option, "max-timeouts:")) {
4337                 const int maxtimeout = strtoint_clipped(val, 1, 255);
4338                 if (maxtimeout == -1) return -1;
4339                 if (!(flags & DNS_OPTION_MISC)) return 0;
4340                 log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
4341                         maxtimeout);
4342                 base->global_max_nameserver_timeout = maxtimeout;
4343         } else if (str_matches_option(option, "max-inflight:")) {
4344                 const int maxinflight = strtoint_clipped(val, 1, 65000);
4345                 if (maxinflight == -1) return -1;
4346                 if (!(flags & DNS_OPTION_MISC)) return 0;
4347                 log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
4348                         maxinflight);
4349                 evdns_base_set_max_requests_inflight(base, maxinflight);
4350         } else if (str_matches_option(option, "attempts:")) {
4351                 int retries = strtoint(val);
4352                 if (retries == -1) return -1;
4353                 if (retries > 255) retries = 255;
4354                 if (!(flags & DNS_OPTION_MISC)) return 0;
4355                 log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
4356                 base->global_max_retransmits = retries;
4357         } else if (str_matches_option(option, "randomize-case:")) {
4358                 int randcase = strtoint(val);
4359                 if (randcase == -1) return -1;
4360                 if (!(flags & DNS_OPTION_MISC)) return 0;
4361                 base->global_randomize_case = randcase;
4362         } else if (str_matches_option(option, "bind-to:")) {
4363                 /* XXX This only applies to successive nameservers, not
4364                  * to already-configured ones.  We might want to fix that. */
4365                 int len = sizeof(base->global_outgoing_address);
4366                 if (!(flags & DNS_OPTION_NAMESERVERS)) return 0;
4367                 if (evutil_parse_sockaddr_port(val,
4368                         (struct sockaddr*)&base->global_outgoing_address, &len))
4369                         return -1;
4370                 base->global_outgoing_addrlen = len;
4371         } else if (str_matches_option(option, "initial-probe-timeout:")) {
4372                 struct timeval tv;
4373                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4374                 if (tv.tv_sec > 3600)
4375                         tv.tv_sec = 3600;
4376                 if (!(flags & DNS_OPTION_MISC)) return 0;
4377                 log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
4378                     val);
4379                 memcpy(&base->global_nameserver_probe_initial_timeout, &tv,
4380                     sizeof(tv));
4381         } else if (str_matches_option(option, "max-probe-timeout:")) {
4382                 const int max_probe_timeout = strtoint_clipped(val, 1, 3600);
4383                 if (max_probe_timeout == -1) return -1;
4384                 if (!(flags & DNS_OPTION_MISC)) return 0;
4385                 log(EVDNS_LOG_DEBUG, "Setting maximum probe timeout to %d",
4386                         max_probe_timeout);
4387                 base->ns_max_probe_timeout = max_probe_timeout;
4388                 if (base->global_nameserver_probe_initial_timeout.tv_sec > max_probe_timeout) {
4389                         base->global_nameserver_probe_initial_timeout.tv_sec = max_probe_timeout;
4390                         base->global_nameserver_probe_initial_timeout.tv_usec = 0;
4391                         log(EVDNS_LOG_DEBUG, "Setting initial probe timeout to %s",
4392                                 val);
4393                 }
4394         } else if (str_matches_option(option, "probe-backoff-factor:")) {
4395                 const int backoff_backtor = strtoint_clipped(val, 1, 10);
4396                 if (backoff_backtor == -1) return -1;
4397                 if (!(flags & DNS_OPTION_MISC)) return 0;
4398                 log(EVDNS_LOG_DEBUG, "Setting probe timeout backoff factor to %d",
4399                         backoff_backtor);
4400                 base->ns_timeout_backoff_factor = backoff_backtor;
4401         } else if (str_matches_option(option, "so-rcvbuf:")) {
4402                 int buf = strtoint(val);
4403                 if (buf == -1) return -1;
4404                 if (!(flags & DNS_OPTION_MISC)) return 0;
4405                 log(EVDNS_LOG_DEBUG, "Setting SO_RCVBUF to %s", val);
4406                 base->so_rcvbuf = buf;
4407         } else if (str_matches_option(option, "so-sndbuf:")) {
4408                 int buf = strtoint(val);
4409                 if (buf == -1) return -1;
4410                 if (!(flags & DNS_OPTION_MISC)) return 0;
4411                 log(EVDNS_LOG_DEBUG, "Setting SO_SNDBUF to %s", val);
4412                 base->so_sndbuf = buf;
4413         } else if (str_matches_option(option, "tcp-idle-timeout:")) {
4414                 struct timeval tv;
4415                 if (evdns_strtotimeval(val, &tv) == -1) return -1;
4416                 if (!(flags & DNS_OPTION_MISC)) return 0;
4417                 log(EVDNS_LOG_DEBUG, "Setting tcp idle timeout to %s", val);
4418                 memcpy(&base->global_tcp_idle_timeout, &tv, sizeof(tv));
4419         } else if (str_matches_option(option, "use-vc:")) {
4420                 if (!(flags & DNS_OPTION_MISC)) return 0;
4421                 if (val && strlen(val)) return -1;
4422                 log(EVDNS_LOG_DEBUG, "Setting use-vc option");
4423                 base->global_tcp_flags |= DNS_QUERY_USEVC;
4424         } else if (str_matches_option(option, "ignore-tc:")) {
4425                 if (!(flags & DNS_OPTION_MISC)) return 0;
4426                 if (val && strlen(val)) return -1;
4427                 log(EVDNS_LOG_DEBUG, "Setting ignore-tc option");
4428                 base->global_tcp_flags |= DNS_QUERY_IGNTC;
4429         } else if (str_matches_option(option, "edns-udp-size:")) {
4430                 const int sz = strtoint_clipped(val, DNS_MAX_UDP_SIZE, EDNS_MAX_UDP_SIZE);
4431                 if (sz == -1) return -1;
4432                 if (!(flags & DNS_OPTION_MISC)) return 0;
4433                 log(EVDNS_LOG_DEBUG, "Setting edns-udp-size to %d", sz);
4434                 base->global_max_udp_size = sz;
4435         }
4436         return 0;
4437 }
4438
4439 int
4440 evdns_set_option(const char *option, const char *val, int flags)
4441 {
4442         if (!current_base)
4443                 current_base = evdns_base_new(NULL, 0);
4444         return evdns_base_set_option(current_base, option, val);
4445 }
4446
4447 static void
4448 resolv_conf_parse_line(struct evdns_base *base, char *const start, int flags) {
4449         char *strtok_state;
4450         static const char *const delims = " \t";
4451 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
4452
4453
4454         char *const first_token = strtok_r(start, delims, &strtok_state);
4455         ASSERT_LOCKED(base);
4456         if (!first_token) return;
4457
4458         if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
4459                 const char *const nameserver = NEXT_TOKEN;
4460
4461                 if (nameserver)
4462                         evdns_base_nameserver_ip_add(base, nameserver);
4463         } else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
4464                 const char *const domain = NEXT_TOKEN;
4465                 if (domain) {
4466                         search_postfix_clear(base);
4467                         search_postfix_add(base, domain);
4468                 }
4469         } else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
4470                 const char *domain;
4471                 search_postfix_clear(base);
4472
4473                 while ((domain = NEXT_TOKEN)) {
4474                         search_postfix_add(base, domain);
4475                 }
4476                 search_reverse(base);
4477         } else if (!strcmp(first_token, "options")) {
4478                 const char *option;
4479                 while ((option = NEXT_TOKEN)) {
4480                         const char *val = strchr(option, ':');
4481                         evdns_base_set_option_impl(base, option, val ? val+1 : "", flags);
4482                 }
4483         }
4484 #undef NEXT_TOKEN
4485 }
4486
4487 /* exported function */
4488 /* returns: */
4489 /*   0 no errors */
4490 /*   1 failed to open file */
4491 /*   2 failed to stat file */
4492 /*   3 file too large */
4493 /*   4 out of memory */
4494 /*   5 short read from file */
4495 int
4496 evdns_base_resolv_conf_parse(struct evdns_base *base, int flags, const char *const filename) {
4497         int res;
4498         EVDNS_LOCK(base);
4499         res = evdns_base_resolv_conf_parse_impl(base, flags, filename);
4500         EVDNS_UNLOCK(base);
4501         return res;
4502 }
4503
4504 static char *
4505 evdns_get_default_hosts_filename(void)
4506 {
4507 #ifdef _WIN32
4508         /* Windows is a little coy about where it puts its configuration
4509          * files.  Sure, they're _usually_ in C:\windows\system32, but
4510          * there's no reason in principle they couldn't be in
4511          * W:\hoboken chicken emergency\
4512          */
4513         char path[MAX_PATH+1];
4514         static const char hostfile[] = "\\drivers\\etc\\hosts";
4515         char *path_out;
4516         size_t len_out;
4517
4518         if (! SHGetSpecialFolderPathA(NULL, path, CSIDL_SYSTEM, 0))
4519                 return NULL;
4520         len_out = strlen(path)+strlen(hostfile)+1;
4521         path_out = mm_malloc(len_out);
4522         evutil_snprintf(path_out, len_out, "%s%s", path, hostfile);
4523         return path_out;
4524 #else
4525         return mm_strdup("/etc/hosts");
4526 #endif
4527 }
4528
4529 static int
4530 evdns_base_resolv_conf_parse_impl(struct evdns_base *base, int flags, const char *const filename) {
4531         size_t n;
4532         char *resolv;
4533         char *start;
4534         int err = 0;
4535         int add_default;
4536
4537         log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
4538
4539         add_default = flags & DNS_OPTION_NAMESERVERS;
4540         if (flags & DNS_OPTION_NAMESERVERS_NO_DEFAULT)
4541                 add_default = 0;
4542
4543         if (flags & DNS_OPTION_HOSTSFILE) {
4544                 char *fname = evdns_get_default_hosts_filename();
4545                 evdns_base_load_hosts(base, fname);
4546                 if (fname)
4547                         mm_free(fname);
4548         }
4549
4550         if (!filename) {
4551                 evdns_resolv_set_defaults(base, flags);
4552                 return 1;
4553         }
4554
4555         if ((err = evutil_read_file_(filename, &resolv, &n, 0)) < 0) {
4556                 if (err == -1) {
4557                         /* No file. */
4558                         evdns_resolv_set_defaults(base, flags);
4559                         return 1;
4560                 } else {
4561                         return 2;
4562                 }
4563         }
4564
4565         start = resolv;
4566         for (;;) {
4567                 char *const newline = strchr(start, '\n');
4568                 if (!newline) {
4569                         resolv_conf_parse_line(base, start, flags);
4570                         break;
4571                 } else {
4572                         *newline = 0;
4573                         resolv_conf_parse_line(base, start, flags);
4574                         start = newline + 1;
4575                 }
4576         }
4577
4578         if (!base->server_head && add_default) {
4579                 /* no nameservers were configured. */
4580                 evdns_base_nameserver_ip_add(base, "127.0.0.1");
4581                 err = 6;
4582         }
4583         if (flags & DNS_OPTION_SEARCH && (!base->global_search_state || base->global_search_state->num_domains == 0)) {
4584                 search_set_from_hostname(base);
4585         }
4586
4587         mm_free(resolv);
4588         return err;
4589 }
4590
4591 int
4592 evdns_resolv_conf_parse(int flags, const char *const filename) {
4593         if (!current_base)
4594                 current_base = evdns_base_new(NULL, 0);
4595         return evdns_base_resolv_conf_parse(current_base, flags, filename);
4596 }
4597
4598
4599 #ifdef _WIN32
4600 /* Add multiple nameservers from a space-or-comma-separated list. */
4601 static int
4602 evdns_nameserver_ip_add_line(struct evdns_base *base, const char *ips) {
4603         const char *addr;
4604         char *buf;
4605         int r;
4606         ASSERT_LOCKED(base);
4607         while (*ips) {
4608                 while (isspace(*ips) || *ips == ',' || *ips == '\t')
4609                         ++ips;
4610                 addr = ips;
4611                 while (isdigit(*ips) || *ips == '.' || *ips == ':' ||
4612                     *ips=='[' || *ips==']')
4613                         ++ips;
4614                 buf = mm_malloc(ips-addr+1);
4615                 if (!buf) return 4;
4616                 memcpy(buf, addr, ips-addr);
4617                 buf[ips-addr] = '\0';
4618                 r = evdns_base_nameserver_ip_add(base, buf);
4619                 mm_free(buf);
4620                 if (r) return r;
4621         }
4622         return 0;
4623 }
4624
4625 typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
4626
4627 /* Use the windows GetNetworkParams interface in iphlpapi.dll to */
4628 /* figure out what our nameservers are. */
4629 static int
4630 load_nameservers_with_getnetworkparams(struct evdns_base *base)
4631 {
4632         /* Based on MSDN examples and inspection of  c-ares code. */
4633         FIXED_INFO *fixed;
4634         HMODULE handle = 0;
4635         ULONG size = sizeof(FIXED_INFO);
4636         void *buf = NULL;
4637         int status = 0, r, added_any;
4638         IP_ADDR_STRING *ns;
4639         GetNetworkParams_fn_t fn;
4640
4641         ASSERT_LOCKED(base);
4642         if (!(handle = evutil_load_windows_system_library_(
4643                         TEXT("iphlpapi.dll")))) {
4644                 log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
4645                 status = -1;
4646                 goto done;
4647         }
4648         if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
4649                 log(EVDNS_LOG_WARN, "Could not get address of function.");
4650                 status = -1;
4651                 goto done;
4652         }
4653
4654         buf = mm_malloc(size);
4655         if (!buf) { status = 4; goto done; }
4656         fixed = buf;
4657         r = fn(fixed, &size);
4658         if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
4659                 status = -1;
4660                 goto done;
4661         }
4662         if (r != ERROR_SUCCESS) {
4663                 mm_free(buf);
4664                 buf = mm_malloc(size);
4665                 if (!buf) { status = 4; goto done; }
4666                 fixed = buf;
4667                 r = fn(fixed, &size);
4668                 if (r != ERROR_SUCCESS) {
4669                         log(EVDNS_LOG_DEBUG, "fn() failed.");
4670                         status = -1;
4671                         goto done;
4672                 }
4673         }
4674
4675         EVUTIL_ASSERT(fixed);
4676         added_any = 0;
4677         ns = &(fixed->DnsServerList);
4678         while (ns) {
4679                 r = evdns_nameserver_ip_add_line(base, ns->IpAddress.String);
4680                 if (r) {
4681                         log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
4682                                 (ns->IpAddress.String),(int)GetLastError());
4683                         status = r;
4684                 } else {
4685                         ++added_any;
4686                         log(EVDNS_LOG_DEBUG,"Successfully added %s as nameserver",ns->IpAddress.String);
4687                 }
4688
4689                 ns = ns->Next;
4690         }
4691
4692         if (!added_any) {
4693                 log(EVDNS_LOG_DEBUG, "No nameservers added.");
4694                 if (status == 0)
4695                         status = -1;
4696         } else {
4697                 status = 0;
4698         }
4699
4700  done:
4701         if (buf)
4702                 mm_free(buf);
4703         if (handle)
4704                 FreeLibrary(handle);
4705         return status;
4706 }
4707
4708 static int
4709 config_nameserver_from_reg_key(struct evdns_base *base, HKEY key, const TCHAR *subkey)
4710 {
4711         char *buf;
4712         DWORD bufsz = 0, type = 0;
4713         int status = 0;
4714
4715         ASSERT_LOCKED(base);
4716         if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
4717             != ERROR_MORE_DATA)
4718                 return -1;
4719         if (!(buf = mm_malloc(bufsz)))
4720                 return -1;
4721
4722         if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
4723             == ERROR_SUCCESS && bufsz > 1) {
4724                 status = evdns_nameserver_ip_add_line(base,buf);
4725         }
4726
4727         mm_free(buf);
4728         return status;
4729 }
4730
4731 #define SERVICES_KEY TEXT("System\\CurrentControlSet\\Services\\")
4732 #define WIN_NS_9X_KEY  SERVICES_KEY TEXT("VxD\\MSTCP")
4733 #define WIN_NS_NT_KEY  SERVICES_KEY TEXT("Tcpip\\Parameters")
4734
4735 static int
4736 load_nameservers_from_registry(struct evdns_base *base)
4737 {
4738         int found = 0;
4739         int r;
4740 #define TRY(k, name) \
4741         if (!found && config_nameserver_from_reg_key(base,k,TEXT(name)) == 0) { \
4742                 log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
4743                 found = 1;                                              \
4744         } else if (!found) {                                            \
4745                 log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
4746                     #k,#name);                                          \
4747         }
4748
4749         ASSERT_LOCKED(base);
4750
4751         if (((int)GetVersion()) > 0) { /* NT */
4752                 HKEY nt_key = 0, interfaces_key = 0;
4753
4754                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
4755                                  KEY_READ, &nt_key) != ERROR_SUCCESS) {
4756                         log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
4757                         return -1;
4758                 }
4759                 r = RegOpenKeyEx(nt_key, TEXT("Interfaces"), 0,
4760                              KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
4761                              &interfaces_key);
4762                 if (r != ERROR_SUCCESS) {
4763                         log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
4764                         return -1;
4765                 }
4766                 TRY(nt_key, "NameServer");
4767                 TRY(nt_key, "DhcpNameServer");
4768                 TRY(interfaces_key, "NameServer");
4769                 TRY(interfaces_key, "DhcpNameServer");
4770                 RegCloseKey(interfaces_key);
4771                 RegCloseKey(nt_key);
4772         } else {
4773                 HKEY win_key = 0;
4774                 if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
4775                                  KEY_READ, &win_key) != ERROR_SUCCESS) {
4776                         log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
4777                         return -1;
4778                 }
4779                 TRY(win_key, "NameServer");
4780                 RegCloseKey(win_key);
4781         }
4782
4783         if (found == 0) {
4784                 log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
4785         }
4786
4787         return found ? 0 : -1;
4788 #undef TRY
4789 }
4790
4791 int
4792 evdns_base_config_windows_nameservers(struct evdns_base *base)
4793 {
4794         int r;
4795         char *fname;
4796         if (base == NULL)
4797                 base = current_base;
4798         if (base == NULL)
4799                 return -1;
4800         EVDNS_LOCK(base);
4801         fname = evdns_get_default_hosts_filename();
4802         log(EVDNS_LOG_DEBUG, "Loading hosts entries from %s", fname);
4803         evdns_base_load_hosts(base, fname);
4804         if (fname)
4805                 mm_free(fname);
4806
4807         if (load_nameservers_with_getnetworkparams(base) == 0) {
4808                 EVDNS_UNLOCK(base);
4809                 return 0;
4810         }
4811         r = load_nameservers_from_registry(base);
4812
4813         EVDNS_UNLOCK(base);
4814         return r;
4815 }
4816
4817 int
4818 evdns_config_windows_nameservers(void)
4819 {
4820         if (!current_base) {
4821                 current_base = evdns_base_new(NULL, 1);
4822                 return current_base == NULL ? -1 : 0;
4823         } else {
4824                 return evdns_base_config_windows_nameservers(current_base);
4825         }
4826 }
4827 #endif
4828
4829 struct evdns_base *
4830 evdns_base_new(struct event_base *event_base, int flags)
4831 {
4832         struct evdns_base *base;
4833
4834         if (evutil_secure_rng_init() < 0) {
4835                 log(EVDNS_LOG_WARN, "Unable to seed random number generator; "
4836                     "DNS can't run.");
4837                 return NULL;
4838         }
4839
4840         /* Give the evutil library a hook into its evdns-enabled
4841          * functionality.  We can't just call evdns_getaddrinfo directly or
4842          * else libevent-core will depend on libevent-extras. */
4843         evutil_set_evdns_getaddrinfo_fn_(evdns_getaddrinfo);
4844         evutil_set_evdns_getaddrinfo_cancel_fn_(evdns_getaddrinfo_cancel);
4845
4846         base = mm_malloc(sizeof(struct evdns_base));
4847         if (base == NULL)
4848                 return (NULL);
4849         memset(base, 0, sizeof(struct evdns_base));
4850         base->req_waiting_head = NULL;
4851
4852         EVTHREAD_ALLOC_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
4853         EVDNS_LOCK(base);
4854
4855         /* Set max requests inflight and allocate req_heads. */
4856         base->req_heads = NULL;
4857
4858         evdns_base_set_max_requests_inflight(base, 64);
4859
4860         base->server_head = NULL;
4861         base->event_base = event_base;
4862         base->global_good_nameservers = base->global_requests_inflight =
4863                 base->global_requests_waiting = 0;
4864
4865         base->global_timeout.tv_sec = 5;
4866         base->global_timeout.tv_usec = 0;
4867         base->global_max_reissues = 1;
4868         base->global_max_retransmits = 3;
4869         base->global_max_nameserver_timeout = 3;
4870         base->global_search_state = NULL;
4871         base->global_randomize_case = 1;
4872         base->global_max_udp_size = DNS_MAX_UDP_SIZE;
4873         base->global_getaddrinfo_allow_skew.tv_sec = 3;
4874         base->global_getaddrinfo_allow_skew.tv_usec = 0;
4875         base->global_nameserver_probe_initial_timeout.tv_sec = 10;
4876         base->global_nameserver_probe_initial_timeout.tv_usec = 0;
4877         base->ns_max_probe_timeout = 3600;
4878         base->ns_timeout_backoff_factor = 3;
4879         base->global_tcp_idle_timeout.tv_sec = CLIENT_IDLE_CONN_TIMEOUT;
4880
4881         TAILQ_INIT(&base->hostsdb);
4882
4883 #define EVDNS_BASE_ALL_FLAGS ( \
4884         EVDNS_BASE_INITIALIZE_NAMESERVERS | \
4885         EVDNS_BASE_DISABLE_WHEN_INACTIVE  | \
4886         EVDNS_BASE_NAMESERVERS_NO_DEFAULT | \
4887         0)
4888
4889         if (flags & ~EVDNS_BASE_ALL_FLAGS) {
4890                 flags = EVDNS_BASE_INITIALIZE_NAMESERVERS;
4891                 log(EVDNS_LOG_WARN,
4892                     "Unrecognized flag passed to evdns_base_new(). Assuming "
4893                     "you meant EVDNS_BASE_INITIALIZE_NAMESERVERS.");
4894         }
4895 #undef EVDNS_BASE_ALL_FLAGS
4896
4897         if (flags & EVDNS_BASE_INITIALIZE_NAMESERVERS) {
4898                 int r;
4899                 int opts = DNS_OPTIONS_ALL;
4900                 if (flags & EVDNS_BASE_NAMESERVERS_NO_DEFAULT) {
4901                         opts |= DNS_OPTION_NAMESERVERS_NO_DEFAULT;
4902                 }
4903
4904 #ifdef _WIN32
4905                 r = evdns_base_config_windows_nameservers(base);
4906 #else
4907                 r = evdns_base_resolv_conf_parse(base, opts, "/etc/resolv.conf");
4908 #endif
4909                 if (r) {
4910                         evdns_base_free_and_unlock(base, 0);
4911                         return NULL;
4912                 }
4913         }
4914         if (flags & EVDNS_BASE_DISABLE_WHEN_INACTIVE) {
4915                 base->disable_when_inactive = 1;
4916         }
4917
4918         EVDNS_UNLOCK(base);
4919         return base;
4920 }
4921
4922 int
4923 evdns_init(void)
4924 {
4925         struct evdns_base *base = evdns_base_new(NULL, 1);
4926         if (base) {
4927                 current_base = base;
4928                 return 0;
4929         } else {
4930                 return -1;
4931         }
4932 }
4933
4934 const char *
4935 evdns_err_to_string(int err)
4936 {
4937     switch (err) {
4938         case DNS_ERR_NONE: return "no error";
4939         case DNS_ERR_FORMAT: return "misformatted query";
4940         case DNS_ERR_SERVERFAILED: return "server failed";
4941         case DNS_ERR_NOTEXIST: return "name does not exist";
4942         case DNS_ERR_NOTIMPL: return "query not implemented";
4943         case DNS_ERR_REFUSED: return "refused";
4944
4945         case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
4946         case DNS_ERR_UNKNOWN: return "unknown";
4947         case DNS_ERR_TIMEOUT: return "request timed out";
4948         case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
4949         case DNS_ERR_CANCEL: return "dns request canceled";
4950         case DNS_ERR_NODATA: return "no records in the reply";
4951         default: return "[Unknown error code]";
4952     }
4953 }
4954
4955 static void
4956 evdns_nameserver_free(struct nameserver *server)
4957 {
4958         if (server->socket >= 0)
4959                 evutil_closesocket(server->socket);
4960         (void) event_del(&server->event);
4961         event_debug_unassign(&server->event);
4962         if (server->state == 0)
4963                 (void) event_del(&server->timeout_event);
4964         if (server->probe_request) {
4965                 evdns_cancel_request(server->base, server->probe_request);
4966                 server->probe_request = NULL;
4967         }
4968         event_debug_unassign(&server->timeout_event);
4969         disconnect_and_free_connection(server->connection);
4970         mm_free(server);
4971 }
4972
4973 static void
4974 evdns_base_free_and_unlock(struct evdns_base *base, int fail_requests)
4975 {
4976         struct nameserver *server, *server_next;
4977         struct search_domain *dom, *dom_next;
4978         int i;
4979
4980         /* Requires that we hold the lock. */
4981
4982         /* TODO(nickm) we might need to refcount here. */
4983
4984         while (base->req_waiting_head) {
4985                 if (fail_requests)
4986                         reply_schedule_callback(base->req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
4987                 request_finished(base->req_waiting_head, &base->req_waiting_head, 1);
4988         }
4989         for (i = 0; i < base->n_req_heads; ++i) {
4990                 while (base->req_heads[i]) {
4991                         if (fail_requests)
4992                                 reply_schedule_callback(base->req_heads[i], 0, DNS_ERR_SHUTDOWN, NULL);
4993                         request_finished(base->req_heads[i], &REQ_HEAD(base, base->req_heads[i]->trans_id), 1);
4994                 }
4995         }
4996         base->global_requests_inflight = base->global_requests_waiting = 0;
4997
4998         for (server = base->server_head; server; server = server_next) {
4999                 server_next = server->next;
5000                 /** already done something before */
5001                 server->probe_request = NULL;
5002                 evdns_nameserver_free(server);
5003                 if (server_next == base->server_head)
5004                         break;
5005         }
5006         base->server_head = NULL;
5007         base->global_good_nameservers = 0;
5008
5009         if (base->global_search_state) {
5010                 for (dom = base->global_search_state->head; dom; dom = dom_next) {
5011                         dom_next = dom->next;
5012                         mm_free(dom);
5013                 }
5014                 mm_free(base->global_search_state);
5015                 base->global_search_state = NULL;
5016         }
5017
5018         {
5019                 struct hosts_entry *victim;
5020                 while ((victim = TAILQ_FIRST(&base->hostsdb))) {
5021                         TAILQ_REMOVE(&base->hostsdb, victim, next);
5022                         mm_free(victim);
5023                 }
5024         }
5025
5026         mm_free(base->req_heads);
5027
5028         EVDNS_UNLOCK(base);
5029         EVTHREAD_FREE_LOCK(base->lock, EVTHREAD_LOCKTYPE_RECURSIVE);
5030
5031         mm_free(base);
5032 }
5033
5034 void
5035 evdns_base_free(struct evdns_base *base, int fail_requests)
5036 {
5037         EVDNS_LOCK(base);
5038         evdns_base_free_and_unlock(base, fail_requests);
5039 }
5040
5041 void
5042 evdns_base_clear_host_addresses(struct evdns_base *base)
5043 {
5044         struct hosts_entry *victim;
5045         EVDNS_LOCK(base);
5046         while ((victim = TAILQ_FIRST(&base->hostsdb))) {
5047                 TAILQ_REMOVE(&base->hostsdb, victim, next);
5048                 mm_free(victim);
5049         }
5050         EVDNS_UNLOCK(base);
5051 }
5052
5053 void
5054 evdns_shutdown(int fail_requests)
5055 {
5056         if (current_base) {
5057                 struct evdns_base *b = current_base;
5058                 current_base = NULL;
5059                 evdns_base_free(b, fail_requests);
5060         }
5061         evdns_log_fn = NULL;
5062 }
5063
5064 static int
5065 evdns_base_parse_hosts_line(struct evdns_base *base, char *line)
5066 {
5067         char *strtok_state;
5068         static const char *const delims = " \t";
5069         char *const addr = strtok_r(line, delims, &strtok_state);
5070         char *hostname, *hash;
5071         struct sockaddr_storage ss;
5072         int socklen = sizeof(ss);
5073         ASSERT_LOCKED(base);
5074
5075 #define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
5076
5077         if (!addr || *addr == '#')
5078                 return 0;
5079
5080         memset(&ss, 0, sizeof(ss));
5081         if (evutil_parse_sockaddr_port(addr, (struct sockaddr*)&ss, &socklen)<0)
5082                 return -1;
5083         if (socklen > (int)sizeof(struct sockaddr_in6))
5084                 return -1;
5085
5086         if (sockaddr_getport((struct sockaddr*)&ss))
5087                 return -1;
5088
5089         while ((hostname = NEXT_TOKEN)) {
5090                 struct hosts_entry *he;
5091                 size_t namelen;
5092                 if ((hash = strchr(hostname, '#'))) {
5093                         if (hash == hostname)
5094                                 return 0;
5095                         *hash = '\0';
5096                 }
5097
5098                 namelen = strlen(hostname);
5099
5100                 he = mm_calloc(1, sizeof(struct hosts_entry)+namelen);
5101                 if (!he)
5102                         return -1;
5103                 EVUTIL_ASSERT(socklen <= (int)sizeof(he->addr));
5104                 memcpy(&he->addr, &ss, socklen);
5105                 memcpy(he->hostname, hostname, namelen+1);
5106                 he->addrlen = socklen;
5107
5108                 TAILQ_INSERT_TAIL(&base->hostsdb, he, next);
5109
5110                 if (hash)
5111                         return 0;
5112         }
5113
5114         return 0;
5115 #undef NEXT_TOKEN
5116 }
5117
5118 static int
5119 evdns_base_load_hosts_impl(struct evdns_base *base, const char *hosts_fname)
5120 {
5121         char *str=NULL, *cp, *eol;
5122         size_t len;
5123         int err=0;
5124
5125         ASSERT_LOCKED(base);
5126
5127         if (hosts_fname == NULL ||
5128             (err = evutil_read_file_(hosts_fname, &str, &len, 0)) < 0) {
5129                 char tmp[64];
5130                 strlcpy(tmp, "127.0.0.1   localhost", sizeof(tmp));
5131                 evdns_base_parse_hosts_line(base, tmp);
5132                 strlcpy(tmp, "::1   localhost", sizeof(tmp));
5133                 evdns_base_parse_hosts_line(base, tmp);
5134                 return err ? -1 : 0;
5135         }
5136
5137         /* This will break early if there is a NUL in the hosts file.
5138          * Probably not a problem.*/
5139         cp = str;
5140         for (;;) {
5141                 eol = strchr(cp, '\n');
5142
5143                 if (eol) {
5144                         *eol = '\0';
5145                         evdns_base_parse_hosts_line(base, cp);
5146                         cp = eol+1;
5147                 } else {
5148                         evdns_base_parse_hosts_line(base, cp);
5149                         break;
5150                 }
5151         }
5152
5153         mm_free(str);
5154         return 0;
5155 }
5156
5157 int
5158 evdns_base_load_hosts(struct evdns_base *base, const char *hosts_fname)
5159 {
5160         int res;
5161         if (!base)
5162                 base = current_base;
5163         EVDNS_LOCK(base);
5164         res = evdns_base_load_hosts_impl(base, hosts_fname);
5165         EVDNS_UNLOCK(base);
5166         return res;
5167 }
5168
5169 /* A single request for a getaddrinfo, either v4 or v6. */
5170 struct getaddrinfo_subrequest {
5171         struct evdns_request *r;
5172         ev_uint32_t type;
5173 };
5174
5175 /* State data used to implement an in-progress getaddrinfo. */
5176 struct evdns_getaddrinfo_request {
5177         struct evdns_base *evdns_base;
5178         /* Copy of the modified 'hints' data that we'll use to build
5179          * answers. */
5180         struct evutil_addrinfo hints;
5181         /* The callback to invoke when we're done */
5182         evdns_getaddrinfo_cb user_cb;
5183         /* User-supplied data to give to the callback. */
5184         void *user_data;
5185         /* The port to use when building sockaddrs. */
5186         ev_uint16_t port;
5187         /* The sub_request for an A record (if any) */
5188         struct getaddrinfo_subrequest ipv4_request;
5189         /* The sub_request for an AAAA record (if any) */
5190         struct getaddrinfo_subrequest ipv6_request;
5191
5192         /* The cname result that we were told (if any) */
5193         char *cname_result;
5194
5195         /* If we have one request answered and one request still inflight,
5196          * then this field holds the answer from the first request... */
5197         struct evutil_addrinfo *pending_result;
5198         /* And this event is a timeout that will tell us to cancel the second
5199          * request if it's taking a long time. */
5200         struct event timeout;
5201
5202         /* And this field holds the error code from the first request... */
5203         int pending_error;
5204         /* If this is set, the user canceled this request. */
5205         unsigned user_canceled : 1;
5206         /* If this is set, the user can no longer cancel this request; we're
5207          * just waiting for the free. */
5208         unsigned request_done : 1;
5209 };
5210
5211 /* Convert an evdns errors to the equivalent getaddrinfo error. */
5212 static int
5213 evdns_err_to_getaddrinfo_err(int e1)
5214 {
5215         /* XXX Do this better! */
5216         if (e1 == DNS_ERR_NONE)
5217                 return 0;
5218         else if (e1 == DNS_ERR_NOTEXIST)
5219                 return EVUTIL_EAI_NONAME;
5220         else
5221                 return EVUTIL_EAI_FAIL;
5222 }
5223
5224 /* Return the more informative of two getaddrinfo errors. */
5225 static int
5226 getaddrinfo_merge_err(int e1, int e2)
5227 {
5228         /* XXXX be cleverer here. */
5229         if (e1 == 0)
5230                 return e2;
5231         else
5232                 return e1;
5233 }
5234
5235 static void
5236 free_getaddrinfo_request(struct evdns_getaddrinfo_request *data)
5237 {
5238         /* DO NOT CALL this if either of the requests is pending.  Only once
5239          * both callbacks have been invoked is it safe to free the request */
5240         if (data->pending_result)
5241                 evutil_freeaddrinfo(data->pending_result);
5242         if (data->cname_result)
5243                 mm_free(data->cname_result);
5244         event_del(&data->timeout);
5245         mm_free(data);
5246         return;
5247 }
5248
5249 static void
5250 add_cname_to_reply(struct evdns_getaddrinfo_request *data,
5251     struct evutil_addrinfo *ai)
5252 {
5253         if (data->cname_result && ai) {
5254                 ai->ai_canonname = data->cname_result;
5255                 data->cname_result = NULL;
5256         }
5257 }
5258
5259 /* Callback: invoked when one request in a mixed-format A/AAAA getaddrinfo
5260  * request has finished, but the other one took too long to answer. Pass
5261  * along the answer we got, and cancel the other request.
5262  */
5263 static void
5264 evdns_getaddrinfo_timeout_cb(evutil_socket_t fd, short what, void *ptr)
5265 {
5266         int v4_timedout = 0, v6_timedout = 0;
5267         struct evdns_getaddrinfo_request *data = ptr;
5268
5269         /* Cancel any pending requests, and note which one */
5270         if (data->ipv4_request.r) {
5271                 /* XXXX This does nothing if the request's callback is already
5272                  * running (pending_cb is set). */
5273                 evdns_cancel_request(NULL, data->ipv4_request.r);
5274                 v4_timedout = 1;
5275                 EVDNS_LOCK(data->evdns_base);
5276                 ++data->evdns_base->getaddrinfo_ipv4_timeouts;
5277                 EVDNS_UNLOCK(data->evdns_base);
5278         }
5279         if (data->ipv6_request.r) {
5280                 /* XXXX This does nothing if the request's callback is already
5281                  * running (pending_cb is set). */
5282                 evdns_cancel_request(NULL, data->ipv6_request.r);
5283                 v6_timedout = 1;
5284                 EVDNS_LOCK(data->evdns_base);
5285                 ++data->evdns_base->getaddrinfo_ipv6_timeouts;
5286                 EVDNS_UNLOCK(data->evdns_base);
5287         }
5288
5289         /* We only use this timeout callback when we have an answer for
5290          * one address. */
5291         EVUTIL_ASSERT(!v4_timedout || !v6_timedout);
5292
5293         /* Report the outcome of the other request that didn't time out. */
5294         if (data->pending_result) {
5295                 add_cname_to_reply(data, data->pending_result);
5296                 data->user_cb(0, data->pending_result, data->user_data);
5297                 data->pending_result = NULL;
5298         } else {
5299                 int e = data->pending_error;
5300                 if (!e)
5301                         e = EVUTIL_EAI_AGAIN;
5302                 data->user_cb(e, NULL, data->user_data);
5303         }
5304
5305         data->user_cb = NULL; /* prevent double-call if evdns callbacks are
5306                                * in-progress. XXXX It would be better if this
5307                                * weren't necessary. */
5308
5309         if (!v4_timedout && !v6_timedout) {
5310                 /* should be impossible? XXXX */
5311                 free_getaddrinfo_request(data);
5312         }
5313 }
5314
5315 static int
5316 evdns_getaddrinfo_set_timeout(struct evdns_base *evdns_base,
5317     struct evdns_getaddrinfo_request *data)
5318 {
5319         return event_add(&data->timeout, &evdns_base->global_getaddrinfo_allow_skew);
5320 }
5321
5322 static inline int
5323 evdns_result_is_answer(int result)
5324 {
5325         return (result != DNS_ERR_NOTIMPL && result != DNS_ERR_REFUSED &&
5326             result != DNS_ERR_SERVERFAILED && result != DNS_ERR_CANCEL);
5327 }
5328
5329 static void
5330 evdns_getaddrinfo_gotresolve(int result, char type, int count,
5331     int ttl, void *addresses, void *arg)
5332 {
5333         int i;
5334         struct getaddrinfo_subrequest *req = arg;
5335         struct getaddrinfo_subrequest *other_req;
5336         struct evdns_getaddrinfo_request *data;
5337
5338         struct evutil_addrinfo *res;
5339
5340         struct sockaddr_in sin;
5341         struct sockaddr_in6 sin6;
5342         struct sockaddr *sa;
5343         int socklen, addrlen;
5344         void *addrp;
5345         int err;
5346         int user_canceled;
5347
5348         EVUTIL_ASSERT(req->type == DNS_IPv4_A || req->type == DNS_IPv6_AAAA);
5349         if (req->type == DNS_IPv4_A) {
5350                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv4_request);
5351                 other_req = &data->ipv6_request;
5352         } else {
5353                 data = EVUTIL_UPCAST(req, struct evdns_getaddrinfo_request, ipv6_request);
5354                 other_req = &data->ipv4_request;
5355         }
5356
5357         /** Called from evdns_base_free() with @fail_requests == 1 */
5358         if (result != DNS_ERR_SHUTDOWN) {
5359                 EVDNS_LOCK(data->evdns_base);
5360                 if (evdns_result_is_answer(result)) {
5361                         if (req->type == DNS_IPv4_A)
5362                                 ++data->evdns_base->getaddrinfo_ipv4_answered;
5363                         else
5364                                 ++data->evdns_base->getaddrinfo_ipv6_answered;
5365                 }
5366                 user_canceled = data->user_canceled;
5367                 if (other_req->r == NULL)
5368                         data->request_done = 1;
5369                 EVDNS_UNLOCK(data->evdns_base);
5370         } else {
5371                 data->evdns_base = NULL;
5372                 user_canceled = data->user_canceled;
5373         }
5374
5375         req->r = NULL;
5376
5377         if (result == DNS_ERR_CANCEL && ! user_canceled) {
5378                 /* Internal cancel request from timeout or internal error.
5379                  * we already answered the user. */
5380                 if (other_req->r == NULL)
5381                         free_getaddrinfo_request(data);
5382                 return;
5383         }
5384
5385         if (data->user_cb == NULL) {
5386                 /* We already answered.  XXXX This shouldn't be needed; see
5387                  * comments in evdns_getaddrinfo_timeout_cb */
5388                 free_getaddrinfo_request(data);
5389                 return;
5390         }
5391
5392         if (result == DNS_ERR_NONE) {
5393                 if (count == 0)
5394                         err = EVUTIL_EAI_NODATA;
5395                 else
5396                         err = 0;
5397         } else {
5398                 err = evdns_err_to_getaddrinfo_err(result);
5399         }
5400
5401         if (err) {
5402                 /* Looks like we got an error. */
5403                 if (other_req->r) {
5404                         /* The other request is still working; maybe it will
5405                          * succeed. */
5406                         /* XXXX handle failure from set_timeout */
5407                         if (result != DNS_ERR_SHUTDOWN) {
5408                                 evdns_getaddrinfo_set_timeout(data->evdns_base, data);
5409                         }
5410                         data->pending_error = err;
5411                         return;
5412                 }
5413
5414                 if (user_canceled) {
5415                         data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
5416                 } else if (data->pending_result) {
5417                         /* If we have an answer waiting, and we weren't
5418                          * canceled, ignore this error. */
5419                         add_cname_to_reply(data, data->pending_result);
5420                         data->user_cb(0, data->pending_result, data->user_data);
5421                         data->pending_result = NULL;
5422                 } else {
5423                         if (data->pending_error)
5424                                 err = getaddrinfo_merge_err(err,
5425                                     data->pending_error);
5426                         data->user_cb(err, NULL, data->user_data);
5427                 }
5428                 free_getaddrinfo_request(data);
5429                 return;
5430         } else if (user_canceled) {
5431                 if (other_req->r) {
5432                         /* The other request is still working; let it hit this
5433                          * callback with EVUTIL_EAI_CANCEL callback and report
5434                          * the failure. */
5435                         return;
5436                 }
5437                 data->user_cb(EVUTIL_EAI_CANCEL, NULL, data->user_data);
5438                 free_getaddrinfo_request(data);
5439                 return;
5440         }
5441
5442         /* Looks like we got some answers. We should turn them into addrinfos
5443          * and then either queue those or return them all. */
5444         EVUTIL_ASSERT(type == DNS_IPv4_A || type == DNS_IPv6_AAAA);
5445
5446         if (type == DNS_IPv4_A) {
5447                 memset(&sin, 0, sizeof(sin));
5448                 sin.sin_family = AF_INET;
5449                 sin.sin_port = htons(data->port);
5450 #ifdef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
5451                 sin.sin_len = sizeof(sin);
5452 #endif
5453
5454                 sa = (struct sockaddr *)&sin;
5455                 socklen = sizeof(sin);
5456                 addrlen = 4;
5457                 addrp = &sin.sin_addr.s_addr;
5458         } else {
5459                 memset(&sin6, 0, sizeof(sin6));
5460                 sin6.sin6_family = AF_INET6;
5461                 sin6.sin6_port = htons(data->port);
5462 #ifdef EVENT__HAVE_STRUCT_SOCKADDR_IN6_SIN6_LEN
5463                 sin6.sin6_len = sizeof(sin6);
5464 #endif
5465
5466                 sa = (struct sockaddr *)&sin6;
5467                 socklen = sizeof(sin6);
5468                 addrlen = 16;
5469                 addrp = &sin6.sin6_addr.s6_addr;
5470         }
5471
5472         res = NULL;
5473         for (i=0; i < count; ++i) {
5474                 struct evutil_addrinfo *ai;
5475                 memcpy(addrp, ((char*)addresses)+i*addrlen, addrlen);
5476                 ai = evutil_new_addrinfo_(sa, socklen, &data->hints);
5477                 if (!ai) {
5478                         if (other_req->r) {
5479                                 evdns_cancel_request(NULL, other_req->r);
5480                         }
5481                         data->user_cb(EVUTIL_EAI_MEMORY, NULL, data->user_data);
5482                         if (res)
5483                                 evutil_freeaddrinfo(res);
5484
5485                         if (other_req->r == NULL)
5486                                 free_getaddrinfo_request(data);
5487                         return;
5488                 }
5489                 res = evutil_addrinfo_append_(res, ai);
5490         }
5491
5492         if (other_req->r) {
5493                 /* The other request is still in progress; wait for it */
5494                 /* XXXX handle failure from set_timeout */
5495                 evdns_getaddrinfo_set_timeout(data->evdns_base, data);
5496                 data->pending_result = res;
5497                 return;
5498         } else {
5499                 /* The other request is done or never started; append its
5500                  * results (if any) and return them. */
5501                 if (data->pending_result) {
5502                         if (req->type == DNS_IPv4_A)
5503                                 res = evutil_addrinfo_append_(res,
5504                                     data->pending_result);
5505                         else
5506                                 res = evutil_addrinfo_append_(
5507                                     data->pending_result, res);
5508                         data->pending_result = NULL;
5509                 }
5510
5511                 /* Call the user callback. */
5512                 add_cname_to_reply(data, res);
5513                 data->user_cb(0, res, data->user_data);
5514
5515                 /* Free data. */
5516                 free_getaddrinfo_request(data);
5517         }
5518 }
5519
5520 static struct hosts_entry *
5521 find_hosts_entry(struct evdns_base *base, const char *hostname,
5522     struct hosts_entry *find_after)
5523 {
5524         struct hosts_entry *e;
5525
5526         if (find_after)
5527                 e = TAILQ_NEXT(find_after, next);
5528         else
5529                 e = TAILQ_FIRST(&base->hostsdb);
5530
5531         for (; e; e = TAILQ_NEXT(e, next)) {
5532                 if (!evutil_ascii_strcasecmp(e->hostname, hostname))
5533                         return e;
5534         }
5535         return NULL;
5536 }
5537
5538 static int
5539 evdns_getaddrinfo_fromhosts(struct evdns_base *base,
5540     const char *nodename, struct evutil_addrinfo *hints, ev_uint16_t port,
5541     struct evutil_addrinfo **res)
5542 {
5543         int n_found = 0;
5544         struct hosts_entry *e;
5545         struct evutil_addrinfo *ai=NULL;
5546         int f = hints->ai_family;
5547
5548         EVDNS_LOCK(base);
5549         for (e = find_hosts_entry(base, nodename, NULL); e;
5550             e = find_hosts_entry(base, nodename, e)) {
5551                 struct evutil_addrinfo *ai_new;
5552                 ++n_found;
5553                 if ((e->addr.sa.sa_family == AF_INET && f == PF_INET6) ||
5554                     (e->addr.sa.sa_family == AF_INET6 && f == PF_INET))
5555                         continue;
5556                 ai_new = evutil_new_addrinfo_(&e->addr.sa, e->addrlen, hints);
5557                 if (!ai_new) {
5558                         n_found = 0;
5559                         goto out;
5560                 }
5561                 sockaddr_setport(ai_new->ai_addr, port);
5562                 ai = evutil_addrinfo_append_(ai, ai_new);
5563         }
5564         EVDNS_UNLOCK(base);
5565 out:
5566         if (n_found) {
5567                 /* Note that we return an empty answer if we found entries for
5568                  * this hostname but none were of the right address type. */
5569                 *res = ai;
5570                 return 0;
5571         } else {
5572                 if (ai)
5573                         evutil_freeaddrinfo(ai);
5574                 return -1;
5575         }
5576 }
5577
5578 struct evdns_getaddrinfo_request *
5579 evdns_getaddrinfo(struct evdns_base *dns_base,
5580     const char *nodename, const char *servname,
5581     const struct evutil_addrinfo *hints_in,
5582     evdns_getaddrinfo_cb cb, void *arg)
5583 {
5584         struct evdns_getaddrinfo_request *data;
5585         struct evutil_addrinfo hints;
5586         struct evutil_addrinfo *res = NULL;
5587         int err;
5588         int port = 0;
5589         int want_cname = 0;
5590         int started = 0;
5591
5592         if (!dns_base) {
5593                 dns_base = current_base;
5594                 if (!dns_base) {
5595                         log(EVDNS_LOG_WARN,
5596                             "Call to getaddrinfo_async with no "
5597                             "evdns_base configured.");
5598                         cb(EVUTIL_EAI_FAIL, NULL, arg); /* ??? better error? */
5599                         return NULL;
5600                 }
5601         }
5602
5603         /* If we _must_ answer this immediately, do so. */
5604         if ((hints_in && (hints_in->ai_flags & EVUTIL_AI_NUMERICHOST))) {
5605                 res = NULL;
5606                 err = evutil_getaddrinfo(nodename, servname, hints_in, &res);
5607                 cb(err, res, arg);
5608                 return NULL;
5609         }
5610
5611         if (hints_in) {
5612                 memcpy(&hints, hints_in, sizeof(hints));
5613         } else {
5614                 memset(&hints, 0, sizeof(hints));
5615                 hints.ai_family = PF_UNSPEC;
5616         }
5617
5618         evutil_adjust_hints_for_addrconfig_(&hints);
5619
5620         /* Now try to see if we _can_ answer immediately. */
5621         /* (It would be nice to do this by calling getaddrinfo directly, with
5622          * AI_NUMERICHOST, on plaforms that have it, but we can't: there isn't
5623          * a reliable way to distinguish the "that wasn't a numeric host!" case
5624          * from any other EAI_NONAME cases.) */
5625         err = evutil_getaddrinfo_common_(nodename, servname, &hints, &res, &port);
5626         if (err != EVUTIL_EAI_NEED_RESOLVE) {
5627                 cb(err, res, arg);
5628                 return NULL;
5629         }
5630
5631         /* If there is an entry in the hosts file, we should give it now. */
5632         if (!evdns_getaddrinfo_fromhosts(dns_base, nodename, &hints, port, &res)) {
5633                 cb(0, res, arg);
5634                 return NULL;
5635         }
5636
5637         /* Okay, things are serious now. We're going to need to actually
5638          * launch a request.
5639          */
5640         data = mm_calloc(1,sizeof(struct evdns_getaddrinfo_request));
5641         if (!data) {
5642                 cb(EVUTIL_EAI_MEMORY, NULL, arg);
5643                 return NULL;
5644         }
5645
5646         memcpy(&data->hints, &hints, sizeof(data->hints));
5647         data->port = (ev_uint16_t)port;
5648         data->ipv4_request.type = DNS_IPv4_A;
5649         data->ipv6_request.type = DNS_IPv6_AAAA;
5650         data->user_cb = cb;
5651         data->user_data = arg;
5652         data->evdns_base = dns_base;
5653
5654         want_cname = (hints.ai_flags & EVUTIL_AI_CANONNAME);
5655
5656         /* If we are asked for a PF_UNSPEC address, we launch two requests in
5657          * parallel: one for an A address and one for an AAAA address.  We
5658          * can't send just one request, since many servers only answer one
5659          * question per DNS request.
5660          *
5661          * Once we have the answer to one request, we allow for a short
5662          * timeout before we report it, to see if the other one arrives.  If
5663          * they both show up in time, then we report both the answers.
5664          *
5665          * If too many addresses of one type time out or fail, we should stop
5666          * launching those requests. (XXX we don't do that yet.)
5667          */
5668
5669         EVDNS_LOCK(dns_base);
5670
5671         if (hints.ai_family != PF_INET6) {
5672                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv4 as %p",
5673                     nodename, (void *)&data->ipv4_request);
5674
5675                 data->ipv4_request.r = evdns_base_resolve_ipv4(dns_base,
5676                     nodename, 0, evdns_getaddrinfo_gotresolve,
5677                     &data->ipv4_request);
5678                 if (want_cname && data->ipv4_request.r)
5679                         data->ipv4_request.r->current_req->put_cname_in_ptr =
5680                             &data->cname_result;
5681         }
5682         if (hints.ai_family != PF_INET) {
5683                 log(EVDNS_LOG_DEBUG, "Sending request for %s on ipv6 as %p",
5684                     nodename, (void *)&data->ipv6_request);
5685
5686                 data->ipv6_request.r = evdns_base_resolve_ipv6(dns_base,
5687                     nodename, 0, evdns_getaddrinfo_gotresolve,
5688                     &data->ipv6_request);
5689                 if (want_cname && data->ipv6_request.r)
5690                         data->ipv6_request.r->current_req->put_cname_in_ptr =
5691                             &data->cname_result;
5692         }
5693
5694         evtimer_assign(&data->timeout, dns_base->event_base,
5695             evdns_getaddrinfo_timeout_cb, data);
5696
5697         started = (data->ipv4_request.r || data->ipv6_request.r);
5698
5699         EVDNS_UNLOCK(dns_base);
5700
5701         if (started) {
5702                 return data;
5703         } else {
5704                 mm_free(data);
5705                 cb(EVUTIL_EAI_FAIL, NULL, arg);
5706                 return NULL;
5707         }
5708 }
5709
5710 void
5711 evdns_getaddrinfo_cancel(struct evdns_getaddrinfo_request *data)
5712 {
5713         EVDNS_LOCK(data->evdns_base);
5714         if (data->request_done) {
5715                 EVDNS_UNLOCK(data->evdns_base);
5716                 return;
5717         }
5718         event_del(&data->timeout);
5719         data->user_canceled = 1;
5720         if (data->ipv4_request.r)
5721                 evdns_cancel_request(data->evdns_base, data->ipv4_request.r);
5722         if (data->ipv6_request.r)
5723                 evdns_cancel_request(data->evdns_base, data->ipv6_request.r);
5724         EVDNS_UNLOCK(data->evdns_base);
5725 }