4 * Tom Rathborne - tomr@uunet.ca - http://www.uunet.ca/~tomr/
5 * UUNET Canada, April 16, 1995
7 * Rewritten by David Robinson. (drtr@ast.cam.ac.uk)
9 * Usage: logresolve [-s filename] [-c] < access_log > new_log
12 * -s filename name of a file to record statistics
13 * -c check the DNS for a matching A record for the host.
17 * To generate meaningful statistics from an HTTPD log file, it's good
18 * to have the domain name of each machine that accessed your site, but
19 * doing this on the fly can slow HTTPD down.
21 * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
22 * resolution off. Before running your stats program, just run your log
23 * file through this program (logresolve) and all of your IP numbers will
24 * be resolved into hostnames (where possible).
26 * logresolve takes an HTTPD access log (in the COMMON log file format,
27 * or any other format that has the IP number/domain name as the first
28 * field for that matter), and outputs the same file with all of the
29 * domain names looked up. Where no domain name can be found, the IP
32 * To minimize impact on your nameserver, logresolve has its very own
33 * internal hash-table cache. This means that each IP number will only
34 * be looked up the first time it is found in the log file.
36 * The -c option causes logresolve to apply the same check as httpd
37 * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
38 * address, it looks up the IP addresses for the hostname and checks
39 * that one of these matches the original address.
42 #include "ap_config.h"
46 #if !defined(MPE) && !defined(BEOS) && !defined(WIN32)
47 #include <arpa/inet.h>
50 static void cgethost(struct in_addr ipnum, char *string, int check);
51 static int getline(char *s, int n);
52 static void stats(FILE *output);
55 #define NO_ADDRESS NO_DATA
59 /* maximum line length */
62 /* maximum length of a domain name */
67 /* number of buckets in cache hash ap_table_t */
70 #if defined(NEED_STRDUP)
71 char *strdup (const char *str)
75 if (!(dup = (char *) malloc(strlen(str) + 1)))
77 dup = strcpy(dup, str);
84 * struct nsrec - record of nameservice for cache linked list
86 * ipnum - IP number hostname - hostname noname - nonzero if IP number has no
87 * hostname, i.e. hostname=IP number
98 * statistics - obvious
102 extern int h_errno; /* some machines don't have this in their headers */
105 /* largest value for h_errno */
107 #define MAX_ERR (NO_ADDRESS)
108 #define UNKNOWN_ERR (MAX_ERR+1)
109 #define NO_REVERSE (MAX_ERR+2)
111 static int cachehits = 0;
112 static int cachesize = 0;
113 static int entries = 0;
114 static int resolves = 0;
115 static int withname = 0;
116 static int errors[MAX_ERR + 3];
119 * cgethost - gets hostname by IP address, caching, and adding unresolvable
120 * IP numbers with their IP number as hostname, setting noname flag
123 static void cgethost (struct in_addr ipnum, char *string, int check)
125 struct nsrec **current, *new;
126 struct hostent *hostdata;
129 current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) +
130 (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)];
132 while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr)
133 current = &(*current)->next;
135 if (*current == NULL) {
137 new = (struct nsrec *) malloc(sizeof(struct nsrec));
140 fprintf(stderr, "Insufficient memory\n");
148 hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr),
150 if (hostdata == NULL) {
151 if (h_errno > MAX_ERR)
152 errors[UNKNOWN_ERR]++;
155 new->noname = h_errno;
156 name = strdup(inet_ntoa(ipnum));
160 name = strdup(hostdata->h_name);
164 fprintf(stderr, "Insufficient memory\n");
167 hostdata = gethostbyname(name);
168 if (hostdata != NULL) {
171 for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++)
172 if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr)
177 if (hostdata == NULL) {
178 fprintf(stderr, "Bad host: %s != %s\n", name,
180 new->noname = NO_REVERSE;
182 name = strdup(inet_ntoa(ipnum));
183 errors[NO_REVERSE]++;
187 new->hostname = name;
188 if (new->hostname == NULL) {
190 fprintf(stderr, "Insufficient memory\n");
197 /* size of string == MAXDNAME +1 */
198 strncpy(string, (*current)->hostname, MAXDNAME);
199 string[MAXDNAME] = '\0';
203 * prints various statistics to output
206 static void stats (FILE *output)
210 struct nsrec *current;
211 char *errstring[MAX_ERR + 3];
213 for (i = 0; i < MAX_ERR + 3; i++)
214 errstring[i] = "Unknown error";
215 errstring[HOST_NOT_FOUND] = "Host not found";
216 errstring[TRY_AGAIN] = "Try again";
217 errstring[NO_RECOVERY] = "Non recoverable error";
218 errstring[NO_DATA] = "No data record";
219 errstring[NO_ADDRESS] = "No address";
220 errstring[NO_REVERSE] = "No reverse entry";
222 fprintf(output, "logresolve Statistics:\n");
224 fprintf(output, "Entries: %d\n", entries);
225 fprintf(output, " With name : %d\n", withname);
226 fprintf(output, " Resolves : %d\n", resolves);
227 if (errors[HOST_NOT_FOUND])
228 fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]);
229 if (errors[TRY_AGAIN])
230 fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]);
232 fprintf(output, " - No data : %d\n", errors[NO_DATA]);
233 if (errors[NO_ADDRESS])
234 fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]);
235 if (errors[NO_REVERSE])
236 fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]);
237 fprintf(output, "Cache hits : %d\n", cachehits);
238 fprintf(output, "Cache size : %d\n", cachesize);
239 fprintf(output, "Cache buckets : IP number * hostname\n");
241 for (i = 0; i < BUCKETS; i++)
242 for (current = nscache[i]; current != NULL; current = current->next) {
243 ipstring = inet_ntoa(current->ipnum);
244 if (current->noname == 0)
245 fprintf(output, " %3d %15s - %s\n", i, ipstring,
248 if (current->noname > MAX_ERR + 2)
249 fprintf(output, " %3d %15s : Unknown error\n", i,
252 fprintf(output, " %3d %15s : %s\n", i, ipstring,
253 errstring[current->noname]);
260 * gets a line from stdin
263 static int getline (char *s, int n)
267 if (!fgets(s, n, stdin))
269 cp = strchr(s, '\n');
275 int main (int argc, char *argv[])
277 struct in_addr ipnum;
278 char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile;
282 /* If we apr'ify this code, ap_create_pool/ap_destroy_pool
283 * should perform the WSAStartup/WSACleanup for us.
286 WSAStartup(0x101, &wsaData);
291 for (i = 1; i < argc; i++) {
292 if (strcmp(argv[i], "-c") == 0)
294 else if (strcmp(argv[i], "-s") == 0) {
296 fprintf(stderr, "logresolve: missing filename to -s\n");
303 fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n");
308 for (i = 0; i < BUCKETS; i++)
310 for (i = 0; i < MAX_ERR + 2; i++)
313 while (getline(line, MAXLINE)) {
317 if (!isdigit(line[0])) { /* short cut */
322 bar = strchr(line, ' ');
325 ipnum.s_addr = inet_addr(line);
326 if (ipnum.s_addr == 0xffffffffu) {
336 cgethost(ipnum, hoststring, check);
338 printf("%s %s\n", hoststring, bar + 1);
347 if (statfile != NULL) {
349 fp = fopen(statfile, "w");
351 fprintf(stderr, "logresolve: could not open statistics file '%s'\n"