1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/
21 * UUNET Canada, April 16, 1995
23 * Rewritten by David Robinson. (drtr ast.cam.ac.uk)
24 * Rewritten again, and ported to APR by Colm MacCarthaigh
26 * Usage: logresolve [-s filename] [-c] < access_log > new_log
29 * -s filename name of a file to record statistics
30 * -c check the DNS for a matching A record for the host.
32 * Notes: (For historical interest)
34 * To generate meaningful statistics from an HTTPD log file, it's good
35 * to have the domain name of each machine that accessed your site, but
36 * doing this on the fly can slow HTTPD down.
38 * Compiling NCSA HTTPD with the -DMINIMAL_DNS flag turns IP#->hostname
39 * resolution off. Before running your stats program, just run your log
40 * file through this program (logresolve) and all of your IP numbers will
41 * be resolved into hostnames (where possible).
43 * logresolve takes an HTTPD access log (in the COMMON log file format,
44 * or any other format that has the IP number/domain name as the first
45 * field for that matter), and outputs the same file with all of the
46 * domain names looked up. Where no domain name can be found, the IP
49 * To minimize impact on your nameserver, logresolve has its very own
50 * internal hash-table cache. This means that each IP number will only
51 * be looked up the first time it is found in the log file.
53 * The -c option causes logresolve to apply the same check as httpd
54 * compiled with -DMAXIMUM_DNS; after finding the hostname from the IP
55 * address, it looks up the IP addresses for the hostname and checks
56 * that one of these matches the original address.
62 #include "apr_getopt.h"
63 #include "apr_strings.h"
64 #include "apr_file_io.h"
65 #include "apr_network_io.h"
71 #define READ_BUF_SIZE 128*1024
72 #define WRITE_BUF_SIZE 128*1024
73 #define LINE_BUF_SIZE 128*1024
75 static apr_file_t *errfile;
76 static const char *shortname = "logresolve";
77 static apr_hash_t *cache;
80 static int cachehits = 0;
81 static int cachesize = 0;
82 static int entries = 0;
83 static int resolves = 0;
84 static int withname = 0;
85 static int doublefailed = 0;
86 static int noreverse = 0;
89 * prints various statistics to output
91 #define NL APR_EOL_STR
92 static void print_statistics (apr_file_t *output)
94 apr_file_printf(output, "logresolve Statistics:" NL);
95 apr_file_printf(output, "Entries: %d" NL, entries);
96 apr_file_printf(output, " With name : %d" NL, withname);
97 apr_file_printf(output, " Resolves : %d" NL, resolves);
100 apr_file_printf(output, " - No reverse : %d" NL,
105 apr_file_printf(output, " - Double lookup failed : %d" NL,
109 apr_file_printf(output, "Cache hits : %d" NL, cachehits);
110 apr_file_printf(output, "Cache size : %d" NL, cachesize);
116 static void usage(void)
118 apr_file_printf(errfile,
119 "%s -- Resolve IP-addresses to hostnames in Apache log files." NL
120 "Usage: %s [-s STATFILE] [-c]" NL
123 " -s Record statistics to STATFILE when finished." NL
125 " -c Perform double lookups when resolving IP addresses." NL,
126 shortname, shortname);
131 int main(int argc, const char * const argv[])
133 apr_file_t * outfile;
144 int doublelookups = 0;
146 if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) {
149 atexit(apr_terminate);
152 shortname = apr_filepath_name_get(argv[0]);
155 if (apr_pool_create(&pool, NULL) != APR_SUCCESS) {
158 apr_file_open_stderr(&errfile, pool);
159 apr_getopt_init(&o, pool, argc, argv);
163 status = apr_getopt(o, "s:c", &opt, &arg);
164 if (status == APR_EOF) {
167 else if (status != APR_SUCCESS) {
182 stats = apr_pstrdup(pool, arg);
188 apr_file_open_stdout(&outfile, pool);
189 apr_file_open_stdin(&infile, pool);
191 /* Allocate two new 10k file buffers */
192 if ( (outbuffer = apr_palloc(pool, WRITE_BUF_SIZE)) == NULL
193 || (inbuffer = apr_palloc(pool, READ_BUF_SIZE)) == NULL
194 || (line = apr_palloc(pool, LINE_BUF_SIZE)) == NULL) {
198 /* Set the buffers */
199 apr_file_buffer_set(infile, inbuffer, READ_BUF_SIZE);
200 apr_file_buffer_set(outfile, outbuffer, WRITE_BUF_SIZE);
202 cache = apr_hash_make(pool);
203 if (apr_pool_create(&pline, pool) != APR_SUCCESS) {
207 while (apr_file_gets(line, LINE_BUF_SIZE, infile) == APR_SUCCESS) {
211 apr_sockaddr_t *ipdouble;
212 char dummy[] = " " APR_EOL_STR;
214 if (line[0] == '\0') {
218 /* Count our log entries */
221 /* Check if this could even be an IP address */
222 if (!apr_isxdigit(line[0]) && line[0] != ':') {
224 apr_file_puts(line, outfile);
228 /* Terminate the line at the next space */
229 if ((space = strchr(line, ' ')) != NULL) {
236 /* See if we have it in our cache */
237 hostname = (char *) apr_hash_get(cache, line, APR_HASH_KEY_STRING);
239 apr_file_printf(outfile, "%s %s", hostname, space + 1);
244 /* Parse the IP address */
245 status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC, 0, 0, pline);
246 if (status != APR_SUCCESS) {
247 /* Not an IP address */
250 apr_file_puts(line, outfile);
254 /* This does not make much sense, but historically "resolves" means
255 * "parsed as an IP address". It does not mean we actually resolved
256 * the IP address into a hostname.
260 /* From here on our we cache each result, even if it was not
265 /* Try and perform a reverse lookup */
266 status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS;
267 if (status || hostname == NULL) {
268 /* Could not perform a reverse lookup */
270 apr_file_puts(line, outfile);
275 apr_hash_set(cache, line, APR_HASH_KEY_STRING,
276 apr_pstrdup(apr_hash_pool_get(cache), line));
280 /* Perform a double lookup */
282 /* Do a forward lookup on our hostname, and see if that matches our
283 * original IP address.
285 status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0,
287 if (status != APR_SUCCESS ||
288 memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) {
289 /* Double-lookup failed */
291 apr_file_puts(line, outfile);
296 apr_hash_set(cache, line, APR_HASH_KEY_STRING,
297 apr_pstrdup(apr_hash_pool_get(cache), line));
302 /* Output the resolved name */
303 apr_file_printf(outfile, "%s %s", hostname, space + 1);
305 /* Store it in the cache */
306 apr_hash_set(cache, line, APR_HASH_KEY_STRING,
307 apr_pstrdup(apr_hash_pool_get(cache), hostname));
309 apr_pool_clear(pline);
312 /* Flush any remaining output */
313 apr_file_flush(outfile);
316 apr_file_t *statsfile;
317 if (apr_file_open(&statsfile, stats,
318 APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE,
319 APR_OS_DEFAULT, pool) != APR_SUCCESS) {
320 apr_file_printf(errfile, "%s: Could not open %s for writing.",
324 print_statistics(statsfile);
325 apr_file_close(statsfile);