2 # ====================================================================
3 # The Apache Software License, Version 1.1
5 # Copyright (c) 2000 The Apache Software Foundation. All rights
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions
12 # 1. Redistributions of source code must retain the above copyright
13 # notice, this list of conditions and the following disclaimer.
15 # 2. Redistributions in binary form must reproduce the above copyright
16 # notice, this list of conditions and the following disclaimer in
17 # the documentation and/or other materials provided with the
20 # 3. The end-user documentation included with the redistribution,
21 # if any, must include the following acknowledgment:
22 # "This product includes software developed by the
23 # Apache Software Foundation (http://www.apache.org/)."
24 # Alternately, this acknowledgment may appear in the software itself,
25 # if and wherever such third-party acknowledgments normally appear.
27 # 4. The names "Apache" and "Apache Software Foundation" must
28 # not be used to endorse or promote products derived from this
29 # software without prior written permission. For written
30 # permission, please contact apache@apache.org.
32 # 5. Products derived from this software may not be called "Apache",
33 # nor may "Apache" appear in their name, without prior written
34 # permission of the Apache Software Foundation.
36 # THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37 # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38 # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39 # DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40 # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43 # USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46 # OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 # ====================================================================
50 # This software consists of voluntary contributions made by many
51 # individuals on behalf of the Apache Software Foundation. For more
52 # information on the Apache Software Foundation, please see
53 # <http://www.apache.org/>.
57 # v 1.2 by robh @ imdb.com
59 # usage: logresolve.pl <infile >outfile
61 # input = Apache/NCSA/.. logfile with IP numbers at start of lines
62 # output = same logfile with IP addresses resolved to hostnames where
63 # name lookups succeeded.
65 # this differs from the C based 'logresolve' in that this script
66 # spawns a number ($CHILDREN) of subprocesses to resolve addresses
67 # concurrently and sets a short timeout ($TIMEOUT) for each lookup in
68 # order to keep things moving quickly.
70 # the parent process handles caching of IP->hostnames using a Perl hash
71 # it also avoids sending the same IP to multiple child processes to be
72 # resolved multiple times concurrently.
74 # Depending on the settings of $CHILDREN and $TIMEOUT you should see
75 # significant reductions in the overall time taken to resolve your
76 # logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
77 # logfile lines processed per hour compared to ~45,000 per hour
80 # I haven't yet seen any noticable reduction in the percentage of IPs
81 # that fail to get resolved. Your mileage will no doubt vary. 5s is long
84 # Known to work with FreeBSD 2.2
85 # Known to have problems with Solaris
87 # 980417 - use 'sockaddr_un' for bind/connect to make the script work
88 # with linux. Fix from Luuk de Boer <luuk_de_boer@pi.net>
100 use vars qw($PROTOCOL);
111 for (my $child = 1; $child <=$CHILDREN; $child++) {
114 $filename = "./.socket.$parent.$child";
115 if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
125 ## remove all temporary files before shutting down
129 for (my $child = 1; $child <=$CHILDREN; $child++) {
130 if (-e "./.socket.$parent.$child") {
131 unlink("./.socket.$parent.$child")
132 || warn ".socket.$parent.$child $!";
138 # Trap some possible signals to trigger temp file cleanup
139 $SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;
144 ## fork child processes. Each child will create a socket connection
145 ## to this parent and use an unique temp filename to do so.
146 for (my $child = 1; $child <=$CHILDREN; $child++) {
147 $CHILDSOCK{$child}= FileHandle->new;
149 if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
150 warn "parent socket to child failed $!";
152 $filename = "./.socket.$parent.$child";
155 $response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
156 if ($response != 1) {
159 } while ($response != 1);
160 $CHILDSOCK{$child}->autoflush;
162 ## All child processes should now be ready or at worst warming up
164 my (@buffer, $child, $ip, $rest, $hostname, $response);
165 ## read the logfile lines from STDIN
167 @buffer = (); # empty the logfile line buffer array.
168 $child = 1; # children are numbered 1..N, start with #1
170 # while we have a child to talk to and data to give it..
172 push(@buffer, $_); # buffer the line
173 ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest
175 unless ($hash{$ip}) { # resolve if unseen IP
176 $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
177 $hash{$ip} = $ip; # don't look it up again.
180 } while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));
182 ## now poll each child for a response
183 while (--$child > 0) {
184 $response = $CHILDSOCK{$child}->getline;
186 # child sends us back both the IP and HOSTNAME, no need for us
187 # to remember what child received any given IP, and no worries
188 # what order we talk to the children
189 ($ip, $hostname) = split(/\|/, $response, 2);
190 $hash{$ip} = $hostname;
193 # resolve all the logfiles lines held in the log buffer array..
194 for (my $line = 0; $line <=$#buffer; $line++) {
195 # get next buffered line
196 ($ip, $rest) = split(/ /, $buffer[$line], 2);
197 # separate IP from rest and replace with cached hostname
198 printf STDOUT ("%s %s", $hash{$ip}, $rest);
203 ########################################
206 # arg = numeric ID - how the parent refers to me
209 # add trap for alarm signals.
210 $SIG{'ALRM'} = sub { die "alarmed"; };
212 # create a socket to communicate with parent
213 socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL)
214 || die "Error with Socket: !$\n";
215 $filename = "./.socket.$parent.$me";
216 bind(INBOUND, sockaddr_un($filename))
217 || die "Error Binding $filename: $!\n";
218 listen(INBOUND, 5) || die "Error Listening: $!\n";
220 my ($ip, $send_back);
221 my $talk = FileHandle->new;
223 # accept a connection from the parent process. We only ever have
224 # have one connection where we exchange 1 line of info with the
225 # parent.. 1 line in (IP address), 1 line out (IP + hostname).
226 accept($talk, INBOUND) || die "Error Accepting: $!\n";
227 # disable I/O buffering just in case
229 # while the parent keeps sending data, we keep responding..
230 while(($ip = $talk->getline)) {
232 # resolve the IP if time permits and send back what we found..
233 $send_back = sprintf("%s|%s", $ip, &nslookup($ip));
234 $talk->print($send_back."\n");
238 # perform a time restricted hostname lookup.
240 # get the IP as an arg
242 my $hostname = undef;
244 # do the hostname lookup inside an eval. The eval will use the
245 # already configured SIGnal handler and drop out of the {} block
246 # regardless of whether the alarm occured or not.
249 $hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
253 # useful for debugging perhaps..
254 # print "alarming, isn't it? ($ip)";
257 # return the hostname or the IP address itself if there is no hostname
258 $hostname ne "" ? $hostname : $ip;