3 # Copyright 2001-2004 The Apache Software Foundation
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 # v 1.2 by robh imdb.com
22 # usage: logresolve.pl <infile >outfile
24 # input = Apache/NCSA/.. logfile with IP numbers at start of lines
25 # output = same logfile with IP addresses resolved to hostnames where
26 # name lookups succeeded.
28 # this differs from the C based 'logresolve' in that this script
29 # spawns a number ($CHILDREN) of subprocesses to resolve addresses
30 # concurrently and sets a short timeout ($TIMEOUT) for each lookup in
31 # order to keep things moving quickly.
33 # the parent process handles caching of IP->hostnames using a Perl hash
34 # it also avoids sending the same IP to multiple child processes to be
35 # resolved multiple times concurrently.
37 # Depending on the settings of $CHILDREN and $TIMEOUT you should see
38 # significant reductions in the overall time taken to resolve your
39 # logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
40 # logfile lines processed per hour compared to ~45,000 per hour
43 # I haven't yet seen any noticable reduction in the percentage of IPs
44 # that fail to get resolved. Your mileage will no doubt vary. 5s is long
47 # Known to work with FreeBSD 2.2
48 # Known to have problems with Solaris
50 # 980417 - use 'sockaddr_un' for bind/connect to make the script work
51 # with linux. Fix from Luuk de Boer <luuk_de_boer pi.net>
63 use vars qw($PROTOCOL);
74 for (my $child = 1; $child <=$CHILDREN; $child++) {
77 $filename = "./.socket.$parent.$child";
78 if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
88 ## remove all temporary files before shutting down
92 for (my $child = 1; $child <=$CHILDREN; $child++) {
93 if (-e "./.socket.$parent.$child") {
94 unlink("./.socket.$parent.$child")
95 || warn ".socket.$parent.$child $!";
101 # Trap some possible signals to trigger temp file cleanup
102 $SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;
107 ## fork child processes. Each child will create a socket connection
108 ## to this parent and use an unique temp filename to do so.
109 for (my $child = 1; $child <=$CHILDREN; $child++) {
110 $CHILDSOCK{$child}= FileHandle->new;
112 if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
113 warn "parent socket to child failed $!";
115 $filename = "./.socket.$parent.$child";
118 $response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
119 if ($response != 1) {
122 } while ($response != 1);
123 $CHILDSOCK{$child}->autoflush;
125 ## All child processes should now be ready or at worst warming up
127 my (@buffer, $child, $ip, $rest, $hostname, $response);
128 ## read the logfile lines from STDIN
130 @buffer = (); # empty the logfile line buffer array.
131 $child = 1; # children are numbered 1..N, start with #1
133 # while we have a child to talk to and data to give it..
135 push(@buffer, $_); # buffer the line
136 ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest
138 unless ($hash{$ip}) { # resolve if unseen IP
139 $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
140 $hash{$ip} = $ip; # don't look it up again.
143 } while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));
145 ## now poll each child for a response
146 while (--$child > 0) {
147 $response = $CHILDSOCK{$child}->getline;
149 # child sends us back both the IP and HOSTNAME, no need for us
150 # to remember what child received any given IP, and no worries
151 # what order we talk to the children
152 ($ip, $hostname) = split(/\|/, $response, 2);
153 $hash{$ip} = $hostname;
156 # resolve all the logfiles lines held in the log buffer array..
157 for (my $line = 0; $line <=$#buffer; $line++) {
158 # get next buffered line
159 ($ip, $rest) = split(/ /, $buffer[$line], 2);
160 # separate IP from rest and replace with cached hostname
161 printf STDOUT ("%s %s", $hash{$ip}, $rest);
166 ########################################
169 # arg = numeric ID - how the parent refers to me
172 # add trap for alarm signals.
173 $SIG{'ALRM'} = sub { die "alarmed"; };
175 # create a socket to communicate with parent
176 socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL)
177 || die "Error with Socket: !$\n";
178 $filename = "./.socket.$parent.$me";
179 bind(INBOUND, sockaddr_un($filename))
180 || die "Error Binding $filename: $!\n";
181 listen(INBOUND, 5) || die "Error Listening: $!\n";
183 my ($ip, $send_back);
184 my $talk = FileHandle->new;
186 # accept a connection from the parent process. We only ever have
187 # have one connection where we exchange 1 line of info with the
188 # parent.. 1 line in (IP address), 1 line out (IP + hostname).
189 accept($talk, INBOUND) || die "Error Accepting: $!\n";
190 # disable I/O buffering just in case
192 # while the parent keeps sending data, we keep responding..
193 while(($ip = $talk->getline)) {
195 # resolve the IP if time permits and send back what we found..
196 $send_back = sprintf("%s|%s", $ip, &nslookup($ip));
197 $talk->print($send_back."\n");
201 # perform a time restricted hostname lookup.
203 # get the IP as an arg
205 my $hostname = undef;
207 # do the hostname lookup inside an eval. The eval will use the
208 # already configured SIGnal handler and drop out of the {} block
209 # regardless of whether the alarm occured or not.
212 $hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
216 # useful for debugging perhaps..
217 # print "alarming, isn't it? ($ip)";
220 # return the hostname or the IP address itself if there is no hostname
221 $hostname ne "" ? $hostname : $ip;