my $PID_DIR = $TMP_DIR;
my $PID_FILE = undef;
+# Factor used to estimate the total size of compressed file
+# when real size can not be obtained (bz2 or remote files)
+my $BZ_FACTOR = 25;
+my $GZ_FACTOR = 15;
+my $XZ_FACTOR = 18;
+
my @E2A = (
0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15,
16, 17, 18, 19,157, 10, 8,135, 24, 25,146,143, 28, 29, 30, 31,
# Read list of log file to parse from a file
if ($logfile_list) {
- if (!-e $logfile_list) {
+ if (!-e $logfile_list)
+ {
localdie("FATAL: logfile list $logfile_list must exist!\n");
}
my $in = undef;
- if (not open($in, "<", $logfile_list)) {
+ if (not open($in, "<", $logfile_list))
+ {
localdie("FATAL: can not read logfile list $logfile_list, $!.\n");
}
my @files = <$in>;
close($in);
- foreach my $file (@files) {
+ foreach my $file (@files)
+ {
chomp($file);
$file =~ s/\r//;
- if ($file eq '-') {
+ if ($file eq '-')
+ {
localdie("FATAL: stdin input - can not be used with logfile list.\n");
}
push(@log_files, &set_file_list($file));
{
my @tmpfilelist = ();
# Removed files that have already been parsed during previous runs
- foreach my $f (@given_log_files) {
- if ($f eq '-') {
+ foreach my $f (@given_log_files)
+ {
+ if ($f eq '-')
+ {
&logmsg('DEBUG', "waiting for log entries from stdin.");
$saved_last_line{current_pos} = 0;
push(@tmpfilelist, $f);
- } elsif ( $journalctl_cmd && ($f eq $journalctl_cmd) ) {
+ }
+ elsif ($f =~ /\.bin$/)
+ {
+ &logmsg('DEBUG', "binary file as input, there is no log to parse.");
+ $saved_last_line{current_pos} = 0;
+ push(@tmpfilelist, $f);
+ }
+ elsif ( $journalctl_cmd && ($f eq $journalctl_cmd) )
+ {
my $since = '';
- if ( ($journalctl_cmd !~ /--since|-S/) && ($saved_last_line{datetime} =~ /^(\d+)-(\d+)-(\d+).(\d+):(\d+):(\d+)/) ) {
+ if ( ($journalctl_cmd !~ /--since|-S/) &&
+ ($saved_last_line{datetime} =~ /^(\d+)-(\d+)-(\d+).(\d+):(\d+):(\d+)/) )
+ {
$since = " --since='$1-$2-$3 $4:$5:$6'";
}
&logmsg('DEBUG', "journalctl call will start since: $saved_last_line{datetime}");
push(@tmpfilelist, "$f $since");
-
- } else {
-
+ }
+ else
+ {
# Auto detect log format for proper parsing
my $fmt = autodetect_format($f);
# Set regex to parse the log file
$fmt = set_parser_regex($fmt);
- if (($fmt ne 'pgbouncer') && ($saved_last_line{current_pos} > 0)) {
+ if (($fmt ne 'pgbouncer') && ($saved_last_line{current_pos} > 0))
+ {
my ($retcode, $msg) = &check_file_changed($f, $file_size{$f}, $fmt, $saved_last_line{datetime}, $saved_last_line{current_pos});
- if (!$retcode) {
+ if (!$retcode)
+ {
&logmsg('DEBUG', "this file has already been parsed: $f, $msg");
- } else {
+ }
+ else
+ {
push(@tmpfilelist, $f);
}
- } elsif (($fmt eq 'pgbouncer') && ($pgb_saved_last_line{current_pos} > 0)) {
+ }
+ elsif (($fmt eq 'pgbouncer') && ($pgb_saved_last_line{current_pos} > 0))
+ {
my ($retcode, $msg) = &check_file_changed($f, $file_size{$f}, $fmt, $pgb_saved_last_line{datetime}, $pgb_saved_last_line{current_pos});
- if (!$retcode) {
+ if (!$retcode)
+ {
&logmsg('DEBUG', "this file has already been parsed: $f, $msg");
- } else {
+ }
+ else
+ {
push(@tmpfilelist, $f);
}
- } else {
+ }
+ else
+ {
push(@tmpfilelist, $f);
}
}
$writer->autoflush(1);
# Fork the logger process
-if ($progress) {
- spawn sub {
+if ($progress)
+{
+ spawn sub
+ {
&multiprocess_progressbar($global_totalsize);
};
}
{
# Confirm if we can use multiprocess for this file
my $pstatus = confirm_multiprocess($logfile);
- if ($pstatus >= 0) {
- if ($pstatus = 1 && $job_per_file > 1) {
+ if ($pstatus >= 0)
+ {
+ if ($pstatus = 1 && $job_per_file > 1)
+ {
$parallel_process = $job_per_file;
- } else {
+ }
+ else
+ {
$parallel_process = $queue_size;
}
- } else {
+ }
+ else
+ {
$parallel_process = 1;
}
# Wait until a child dies if max parallel processes is reach
- while ($child_count >= $parallel_process) {
+ while ($child_count >= $parallel_process)
+ {
my $kid = waitpid(-1, WNOHANG);
- if ($kid > 0) {
+ if ($kid > 0)
+ {
$child_count--;
delete $RUNNING_PIDS{$kid};
}
# Get log format of the current file
my $fmt = $format || 'stderr';
my $logfile_orig = $logfile;
- if ($logfile ne '-' && !$journalctl_cmd) {
+ if ($logfile ne '-' && !$journalctl_cmd)
+ {
$fmt = &autodetect_format($logfile, $file_size{$logfile});
$fmt ||= $format;
# Remove log format from filename if any
$logfile =~ s/:(stderr|csv|syslog|pgbouncer)\d*$//i;
&logmsg('DEBUG', "pgBadger will use log format $fmt to parse $logfile.");
- } else {
+ }
+ else
+ {
&logmsg('DEBUG', "Can not autodetect log format, assuming $fmt.");
}
# Do not use split method with remote and compressed files, stdin or journalctl command
if ( ($parallel_process > 1) && ($queue_size > 1) &&
- ($logfile !~ /\.(gz|bz2|zip|xz)$/i) && ($logfile ne '-') &&
+ ($logfile !~ /\.(gz|bz2|zip|xz|bin)$/i) && ($logfile ne '-') &&
($logfile !~ /^(http[s]*|ftp[s]*|ssh):/i) &&
(!$journalctl_cmd || ($logfile !~ /\Q$journalctl_cmd\E/))
- ) {
+ )
+ {
# Create multiple processes to parse one log file by chunks of data
my @chunks = split_logfile($logfile, $file_size{$logfile_orig}, ($fmt eq 'pgbouncer') ? $pgb_saved_last_line{current_pos} : $saved_last_line{current_pos});
&logmsg('DEBUG', "The following boundaries will be used to parse file $logfile, " . join('|', @chunks));
- for (my $i = 0; $i < $#chunks; $i++) {
- while ($child_count >= $parallel_process) {
+ for (my $i = 0; $i < $#chunks; $i++)
+ {
+ while ($child_count >= $parallel_process)
+ {
my $kid = waitpid(-1, WNOHANG);
- if ($kid > 0) {
+ if ($kid > 0)
+ {
$child_count--;
delete $RUNNING_PIDS{$kid};
}
localdie("FATAL: Abort signal received when processing to next chunk\n") if ($interrupt == 2);
last if ($interrupt);
push(@tempfiles, [ tempfile('tmp_pgbadgerXXXX', SUFFIX => '.bin', DIR => $TMP_DIR, UNLINK => 1 ) ]);
- spawn sub {
+ spawn sub
+ {
&process_file($logfile, $file_size{$logfile_orig}, $fmt, $tempfiles[-1]->[0], $chunks[$i], $chunks[$i+1], $i);
};
$child_count++;
}
- } else {
+ }
+ else
+ {
# Start parsing one file per parallel process
push(@tempfiles, [ tempfile('tmp_pgbadgerXXXX', SUFFIX => '.bin', DIR => $TMP_DIR, UNLINK => 1 ) ]);
- spawn sub {
+ spawn sub
+ {
&process_file($logfile, $file_size{$logfile_orig}, $fmt, $tempfiles[-1]->[0], ($fmt eq 'pgbouncer') ? $pgb_saved_last_line{current_pos} : $saved_last_line{current_pos});
};
$child_count++;
my $file_orig = $file;
my $fmt = '';
# Remove log format from log file if any
- if ($file =~ s/(:(?:stderr|csv|syslog|pgbouncer)\d*)$//i) {
+ if ($file =~ s/(:(?:stderr|csv|syslog|pgbouncer|jsonlog)\d*)$//i)
+ {
$fmt = $1;
}
# Store the journalctl command as is we will create a pipe from this command
- if ( $journalctl_cmd && ($file =~ m/\Q$journalctl_cmd\E/) ) {
+ if ( $journalctl_cmd && ($file =~ m/\Q$journalctl_cmd\E/) )
+ {
push(@lfiles, $file_orig);
$empty_files = 0;
+ }
# Input from stdin
- } elsif ($file eq '-') {
- if ($logfile_list) {
+ elsif ($file eq '-')
+ {
+ if ($logfile_list)
+ {
localdie("FATAL: stdin input - can not be used with logfile list (-L).\n");
}
push(@lfiles, $file_orig);
$empty_files = 0;
+ }
# For input from other sources than stdin
- } else {
+ else
+ {
# if it is not a remote file store the file if it is not an empty file
- if (!$remote_host && $file !~ /^(http[s]*:|[s]*ftp:|ssh:)/i) {
+ if (!$remote_host && $file !~ /^(http[s]*:|[s]*ftp:|ssh:)/i)
+ {
localdie("FATAL: logfile \"$file\" must exist!\n") if (not -f $file);
- if (-z $file) {
+ if (-z $file)
+ {
print "WARNING: file $file is empty\n" if (!$quiet);
next;
}
push(@lfiles, $file_orig);
$empty_files = 0;
+ }
# if this is a remote file extract the list of files using a ssh command
- } elsif ($file !~ /^(http[s]*:|[s]*ftp:)/i) {
+ elsif ($file !~ /^(http[s]*:|[s]*ftp:)/i)
+ {
# Get files from remote host
- if ($file !~ /^ssh:/) {
+ if ($file !~ /^ssh:/)
+ {
&logmsg('DEBUG', "Looking for remote filename using command: $remote_command \"ls $file\"");
my @rfiles = `$remote_command "ls $file"`;
- foreach my $f (@rfiles) {
+ foreach my $f (@rfiles)
+ {
push(@lfiles, "$f$fmt");
}
- } elsif ($file =~ m#^ssh://([^\/]+)/(.*)#) {
+ }
+ elsif ($file =~ m#^ssh://([^\/]+)/(.*)#)
+ {
my $host_info = $1;
my $file = $2;
my $ssh = $ssh_command || 'ssh';
&logmsg('DEBUG', "Looking for remote filename using command: $ssh $host_info \"ls $file\"");
my @rfiles = `$ssh $host_info "ls $file"`;
- foreach my $f (@rfiles) {
+ foreach my $f (@rfiles)
+ {
push(@lfiles, "ssh://$host_info/$f$fmt");
}
}
$empty_files = 0;
+ }
# this is remote file extracted using http/ftp protocol, store the uri
- } else {
+ else
+ {
push(@lfiles, $file_orig);
$empty_files = 0;
}
}
+
return @lfiles;
}
{
if ($progress && ($getout != 1))
{
- if (!$tmpoutfile) {
- if ($totalsize) {
+ if (!$tmpoutfile)
+ {
+ if ($totalsize)
+ {
print STDERR &progress_bar($cursize, $stop_offset || $totalsize, 25, '=',$overall_stat{'queries_number'},($overall_stat{'errors_number'}+$pgb_overall_stat{'errors_number'}), $logfile);
}
- } else {
+ }
+ else
+ {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . (($overall_stat{'errors_number'}+$pgb_overall_stat{'errors_number'}) - $old_errors_count) . "\n");
}
}
- if (!$totalsize && $tmpoutfile) {
+ if (!$totalsize && $tmpoutfile)
+ {
&dump_as_binary($tmpoutfile);
$tmpoutfile->close();
}
# Reset the start position if file is smaller that the current start offset
- if ($totalsize > -1 && $start_offset > $totalsize) {
+ if ($totalsize > -1 && $start_offset > $totalsize)
+ {
&logmsg('DEBUG', "Starting offset $start_offset is greater than total size $totalsize for file $logfile");
&logmsg('DEBUG', "Reverting start offset $start_offset to 0 for file $logfile, stoppping offset is " . ($stop_offset || $totalsize));
$start_offset = 0 ;
}
# Check if the first date in the log are after the last date saved
- if (($logfile ne '-') && ($fmt ne 'binary') && ($fmt ne 'csv') && !$http_download) {
+ if (($logfile ne '-') && ($fmt ne 'binary') && ($fmt ne 'csv') && !$http_download)
+ {
if ($start_offset && !$chunk_pos) {
my ($retcode, $msg) = check_file_changed($logfile, $file_size{$logfile}, $fmt, ($fmt =~ /pgbouncer/) ? $pgb_saved_last_line{datetime} : $saved_last_line{datetime}, $start_offset, 1);
if ($retcode) {
}
$cursize = $start_offset;
}
- } else {
+ }
+ else
+ {
$start_offset = 0;
$stop_offset = 0;
}
my $is_syslog = 0;
$is_syslog = 1 if ($fmt =~ /syslog/);
- if ($stop_offset > 0) {
+ if ($stop_offset > 0)
+ {
$totalsize = $stop_offset - $start_offset;
}
my $current_offset = $start_offset || 0;
- if (!$remote_host) {
+ if (!$remote_host)
+ {
&logmsg('DEBUG', "Starting reading file $logfile...");
- } else {
+ }
+ else
+ {
&logmsg('DEBUG', "Starting reading file $remote_host:$logfile...");
}
# Parse pgbouncer logfile
- if ($fmt =~ /pgbouncer/) {
-
+ if ($fmt =~ /pgbouncer/)
+ {
my $time_pattern = qr/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/;
my $cur_pid = '';
my @matches = ();
my $has_exclusion = 0;
- if ($#exclude_line >= 0) {
+ if ($#exclude_line >= 0)
+ {
$has_exclusion = 1;
}
&logmsg('DEBUG', "Start parsing pgbouncer log at offset $start_offset of file $logfile to " . ($stop_offset || $totalsize));
- if ($start_offset) {
+ if ($start_offset)
+ {
# Move to the starting offset position in file
$lfile->seek($start_offset, 0);
}
$line =~ s/\r//;
# Start to exclude from parsing any desired lines
- if ($has_exclusion >= 0) {
-
+ if ($has_exclusion >= 0)
+ {
# Log line matches the excluded regex
map { next if ($line =~ /$_/is); } @exclude_line;
}
%prefix_vars = ();
@matches = ($line =~ $pgbouncer_log_parse1);
- if ($#matches >= 0) {
- for (my $i = 0 ; $i <= $#pgb_prefix_parse1 ; $i++) {
+ if ($#matches >= 0)
+ {
+ for (my $i = 0 ; $i <= $#pgb_prefix_parse1 ; $i++)
+ {
$prefix_vars{$pgb_prefix_parse1[$i]} = $matches[$i];
}
# Get detailled information from timestamp
- if (!$prefix_vars{'t_month'}) {
+ if (!$prefix_vars{'t_month'})
+ {
($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'},
$prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($prefix_vars{'t_timestamp'} =~ $time_pattern);
- } else {
-
+ }
+ else
+ {
# Standard syslog format does not have year information, months are
# three letters and days are not always with 2 digits.
- if ($prefix_vars{'t_month'} !~ /\d/) {
+ if ($prefix_vars{'t_month'} !~ /\d/)
+ {
$prefix_vars{'t_year'} = $gyear;
$prefix_vars{'t_day'} = sprintf("%02d", $prefix_vars{'t_day'});
$prefix_vars{'t_month'} = $month_abbr{$prefix_vars{'t_month'}};
"$prefix_vars{'t_year'}-$prefix_vars{'t_month'}-$prefix_vars{'t_day'} $prefix_vars{'t_hour'}:$prefix_vars{'t_min'}:$prefix_vars{'t_sec'}";
}
$prefix_vars{'t_loglevel'} = 'LOG';
- if ($prefix_vars{'t_session_id'} eq 'Stats') {
+ if ($prefix_vars{'t_session_id'} eq 'Stats')
+ {
$prefix_vars{'t_loglevel'} = 'STATS';
$prefix_vars{'t_session_id'} = '';
$prefix_vars{'t_query'} = 'Stats: ' . $prefix_vars{'t_query'};
# Skip unwanted lines
my $res = &skip_unwanted_line();
next if ($res == 1);
- if ($res == -1) {
+ if ($res == -1)
+ {
&update_progress_bar($tmpoutfile, $nlines, $stop_offset, $totalsize, \$cursize, \$old_queries_count, \$old_errors_count);
$getout = 2;
last;
&store_current_timestamp($prefix_vars{'t_timestamp'});
# Override timestamp when we have to adjust datetime to the log timezone
- if ($log_timezone) {
+ if ($log_timezone)
+ {
($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = change_timezone($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'});
$prefix_vars{'t_timestamp'} = "$prefix_vars{'t_year'}-$prefix_vars{'t_month'}-$prefix_vars{'t_day'} $prefix_vars{'t_hour'}:$prefix_vars{'t_min'}:$prefix_vars{'t_sec'}";
}
# Extract other information from the line
@matches = ($line =~ $pgbouncer_log_parse2);
- if ($#matches >= 0) {
- for (my $i = 0 ; $i <= $#pgb_prefix_parse2 ; $i++) {
+ if ($#matches >= 0)
+ {
+ for (my $i = 0 ; $i <= $#pgb_prefix_parse2 ; $i++)
+ {
$prefix_vars{$pgb_prefix_parse2[$i]} = $matches[$i];
}
$prefix_vars{'t_client'} = _gethostbyaddr($prefix_vars{'t_client'}) if ($dns_resolv && $prefix_vars{'t_client'});
- } else {
+ }
+ else
+ {
# pgBouncer Statistics appears each minutes in the log
- if ($prefix_vars{'t_query'} =~ /[Ss]tats: (\d+) req\/s, in (\d+) b\/s, out (\d+) b\/s,query (\d+) us/) {
+ if ($prefix_vars{'t_query'} =~ /[Ss]tats: (\d+) req\/s, in (\d+) b\/s, out (\d+) b\/s,query (\d+) us/)
+ {
$prefix_vars{'t_loglevel'} = 'STATS';
$prefix_vars{'t_req/s'} = $1;
$prefix_vars{'t_inbytes/s'} = $2;
$prefix_vars{'t_outbytes/s'} = $3;
$prefix_vars{'t_avgduration'} = $4;
- } elsif ($prefix_vars{'t_query'} =~ /[Ss]tats: (\d+) xacts\/s, (\d+) queries\/s, in (\d+) B\/s, out (\d+) B\/s, xact (\d+) us, query (\d+) us/) {
+ }
+ elsif ($prefix_vars{'t_query'} =~ /[Ss]tats: (\d+) xacts\/s, (\d+) queries\/s, in (\d+) B\/s, out (\d+) B\/s, xact (\d+) us, query (\d+) us/)
+ {
$prefix_vars{'t_loglevel'} = 'STATS';
$prefix_vars{'t_xact/s'} = $1;
$prefix_vars{'t_req/s'} = $2;
}
# Check if the log line should be excluded from the report
- if (&validate_log_line($prefix_vars{'t_pid'})) {
+ if (&validate_log_line($prefix_vars{'t_pid'}))
+ {
$prefix_vars{'t_host'} = 'stderr'; # this unused variable is used to store format information when log format is not syslog
# Process the log line
&parse_pgbouncer($fmt);
}
- } else {
+ }
+ else
+ {
# unknown format
&logmsg('DEBUG', "Unknown pgbouncer line format: $line");
}
}
}
-
- elsif ($fmt eq 'binary') {
-
+ elsif ($fmt eq 'binary')
+ {
&load_stats($lfile);
+ $pipe->print("$totalsize 0 0\n");
}
# Format is not CSV and in incremental mode we are not at end of the file
- else {
+ else
+ {
my $time_pattern = qr/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/;
my $cur_pid = '';
# If log format is given at end of the filename, remove it and return the format
# Ex: ssh://remotehost/postgresql-10.log:csv
- if ($file =~ s#:(syslog|csv|stderr|pgbouncer)\d*$##) {
+ if ($file =~ s#:(syslog|csv|stderr|pgbouncer)\d*$##)
+ {
&logmsg('DEBUG', "Autodetected log format '$1' from URI '$file'");
return $1;
}
- if (!$remote_host && !$http_download && !$ssh_download) {
- if (open(my $in, '<', $file)) {
+ if (!$remote_host && !$http_download && !$ssh_download)
+ {
+ if (open(my $in, '<', $file))
+ {
$fltf = <$in>;
close($in);
- } else {
+ }
+ else
+ {
localdie("FATAL: when looking for log file format, can't open file $file, $!\n");
}
}
# is file in binary format ?
- if ( $fltf =~ /^pst\d/ ) {
+ if ( $fltf =~ /^pst\d/ )
+ {
+ &logmsg('DEBUG', "found binary file with $file");
$fmt = 'binary';
}
- elsif (!$http_download) {
+ elsif (!$http_download)
+ {
# try to detect syslogs, stderr, csv, jsonlog or pgbouncer format
my $tfile = &get_log_file($file, $totalsize, $remote_host);
- if (defined $tfile) {
- while (my $line = <$tfile>) {
+ if (defined $tfile)
+ {
+ while (my $line = <$tfile>)
+ {
chomp($line);
$line =~ s/\r//;
next if (!$line);
last if (($nfound > 10) || ($nline > 5000));
}
$tfile->close();
- } else {
+ }
+ else
+ {
&logmsg('DEBUG', "Can not autodetected log format from $file, using default");
return 'default';
}
- } elsif (!$format) {
- if (!$http_download) {
+ }
+ elsif (!$format)
+ {
+ if (!$http_download)
+ {
localdie("FATAL: with http files you need to specify the log format, please use -f option.\n");
- } else {
+ }
+ else
+ {
localdie("FATAL: with http files you need to specify the log format, append it to the uri.\n");
}
}
# When --pgbouncer-only is used force the format
- if (!$format && !$fmt && $pgbouncer_only) {
+ if (!$format && !$fmt && $pgbouncer_only)
+ {
$pgbouncer_only = 1;
$fmt = 'pgbouncer';
- } elsif (!$format) {
- if (!$fmt || ($nfound < 10)) {
+ }
+ elsif (!$format)
+ {
+ if (!$fmt || ($nfound < 10 && $fmt ne 'binary'))
+ {
localdie("FATAL: unable to detect log file format from $file, please use -f option.\n");
}
}
- if (($fmt =~ /syslog/) && !$ident && (scalar keys %ident_name == 1)) {
+ if (($fmt =~ /syslog/) && !$ident && (scalar keys %ident_name == 1))
+ {
$ident = (keys %ident_name)[0];
}
my $totalsize = 0;
# Log entries extracted from journalctl command are of indetermined size
- if ( $journalctl_cmd && ($logf =~ m/\Q$journalctl_cmd\E/) ) {
+ if ( $journalctl_cmd && ($logf =~ m/\Q$journalctl_cmd\E/) )
+ {
$totalsize = -1;
+ }
+
# Same from stdin
- } elsif ($logf eq '-') {
+ elsif ($logf eq '-')
+ {
$totalsize = -1;
- # Regular local files can be "stated"
- } elsif (!$remote_host && !$http_download && !$ssh_download) {
+ }
+
+ # Regular local files can be "stated" if they are not compressed
+ elsif (!$remote_host && !$http_download && !$ssh_download && !$iscompressed)
+ {
eval {
$totalsize = (stat("$logf"))[7];
};
$totalsize = -1 if ($@);
- # For uncompressed files try to get the size following the protocol
- } elsif (!$iscompressed) {
+ }
+
+ # For uncompressed files try to get the size following the remote access protocol
+ elsif (!$iscompressed)
+ {
# Use curl to try to get remote file size if it is not compressed
if ($http_download) {
&logmsg('DEBUG', "Looking for file size using command: $curl_command --head $logf | grep \"Content-Length:\" | awk '{print \$2}'");
chomp($totalsize);
&logmsg('DEBUG', "Remote file size: $totalsize");
+ }
# Real size of the file is unknown with compressed file, try to find
# size using uncompress command (bz2 does not report real size)
- } elsif (!$http_download && $logf =~ /\.(gz|zip|xz)$/i) {
+ elsif (!$http_download && $logf =~ /\.(gz|zip|xz|bz2)$/i)
+ {
my $cmd_file_size = $gzip_uncompress_size;
if ($logf =~ /\.zip$/i) {
$cmd_file_size = $zip_uncompress_size;
} elsif ($logf =~ /\.xz$/i) {
$cmd_file_size = $xz_uncompress_size;
+ } elsif ($logf =~ /\.bz2$/i) {
+ $cmd_file_size = "ls -l %f | awk '{print \$5}'";
}
if (!$remote_host && !$http_download && !$ssh_download) {
$cmd_file_size =~ s/\%f/$logf/g;
$totalsize = `$remote_command \"$cmd_file_size\"`;
}
chomp($totalsize);
+ # For bz2 compressed file we don't know the real size
+ if ($logf =~ /\.bz2$/i) {
+ # apply deflate estimation factor
+ $totalsize *= $BZ_FACTOR;
+ }
+
+ }
- } elsif ($http_download) {
+ # Bzip2 and remote download compressed files can't report real size, get compressed
+ # file size and estimate the real size by using bzip2, gzip and xz factors.
+ elsif ($http_download)
+ {
&logmsg('DEBUG', "Looking for file size using command: $curl_command --head $logf | grep \"Content-Length:\" | awk '{print \$2}'");
$totalsize = `$curl_command --head $logf | grep "Content-Length:" | awk '{print \$2}'`;
chomp($totalsize);
localdie("FATAL: can't get size of remote file, please check what's going wrong with command: $curl_command --head $logf | grep \"Content-Length:\"\n") if ($totalsize eq '');
&logmsg('DEBUG', "With http access size real size of a compressed file is unknown but use Content-Length wirth compressed side.");
+ # For all compressed file we don't know the
+ # real size apply deflate estimation factor
+ if ($logf =~ /\.bz2$/i)
+ {
+ # apply deflate estimation factor
+ $totalsize *= $BZ_FACTOR;
+ }
+ elsif ($logf =~ /\.(zip|gz)$/i)
+ {
+ $totalsize *= $GZ_FACTOR;
+ }
+ elsif ($logf =~ /\.xz$/i)
+ {
+ $totalsize *= $XZ_FACTOR;
+ }
}
return $totalsize;