From: Darold Gilles Date: Thu, 26 Nov 2015 15:54:46 +0000 (+0100) Subject: Add support to journalctl command output and add --journalctl_cmd command line option... X-Git-Tag: v7.2~9 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6ffd13ca26c4243ff897c6fa49d539a0e1689114;p=pgbadger Add support to journalctl command output and add --journalctl_cmd command line option to enable this functionnality and set the command. Typically: --journalctl_cmd "journalctl -u postgresql-9.4" --- diff --git a/README b/README index 8d92a39..d2f99de 100644 --- a/README +++ b/README @@ -11,8 +11,6 @@ SYNOPSIS logfile can be a single log file, a list of files, or a shell command returning a list of files. If you want to pass log content from stdin use - as filename. Note that input from stdin will not work with csvlog. - You can also use a file containing a list of log files to parse, see - the -L command line option. Options: @@ -84,7 +82,7 @@ SYNOPSIS -w | --watch-mode : only report errors just like logwatch could do. -x | --extension : output format. Values: text, html, bin, json or tsung. Default: html - -X | --extra-files : in incremetal mode allow pgBadger to write CSS and + -X | --extra-files : in incremetal mode allow pgbadger to write CSS and JS files in the output directory as separate files. -z | --zcat exec_path : set the full path to the zcat program. Use it if zcat or bzcat or unzip is not in your path. @@ -121,16 +119,19 @@ SYNOPSIS You can use this option multiple times. --exclude-appname name : exclude entries for the specified application name from report. Example: "pg_dump". - --exclude-line regex : pgBadger will start to exclude any log entry that + --exclude-line regex : pgbadger will start to exclude any log entry that will match the given regex. Can be used multiple time. --anonymize : obscure all literals in queries, useful to hide confidential data. - --noreport : prevent pgBadger to create reports in incremental + --noreport : prevent pgbadger to create reports in incremental mode. - --log-duration : force pgBadger to associate log entries generated + --log-duration : force pgbadger to associate log entries generated by both log_duration = on and log_statement = 'all' --enable-checksum : used to add a md5 sum under each query report. + --journalctl command : command to use to replace PostgreSQL logfile by + a call to journalctl. Basically it might be: + journalctl -u postgresql-9.5 pgBadger is able to parse a remote log file using a passwordless ssh connection. Use the -r or --remote-host to set the host ip address or @@ -157,11 +158,11 @@ SYNOPSIS /var/log/postgresql.log cat /var/log/postgres.log | pgbadger - # Log prefix with stderr log output - perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h' + perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log # Log line prefix with syslog log output - perl pgbadger --prefix 'user=%u,db=%d,client=%h,app=%a' + perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* # Use my 8 CPUs to parse my 10GB file faster, much faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log @@ -189,29 +190,40 @@ SYNOPSIS will generate a report per day and per week. - In incremental mode, you can also specify the number of weeks to keep in + In incremental mode, you can also specify the number of week to keep in the reports: /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 -O /var/www/pg_reports/ - If you have a pg_dump at 23:00 and 13:00 each day lasting half an hour, - you can use pgBadger as follows to exclude those periods from the report: + If you have a pg_dump at 23:00 and 13:00 each day during half an hour, + you can use pgbadger as follow to exclude these period from the report: pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log - This will help avoid having COPY statements, as generated by pg_dump, at - the top of the list of slowest queries. You can also use --exclude-appname + This will help avoid having COPY statements, as generated by pg_dump, on + top of the list of slowest queries. You can also use --exclude-appname "pg_dump" to solve this problem in a simpler way. + You can also parse journalctl output just as if it was a log file: + + perl pgbadger --journalctl 'journalctl -u postgresql-9.5' + + or worst, call it from a remote host: + + perl pgbadger -r 192.168.1.159 --journalctl 'journalctl -u postgresql-9.5' + + you don't need to specify any log file at command line, but if you have + others PostgreSQL log files to parse, you can add them as usual. + DESCRIPTION - pgBadger is a PostgreSQL log analyzer built for speed with fully - detailed reports from your PostgreSQL log file. It's a single and small - Perl script that outperforms any other PostgreSQL log analyzer. + pgBadger is a PostgreSQL log analyzer built for speed with fully reports + from your PostgreSQL log file. It's a single and small Perl script Perl + script that outperforms any other PostgreSQL log analyzer. - It is written in pure Perl and uses a javascript library - (flotr2) to draw graphs so that you don't need to install any additional - Perl modules or other packages. Furthermore, this library gives us more + It is written in pure Perl and uses a javascript library (flotr2) to + draw graphs so that you don't need to install any additional Perl + modules or other packages. Furthermore, this library gives us more features such as zooming. pgBadger also uses the Bootstrap javascript library and the FontAwesome webfont for better design. Everything is embedded. @@ -219,8 +231,8 @@ DESCRIPTION pgBadger is able to autodetect your log file format (syslog, stderr or csvlog). It is designed to parse huge log files as well as gzip compressed files. See a complete list of features below. Supported - compressed formats are gzip, bzip2 and xz. For the xz format you must have - an xz version greater than 5.05 that supports the --robot option. + compressed format are gzip, bzip2 and xz. For the xz format you must + have an xz version upper than 5.05 that supports the --robot option. All charts are zoomable and can be saved as PNG images. @@ -228,14 +240,14 @@ DESCRIPTION the report using command line options. pgBadger supports any custom format set into the log_line_prefix - directive of your postgresql.conf file as long as it specifies at least + directive of your postgresql.conf file as long as it at least specify the %t and %p patterns. pgBadger allows parallel processing of a single log file or multiple files through the use of the -j option specifying the number of CPUs. If you want to save system performance you can also use log_duration - instead of log_min_duration_statement to have reports on the duration and + instead of log_min_duration_statement to have reports on duration and number of queries only. FEATURE @@ -267,7 +279,7 @@ FEATURE Cancelled queries. Error events (panic, fatal, error and warning). - There are also some pie charts about distributions of: + There are also some pie charts about distribution of: Locks statistics. Queries by type (select/insert/update/delete). @@ -283,22 +295,23 @@ FEATURE You can also have incremental reports with one report per day and a cumulative report per week. Two multiprocess modes are available to speed up log parsing, one using one core per log file, and the second - using multiple cores to parse a single file. These modes can be combined. + using multiple cores to parse a single file. These modes can be + combined. Histogram granularity can be adjusted using the -A command line option. - By default they will report the mean of each top queries/errors occurring + By default they will report the mean of each top queries/errors occuring per hour, but you can specify the granularity down to the minute. pgBadger can also be used in a central place to parse remote log files using a passwordless SSH connection. This mode can be used with - compressed files and in the multiprocess per file mode (-J) but can not be - used with the CSV log format. + compressed files and in the multiprocess per file mode (-J) but can not + be used with the CSV log format. REQUIREMENT pgBadger comes as a single Perl script - you do not need anything other than a modern Perl distribution. Charts are rendered using a Javascript - library so you don't need anything other than a web browser. Your browser - will do all the work. + library so you don't need anything other than a web browser. Your + browser will do all the work. If you planned to parse PostgreSQL CSV log files you might need some Perl Modules: @@ -368,10 +381,10 @@ POSTGRESQL CONFIGURATION log_min_duration_statement = 0 Here every statement will be logged, on a busy server you may want to - increase this value to only log queries with a longer duration. - Note that if you have log_statement set to 'all' nothing will be logged - through the log_min_duration_statement directive. See the next chapter for - more information. + increase this value to only log queries with a longer duration. Note + that if you have log_statement set to 'all' nothing will be logged + through the log_min_duration_statement directive. See the next chapter + for more information. With 'stderr' log format, log_line_prefix must be at least: @@ -462,10 +475,10 @@ PARALLEL PROCESSING When you have many small log files and many CPUs it is speedier to dedicate one core to one log file at a time. To enable this behavior you have to use option -J N instead. With 200 log files of 10MB each the use - of the -J option starts being really interesting with 8 Cores. Using this - method you will be sure not to lose any queries in the reports. + of the -J option starts being really interesting with 8 Cores. Using + this method you will be sure not to lose any queries in the reports. - Here are benchmarks done on a server with 8 CPUs and a single file of + He are a benchmarck done on a server with 8 CPUs and a single file of 9.5GB. Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU @@ -481,8 +494,8 @@ PARALLEL PROCESSING -j | 20m15 | 9m56 | 5m20 | 4m20 -J | 20m15 | 9m49 | 5m00 | 2m40 - So it is recommended to use -j unless you have hundreds of small log files - and can use at least 8 CPUs. + So it is recommended to use -j unless you have hundreds of small log + files and can use at least 8 CPUs. IMPORTANT: when you are using parallel parsing pgBadger will generate a lot of temporary files in the /tmp directory and will remove them at the @@ -491,8 +504,8 @@ PARALLEL PROCESSING can be easily identified. INCREMENTAL REPORTS - pgBadger includes an automatic incremental report mode using option -I or - --incremental. When running in this mode, pgBadger will generate one + pgBadger includes an automatic incremental report mode using option -I + or --incremental. When running in this mode, pgBadger will generate one report per day and a cumulative report per week. Output is first done in binary format into the mandatory output directory (see option -O or --outdir), then in HTML format for daily and weekly reports with a main @@ -512,8 +525,8 @@ INCREMENTAL REPORTS In this mode pgBadger will create an automatic incremental file in the output directory, so you don't have to use the -l option unless you want to change the path of that file. This means that you can run pgBadger in - this mode each day on a log file rotated each week, and it will not count - the log entries twice. + this mode each day on a log file rotated each week, and it will not + count the log entries twice. To save disk space you may want to use the -X or --extra-files command line option to force pgBadger to write javascript and css to separate @@ -533,8 +546,8 @@ BINARY FORMAT pgbadger sunday/*.bin - Or as another example, if you generate one log file per hour and you want - reports to be rebuilt each time the log file is rotated, proceed as + Or as another example, if you generate one log file per hour and you + want reports to be rebuilt each time the log file is rotated, proceed as follows: pgbadger -o day1/hour01.bin /var/log/pgsql/pglog/postgresql-2012-03-23_10.log diff --git a/doc/pgBadger.pod b/doc/pgBadger.pod index 36532f1..79a904d 100644 --- a/doc/pgBadger.pod +++ b/doc/pgBadger.pod @@ -13,8 +13,6 @@ Arguments: logfile can be a single log file, a list of files, or a shell command returning a list of files. If you want to pass log content from stdin use - as filename. Note that input from stdin will not work with csvlog. - You can also use a file containing a list of log files to parse, see - the -L command line option. Options: @@ -67,7 +65,7 @@ Options: or application name. See examples below. -P | --no-prettify : disable SQL queries prettify formatter. -q | --quiet : don't print anything to stdout, not even a progress - bar. + bar. -r | --remote-host ip : set the host where to execute the cat command on remote logfile to parse localy the file. -R | --retention N : number of week to keep in incremental mode. Default @@ -86,7 +84,7 @@ Options: -w | --watch-mode : only report errors just like logwatch could do. -x | --extension : output format. Values: text, html, bin, json or tsung. Default: html - -X | --extra-files : in incremetal mode allow pgBadger to write CSS and + -X | --extra-files : in incremetal mode allow pgbadger to write CSS and JS files in the output directory as separate files. -z | --zcat exec_path : set the full path to the zcat program. Use it if zcat or bzcat or unzip is not in your path. @@ -123,16 +121,20 @@ Options: You can use this option multiple times. --exclude-appname name : exclude entries for the specified application name from report. Example: "pg_dump". - --exclude-line regex : pgBadger will start to exclude any log entry that + --exclude-line regex : pgbadger will start to exclude any log entry that will match the given regex. Can be used multiple time. --anonymize : obscure all literals in queries, useful to hide confidential data. --noreport : prevent pgbadger to create reports in incremental mode. - --log-duration : force pgBadger to associate log entries generated + --log-duration : force pgbadger to associate log entries generated by both log_duration = on and log_statement = 'all' --enable-checksum : used to add a md5 sum under each query report. + --journalctl command : command to use to replace PostgreSQL logfile by + a call to journalctl. Basically it might be: + journalctl -u postgresql-9.5 + pgBadger is able to parse a remote log file using a passwordless ssh connection. Use the -r or --remote-host to set the host ip address or hostname. There's also @@ -158,11 +160,11 @@ Examples: /var/log/postgresql.log cat /var/log/postgres.log | pgbadger - # Log prefix with stderr log output - perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,app=%a,client=%h' + perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log # Log line prefix with syslog log output - perl pgbadger --prefix 'user=%u,db=%d,client=%h,app=%a' + perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* # Use my 8 CPUs to parse my 10GB file faster, much faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log @@ -190,21 +192,31 @@ Or better, use the auto-generated incremental reports: will generate a report per day and per week. -In incremental mode, you can also specify the number of weeks to keep in +In incremental mode, you can also specify the number of week to keep in the reports: /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 -O /var/www/pg_reports/ -If you have a pg_dump at 23:00 and 13:00 each day lasting half an hour, -you can use pgBadger as follows to exclude those periods from the report: +If you have a pg_dump at 23:00 and 13:00 each day during half an hour, you can +use pgbadger as follow to exclude these period from the report: pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log -This will help avoid having COPY statements, as generated by pg_dump, at -the top of the list of slowest queries. You can also use --exclude-appname +This will help avoid having COPY statements, as generated by pg_dump, on top of +the list of slowest queries. You can also use --exclude-appname "pg_dump" to solve this problem in a simpler way. +You can also parse journalctl output just as if it was a log file: + + perl pgbadger --journalctl 'journalctl -u postgresql-9.5' + +or worst, call it from a remote host: + + perl pgbadger -r 192.168.1.159 --journalctl 'journalctl -u postgresql-9.5' + +you don't need to specify any log file at command line, but if you have others +PostgreSQL log files to parse, you can add them as usual. =head1 DESCRIPTION diff --git a/pgbadger b/pgbadger index 4aea653..6abac0a 100755 --- a/pgbadger +++ b/pgbadger @@ -188,6 +188,7 @@ sub spawn } # Command line options +my $journalctl_cmd = ''; my $zcat_cmd = 'gunzip -c'; my $zcat = $zcat_cmd; my $bzcat = 'bunzip2 -c'; @@ -418,6 +419,7 @@ my $result = GetOptions( 'noreport!' => \$noreport, 'log-duration!' => \$log_duration, 'enable-checksum!' => \$enable_checksum, + 'journalctl=s' => \$journalctl_cmd, ); die "FATAL: use pgbadger --help\n" if (not $result); @@ -456,12 +458,22 @@ if ($remote_host) { } } +# Add journalctl command to the file list si on le trouve pas déjà +if ($journalctl_cmd) { + if (!grep(/^\Q$journalctl_cmd\E$/, @ARGV)) { + $journalctl_cmd .= " --output='short-iso'"; + push(@ARGV, $journalctl_cmd); + } +} + # Log files to be parsed are passed as command line arguments if ($#ARGV >= 0) { foreach my $file (@ARGV) { - if ($file ne '-') { + if ( $journalctl_cmd && ($file =~ m/\Q$journalctl_cmd\E/) ) { + push(@log_files, $file); + } elsif ($file ne '-') { if (!$remote_host) { - die "FATAL: logfile $file must exist!\n" if not -f $file; + die "FATAL: logfile \"$file\" must exist!\n" if not -f $file; if (-z $file) { print "WARNING: file $file is empty\n"; next; @@ -501,7 +513,9 @@ if ($logfile_list) { if ($file eq '-') { die "FATAL: stdin input - can not be used with logfile list.\n"; } - if (!$remote_host) { + if ( $journalctl_cmd && ($file =~ m/\Q$journalctl_cmd\E/) ) { + push(@log_files, $file); + } elsif (!$remote_host) { die "FATAL: logfile $file must exist!\n" if not -f $file; if (-z $file) { print "WARNING: file $file is empty\n"; @@ -520,12 +534,17 @@ if ($logfile_list) { } -# Logfile is a mandatory parameter -if ($#log_files < 0) { +# Logfile is a mandatory parameter when journalctl command is not set. +if ( ($#log_files < 0) && !$journalctl_cmd) { print STDERR "FATAL: you must give a log file as command line parameter.\n\n"; &usage(); } +# Remove follow option from journalctl command to prevent infinit loop +if ($journalctl_cmd) { + $journalctl_cmd =~ s/(-f|--follow)\b//; +} + # Quiet mode is forced with progress bar $progress = 0 if ($quiet); @@ -556,13 +575,21 @@ my $other_syslog_line = my $orphan_syslog_line = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:/; my $orphan_stderr_line = ''; -# Simply genreate a random string, thanks to Perlmonks -# Set default format +# Set default format, with multiple files format will be autodetected each time. +# This default format will be used when the autodetection fail. my $frmt = ''; if (!$remote_host) { - $frmt = &autodetect_format($log_files[0]); + if ($journalctl_cmd) { + $frmt = 'syslog2'; + } else { + $frmt = &autodetect_format($log_files[0]); + } } elsif (!$format) { - die "FATAL: you must give a log file format (-f or --format) when using remote connection.\n\n"; + if ($journalctl_cmd) { + $frmt = 'syslog2'; + } else { + die "FATAL: you must give a log file format (-f or --format) when using remote connection.\n\n"; + } } $format ||= $frmt; @@ -1179,6 +1206,8 @@ if ( $saved_last_line{datetime} && $outdir && $retention && ($saved_last_line{da my $global_totalsize = 0; my @given_log_files = ( @log_files ); chomp(@given_log_files); +# Append journalctl command at top of log file list +unshift(@given_log_files, $journalctl_cmd) if ($journalctl_cmd); # Verify that the file has not changed for incremental move if (!$remote_host) { @@ -1190,6 +1219,13 @@ if (!$remote_host) { &logmsg('DEBUG', "waiting for log entries from stdin."); $saved_last_line{current_pos} = 0; push(@tmpfilelist, $f); + } elsif ( $journalctl_cmd && ($f eq $journalctl_cmd) ) { + my $since = ''; + if ( ($journalctl_cmd !~ /--since|-S/) && ($saved_last_line{datetime} =~ /^(\d+)-(\d+)-(\d+).(\d+):(\d+):(\d+)/) ) { + $since = " --since='$1-$2-$3 $4:$5:$6'"; + } + &logmsg('DEBUG', "journalctl call will start since: $saved_last_line{datetime}"); + push(@tmpfilelist, "$f $since"); } elsif (!&check_file_changed($f, $saved_last_line{datetime})) { &logmsg('DEBUG', "this file has already been parsed: $f"); } else { @@ -1256,7 +1292,7 @@ if ( ($#given_log_files >= 0) && (($queue_size > 1) || ($job_per_file > 1)) ) { } # Do not use split method with compressed files - if ( ($queue_size > 1) && ($logfile !~ /\.(gz|bz2|zip|xz)$/i) ) { + if ( ($queue_size > 1) && ($logfile !~ /\.(gz|bz2|zip|xz)$/i) && ($logfile !~ /\Q$journalctl_cmd\E/) ) { # Create multiple processes to parse one log file by chunks of data my @chunks = &split_logfile($logfile); &logmsg('DEBUG', "The following boundaries will be used to parse file $logfile, " . join('|', @chunks)); @@ -1770,6 +1806,9 @@ Options: --log-duration : force pgbadger to associate log entries generated by both log_duration = on and log_statement = 'all' --enable-checksum : used to add a md5 sum under each query report. + --journalctl command : command to use to replace PostgreSQL logfile by + a call to journalctl. Basically it might be: + journalctl -u postgresql-9.5 pgBadger is able to parse a remote log file using a passwordless ssh connection. @@ -1843,6 +1882,17 @@ This will help avoid having COPY statements, as generated by pg_dump, on top of the list of slowest queries. You can also use --exclude-appname "pg_dump" to solve this problem in a simpler way. +You can also parse journalctl output just as if it was a log file: + + perl pgbadger --journalctl 'journalctl -u postgresql-9.5' + +or worst, call it from a remote host: + + perl pgbadger -r 192.168.1.159 --journalctl 'journalctl -u postgresql-9.5' + +you don't need to specify any log file at command line, but if you have other +PostgreSQL log file to parse, you can add them as usual. + }; exit 0; @@ -11930,7 +11980,9 @@ sub get_log_file # get file size my $totalsize = 0; - if (!$remote_host) { + if ( $journalctl_cmd && ($logf =~ m/\Q$journalctl_cmd\E/) ) { + $totalsize = 0; + } elsif (!$remote_host) { $totalsize = (stat("$logf"))[7] || 0 if ($logf ne '-'); } elsif ($logf !~ /\.(gz|bz2|zip|xz)$/i) { &logmsg('DEBUG', "Looking for file size using command: $ssh_command \"ls -l $logf\" | awk '{print \$5}'"); @@ -11947,10 +11999,21 @@ sub get_log_file my $iscompressed = 1; # Open a file handle - if ($logf !~ /\.(gz|bz2|zip|xz)$/i) { + if ( $journalctl_cmd && ($logf =~ m/\Q$journalctl_cmd\E/) ) { + # For journalctl command we need to use a pipe as file handle + if (!$remote_host) { + open($lfile, "$logf |") || die "FATAL: cannot read output of commanf: $logf. $!\n"; + } else { + &logmsg('DEBUG', "Retrieving log entries using command: $ssh_command \"$logf\" |"); + # Open a pipe to remote journalctl program + open($lfile,"$ssh_command \"$logf\" |") || die "FATAL: cannot read from pipe to $ssh_command \"$logf\". $!\n"; + } + $iscompressed = 0; + } elsif ($logf !~ /\.(gz|bz2|zip|xz)$/i) { if (!$remote_host) { open($lfile, $logf) || die "FATAL: cannot read log file $logf. $!\n"; } else { + &logmsg('DEBUG', "Retrieving log entries using command: $ssh_command \" cat $logf\" |"); # Open a pipe to zcat program for compressed log open($lfile,"$ssh_command \"cat $logf\" |") || die "FATAL: cannot read from pipe to $ssh_command \"cat $logf\". $!\n"; } @@ -12006,6 +12069,7 @@ sub get_log_file } # In scalar context return size only close($lfile); + return $totalsize; } @@ -13814,7 +13878,7 @@ function create_bargraph (divid, title, ytitle, data, y2title) { tickRenderer: $.jqplot.CanvasAxisTickRenderer, tickOptions: { textColor: '#333', - formatter: function(format, value) { return pretty_print_number(value, 0, ytitle); }, + formatter: function(format, value) { return pretty_print_number(value, 1, ytitle); }, fontFamily:'Helvetica', fontSize: '8pt' }, @@ -13826,7 +13890,7 @@ function create_bargraph (divid, title, ytitle, data, y2title) { tickRenderer: $.jqplot.CanvasAxisTickRenderer, tickOptions: { textColor: '#8dbd0f', - formatter: function(format, value) { return pretty_print_number(value, 0, y2title); }, + formatter: function(format, value) { return pretty_print_number(value, 1, y2title); }, fontFamily:'Helvetica', fontSize: '8pt' },