#------------------------------------------------------------------------------
use vars qw($VERSION);
-use strict;
+use strict qw(vars subs);
+
use Getopt::Long qw(:config no_ignore_case bundling);
use IO::File;
use Benchmark;
my $avg_minutes = 5;
my $last_parsed = '';
my $report_title = 'PgBadger: PostgreSQL log analyzer';
+my $log_line_prefix = '';
+my $compiled_prefix = '';
my $NUMPROGRESS = 10000;
my @DIMENSIONS = (800, 300);
my $RESRC_URL = '';
my $img_format = 'png';
my @log_files = ();
+my %prefix_vars = ();
# Do not display data in pie where percentage is lower than this value
# to avoid label overlaping.
"m|maxlength=i" => \$maxlength,
"n|nohighlight!" => \$nohighlight,
"o|outfile=s" => \$outfile,
+ "p|prefix=s" => \$log_line_prefix,
"P|no-prettify!" => \$noprettify,
"q|quiet!" => \$quiet,
"s|sample=i" => \$sample,
$avg_minutes = 60 if ($avg_minutes > 60);
$avg_minutes = 1 if ($avg_minutes < 1);
+
# Set default format
$format ||= &autodetect_format($log_files[0]);
&check_regex($regex_prefix_dbname, '--regex-db');
&check_regex($regex_prefix_dbuser, '--regex-user');
+# Compile custom log line prefie prefix
+my @prefix_params = ();
+if ($log_line_prefix) {
+ $log_line_prefix =~ s/([\[\]\|\/])/\\$1/g;
+ @prefix_params = &build_log_line_prefix_regex();
+ &check_regex($log_line_prefix, '--prefix');
+ if ($format eq 'syslog') {
+ $log_line_prefix = '(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*' . $log_line_prefix . '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)';
+ $compiled_prefix = qr/$log_line_prefix/;
+ unshift(@prefix_params, 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line');
+ push(@prefix_params, 't_loglevel', 't_query');
+ } elsif ($format eq 'stderr') {
+ $log_line_prefix = $log_line_prefix . '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)';
+ $compiled_prefix = qr/$log_line_prefix/;
+ push(@prefix_params, 't_loglevel', 't_query');
+ }
+} elsif ($format eq 'syslog') {
+ $compiled_prefix = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/;
+ push(@prefix_params, 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line', 't_logprefix', 't_loglevel', 't_query');
+} elsif ($format eq 'stderr') {
+ $compiled_prefix = qr/^(\d+)-(\d+)-(\d+)\s(\d+):(\d+):(\d+)[\.\d]*\s[^\s]+\s\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/;
+ push(@prefix_params, 't_year', 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_pid', 't_session_line', 't_logprefix', 't_loglevel', 't_query');
+}
+
sub check_regex
{
my ($pattern, $varname) = @_;
if ($row->[13] && ($row->[13] !~ m/duration: \d+\.\d+ ms/)) {
$row->[13] = "duration: $duration ms $row->[13]";
}
-
- # Process the log line
- &parse_query(
- @date,
- $row->[4], # connection from
- $row->[3], # pid
- $row->[5], # session
- 'user=' . $row->[1] . ',db=' . $row->[2], # logprefix
- $row->[11], # loglevel
- $row->[13] || $row->[14] || $row->[15], # message || detail || hint
- );
+ if (&parse_log_prefix($row->[3], 'user=' . $row->[1] . ',db=' . $row->[2])) {
+ # Process the log line
+ &parse_query(
+ @date,
+ $row->[4], # connection from
+ $row->[3], # pid
+ $row->[5], # session
+ $row->[11], # loglevel
+ $row->[13] || $row->[14] || $row->[15], # message || detail || hint
+ );
+ }
}
$csv->eof or die "FATAL: cannot use CSV, " . $csv->error_diag() . "\n";
close $io;
# Parse syslog lines
if ($format eq 'syslog') {
- if ($line =~
- /^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/)
- {
+ my @matches = ($line =~ $compiled_prefix);
+ for (my $i = 0; $i <= $#prefix_params; $i++) {
+ $prefix_vars{$prefix_params[$i]} = $matches[$i];
+ }
+ if ($#matches >= 0) {
# skip non postgresql lines
- next if ($7 ne $ident);
+ next if ($prefix_vars{'t_ident'} ne $ident);
# Syslog do not have year information, so take care of year overlapping
my $tmp_year = $gyear;
- if ("$tmp_year$month_abbr{$1}$2" > $CURRENT_DATE) {
+ $prefix_vars{'t_day'} = sprintf("%02d", $prefix_vars{'t_day'});
+ if ("$tmp_year$month_abbr{$prefix_vars{'t_month'}}$prefix_vars{'t_day'}" > $CURRENT_DATE) {
$tmp_year = substr($CURRENT_DATE, 1, 4) - 1;
}
- # Fix single digit days
- my $day = sprintf("%02d", $2);
- my $cur_date = "$tmp_year$month_abbr{$1}$day$3$4$5";
+ my $cur_date = $tmp_year . $month_abbr{$prefix_vars{'t_month'}} . $prefix_vars{'t_day'} . $prefix_vars{'t_hour'} . $prefix_vars{'t_min'} . $prefix_vars{'t_sec'};
# Skip unwanted lines
next if ($from && ($from > $cur_date));
last if ($to && ($to < $cur_date));
- $cur_pid = $8;
+ $cur_pid = $prefix_vars{'t_pid'};
# Jump to the last line parsed if required
next if (!&check_incremental_position($cur_date, $line));
- # Process the log line
- &parse_query($tmp_year, $month_abbr{$1}, $day, $3, $4, $5, $6, $8, $9, $10, $11, $12);
+ # Extract information from log line prefix
+ if (!$log_line_prefix) {
+ &parse_log_prefix($prefix_vars{'t_pid'}, $prefix_vars{'t_logprefix'});
+ }
+ # Check if the log line shoud be exclude from the report
+ if (&validate_log_line($cur_pid)) {
+ # Process the log line
+ &parse_query($tmp_year, $month_abbr{$prefix_vars{'t_month'}}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}, $prefix_vars{'t_host'}, $prefix_vars{'t_pid'}, $prefix_vars{'t_session_line'}, $prefix_vars{'t_loglevel'}, $prefix_vars{'t_query'});
+ }
} elsif ($line =~ /^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*)/) {
} elsif ($format eq 'stderr') {
- # Parse stderr lines
- if ($line =~
- /(\d+)-(\d+)-(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/)
- {
+ my @matches = ($line =~ $compiled_prefix);
+ for (my $i = 0; $i <= $#prefix_params; $i++) {
+ $prefix_vars{$prefix_params[$i]} = $matches[$i];
+ }
+ if ($#matches >= 0) {
+ my $temp_timestamp = '';
+ if ($prefix_vars{'t_timestamp'}) {
+ $temp_timestamp = $prefix_vars{'t_timestamp'};
+ } elsif ($prefix_vars{'t_mtimestamp'}) {
+ $temp_timestamp = $prefix_vars{'t_mtimestamp'};
+ } elsif ($prefix_vars{'t_session_timestamp'}) {
+ $temp_timestamp = $prefix_vars{'t_session_timestamp'};
+ }
+ ($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($prefix_vars{'t_timestamp'} =~ /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/);
+ my $cur_date = $prefix_vars{'t_year'} . $prefix_vars{'t_month'} . $prefix_vars{'t_day'} . $prefix_vars{'t_hour'} . $prefix_vars{'t_min'} . $prefix_vars{'t_sec'};
# Skip unwanted lines
- my $cur_date = "$1$2$3$4$5$6";
next if ($from && ($from > $cur_date));
last if ($to && ($to < $cur_date));
- $cur_pid = $8;
+ $cur_pid = $prefix_vars{'t_pid'};
# Jump to the last line parsed if required
next if (!&check_incremental_position($cur_date, $line));
- # Process the log line
- &parse_query($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
+ # Extract information from log line prefix
+ if (!$log_line_prefix) {
+ &parse_log_prefix($prefix_vars{'t_pid'}, $prefix_vars{'t_logprefix'});
+ }
- # Collect orphans lines of multiline queries
- } elsif ($line !~ /^\d+-\d+-\d+\s\d+:\d+:\d+\s[^\s]+\s\[\d+\]:/) {
+ # Check if the log line shoud be exclude from the report
+ if (&validate_log_line($cur_pid)) {
+ # Process the log line
+ &parse_query($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}, 'stderr', $prefix_vars{'t_pid'}, $prefix_vars{'t_session_line'}, $prefix_vars{'t_loglevel'}, $prefix_vars{'t_query'});
+ }
+
+ # Collect orphans lines of multiline queries
+ } elsif ($line !~ /[^']*\d+-\d+-\d+\s\d+:\d+:\d+\s[^\s]+[^']*/) {
if ($cur_info{$cur_pid}{statement}) {
$cur_info{$cur_pid}{statement} .= "\n" . $line;
-o | --outfile filename: define the filename for the output. Default depends
of the output format: out.html or out.txt. To dump
output to stdout use - as filename.
+ -p | --prefix string : give here the value of your custom log_line_prefix
+ defined in your postgresql.conf. You may used it only
+ if don't have the default allowed formats ot to use
+ other custom variables like client ip or application
+ name. See examples below.
-P | --no-prettify : disable SQL queries prettify formatter.
-q | --quiet : don't print anything to stdout, even not a progress bar.
-s | --sample number : number of query sample to store/display. Default: 3
pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
cat /var/log/postgres.log | pgbadger -
+ perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \
+ /pglog/postgresql-2012-08-21*
+
Reporting errors every week by cron job:
30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html
+Generate report every week using incremental behavior:
+
+ 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \
+ -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
+
This supposes that your log file and HTML report are also rotated every weeks.
};
{
my $time = shift;
+ return '0s' if (!$time);
+
my $days = int($time / 86400000);
$time -= ($days * 86400000);
my $hours = int($time / 3600000);
};
for (my $i = 0 ; $i <= $#top_slowest ; $i++) {
my $col = $i % 2;
+ my $ttl = $top_slowest[$i]->[1] || '';
print $fh "<tr class=\"row$col\"><td class=\"center top\">", $i + 1, "</td><td class=\"relevantInformation top center\">",
&convert_time($top_slowest[$i]->[0]),
- "</td><td title=\"$top_slowest[$i]->[1]\"><div class=\"sql\" onclick=\"sql_format(this)\">",
+ "</td><td title=\"$ttl\"><div class=\"sql\" onclick=\"sql_format(this)\">",
&highlight_code($top_slowest[$i]->[2]), "</div></td></tr>\n";
}
print $fh "</table>\n";
return $code;
}
+sub validate_log_line
+{
+ my ($t_pid) = @_;
+
+ # Check user and/or database if require
+ if ($dbname) {
+
+ # Log line do not match the required dbname
+ if (!$prefix_vars{'t_dbname'} || ($dbname ne $prefix_vars{'t_dbname'})) {
+ delete $cur_info{$t_pid};
+ return 0;
+ }
+ }
+ if ($dbuser) {
+
+ # Log line do not match the required dbuser
+ if (!$prefix_vars{'t_dbuser'} || ($dbuser ne $prefix_vars{'t_dbuser'})) {
+ delete $cur_info{$t_pid};
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+
+sub parse_log_prefix
+{
+ my ($t_pid, $t_logprefix) = @_;
+
+ # Extract user and database information from the logprefix part
+ if ($t_logprefix) {
+ # Remove timestamp in log_line_prefix and extra space character
+ $t_logprefix =~ s/(\d+)-(\d+)-(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)//;
+ $t_logprefix =~ s/\s+$//;
+ $t_logprefix =~ s/^\s+//;
+ # Search for database user
+ if ($t_logprefix =~ /$regex_prefix_dbuser/) {
+ $prefix_vars{'t_dbuser'} = $1;
+ }
+
+ # Search for database name
+ if ($t_logprefix =~ /$regex_prefix_dbname/) {
+ $prefix_vars{'t_dbname'} = $1;
+ }
+ }
+}
+
sub parse_query
{
- my ($t_year, $t_month, $t_day, $t_hour, $t_min, $t_sec, $t_host, $t_pid, $t_session, $t_logprefix, $t_loglevel, $t_query) = @_;
+ my ($t_year, $t_month, $t_day, $t_hour, $t_min, $t_sec, $t_host, $t_pid, $t_session, $t_loglevel, $t_query) = @_;
# Get log level and pass if not supported following the context
if ($t_loglevel !~ /^(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT)$/) {
return;
}
- # Extract user and database information from the logprefix part
- my $t_dbname = '';
- my $t_dbuser = '';
-
- # Remove timestamp in log_line_prefix and extra space character
- $t_logprefix =~ s/(\d+)-(\d+)-(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)//;
- $t_logprefix =~ s/\s+$//;
- $t_logprefix =~ s/^\s+//;
-
- if ($t_logprefix) {
-
- # Search for database user
- if ($t_logprefix =~ /$regex_prefix_dbuser/) {
- $t_dbuser = $1;
- }
-
- # Search for database name
- if ($t_logprefix =~ /$regex_prefix_dbname/) {
- $t_dbname = $1;
- }
-
- # Check user and/or database if require
- if ($dbname) {
-
- # Log line do not match the required dbname
- if (!$t_dbname || ($dbname ne $t_dbname)) {
- delete $cur_info{$t_pid};
- return;
- }
- }
- if ($dbuser) {
-
- # Log line do not match the required dbuser
- if (!$t_dbuser || ($dbuser ne $t_dbuser)) {
- delete $cur_info{$t_pid};
- return;
- }
- }
- }
-
# Store the current timestamp of the log line
$first_log_date = "$t_year-$t_month-$t_day $t_hour:$t_min:$t_sec" if (!$first_log_date);
$last_log_date = "$t_year-$t_month-$t_day $t_hour:$t_min:$t_sec";
return if ($t_action !~ /query|statement|execute/);
} elsif ($t_loglevel eq 'LOG') {
if ($t_query !~
-/incomplete startup packet|connection|receive|unexpected EOF|still waiting for [^\s]+Lock|checkpoint starting:|could not send data to client|parameter .*configuration file|autovacuum launcher/
+/incomplete startup packet|connection|receive|unexpected EOF|still waiting for [^\s]+Lock|checkpoint starting:|could not send data to client|parameter .*configuration file|autovacuum launcher|automatic (analyze|vacuum)/
)
{
- &logmsg('WARNING', "Unrecognized line: $t_loglevel: $t_query at line $nlines");
+ &logmsg('DEBUG', "Unrecognized line: $t_loglevel: $t_query at line $nlines");
}
if (exists $cur_info{$t_pid} && ($t_session != $cur_info{$t_pid}{session})) {
&store_queries($t_pid);
$cur_info{$t_pid}{loglevel} = $t_loglevel;
$cur_info{$t_pid}{query} = $t_query;
$cur_info{$t_pid}{duration} = $t_duration;
- $cur_info{$t_pid}{dbname} = $t_dbname;
- $cur_info{$t_pid}{dbuser} = $t_dbuser;
+ $cur_info{$t_pid}{dbname} = $prefix_vars{t_dbname};
+ $cur_info{$t_pid}{dbuser} = $prefix_vars{t_dbuser};
return 1;
}
$nline++;
# Is syslog lines ?
- if ($line =~ /^...\s+\d+\s\d+:\d+:\d+\s[^\s]+\s([^\[]+)\[\d+\]:\s\[[0-9\-]+\](.*?)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+$duration/) {
+ if ($line =~ /^[A-Z][a-z]{2}\s+\d+\s\d+:\d+:\d+\s[^\s]+\s([^\[]+)\[\d+\]:\s\[[0-9\-]+\](.*?)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+$duration/) {
$fmt = 'syslog';
$nfound++;
$ident_name{$1}++;
# Is stderr lines
- } elsif ($line =~ /\d+-\d+-\d+\s\d+:\d+:\d+\s[^\s]+\s\[\d+\]:\s\[[0-9\-]+\](.*?)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+$duration/) {
- $fmt = 'stderr';
- $nfound++;
- } elsif ( ($line =~ /,(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT),/) && ($line =~ tr/,/,/ >= 12) ) {
+ } elsif ( ($line =~ /^\d+-\d+-\d+ \d+:\d+:\d+\.\d+ [A-Z\d]{3,6},.*,(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT),/) && ($line =~ tr/,/,/ >= 12) ) {
$fmt = 'csv';
$nfound++;
+ } elsif ($line =~ /\d+-\d+-\d+ \d+:\d+:\d+[\.0-9]* [A-Z\d]{3,6}(.*?)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+$duration/) {
+ $fmt = 'stderr';
+ $nfound++;
}
last if (($nfound > 10) || ($nline > 5000));
}
}
+sub build_log_line_prefix_regex
+{
+ my %regex_map = (
+ '%a' => [ ('t_appname', '([0-9a-zA-Z\.\-\_\/\[\]]*)') ], # application name
+ '%u' => [ ('t_dbuser', '([0-9a-zA-Z\_\[\]]*)') ], # user name
+ '%d' => [ ('t_dbname', '([0-9a-zA-Z\_\[\]]*)') ], # database name
+ '%r' => [ ('t_hostport', '(\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):\d+') ], # remote host and port
+ '%h' => [ ('t_client', '(\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?') ], # remote host
+ '%p' => [ ('t_pid', '(\d+)') ], # process ID
+ '%t' => [ ('t_timestamp', '(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) [A-Z\d]{3,6}') ], # timestamp without milliseconds
+ '%m' => [ ('t_mtimestamp', '(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.\d+ [A-Z\d]{3,6}') ], # timestamp with milliseconds
+ '%l' => [ ('t_session_line', '(\d+)') ], # session line number
+ '%s' => [ ('t_session_timestamp', '(\d{4}-\d{2}-\d{2} \d{2}):\d{2}:\d{2}) [A-Z\d]{3,6}') ], # session start timestamp
+ '%c' => [ ('t_session_id', '([0-9a-f\.]*)') ], # session ID
+ '%v' => [ ('t_virtual_xid', '([0-9a-f\.\/]*)') ], # virtual transaction ID
+ '%x' => [ ('t_xid', '([0-9a-f\.\/]*)') ], # transaction ID
+ '%i' => [ ('t_command', '([0-9a-zA-Z\.\-\_]*)') ], # command tag
+ '%e' => [ ('t_sqlstate', '([0-9a-zA-Z]+)') ], # SQL state
+ );
+ my @param_list = ();
+ $log_line_prefix =~ s/\%l([^\d])\d+/\%l$1\\d\+/;
+ while ($log_line_prefix =~ s/(\%[audrhptmlscvxie])/$regex_map{"$1"}->[1]/) {
+ push(@param_list, $regex_map{"$1"}->[0]);
+ }
+ return @param_list;
+}
+
+# Inclusion of package SQL::Beautify
{
package SQL::Beautify;