From: Darold Gilles Date: Fri, 19 Feb 2016 10:44:29 +0000 (+0100) Subject: Add reports on pgBouncer log file. X-Git-Tag: v8.0~5 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f9f19417e383270f1dbbc41926eac6cd51e26bbd;p=pgbadger Add reports on pgBouncer log file. --- diff --git a/pgbadger b/pgbadger index 819bb87..468375a 100755 --- a/pgbadger +++ b/pgbadger @@ -259,6 +259,7 @@ my $job_per_file = 0; my $charset = 'utf-8'; my $csv_sep_char = ','; my %current_sessions = (); +my %pgb_current_sessions = (); my $incr_date = ''; my $last_incr_date = ''; my $anonymize = 0; @@ -272,9 +273,9 @@ my $log_duration = 0; my $logfile_list = ''; my $enable_checksum = 0; my $timezone = 0; +my $pgbouncer_only = 0; my $rebuild = 0; - my $NUMPROGRESS = 10000; my @DIMENSIONS = (800, 300); my $RESRC_URL = ''; @@ -430,6 +431,7 @@ my $result = GetOptions( 'journalctl=s' => \$journalctl_cmd, 'pid-dir=s' => \$PID_DIR, 'rebuild!' => \$rebuild, + 'pgbouncer-only!' => \$pgbouncer_only, ); die "FATAL: use pgbadger --help\n" if (not $result); @@ -600,6 +602,14 @@ my $parse_regex = qr/^(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|HINT|STATEMENT| my $full_error_regex = qr/^(WARNING|ERROR|FATAL|PANIC|DETAIL|HINT|STATEMENT|CONTEXT)/; my $main_error_regex = qr/^(WARNING|ERROR|FATAL|PANIC)/; +my $pgbouncer_log_format = qr/^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.\d+(?: [A-Z\+\-\d]{3,6})? (\d+) ([^\s]+) (.\-0x[0-9a-f\.]*): ([0-9a-zA-Z\_\[\]\-\.]*)\/([0-9a-zA-Z\_\[\]\-\.]*)\@([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]+)?[:\d]* (.*)/; +my @pgb_prefix_params = ('t_timestamp', 't_pid', 't_loglevel', 't_session_id', 't_dbname', 't_dbuser', 't_client', 't_query'); + +my $pgbouncer_log_parse1 = qr/^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.\d+(?: [A-Z\+\-\d]{3,6})? (\d+) ([^\s]+) (.*)/; +my @pgb_prefix_parse1 = ('t_timestamp', 't_pid', 't_loglevel', 't_query'); +my $pgbouncer_log_parse2 = qr/^(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.\d+(?: [A-Z\+\-\d]{3,6})? (\d+) ([^\s]+) (.\-0x[0-9a-f\.]*): ([0-9a-zA-Z\_\[\]\-\.]*)\/([0-9a-zA-Z\_\[\]\-\.]*)\@([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|[0-9a-fA-F:]+)?[:\d]* (.*)/; +my @pgb_prefix_parse2 = ('t_timestamp', 't_pid', 't_loglevel', 't_session_id', 't_dbname', 't_dbuser', 't_client', 't_query'); + # Set syslog prefix regex my $other_syslog_line = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*)/; @@ -637,6 +647,7 @@ if ($format eq 'syslog2') { $orphan_syslog_line = qr/^(\d+-\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s(?:[^\s]+\s)?(?:[^\s]+\s)?([^\s\[]+)\[(\d+)\]:/; } + # Set default top query $top ||= 20; @@ -1024,21 +1035,26 @@ sub get_hist_inbound # Where statistics are stored my %overall_stat = (); +my %pgb_overall_stat = (); my %overall_checkpoint = (); my @top_slowest = (); my %normalyzed_info = (); my %error_info = (); +my %pgb_error_info = (); my %logs_type = (); my %per_minute_info = (); +my %pgb_per_minute_info = (); my %lock_info = (); my %tempfile_info = (); my %cancelled_info = (); my %connection_info = (); +my %pgb_connection_info = (); my %database_info = (); my %application_info = (); my %user_info = (); my %host_info = (); my %session_info = (); +my %pgb_session_info = (); my %conn_received = (); my %checkpoint_info = (); my %autovacuum_info = (); @@ -1178,7 +1194,6 @@ if (!$noclean && $saved_last_line{datetime} && $outdir) { my $wn = &get_week_number($last_year, $last_month, $last_day); # Get the days of the current week where binary files must be preserved my @wdays = &get_wdays_per_month($wn - 1, "$last_year-$last_month"); - # Find obsolete dir days that shoud be cleaned unless(opendir(DIR, "$outdir")) { localdie("Error: can't opendir $outdir: $!"); @@ -1508,7 +1523,6 @@ my $t1 = Benchmark->new; my $td = timediff($t1, $t0); &logmsg('DEBUG', "the log statistics gathering took:" . timestr($td)); - if (!$incremental && ($#given_log_files >= 0) ) { &logmsg('LOG', "Ok, generating $extension report..."); @@ -1568,6 +1582,7 @@ if (!$incremental && ($#given_log_files >= 0) ) { &logmsg('HINT', "maybe there's no new entries in your log since last run."); } &build_incremental_reports(@build_directories); + } my $t2 = Benchmark->new; @@ -1724,6 +1739,7 @@ Options: the same time. --rebuild : used to rebuild all html reports in incremental output directories where there is binary data files. + --pgbouncer-only : only show pgbouncer related menu in the header. pgBadger is able to parse a remote log file using a passwordless ssh connection. Use the -r or --remote-host to set the host ip address or hostname. There's also @@ -1917,7 +1933,7 @@ sub build_incremental_reports } &logmsg('LOG', "Ok, generating global index to access incremental reports..."); - + $fh = new IO::File ">$outdir/index.html"; if (not defined $fh) { localdie("FATAL: can't write to $outdir/index.html, $!\n"); @@ -2107,6 +2123,7 @@ sub init_stats_vars # Empty where statistics are stored %overall_stat = (); + %pgb_overall_stat = (); %overall_checkpoint = (); @top_slowest = (); @top_tempfile_info = (); @@ -2114,15 +2131,19 @@ sub init_stats_vars @top_locked_info = (); %normalyzed_info = (); %error_info = (); + %pgb_error_info = (); %logs_type = (); %per_minute_info = (); + %pgb_per_minute_info = (); %lock_info = (); %tempfile_info = (); %cancelled_info = (); %connection_info = (); + %pgb_connection_info = (); %database_info = (); %application_info = (); %session_info = (); + %pgb_session_info = (); %conn_received = (); %checkpoint_info = (); %autovacuum_info = (); @@ -2243,7 +2264,146 @@ sub process_file &logmsg('DEBUG', "Starting reading file $remote_host:$logfile..."); } - if ($format eq 'csv') { + # Detect if we are parsing a pgbouncer file + my ($is_pgbouncer_format, $retcode, $msg) = &detect_pgbouncer_log($logfile, $saved_last_line{datetime}, 1); + + # Parse pgbouncer logfile + if ($is_pgbouncer_format) { + + my $time_pattern = qr/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/; + my $cur_pid = ''; + my @matches = (); + my $goon = 0; + my $has_exclusion = 0; + if ($#exclude_line >= 0) { + $has_exclusion = 1; + } + &logmsg('DEBUG', "Start parsing at offset $start_offset of file $logfile to " . ($stop_offset || $totalsize)); + if ($start_offset) { + # Move to the starting offset position in file + $lfile->seek($start_offset, 0); + } + while (my $line = <$lfile>) { + + # We received a signal + last if ($terminate); + + # Get current size/offset in the log file + $cursize += length($line); + $current_offset += length($line); + + # Replace CR/LF by LF + $line =~ s/\r//; + + # Start to exclude from parsing any desired lines + if ($has_exclusion >= 0) { + + # Log line matches the excluded regex + my $ef = 0; + map { $ef = 1, last if ($line =~ /$_/is); } @exclude_line; + next if ($ef); + } + + chomp($line); + $nlines++; + next if (!$line); + + if (!$tmpoutfile) { + if ($progress && (($nlines % $NUMPROGRESS) == 0)) { + if ($totalsize) { + print STDERR &progress_bar($cursize, $stop_offset || $totalsize, 25, '='); + } else { + print STDERR "."; + } + } + } else { + if ($progress && (($nlines % $NUMPROGRESS) == 0)) { + $pipe->print("$cursize\n"); + $cursize = 0; + } + } + + %prefix_vars = (); + + @matches = ($line =~ $pgbouncer_log_parse1); + if ($#matches >= 0) { + for (my $i = 0 ; $i <= $#pgb_prefix_parse1 ; $i++) { + $prefix_vars{$pgb_prefix_parse1[$i]} = $matches[$i]; + } + + # Get time detailed information + ($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, + $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($prefix_vars{'t_timestamp'} =~ $time_pattern); + + # Skip unwanted lines + if ($#exclude_time >= 0) { + foreach (@exclude_time) { + if ($prefix_vars{'t_timestamp'} =~ /$_/) { + return; + } + } + } + next if ($from && ($from gt $prefix_vars{'t_timestamp'})); + if ($to && ($to lt $prefix_vars{'t_timestamp'})) { + if (!$tmpoutfile) { + if ($totalsize) { + print STDERR &progress_bar($cursize, $stop_offset || $totalsize, 25, '='); + } else { + print STDERR "."; + } + } else { + $pipe->print("$cursize\n"); + $cursize = 0; + } + $getout = 2; + last; + } + + # Jump to the last line parsed if required + next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, $line)); + + # Extract other information from the line + @matches = ($line =~ $pgbouncer_log_parse2); + if ($#matches >= 0) { + for (my $i = 0 ; $i <= $#pgb_prefix_parse2 ; $i++) { + $prefix_vars{$pgb_prefix_parse2[$i]} = $matches[$i]; + } + $prefix_vars{'t_client'} = _gethostbyaddr($prefix_vars{'t_client'}) if ($dns_resolv && $prefix_vars{'t_client'}); + } else { + # pgBouncer Statistics appears each minutes in the log + if ($prefix_vars{'t_query'} =~ /Stats: (\d+) req\/s, in (\d+) b\/s, out (\d+) b\/s,query (\d+) us/) { + $prefix_vars{'t_loglevel'} = 'STATS'; + $prefix_vars{'t_req/s'} = $1; + $prefix_vars{'t_inbytes/s'} = $2; + $prefix_vars{'t_outbytes/s'} = $3; + $prefix_vars{'t_avgduration'} = $4; + } + } + + # Store the current timestamp of the log line + &store_current_timestamp($prefix_vars{'t_timestamp'}); + + # Check if the log line should be excluded from the report + if (&validate_log_line($prefix_vars{'t_pid'})) { + $prefix_vars{'t_host'} = 'stderr'; # this unused variable is used to store format information when log format is not syslog + + # Process the log line + &parse_pgbouncer(); + } + + } else { + # unknown format + &logmsg('DEBUG', "Unknown pgbouncer line format: $line"); + } + + last if (($stop_offset > 0) && ($current_offset >= $stop_offset)); + } + if ($last_parsed) { + $last_line{current_pos} = $current_offset; + } + + # Parse PostgreSQL log file with CSV format + } elsif ($format eq 'csv') { require Text::CSV_XS; my $csv = Text::CSV_XS->new( @@ -2872,6 +3032,70 @@ sub store_current_timestamp } } +# Method used to check if the log file is produced by pgbouncer +sub detect_pgbouncer_log +{ + my ($file, $saved_date, $look_at_beginning) = @_; + + my ($lfile, $totalsize, $iscompressed) = &get_log_file($file); + + # Compressed files do not allow seeking + if ($iscompressed) { + close($lfile); + return (1, "log file is compressed"); + + } + + my ($gsec, $gmin, $ghour, $gmday, $gmon, $gyear, $gwday, $gyday, $gisdst) = localtime(time); + $gyear += 1900; + my $CURRENT_DATE = $gyear . sprintf("%02d", $gmon + 1) . sprintf("%02d", $gmday); + + %prefix_vars = (); + my $startoffset = 0; + # If seeking is not explicitely disabled + if (!$look_at_beginning) { + # do not seek if filesize is smaller than the seek position + if ($saved_last_line{current_pos} < $totalsize) { + $lfile->seek($saved_last_line{current_pos} || 0, 0); + $startoffset = $saved_last_line{current_pos} || 0; + } + } + + my $more_lines = 0; + my $ispgbouncerlog = 0; + while (my $line = <$lfile>) { + + $more_lines++; + + $line =~ s/\r//; + + my @matches = ($line =~ $pgbouncer_log_format); + if ($#matches >= 0) { + $ispgbouncerlog++; + for (my $i = 0 ; $i <= $#pgb_prefix_params ; $i++) { + $prefix_vars{$pgb_prefix_params[$i]} = $matches[$i]; + } + } + next if (!$prefix_vars{'t_timestamp'}); + # This file has already been parsed + if ($saved_date gt $prefix_vars{'t_timestamp'}) { + close($lfile); + return ($ispgbouncerlog, 0, "timestamp $prefix_vars{'t_timestamp'} read at offset $startoffset is lower than saved timestamp: $saved_date"); + } else { + last; + } + } + close($lfile); + + if (!$more_lines) { + close($lfile); + return ($ispgbouncerlog, 0, "there no new lines in this file"); + } + + return ($ispgbouncerlog, 1, "reach the end of check_file_changed() with start date: $saved_date and file size: $totalsize") ; +} + + # Method used to check if the file stores logs after the last incremental position or not # This position should have been saved in the incremental file and read in the $last_parsed at # start up. Here we just verify that the first date in file is before the last incremental date. @@ -3115,7 +3339,7 @@ sub normalize_query # Normalise alias with quote $orig_query =~ s/AS\s+"([^"]+)"/'AS "' . remove_alias($1) . '"'/eigs; - + # Remove string content $orig_query =~ s/\\'//gs; $orig_query =~ s/'[^']*'/''/gs; @@ -3382,6 +3606,21 @@ sub set_top_error_sample } } +# Stores top N error sample from pgbouncer log +sub pgb_set_top_error_sample +{ + my ($q, $date, $real_error, $db, $user, $remote) = @_; + + # Stop when we have our number of samples + if (!exists $pgb_error_info{$q}{date} || ($#{$pgb_error_info{$q}{date}} < $sample)) { + push(@{$pgb_error_info{$q}{date}}, $date); + push(@{$pgb_error_info{$q}{error}}, $real_error); + push(@{$pgb_error_info{$q}{db}}, $db); + push(@{$pgb_error_info{$q}{user}}, $user); + push(@{$pgb_error_info{$q}{remote}}, $remote); + } +} + sub dump_as_text { @@ -3876,6 +4115,7 @@ Report not supported by text format $idx++; } } + if (!$disable_query && (scalar keys %normalyzed_info > 0)) { print $fh "\n- Most frequent queries (N) --------------------------------------------\n\n"; print $fh "Rank Times executed Total duration Min/Max/Avg duration (s) Query\n"; @@ -3955,6 +4195,84 @@ Report not supported by text format &show_error_as_text(); } + + # Show pgbouncer session per database statistics + if (exists $pgb_session_info{database}) { + print $fh "\n- pgBouncer sessions per database --------------------------------------------\n\n"; + print $fh "Database Count Total Duration Avg duration (s)\n"; + foreach my $d (sort keys %{$pgb_session_info{database}}) { + print $fh "$d - ", &comma_numbers($pgb_session_info{database}{$d}{count}), " ", + &convert_time($pgb_session_info{database}{$d}{duration}), " ", + &convert_time($pgb_session_info{database}{$d}{duration} / $pgb_session_info{database}{$d}{count}), "\n"; + } + } + + # Show pgbouncer session per user statistics + if (exists $pgb_session_info{user}) { + print $fh "\n- pgBouncer sessions per user ------------------------------------------------\n\n"; + print $fh "User Count Total Duration Avg duration (s)\n"; + foreach my $d (sort keys %{$pgb_session_info{user}}) { + print $fh "$d - ", &comma_numbers($pgb_session_info{user}{$d}{count}), " ", &convert_time($pgb_session_info{user}{$d}{duration}), + " ", &convert_time($pgb_session_info{user}{$d}{duration} / $pgb_session_info{user}{$d}{count}), "\n"; + } + } + + # Show pgbouncer session per host statistics + if (exists $pgb_session_info{host}) { + print $fh "\n- pgBouncer sessions per host ------------------------------------------------\n\n"; + print $fh "User Count Total Duration Avg duration (s)\n"; + foreach my $d (sort keys %{$pgb_session_info{host}}) { + print $fh "$d - ", &comma_numbers($pgb_session_info{host}{$d}{count}), " ", &convert_time($pgb_session_info{host}{$d}{duration}), + " ", &convert_time($pgb_session_info{host}{$d}{duration} / $pgb_session_info{host}{$d}{count}), "\n"; + } + } + + # Show pgbouncer session per application statistics + if (exists $pgb_session_info{app}) { + print $fh "\n- pgBouncer sessions per application -----------------------------------------\n\n"; + print $fh "Application Count Total Duration Avg duration (s)\n"; + foreach my $d (sort keys %{$pgb_session_info{app}}) { + print $fh "$d - ", &comma_numbers($pgb_session_info{app}{$d}{count}), " ", &convert_time($pgb_session_info{app}{$d}{duration}), + " ", &convert_time($pgb_session_info{app}{$d}{duration} / $pgb_session_info{app}{$d}{count}), "\n"; + } + } + + + # Show pgbouncer connection per database statistics + if (exists $pgb_connection_info{database}) { + print $fh "\n- pgBouncer sonnections per database -----------------------------------------\n\n"; + print $fh "Database User Count\n"; + foreach my $d (sort keys %{$pgb_connection_info{database}}) { + print $fh "$d - ", &comma_numbers($pgb_connection_info{database}{$d}), "\n"; + foreach my $u (sort keys %{$pgb_connection_info{user}}) { + next if (!exists $pgb_connection_info{database_user}{$d}{$u}); + print $fh "\t$u ", &comma_numbers($pgb_connection_info{database_user}{$d}{$u}), "\n"; + } + } + } + + # Show pgbouncer connection per user statistics + if (exists $pgb_connection_info{user}) { + print $fh "\n- pgBouncer connections per user ---------------------------------------------\n\n"; + print $fh "User Count\n"; + foreach my $d (sort keys %{$pgb_connection_info{user}}) { + print $fh "$d - ", &comma_numbers($pgb_connection_info{user}{$d}), "\n"; + } + } + + # Show pgbouncer connection per host statistics + if (exists $pgb_connection_info{host}) { + print $fh "\n- pgBouncer connections per host --------------------------------------------\n\n"; + print $fh "User Count\n"; + foreach my $d (sort keys %{$pgb_connection_info{host}}) { + print $fh "$d - ", &comma_numbers($pgb_connection_info{host}{$d}), "\n"; + } + } + + if (!$disable_error) { + &show_pgb_error_as_text(); + } + print $fh "\n\n"; print $fh "Report generated by pgBadger $VERSION ($project_url).\n"; @@ -4071,6 +4389,41 @@ sub show_error_as_text } } +sub show_pgb_error_as_text +{ + return if (scalar keys %error_info == 0); + + print $fh "\n- Most frequent events (N) ---------------------------------------------\n\n"; + my $idx = 1; + foreach my $k (sort {$pgb_error_info{$b}{count} <=> $pgb_error_info{$a}{count}} keys %pgb_error_info) { + next if (!$pgb_error_info{$k}{count}); + last if ($idx > $top); + if ($pgb_error_info{$k}{count} > 1) { + my $msg = $k; + print $fh "$idx) " . &comma_numbers($pgb_error_info{$k}{count}) . " - $msg\n"; + print $fh "--\n"; + my $j = 1; + for (my $i = 0 ; $i <= $#{$pgb_error_info{$k}{date}} ; $i++) { + print $fh "\t- Example $j: $pgb_error_info{$k}{date}[$i] - $pgb_error_info{$k}{error}[$i]\n"; + print $fh "\t\tDatabase: $pgb_error_info{$k}{db}[$i]\n" if ($pgb_error_info{$k}{db}[$i]); + print $fh "\t\tUser: $pgb_error_info{$k}{user}[$i]\n" if ($pgb_error_info{$k}{user}[$i]); + print $fh "\t\tClient: $pgb_error_info{$k}{remote}[$i]\n" if ($pgb_error_info{$k}{remote}[$i]); + $j++; + } + } else { + print $fh "$idx) " . &comma_numbers($pgb_error_info{$k}{count}) . " - $pgb_error_info{$k}{error}[0]\n"; + print $fh "--\n"; + print $fh "\t- Date: $pgb_error_info{$k}{date}[0]\n"; + print $fh "\t\tDatabase: $pgb_error_info{$k}{db}[0]\n" if ($pgb_error_info{$k}{db}[0]); + print $fh "\t\tUser: $pgb_error_info{$k}{user}[0]\n" if ($pgb_error_info{$k}{user}[0]); + print $fh "\t\tClient: $pgb_error_info{$k}{remote}[0]\n" if ($pgb_error_info{$k}{remote}[0]); + } + $idx++; + } + +} + + sub html_header { my $uri = shift; @@ -4106,7 +4459,7 @@ sub html_header $pgbadger_logo $report_title