From: Darold Gilles Date: Thu, 12 Apr 2012 16:05:23 +0000 (+0200) Subject: Initial commit X-Git-Tag: v3.2~274 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1b880c9f6af20694e3a2b9b5acf99b374310343f;p=pgbadger Initial commit --- 1b880c9f6af20694e3a2b9b5acf99b374310343f diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f691b5e --- /dev/null +++ b/LICENSE @@ -0,0 +1,26 @@ +This collective work is Copyright (c)2011 by Gilles Darold +Individual portions may be copyright by individual contributors, and +are included in this collective work with permission of the copyright +owners. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/README b/README new file mode 100755 index 0000000..14634bf --- /dev/null +++ b/README @@ -0,0 +1,57 @@ +PgBadger - An other PostgreSQL log analyzer that aims to replace and + outperforms pgFouine + +This is still a work in progress. + +------------------------------------------------------------------------------ + +You must first enable SQL query logging to have something to parse: + + log_min_duration_statement = 0 + +With 'stderr' log format, log line prefix must be at least: + + log_line_prefix = '%t [%p]: [%l-1] ' + +Log line prefix could add user and database name as follow: + + log_line_prefix = '%t [%p]: [%l-1] user=username,db=dbname ' + +or for syslog log file format: + + log_line_prefix = 'user=username,db=dbname ' + +Log line prefix could also be: + + log_line_prefix = '%t [%p]: [%l-1] db=dbname,user=username' + +Additional informations that could be collected need logging activation into postgresql.conf: + + log_checkpoints = on + log_connections = on + log_disconnections = on + log_lock_waits = on + log_temp_files = 0 + +------------------------------------------------------------------------------- + +Usage: pgbadger -l logfile ... > out.html + + -l | --logfile filename: path to the PostgreSQL log file to parse. It can + be a plain text log or a gzip compressed file + with the .gz extension. + -f | --format logtype : the value can be: syslog or stderr. Default: stderr + -i | --ident name : programname used as syslog ident. Default: postgres + -h | --help : show this message and exit. + -d | --dbname database : only report what concern the given database + -u | --dbuser username : only report what concern the given user + -t | --top number : number of query to store/display. Default: 20 + -s | --sample number : number of query sample to store/display. Default: 3 + -x | --extension : output format. Values: text or html. Default: html + -m | --maxlength size : maximum length of a query, it will be cutted above + the given size. Default: no truncate + -g | --graph : generate graphs, requires GD::Graph perl module + -b | --begin datetime : start date/time for the data to be parsed in log. + -e | --end datetime : end date/time for the data to be parsed in log. + + diff --git a/pgbadger b/pgbadger new file mode 100755 index 0000000..1cdb9e6 --- /dev/null +++ b/pgbadger @@ -0,0 +1,1814 @@ +#!/usr/bin/perl +#------------------------------------------------------------------------------ +# +# PgBadger - An other PostgreSQL log analyzer that aims to replace and +# outperforms pgFouine +# +# This program is open source, licensed under the simplified BSD license. +# For license terms, see the LICENSE file. +#------------------------------------------------------------------------------ +# You must enable SQL query logging : log_min_duration_statement = 0 +# Log line prefix should be : log_line_prefix = '%t [%p]: [%l-1] ' +# Log line prefix should be : log_line_prefix = '%t [%p]: [%l-1] user=username,db=dbname' +# Log line prefix should be : log_line_prefix = '%t [%p]: [%l-1] db=dbname,user=username' +# +# Additional informations that could be collected +# +# log_checkpoints = on +# log_connections = on +# log_disconnections = on +# log_lock_waits = on +# log_temp_files = 0 +#------------------------------------------------------------------------------ +use strict; +use Getopt::Long; +use IO::File; +use Benchmark; + +$| = 1; + +# Global variables overriden during install +my $ZCAT_PROG = '/bin/zcat'; +my $VERSION = '1.0'; + +# Command line options +my $logfile = ''; +my $format = ''; +my $outdir = ''; +my $help = ''; +my $dbname = ''; +my $dbuser = ''; +my $ident = ''; +my $top = 0; +my $sample = 0; +my $extension = ''; +my $maxlength = 0; +my $graph = 0; +my $debug = 1; +my $nohighlight = 0; +my $from = ''; +my $to = ''; + +my $t0 = Benchmark->new; + +# get the command line parameters +my $result = GetOptions ( + "l|logfile=s" => \$logfile, + "f|format=s" => \$format, + "o|outdir=s" => \$outdir, + "h|help!" => \$help, + "d|dbname=s" => \$dbname, + "u|dbuser=s" => \$dbuser, + "i|ident=s" => \$ident, + "t|top=i" => \$top, + "s|sample=i" => \$sample, + "x|extension=s" => \$extension, + "m|maxlength=i" => \$maxlength, + "g|graph!" => \$graph, + "n|nohighlight!"=> \$nohighlight, + "b|begin=s" => \$from, + "e|end=s" => \$to, +); + +&usage() if ($help); +if (!$logfile) { + print STDERR "FATAL: you must set a log file. See option -f.\n\n"; + &usage(); +} + +# Set default format +$format ||= 'stderr'; +# Set default syslog ident name +$ident ||= 'postgres'; +# Set default top query +$top ||= 20; +# Set the default number of samples +$sample ||= 3; +# Set the default extension and output format +$extension ||= 'html'; +$graph = 0 if ($extension ne 'html'); + +my $end_top = $top - 1; + +# Check if the logfile exists +die "FATAL: logfile $logfile must exists!\n" if (!-e $logfile || -z $logfile); +die "FATAL: logfile $logfile must not be empty!\n" if (!-e $logfile || -z $logfile); + +# Check start/end date time +if ($from) { + if ($from =~ /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) { + $from = "$1$2$3$4$5$6"; + } elsif ($from =~ /^(\d{4})-(\d{2})-(\d{2})$/) { + $from = "$1$2$3" . "000000"; + } else { + die "FATAL: bad format for begin datetime, shoud be yyyy-mm-dd hh:mm:ss\n"; + } +} +if ($to) { + if ($to =~ /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) { + $to = "$1$2$3$4$5$6"; + } elsif ($to =~ /^(\d{4})-(\d{2})-(\d{2})$/) { + $to = "$1$2$3" . "000000"; + } else { + die "FATAL: bad format for ending datetime, shoud be yyyy-mm-dd hh:mm:ss\n"; + } +} + +# Stores the last parsed line from log file to allow incremental parsing +my $LAST_LINE = ''; +# Set the level of the data aggregator, can be minute, hour or day follow the +# size of the log file. +my $LEVEL = 'hour'; + +# Month names +my %month_abbr = ( + 'Jan'=>'01','Feb'=>'02','Mar'=>'03','Apr'=>'04','May'=>'05','Jun'=>'06', + 'Jul'=>'07','Aug'=>'08','Sep'=>'09','Oct'=>'10','Nov'=>'11','Dec'=>'12' +); +my %abbr_month = ( + '01'=>'Jan','02'=>'Feb','03'=>'Mar','04'=>'Apr','05'=>'May','06'=>'Jun', + '07'=>'Jul','08'=>'Aug','09'=>'Sep','10'=>'Oct','11'=>'Nov','12'=>'Dec' +); +# Highlight variables +my @KEYWORDS1 = ( + 'ALL', 'ASC', 'AS', 'ALTER', 'AND', 'ADD', 'AUTO_INCREMENT', 'ANY', 'ANALYZE', + 'BETWEEN', 'BINARY', 'BOTH', 'BY', 'BOOLEAN', 'BEGIN', + 'CHANGE', 'CHECK', 'COLUMNS', 'COLUMN', 'CROSS','CREATE', 'CASE', 'COMMIT', 'COALESCE', 'CLUSTER', 'COPY', + 'DATABASES', 'DATABASE', 'DATA', 'DELAYED', 'DESCRIBE', 'DESC', 'DISTINCT', 'DELETE', 'DROP', 'DEFAULT', + 'ENCLOSED', 'ESCAPED', 'EXISTS', 'EXPLAIN', 'ELSE', 'END', 'EXCEPT', + 'FIELDS', 'FIELD', 'FLUSH', 'FOR', 'FOREIGN', 'FUNCTION', 'FROM', 'FULL', + 'GROUP', 'GRANT', 'GREATEST', + 'HAVING', + 'IGNORE', 'INDEX', 'INFILE', 'INSERT', 'INNER', 'INTO', 'IDENTIFIED', 'IN', 'IS', 'IF', 'INTERSECT', 'INHERIT', + 'JOIN', + 'KEYS', 'KILL','KEY', + 'LEADING', 'LIKE', 'LIMIT', 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOW_PRIORITY', 'LEFT', 'LANGUAGE', 'LEAST', 'LOGIN', + 'MODIFY', + 'NATURAL', 'NOT', 'NULL', 'NEXTVAL', 'NULLIF', 'NOSUPERUSER', 'NOCREATEDB', 'NOCREATEROLE', + 'OPTIMIZE', 'OPTION', 'OPTIONALLY', 'ORDER', 'OUTFILE', 'OR', 'OUTER', 'ON', 'OVERLAPS', 'OWNER', + 'PROCEDURE','PROCEDURAL', 'PRIMARY', + 'READ', 'REFERENCES', 'REGEXP', 'RENAME', 'REPLACE', 'RETURN', 'REVOKE', 'RLIKE', 'RIGHT', 'ROLE', 'ROLLBACK', + 'SHOW', 'SONAME', 'STATUS', 'STRAIGHT_JOIN', 'SELECT', 'SETVAL', 'SET', 'SOME', 'SEQUENCE', + 'TABLES', 'TEMINATED', 'TO', 'TRAILING','TRUNCATE', 'TABLE', 'TEMPORARY', 'TRIGGER', 'TRUSTED', 'THEN', + 'UNIQUE', 'UNLOCK', 'USE', 'USING', 'UPDATE', 'UNSIGNED', 'UNION', + 'VALUES', 'VARIABLES', 'VIEW', 'VACUUM', 'VERBOSE', + 'WITH', 'WRITE', 'WHERE', 'WHEN', + 'ZEROFILL', + 'XOR', +); +my @KEYWORDS2 = ( + 'ascii', 'age', + 'bit_length', 'btrim', + 'char_length', 'character_length', 'convert', 'chr', 'current_date', 'current_time', 'current_timestamp', 'count', + 'decode', 'date_part', 'date_trunc', + 'encode', 'extract', + 'get_byte', 'get_bit', + 'initcap', 'isfinite', 'interval', + 'justify_hours', 'justify_days', + 'lower', 'length', 'lpad', 'ltrim', 'localtime', 'localtimestamp', + 'md5', + 'now', + 'octet_length', 'overlay', + 'position', 'pg_client_encoding', + 'quote_ident', 'quote_literal', + 'repeat', 'replace', 'rpad', 'rtrim', + 'substring', 'split_part', 'strpos', 'substr', 'set_byte', 'set_bit', + 'trim', 'to_ascii', 'to_hex', 'translate', 'to_char', 'to_date', 'to_timestamp', 'to_number', 'timeofday', + 'upper', +); +my @KEYWORDS3 = ( + 'STDIN', 'STDOUT' +); +my %SYMBOLS = ( '=' => '=', '<' => '<', '>' => '>', '\|' => '|', ',' => ',', '\.' => '.', '\+' => '+', '\-' => '-', '\*' => '*', '\/' => '/', '\!=' => '!=' ); +my @BRACKETS = ( '(', ')' ); +map { $_ = quotemeta($_) } @BRACKETS; + + +# Where statistic are stored +my %STATS = (); +my $first_log_date = ''; +my $last_log_date = ''; +my %overall_stat = (); +my @top_slowest = (); +my %normalyzed_info = (); +my %error_info = (); +my %per_hour_info = (); +my %per_minute_info = (); +my %lock_info = (); +my %tempfile_info = (); +my %connection_info = (); +my %session_info = (); +my %conn_received = (); +my %checkpoint_info = (); +my @graph_values = (); + +# Open log file for reading +my $nlines = 0; +my $lfile = new IO::File; +if ($logfile !~ /\.gz/) { + $lfile->open($logfile) || die "FATAL: cannot read logfile $logfile. $!\n"; +} else { + # Open a pipe to zcat program for compressed log + $lfile->open("$ZCAT_PROG $logfile |") || die "FATAL: cannot read from pipe to $ZCAT_PROG $logfile. $!\n"; +} +my %cur_info = (); + +my $curdate = localtime(time); +# Syslog do not have year information, so take care of year overlapping +my ($gsec,$gmin,$ghour,$gmday,$gmon,$gyear,$gwday,$gyday,$gisdst) = localtime(time); +$gyear += 1900; +my $CURRENT_DATE = $gyear . sprintf("%02d", $gmon+1) . sprintf("%02d",$gmday); + +my $cur_td = $t0; + +while (my $line = <$lfile>) { + chomp($line); + $line =~ s/ //; + $nlines++; + next if (!$line); + + if ($debug && (($nlines % 100000) == 0)) { + my $t1 = Benchmark->new; + my $td = timediff($t1, $cur_td); + &logmsg('DEBUG', "Lines parsed $nlines in " . timestr($td) . "..."); + $cur_td = $t1; + } + + # Parse syslog lines + if ($format eq 'syslog') { + if ($line =~ /^(...)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+([^\s]+)\s+([^\[]+)\[(\d+)\]:\s+\[([0-9\-]+)\]\s+([^:]+:)\s+(.*)/) { + # skip non postgresql lines + next if ($7 ne $ident); + # Syslog do not have year information, so take care of year overlapping + my $tmp_year = $gyear; + if ("$tmp_year$month_abbr{$1}$2" > $CURRENT_DATE) { + $tmp_year = substr($CURRENT_DATE,1, 4) - 1; + } + # Skip unwanted lines + my $cur_date = "$tmp_year$1$2$3$4$5"; + next if ($from && ($from > $cur_date)); + last if ($to && ($to < $cur_date)); + # Process the log line + &parse_query($tmp_year, $month_abbr{$1}, sprintf("%02d", $2), $3, $4, $5, $6, $8, $9, $10, $11); + } elsif ($line =~ /^(...)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+([^\s]+)\s+([^\[]+)\[(\d+)\]:\s+\[([0-9\-]+)\]\s+(#011)[\t\s]*(.*)/) { + + $cur_info{query} .= "\n" . $11; + } else { + &logmsg('DEBUG', "Unknown syslog line format: $line"); + } + + } elsif ($format eq 'stderr') { + + # Parse stderr lines + if ($line =~ /(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)\s+([^\s]+)\s+\[(\d+)\]:\s+\[([0-9\-]+)\]\s+([^:]+:)\s+(.*)/) { + # Skip unwanted lines + my $cur_date = "$1$2$3$4$5$6"; + next if ($from && ($from > $cur_date)); + last if ($to && ($to < $cur_date)); + # Process the log line + &parse_query($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11); + } else { + $cur_info{query} .= "\n" . $line if ($cur_info{query}); + } + + } else { + # unknown format + &logmsg('DEBUG', "Unknown line format: $line"); + } +} + +$lfile->close(); + + +my $t1 = Benchmark->new; +my $td = timediff($t1, $t0); + +&logmsg('DEBUG', "Ok, generating report...") if ($debug); + +if ( ($extension eq 'text') || ($extension eq 'txt') ) { + &dump_as_text(); +} else { + &dump_as_html(); +} + +exit 0; + +#------------------------------------------------------------------------------- + +# Show PgBadger command line usage +sub usage +{ + print qq{ +Usage: $0 -l logfile ... > out.html + + -l | --logfile filename: path to the PostgreSQL log file to parse. It can + be a plain text log or a gzip compressed file + with the .gz extension. + -f | --format logtype : the value can be: syslog or stderr. Default: stderr + -i | --ident name : programname used as syslog ident. Default: postgres + -h | --help : show this message and exit. + -d | --dbname database : only report what concern the given database + -u | --dbuser username : only report what concern the given user + -t | --top number : number of query to store/display. Default: 20 + -s | --sample number : number of query sample to store/display. Default: 3 + -x | --extension : output format. Values: text or html. Default: html + -m | --maxlength size : maximum length of a query, it will be cutted above + the given size. Default: no truncate + -g | --graph : generate graphs, requires GD::Graph perl module + -b | --begin datetime : start date/time for the data to be parsed in log. + -e | --end datetime : end date/time for the data to be parsed in log. + +}; + + exit 0; +} + +# Display message following the log level +sub logmsg +{ + my ($level, $str) = @_; + + if ($level =~ /(\d+)/) { + print STDERR "\t"x$1; + } + + print STDERR "$level: $str\n"; +} + +# Normalyze SQL queries by removing parameters +sub normalize_query +{ + my $orig_query = shift; + + return if (!$orig_query); + + $orig_query = lc($orig_query); + + # Remove extra space, new line and tab caracters by a single space + $orig_query =~ s/[\t\s\r\n]+/ /gs; + # Remove string content + $orig_query =~ s/\\'//g; + $orig_query =~ s/'[^']*'/''/g; + $orig_query =~ s/''('')+/''/g; + # Remove NULL parameters + $orig_query =~ s/=\s*NULL/=''/g; + # Remove numbers + $orig_query =~ s/([^a-z_\$-])-?([0-9]+)/${1}0/g; + # Remove hexadecimal numbers + $orig_query =~ s/([^a-z_\$-])0x[0-9a-f]{1,10}/${1}0x/g; + # Remove IN values + $orig_query =~ s/in\s*\([\'0x,\s]*\)/in (...)/g; + + return $orig_query; +} + +# Format numbers with comma for better reading +sub comma_numbers +{ + return 0 if ($#_ < 0); + + my $text = reverse $_[0]; + + $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; + + return scalar reverse $text; +} + +# Format duration +sub convert_time +{ + my $time = shift; + + my $days = int($time / 86400000); + $time -= ($days * 86400000); + my $hours = int($time / 3600000); + $time -= ($hours * 3600000); + my $minutes = int($time / 60000); + $time -= ($minutes * 60000); + my $seconds = sprintf("%0.2f", $time / 1000); + + $days = $days < 1 ? '' : $days .'d'; + $hours = $hours < 1 ? '' : $hours .'h'; + $minutes = $minutes < 1 ? '' : $minutes . 'm'; + $time = $days . $hours . $minutes . $seconds . 's'; + + return $time; +} + +# Stores the top N slowest queries +sub set_top_slowest +{ + my ($q, $dt, $date) = @_; + + push(@top_slowest, [ ($dt,$date,$q) ]); + + @top_slowest = (sort {$b->[0] <=> $a->[0]} @top_slowest)[0..$end_top]; + +} + +# Stores top N slowest sample queries +sub set_top_sample +{ + my ($norm, $q, $dt, $date) = @_; + + $normalyzed_info{$norm}{samples}{$dt}{query} = $q; + $normalyzed_info{$norm}{samples}{$dt}{date} = $date; + + my $i = 1; + foreach my $k (sort {$b <=> $a} keys %{$normalyzed_info{$norm}{samples}}) { + if ($i > $sample) { + delete $normalyzed_info{$norm}{samples}{$k}; + } + $i++; + } +} + +# Stores top N error sample queries +sub set_top_error_sample +{ + my ($q, $date, $detail) = @_; + + + # always keep the last error messages + if (exists $error_info{$q}{date} && ($#{$error_info{$q}{date}} >= $sample)) { + shift(@{$error_info{$q}{date}}); + shift(@{$error_info{$q}{detail}}); + } + push(@{$error_info{$q}{date}}, $date); + push(@{$error_info{$q}{detail}}, $detail); +} + + +sub dump_as_text +{ + + # Global informations + my $curdate = localtime(time); + my $fmt_nlines = &comma_numbers($nlines); + my $total_time = timestr($td); + $total_time =~ s/^([\.0-9]+) wallclock.*/$1/; + $total_time = &convert_time($total_time*1000); + print qq{ + +- Global informations -------------------------------------------------- + +Generated on $curdate +Log file: $logfile +Parsed $fmt_nlines log entries in $total_time +Log start from $first_log_date to $last_log_date +}; + + # Overall statistics + my $fmt_unique = &comma_numbers(scalar keys %normalyzed_info) || 0; + my $fmt_queries = &comma_numbers($overall_stat{'queries_number'}) || 0; + my $fmt_duration = &convert_time($overall_stat{'queries_duration'}) || 0; + print qq{ + +- Overall statistics --------------------------------------------------- + +Number of unique normalized queries: $fmt_unique +Number of queries: $fmt_queries +Total query duration: $fmt_duration +First query: $overall_stat{'first_query'} +Last query: $overall_stat{'last_query'} +}; + foreach (sort { $overall_stat{'query_peak'}{$b} <=> $overall_stat{'query_peak'}{$a} } keys %{$overall_stat{'query_peak'}}) { + print "Query peak: ", &comma_numbers($overall_stat{'query_peak'}{$_}), " queries/s at $_"; + last; + } + my $fmt_errors = &comma_numbers($overall_stat{'errors_number'}) || 0; + my $fmt_unique_error = &comma_numbers(scalar keys %{$overall_stat{'unique_normalized_errors'}}) || 0; + print qq{ +Number of errors: $fmt_errors +Number of unique normalized errors: $fmt_unique_error +}; + if ($tempfile_info{count}) { + my $fmt_temp_maxsise = &comma_numbers($tempfile_info{maxsize}) || 0; + my $fmt_temp_avsize = &comma_numbers(sprintf("%.2f", ($tempfile_info{maxsize}/$tempfile_info{count}))); + print qq{ +Number temporary file: $tempfile_info{count} +Max size of temporary file: $fmt_temp_maxsise +Average size of temporary file: $fmt_temp_avsize +}; + } + print qq{ + +- Hourly statistics ---------------------------------------------------- + +Report not supported by text format + +}; + + # INSERT/DELETE/UPDATE/SELECT repartition + my $totala = $overall_stat{'SELECT'} + $overall_stat{'INSERT'} + $overall_stat{'UPDATE'} + $overall_stat{'DELETE'}; + my $total = $overall_stat{'queries_number'}; + print "\n- Queries by type ------------------------------------------------------\n\n"; + print "SELECT: ", &comma_numbers($overall_stat{'SELECT'}), " ", sprintf("%0.2f", ($overall_stat{'SELECT'}*100)/$total), "%\n" if ($overall_stat{'SELECT'}); + print "INSERT: ", &comma_numbers($overall_stat{'INSERT'}), " ", sprintf("%0.2f", ($overall_stat{'INSERT'}*100)/$total), "%\n" if ($overall_stat{'INSERT'}); + print "UPDATE: ", &comma_numbers($overall_stat{'UPDATE'}), " ", sprintf("%0.2f", ($overall_stat{'UPDATE'}*100)/$total), "%\n" if ($overall_stat{'UPDATE'}); + print "DELETE: ", &comma_numbers($overall_stat{'DELETE'}), " ", sprintf("%0.2f", ($overall_stat{'DELETE'}*100)/$total), "%\n" if ($overall_stat{'DELETE'}); + print "OTHERS: ", &comma_numbers($total - $totala), " ", sprintf("%0.2f", (($total - $totala)*100)/$total), "%\n" if (($total - $totala) > 0); + print "\n"; + + # Show top informations + print "\n- Slowest queries ------------------------------------------------------\n\n"; + for (my $i = 0; $i <= $#top_slowest; $i++) { + print $i+1, ") " . &convert_time($top_slowest[$i]->[0]) . " - $top_slowest[$i]->[2]\n"; + print "--\n"; + } + @top_slowest = (); + + print "\n- Queries that took up the most time (N) -------------------------------\n\n"; + my $idx = 1; + foreach my $k (sort {$normalyzed_info{$b}{duration} <=> $normalyzed_info{$a}{duration}} keys %normalyzed_info) { + next if (!$normalyzed_info{$k}{count}); + last if ($idx > $top); + my $q = $k; + if ($normalyzed_info{$k}{count} == 1) { + foreach (keys %{$normalyzed_info{$k}{samples}}) { + $q = $normalyzed_info{$k}{samples}{$_}{query}; + last; + } + } + $normalyzed_info{$k}{average} = $normalyzed_info{$k}{duration}/$normalyzed_info{$k}{count}; + print "$idx) " . &convert_time($normalyzed_info{$k}{duration}) . " - " . &comma_numbers($normalyzed_info{$k}{count}) . " - " . &convert_time($normalyzed_info{$k}{average}) . " - $q\n"; + print "--\n"; + $idx++; + } + + print "\n- Most frequent queries (N) --------------------------------------------\n\n"; + $idx = 1; + foreach my $k (sort {$normalyzed_info{$b}{count} <=> $normalyzed_info{$a}{count}} keys %normalyzed_info) { + next if (!$normalyzed_info{$k}{count}); + last if ($idx > $top); + my $q = $k; + if ($normalyzed_info{$k}{count} == 1) { + foreach (keys %{$normalyzed_info{$k}{samples}}) { + $q = $normalyzed_info{$k}{samples}{$_}{query}; + last; + } + } + print "$idx) " . &comma_numbers($normalyzed_info{$k}{count}) . " - " . &convert_time($normalyzed_info{$k}{duration}) . " - " . &convert_time($normalyzed_info{$k}{duration}/$normalyzed_info{$k}{count}) . " - $q\n"; + print "--\n"; + $idx++; + } + + print "\n- Slowest queries (N) --------------------------------------------------\n\n"; + $idx = 1; + foreach my $k (sort {$normalyzed_info{$b}{average} <=> $normalyzed_info{$a}{average}} keys %normalyzed_info) { + next if (!$normalyzed_info{$k}{count}); + last if ($idx > $top); + my $q = $k; + if ($normalyzed_info{$k}{count} == 1) { + foreach (keys %{$normalyzed_info{$k}{samples}}) { + $q = $normalyzed_info{$k}{samples}{$_}{query}; + last; + } + } + print "$idx) " . &convert_time($normalyzed_info{$k}{average}) . " - " . &comma_numbers($normalyzed_info{$k}{count}) . " - " . &convert_time($normalyzed_info{$k}{duration}) . " - $q\n"; + print "--\n"; + $idx++; + } + + print "\n- Most frequent errors (N) ---------------------------------------------\n\n"; + $idx = 1; + foreach my $k (sort {$error_info{$b}{count} <=> $error_info{$a}{count}} keys %error_info) { + next if (!$error_info{$k}{count}); + last if ($idx > $top); + print "$idx) " . &comma_numbers($error_info{$k}{count}) . " - $k\n"; + print "--\n"; + $idx++; + } + print "\n\n"; + print "Report generated by PgBadger $VERSION. License: GPL v3.\n"; + +} + +sub html_header +{ + print qq{ + + +PgBadger: PostgreSQL log analyzer + + + + +
+ +

PgBadger: PostgreSQL log analyzer report

+ + +

Normalized reports are marked with a "(N)".

+}; + +} + + +sub html_footer +{ + print qq{ + +
+
+ +
Table of contents
+
+ + +}; + +} + +sub dump_as_html +{ + + # Dump the html header + &html_header(); + + # Global informations + my $curdate = localtime(time); + my $fmt_nlines = &comma_numbers($nlines); + my $total_time = timestr($td); + $total_time =~ s/^([\.0-9]+) wallclock.*/$1/; + $total_time = &convert_time($total_time*1000); + print qq{ +
+ +
+}; + + # Overall statistics + my $fmt_unique = &comma_numbers(scalar keys %normalyzed_info) || 0; + my $fmt_queries = &comma_numbers($overall_stat{'queries_number'}) || 0; + my $fmt_duration = &convert_time($overall_stat{'queries_duration'}) || 0; + print qq{ +
+

Overall statistics ^

+ +}; + + + print qq{ +

Hourly statistics ^

+ + + + + + + + +}; + if ($tempfile_info{count}) { + print qq{ + +}; + } + if (scalar keys %{$checkpoint_info{chronos}} > 0) { + print qq{ + +}; + } + + + print qq{ + + + + + + + + + + + +}; + if ($tempfile_info{count}) { + print qq{ + + +}; + } + if (scalar keys %{$checkpoint_info{chronos}} > 0) { + print qq{ + + + + + + + + }; + } + + print qq{ + +}; + + foreach my $d (sort keys %per_hour_info) { + my $c = 1; + $d =~ /^\d{4}(\d{2})(\d{2})$/; + my $zday = "$abbr_month{$1} $2"; + foreach my $h (sort keys %{$per_hour_info{$d}}) { + my $colb = $c % 2; + $zday = " " if ($c > 1); + $per_hour_info{$d}{$h}{average} = $per_hour_info{$d}{$h}{duration} / ($per_hour_info{$d}{$h}{count} || 1); + $per_hour_info{$d}{$h}{'SELECT'}{average} = $per_hour_info{$d}{$h}{'SELECT'}{duration} / ($per_hour_info{$d}{$h}{'SELECT'}{count} || 1); + my $write_average = (($per_hour_info{$d}{$h}{'INSERT'}{duration}+$per_hour_info{$d}{$h}{'UPDATE'}{duration}+$per_hour_info{$d}{$h}{'DELETE'}{duration})||0)/(($per_hour_info{$d}{$h}{'INSERT'}{count}+$per_hour_info{$d}{$h}{'UPDATE'}{count}+$per_hour_info{$d}{$h}{'DELETE'}{count})||1); + print ""; + if ($tempfile_info{count}) { + my $temp_average = '0.00'; + if ($tempfile_info{chronos}{$d}{$h}{count}) { + $temp_average = &comma_numbers(sprintf("%.2f", $tempfile_info{chronos}{$d}{$h}{size}/$tempfile_info{chronos}{$d}{$h}{count})); + } + print ""; + } + if (exists $checkpoint_info{chronos}{$d}{$h}) { + print ""; + } elsif (exists $checkpoint_info{chronos}) { + print ""; + } + print "\n"; + $c++; + } + } + print "
DayTimeQueriesSELECT queriesWrite queriesTemporary filesCheckpoints
CountAv. duration (s)CountAv. duration (s)INSERTUPDATEDELETEAv. duration (s)CountAv. sizeWrote buffersAddedRemovedRecycledWrite time (sec)Sync time (sec)Total time (sec)
$zday$h", &comma_numbers($per_hour_info{$d}{$h}{count}), "", &convert_time($per_hour_info{$d}{$h}{average}), "",&comma_numbers($per_hour_info{$d}{$h}{'SELECT'}{count}||0), "", &convert_time($per_hour_info{$d}{$h}{'SELECT'}{average}||0), "", &comma_numbers($per_hour_info{$d}{$h}{'INSERT'}{count}||0), "", &comma_numbers($per_hour_info{$d}{$h}{'UPDATE'}{count}||0), "", &comma_numbers($per_hour_info{$d}{$h}{'DELETE'}{count}||0), "", &convert_time($write_average), "", &comma_numbers($tempfile_info{chronos}{$d}{$h}{count} || 0), "$temp_average", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{wbuffer}), "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{file_added}), "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{file_removed}), "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{file_recycled}), "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{write}), "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{sync}), "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{total}), "       
\n"; + + if ($graph) { + my @labels = (); + my @data1 = (); + my @data2 = (); + my @data3 = (); + foreach my $tm (keys %{$per_minute_info{query}}) { + $tm =~ /(\d{4})(\d{2})(\d{2})/; + my $prefix = "$1-$2-$3 "; + foreach my $h ("00" .. "23") { + my %dataavg = (); + foreach my $m ("00" .. "59") { + my $rd = &average_five_minutes($m); + if (exists $per_minute_info{query}{$tm}{$h}{$m}) { + # Average per minute + $dataavg{average}{"$rd"} += $per_minute_info{query}{$tm}{$h}{$m}{count}; + # Search minimum and maximum during this minute + foreach my $s (keys %{$per_minute_info{query}{$tm}{$h}{$m}{second}}) { + $dataavg{max}{"$rd"} = $per_minute_info{query}{$tm}{$h}{$m}{second}{$s} if ($per_minute_info{query}{$tm}{$h}{$m}{second}{$s} > $dataavg{max}{"$rd"}); + $dataavg{min}{"$rd"} = $per_minute_info{query}{$tm}{$h}{$m}{second}{$s} if (!$dataavg{min}{"$rd"} || ($per_minute_info{query}{$tm}{$h}{$m}{second}{$s} < $dataavg{min}{"$rd"})); + } + } + } + foreach my $rd ('00','05','10','15','20','25','30','35','40','45','50','55') { + push(@data2, int(($dataavg{average}{"$rd"} || 0) / 300)); + # Maxi per minute + push(@data1, $dataavg{max}{"$rd"} || 0); + # Mini per minute + push(@data3, $dataavg{min}{"$rd"} || 0); + push(@labels, "$prefix $h:$rd"); + } + } + } + delete $per_minute_info{query}; + push(@graph_values, [ @labels ] ); @labels = (); + push(@graph_values, [ @data1 ] ); @data1 = (); + push(@graph_values, [ @data2 ] ); @data2 = (); + push(@graph_values, [ @data3 ] ); @data3 = (); + &create_graph('queriespersecond', 'Queries per second (5 minutes average)', 'Hours', 'Queries per second', 'Maximum', 'Average', 'Minimum'); + @graph_values = (); + print qq{

Queries per second

}; + # All queries + foreach my $tm (keys %per_hour_info) { + $tm =~ /(\d{4})(\d{2})(\d{2})/; + my $prefix = "$1-$2-$3 "; + foreach my $h ("00" .. "23") { + push(@labels, "$prefix $h:00"); + push(@data1, $per_hour_info{$tm}{$h}{count} || 0); + push(@data2, sprintf("%.2f", (($per_hour_info{$tm}{$h}{duration} || 0) / ($per_hour_info{$tm}{$h}{count} || 1)) /1000)); + } + } + push(@graph_values, [ @labels ] ); @labels = (); + push(@graph_values, [ @data1 ] ); @data1 = (); + push(@graph_values, [ @data2 ] ); @data2 = (); + &create_graph_twoaxes('allqueries', 'All queries', 'Hours', 'Queries', 'Duration', 'Number of queries','Average duration (s)'); + print qq{

All queries

}; + @graph_values = (); + # Select queries + foreach my $tm (keys %per_hour_info) { + $tm =~ /(\d{4})(\d{2})(\d{2})/; + my $prefix = "$1-$2-$3 "; + foreach my $h ("00" .. "23") { + push(@labels, "$prefix $h:00"); + push(@data1, $per_hour_info{$tm}{$h}{'SELECT'}{count} || 0); + push(@data2, sprintf("%.2f", (($per_hour_info{$tm}{$h}{'SELECT'}{duration} || 0) / ($per_hour_info{$tm}{$h}{'SELECT'}{count} || 1)) /1000)); + } + } + push(@graph_values, [ @labels ] ); @labels = (); + push(@graph_values, [ @data1 ] ); @data1 = (); + push(@graph_values, [ @data2 ] ); @data2 = (); + &create_graph_twoaxes('selectqueries', 'SELECT queries', 'Hours', 'Queries', 'Duration', 'Number of queries','Average duration (s)'); + print qq{

SELECT queries

}; + @graph_values = (); + + # Write queries + my @data4 = (); + foreach my $tm (keys %per_hour_info) { + $tm =~ /(\d{4})(\d{2})(\d{2})/; + my $prefix = "$1-$2-$3 "; + foreach my $h ("00" .. "23") { + push(@labels, "$prefix $h:00"); + my $wcount = $per_hour_info{$tm}{$h}{'UPDATE'}{count} + $per_hour_info{$tm}{$h}{'DELETE'}{count} + $per_hour_info{$tm}{$h}{'INSERT'}{count}; + my $wduration = $per_hour_info{$tm}{$h}{'UPDATE'}{duration} + $per_hour_info{$tm}{$h}{'DELETE'}{duration} + $per_hour_info{$tm}{$h}{'INSERT'}{duration}; + push(@data1, $per_hour_info{$tm}{$h}{'DELETE'}{count} || 0); + push(@data2, $per_hour_info{$tm}{$h}{'INSERT'}{count} || 0); + push(@data3, $per_hour_info{$tm}{$h}{'UPDATE'}{count} || 0); + push(@data4, sprintf("%.2f", (($wduration || 0) / ($wcount || 1)) /1000)); + } + } + push(@graph_values, [ @labels ] ); @labels = (); + push(@graph_values, [ @data1 ] ); @data1 = (); + push(@graph_values, [ @data2 ] ); @data2 = (); + push(@graph_values, [ @data3 ] ); @data2 = (); + push(@graph_values, [ @data4 ] ); @data4 = (); + &create_graph_twoaxes('writequeries', 'Write queries', 'Hours', 'Queries', 'Duration', 'DELETE queries', 'INSERT queries', 'UPDATE queries', 'Average duration (s)'); + print qq{

Write queries

}; + @graph_values = (); + + + } + + # INSERT/DELETE/UPDATE/SELECT repartition + print qq{ +

Queries by type ^

+ + + + + + + +}; + $overall_stat{'SELECT'} ||= 0; $overall_stat{'INSERT'} ||= 0; $overall_stat{'UPDATE'} ||= 0; $overall_stat{'DELETE'} ||= 0; + my $totala = $overall_stat{'SELECT'} + $overall_stat{'INSERT'} + $overall_stat{'UPDATE'} + $overall_stat{'DELETE'}; + my $total = $overall_stat{'queries_number'} || 1; + + print "\n"; + print "\n"; + print "\n"; + print "\n"; + print "\n" if (($total - $totala) > 0); + print "
TypeCountPercentage
SELECT", &comma_numbers($overall_stat{'SELECT'}), "", sprintf("%0.2f", ($overall_stat{'SELECT'}*100)/$total), "%
INSERT", &comma_numbers($overall_stat{'INSERT'}), "", sprintf("%0.2f", ($overall_stat{'INSERT'}*100)/$total), "%
UPDATE", &comma_numbers($overall_stat{'UPDATE'}), "", sprintf("%0.2f", ($overall_stat{'UPDATE'}*100)/$total), "%
DELETE", &comma_numbers($overall_stat{'DELETE'}), "", sprintf("%0.2f", ($overall_stat{'DELETE'}*100)/$total), "%
OTHERS", &comma_numbers($total - $totala), "", sprintf("%0.2f", (($total - $totala)*100)/$total), "%
\n"; + + # Lock stats per type + print qq{ +

Locks by type ^

+ + + + + + + + +}; + my $total_count = 0; + my $total_duration = 0; + foreach my $t (sort keys %lock_info) { + print "\n"; + foreach my $o (sort keys %{$lock_info{$t}}) { + next if (($o eq 'count') || ($o eq 'duration') || ($o eq 'chronos')); + print "\n"; + } + $total_count += $lock_info{$t}{count}; + $total_duration += $lock_info{$t}{duration}; + } + print "\n"; + print "
TypeObjectCountTotal DurationAv. duration (s)
$t", &comma_numbers($lock_info{$t}{count}), "", &convert_time($lock_info{$t}{duration}), "", &convert_time($lock_info{$t}{duration}/$lock_info{$t}{count}), "
$o", &comma_numbers($lock_info{$t}{$o}{count}), "", &convert_time($lock_info{$t}{$o}{duration}), "", &convert_time($lock_info{$t}{$o}{duration}/$lock_info{$t}{$o}{count}), "
Total", &comma_numbers($total_count), "", &convert_time($total_duration), "", &convert_time($total_duration/($total_count||1)), "
\n"; + + # Show session per database statistics + print qq{ +

Sessions per database ^

+ + + + + + + +}; + + foreach my $d (sort keys %{$session_info{database}}) { + print "\n"; + } + print "
DatabaseCountTotal DurationAv. duration (s)
$d", &comma_numbers($session_info{database}{$d}{count}), "", &convert_time($session_info{database}{$d}{duration}), "", &convert_time($session_info{database}{$d}{duration}/$session_info{database}{$d}{count}), "
\n"; + + # Show session per user statistics + print qq{ +

Sessions per user ^

+ + + + + + + +}; + + foreach my $d (sort keys %{$session_info{user}}) { + print "\n"; + } + print "
UserCountTotal DurationAv. duration (s)
$d", &comma_numbers($session_info{user}{$d}{count}), "", &convert_time($session_info{user}{$d}{duration}), "", &convert_time($session_info{user}{$d}{duration}/$session_info{user}{$d}{count}), "
\n"; + + # Show session per host statistics + print qq{ +

Sessions per host ^

+ + + + + + + +}; + + foreach my $d (sort keys %{$session_info{host}}) { + print "\n"; + } + print "
HostCountTotal DurationAv. duration (s)
$d", &comma_numbers($session_info{host}{$d}{count}), "", &convert_time($session_info{host}{$d}{duration}), "", &convert_time($session_info{host}{$d}{duration}/$session_info{host}{$d}{count}), "
\n"; + + $connection_info{count}++; + + # Show connection per database statistics + print qq{ +

Connections per database ^

+ + + + + + +}; + + foreach my $d (sort keys %{$connection_info{database}}) { + print "\n"; + foreach my $u (sort keys %{$connection_info{user}}) { + next if (!exists $connection_info{database_user}{$d}{$u}); + print "\n"; + } + } + print "
DatabaseUserCount
$d", &comma_numbers($connection_info{database}{$d}), "
$u", &comma_numbers($connection_info{database_user}{$d}{$u}), "
\n"; + + # Show connection per user statistics + print qq{ +

Connections per user ^

+ + + + + +}; + + foreach my $u (sort keys %{$connection_info{user}}) { + print "\n"; + } + print "
UserCount
$u", &comma_numbers($connection_info{user}{$u}), "
\n"; + + # Show connection per host statistics + print qq{ +

Connections per host ^

+ + + + + +}; + + foreach my $h (sort keys %{$connection_info{host}}) { + print "\n"; + } + print "
HostCount
$h", &comma_numbers($connection_info{host}{$h}), "
\n"; + + + # Show top informations + print qq{ +

Slowest queries ^

+ + + + + + + +}; + for (my $i = 0; $i <= $#top_slowest; $i++) { + my $col = $i % 2; + print "\n"; + } + print "
RankDuration (s)Query
", $i+1, "", &convert_time($top_slowest[$i]->[0]), "[1]\">
", &highlight_code($top_slowest[$i]->[2]), "
\n"; + @top_slowest = (); + + print qq{ +

Queries that took up the most time (N) ^

+ + + + + + + + + +}; + my $idx = 1; + foreach my $k (sort {$normalyzed_info{$b}{duration} <=> $normalyzed_info{$a}{duration}} keys %normalyzed_info) { + next if (!$normalyzed_info{$k}{count}); + last if ($idx > $top); + my $q = $k; + if ($normalyzed_info{$k}{count} == 1) { + foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { + $q = $normalyzed_info{$k}{samples}{$d}{query}; + last; + } + } + $normalyzed_info{$k}{average} = $normalyzed_info{$k}{duration}/$normalyzed_info{$k}{count}; + my $col = $idx % 2; + print ""; + print "\n"; + $idx++; + } + print "
RankTotal durationTimes executedAv. duration (s)Query
$idx", &convert_time($normalyzed_info{$k}{duration}), "
", &comma_numbers($normalyzed_info{$k}{count}), "
"; + foreach my $d (sort keys %{$normalyzed_info{$k}{chronos}}) { + my $c = 1; + $d =~ /^\d{4}(\d{2})(\d{2})$/; + my $zday = "$abbr_month{$1} $2"; + foreach my $h (sort keys %{$normalyzed_info{$k}{chronos}{$d}}) { + $normalyzed_info{$k}{chronos}{$d}{$h}{average} = $normalyzed_info{$k}{chronos}{$d}{$h}{duration}/$normalyzed_info{$k}{chronos}{$d}{$h}{count}; + my $colb = $c % 2; + $zday = " " if ($c > 1); + print ""; + $c++; + } + } + print "
DayTimeCountDurationAv. Duration
$zday$h", &comma_numbers($normalyzed_info{$k}{chronos}{$d}{$h}{count}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{duration}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{average}), "
", &convert_time($normalyzed_info{$k}{average}), "
", &highlight_code($q), "
"; + + if ($normalyzed_info{$k}{count} > 1) { + print "
"; + my $i = 0; + foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { + print "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; + $i++; + } + print "
"; + } + print "
\n"; + + print qq{ +

Most frequent queries (N) ^

+ + + + + + + + +}; + $idx = 1; + foreach my $k (sort {$normalyzed_info{$b}{count} <=> $normalyzed_info{$a}{count}} keys %normalyzed_info) { + next if (!$normalyzed_info{$k}{count}); + last if ($idx > $top); + my $q = $k; + if ($normalyzed_info{$k}{count} == 1) { + foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { + $q = $normalyzed_info{$k}{samples}{$d}{query}; + last; + } + } + my $col = $idx % 2; + print ""; + print "\n"; + $idx++; + } + print "
RankTimes executedTotal durationAv. duration (s)Query
$idx
", &comma_numbers($normalyzed_info{$k}{count}), "
"; + foreach my $d (sort keys %{$normalyzed_info{$k}{chronos}}) { + my $c = 1; + $d =~ /^\d{4}(\d{2})(\d{2})$/; + my $zday = "$abbr_month{$1} $2"; + foreach my $h (sort keys %{$normalyzed_info{$k}{chronos}{$d}}) { + $normalyzed_info{$k}{chronos}{$d}{$h}{average} = $normalyzed_info{$k}{chronos}{$d}{$h}{duration}/$normalyzed_info{$k}{chronos}{$d}{$h}{count}; + my $colb = $c % 2; + $zday = " " if ($c > 1); + print ""; + $c++; + } + } + print "
DayTimeCountDurationAv. Duration
$zday$h", &comma_numbers($normalyzed_info{$k}{chronos}{$d}{$h}{count}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{duration}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{average}), "
", &convert_time($normalyzed_info{$k}{duration}), "", &convert_time($normalyzed_info{$k}{average}), "
", &highlight_code($q), "
"; + + if ($normalyzed_info{$k}{count} > 1) { + print "
"; + my $i = 0; + foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { + print "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; + $i++; + } + print "
"; + } + print "
\n"; + + print qq{ +

Slowest queries (N) ^

+ + + + + + + + + +}; + $idx = 1; + foreach my $k (sort {$normalyzed_info{$b}{average} <=> $normalyzed_info{$a}{average}} keys %normalyzed_info) { + next if (!$k || !$normalyzed_info{$k}{count}); + last if ($idx > $top); + my $q = $k; + if ($normalyzed_info{$k}{count} == 1) { + foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { + $q = $normalyzed_info{$k}{samples}{$d}{query}; + last; + } + } + my $col = $idx % 2; + print ""; + print "\n"; + $idx++; + } + print "
RankAv. duration (s)Times executedTotal durationQuery
$idx", &convert_time($normalyzed_info{$k}{average}), "
", &comma_numbers($normalyzed_info{$k}{count}), "
"; + foreach my $d (sort keys %{$normalyzed_info{$k}{chronos}}) { + my $c = 1; + $d =~ /^\d{4}(\d{2})(\d{2})$/; + my $zday = "$abbr_month{$1} $2"; + foreach my $h (sort keys %{$normalyzed_info{$k}{chronos}{$d}}) { + $normalyzed_info{$k}{chronos}{$d}{$h}{average} = $normalyzed_info{$k}{chronos}{$d}{$h}{duration}/$normalyzed_info{$k}{chronos}{$d}{$h}{count}; + my $colb = $c % 2; + $zday = " " if ($c > 1); + print ""; + $c++; + } + } + print "
DayTimeCountDurationAv. Duration
$zday$h", &comma_numbers($normalyzed_info{$k}{chronos}{$d}{$h}{count}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{duration}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{average}), "
", &convert_time($normalyzed_info{$k}{duration}), "
", &highlight_code($q), "
"; + if ($normalyzed_info{$k}{count} > 1) { + print "
"; + my $i = 0; + foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { + print "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; + $i++; + } + print "
"; + } + print "
\n"; + + print qq{ +

Most frequent errors (N) ^

+ + + + + + + +}; + $idx = 1; + foreach my $k (sort {$error_info{$b}{count} <=> $error_info{$a}{count}} keys %error_info) { + next if (!$error_info{$k}{count}); + last if ($idx > $top); + my $col = $idx % 2; + print "\n"; + if ($error_info{$k}{count} > 1) { + print "\n"; + $idx++; + } + print "
RankTimes reportedError
$idx
", &comma_numbers($error_info{$k}{count}), ""; + print "
"; + foreach my $d (sort keys %{$error_info{$k}{chronos}}) { + my $c = 1; + $d =~ /^\d{4}(\d{2})(\d{2})$/; + my $zday = "$abbr_month{$1} $2"; + foreach my $h (sort keys %{$error_info{$k}{chronos}{$d}}) { + my $colb = $c % 2; + $zday = " " if ($c > 1); + print ""; + $c++; + } + } + print "
DayTimeCount
$zday$h", &comma_numbers($error_info{$k}{chronos}{$d}{$h}{count}), "
$k
"; + print "
"; + for (my $i = 0; $i <= $#{$error_info{$k}{date}}; $i++) { + print "
$k
\n"; + print "
Detail: $error_info{$k}{detail}[$i]
\n" if ($error_info{$k}{detail}[$i]); + } + print "
"; + } elsif ($error_info{$k}{detail}[0]) { + print "
$k
"; + print "
Detail: $error_info{$k}{detail}[0]
\n"; + } else { + print "
$k
"; + } + print "
\n"; + + # Dump the html footer + &html_footer(); + +} + +# Highlight SQL code +sub highlight_code +{ + my $code = shift; + + return $code if ($nohighlight); + + foreach my $x (keys %SYMBOLS) { + $code =~ s/$x/\$\$CLASSSY0A\$\$$SYMBOLS{$x}\$\$CLASSSY0B\$\$/igs; + } + $code =~ s/("[^"]*")/$1<\/span>/igs; + $code =~ s/('[^']*')/$1<\/span>/igs; + $code =~ s/(`[^`]*`)/$1<\/span>/igs; + + for (my $x = 0; $x <= $#KEYWORDS1; $x++) { + #$code =~ s/\b($KEYWORDS1[$x])\b/$1<\/span>/igs; + $code =~ s/\b$KEYWORDS1[$x]\b/$KEYWORDS1[$x]<\/span>/igs; + } + for (my $x = 0; $x <= $#KEYWORDS2; $x++) { + #$code =~ s/\b($KEYWORDS2[$x])\b/$1<\/span>/igs; + $code =~ s/\b$KEYWORDS2[$x]\b/$KEYWORDS2[$x]<\/span>/igs; + } + for (my $x = 0; $x <= $#KEYWORDS3; $x++) { + #$code =~ s/\b($KEYWORDS3[$x])\b/$1<\/span>/igs; + $code =~ s/\b$KEYWORDS3[$x]\b/$KEYWORDS3[$x]<\/span>/igs; + } + for (my $x = 0; $x <= $#BRACKETS; $x++) { + $code =~ s/($BRACKETS[$x])/$1<\/span>/igs; + } + $code =~ s/\$\$CLASSSY0A\$\$([^\$]+)\$\$CLASSSY0B\$\$/$1<\/span>/igs; + + $code =~ s/\b(\d+)\b/$1<\/span>/igs; + + return $code; +} + +sub parse_query +{ + my ($t_year, $t_month, $t_day, $t_hour, $t_min, $t_sec, $t_host, $t_pid, $t_session, $t_loglevel, $t_query) = @_; + + # Extract user and database information from the loglevel part + my $t_dbname = ''; + my $t_dbuser = ''; + if ($t_loglevel =~ /^user=([^,]*),\s*db=([^\s]*)\s+([^:]*)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT):/) { + $t_dbuser = $1; + $t_dbname = $2; + $t_loglevel = $4; + } elsif ($t_loglevel =~ /^db=([^,]*),\s*user=([^\s]*)\s+([^:]*)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT):/) { + $t_dbname = $1; + $t_dbuser = $2; + $t_loglevel = $4; + } elsif ($t_loglevel =~ /(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT):/) { + $t_loglevel = $1; + } elsif ($t_loglevel =~ /(CONTEXT):/) { + next; + } else { + # Unrecognized loglevel format + &logmsg('DEBUG', "Unrecognized loglevel format: $t_query"); + return; + } + + # Check user and/or database if require + if ($dbname) { + # Log line do not match the required dbname + if (!$cur_info{dbname} || ($dbname ne $cur_info{dbname})) { + %cur_info = (); + return; + } + } + if ($dbuser) { + # Log line do not match the required dbuser + if (!$cur_info{dbuser} || ($dbuser ne $cur_info{dbuser})) { + %cur_info = (); + return; + } + } + + # Store the current timestamp of the log line + $first_log_date = "$t_year-$t_month-$t_day $t_hour:$t_min:$t_sec" if (!$first_log_date); + $last_log_date = "$t_year-$t_month-$t_day $t_hour:$t_min:$t_sec"; + + # Stores lock activity + if (($t_loglevel eq 'LOG') && ($t_query =~ /acquired ([^\s]+) on ([^\s]+) .* after ([0-9\.]+) ms/)) { + $lock_info{$1}{count}++; + $lock_info{$1}{duration} += $3; + $lock_info{$1}{$2}{count}++; + $lock_info{$1}{$2}{duration} += $3; + $lock_info{$1}{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{count}++; + $lock_info{$1}{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{duration}++; + return; + } + + # Stores temporary files activity + if (($t_loglevel eq 'LOG') && ($t_query =~ /temporary file: path .*, size (\d+)/)) { + $tempfile_info{count}++; + $tempfile_info{size} += $1; + $tempfile_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{count}++; + $tempfile_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{size} += $1; + $tempfile_info{maxsize} = $1 if ($tempfile_info{maxsize} < $1); + return; + } + + # Stores pre connection activity + if (($t_loglevel eq 'LOG') && ($t_query =~ /connection received: host=([^\s]+) port=(\d+)/)) { + $conn_received{$t_pid} = $1; + return; + } + # Stores connection activity + if (($t_loglevel eq 'LOG') && ($t_query =~ /connection authorized: user=([^\s]+) database=([^\s]+)/)) { + my $usr = $1; + my $db = $2; + $connection_info{count}++; + $connection_info{user}{$usr}++; + $connection_info{database}{$db}++; + $connection_info{database_user}{$db}{$usr}++; + $connection_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{count}++; + $connection_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{user}{$usr}++; + $connection_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{database}{$db}++; + $connection_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{database_user}{$db}{$usr}++; + if (exists $conn_received{$t_pid}) { + $connection_info{host}{$conn_received{$t_pid}}++; + $connection_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{host}{$conn_received{$t_pid}}++; + delete $conn_received{$t_pid}; + } + return; + } + + # Stores session duration + if (($t_loglevel eq 'LOG') && ($t_query =~ /disconnection: session time: ([^\s]+) user=([^\s]+) database=([^\s]+) host=([^\s]+) port=(\d+)/)) { + my $time = $1; + my $usr = $2; + my $db = $3; + my $host = $4; + # Store time in millisecond + $time =~ /(\d+):(\d+):(\d+\.\d+)/; + $time = ($3*1000)+($2*60*1000)+($1*60*60*1000); + $session_info{count}++; + $session_info{duration} += $time; + $session_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{count}++; + $session_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{duration} += $time; + $session_info{database}{$db}{count}++; + $session_info{database}{$db}{duration} += $time; + $session_info{user}{$usr}{count}++; + $session_info{user}{$usr}{duration} += $time; + $session_info{host}{$host}{count}++; + $session_info{host}{$host}{duration} += $time; + return; + } + + # Store checkpoint information + if (($t_loglevel eq 'LOG') && ($t_query =~ /checkpoint complete: wrote (\d+) buffers \(([^\)]+)\); (\d+) transaction log file\(s\) added, (\d+) removed, (\d+) recycled; write=([0-9\.]+) s, sync=([0-9\.]+) s, total=([0-9\.]+) s/)) { + $checkpoint_info{wbuffer} += $1; + #$checkpoint_info{percent_wbuffer} += $2; + $checkpoint_info{file_added} += $3; + $checkpoint_info{file_removed} += $4; + $checkpoint_info{file_recycled} += $5; + $checkpoint_info{write} += $6; + $checkpoint_info{sync} += $7; + $checkpoint_info{total} += $8; + + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{wbuffer} += $1; + #$checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{percent_wbuffer} += $2; + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{file_added} += $3; + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{file_removed} += $4; + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{file_recycled} += $5; + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{write} += $6; + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{sync} += $7; + $checkpoint_info{chronos}{"$t_year$t_month$t_day"}{"$t_hour"}{total} += $8; + return; + } + + # Store errors statistics + if ($cur_info{query}) { + delete $conn_received{$cur_info{pid}}; + if ($cur_info{loglevel} =~ /WARNING|ERROR|FATAL|PANIC|HINT/) { + $cur_info{query} = substr($cur_info{query}, 0, $maxlength) . '[...]' if (($maxlength > 0) && (length($cur_info{query}) > $maxlength)); + $cur_info{query} = $cur_info{loglevel} . ": " . $cur_info{query}; + $overall_stat{'errors_number'}++; + my $normalized_error = &normalize_error($cur_info{query}); + $overall_stat{'unique_normalized_errors'}{"$normalized_error"}++; + # Store normalyzed error count + $error_info{$normalized_error}{count}++; + # Store normalyzed error count per time + $error_info{$normalized_error}{chronos}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{count}++; + # Store normalyzed query samples + my $cur_last_log_date = "$cur_info{year}-$cur_info{month}-$cur_info{day} $cur_info{hour}:$cur_info{min}:$cur_info{sec}"; + if ($t_loglevel =~ /DETAIL|STATEMENT/) { + &set_top_error_sample($normalized_error, $cur_last_log_date, $t_query); + } else { + &set_top_error_sample($normalized_error, $cur_last_log_date); + } + %cur_info = (); + return; + } + + # Process complete query but not debug/info logs + if ($cur_info{loglevel} eq 'LOG') { + # Apply bind parameters if any + if ( ($t_loglevel eq 'DETAIL') && ($t_query =~ /parameters: (.*)/)) { + my @t_res = split(/[,\s]*\$(\d+)\s=\s/, $1); + shift(@t_res); + for (my $i = 0; $i < $#t_res; $i+=2) { + $cur_info{query} =~ s/\$$t_res[$i]\b/$t_res[$i+1]/s; + } + } + + # Cleanup and normalize the current query + $cur_info{query} =~ s/^[\t\s]+//s; + $cur_info{query} =~ s/[\t\s]+$//s; + $cur_info{query} = substr($cur_info{query}, 0, $maxlength) . '[...]' if (($maxlength > 0) && (length($cur_info{query}) > $maxlength)); + $cur_info{query} .= ';' if (substr($cur_info{query}, -1, 1) ne ';'); + my $normalized = &normalize_query($cur_info{query}); + + # Stores global statistics + my $cur_last_log_date = "$cur_info{year}-$cur_info{month}-$cur_info{day} $cur_info{hour}:$cur_info{min}:$cur_info{sec}"; + $overall_stat{'queries_number'}++; + $overall_stat{'queries_duration'} += $cur_info{duration}; + $overall_stat{'first_query'} = $cur_last_log_date if (!$overall_stat{'first_query'}); + $overall_stat{'last_query'} = $cur_last_log_date; + $overall_stat{'query_peak'}{$cur_last_log_date}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{count}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{duration} += $cur_info{duration}; + if ($graph) { + $per_minute_info{query}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{$cur_info{hour}}{$cur_info{min}}{count}++; + $per_minute_info{query}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{$cur_info{hour}}{$cur_info{min}}{second}{$cur_info{sec}}++; + $per_minute_info{query}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{$cur_info{hour}}{$cur_info{min}}{duration} += $cur_info{duration}; + } + if ($normalized =~ /delete from/) { + $overall_stat{'DELETE'}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'DELETE'}{count}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'DELETE'}{duration} += $cur_info{duration}; + if ($graph) { + $per_minute_info{delete}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{$cur_info{hour}}{$cur_info{min}}{count}++; + $per_minute_info{delete}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{$cur_info{hour}}{$cur_info{min}}{duration} += $cur_info{duration}; + } + } elsif ($normalized =~ /insert into/) { + $overall_stat{'INSERT'}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'INSERT'}{count}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'INSERT'}{duration} += $cur_info{duration}; + if ($graph) { + $per_minute_info{insert}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{"$cur_info{min}"}{count}++; + $per_minute_info{insert}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{"$cur_info{min}"}{duration} += $cur_info{duration}; + } + } elsif ($normalized =~ /update.*set\b/) { + $overall_stat{'UPDATE'}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'UPDATE'}{count}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'UPDATE'}{duration} += $cur_info{duration}; + if ($graph) { + $per_minute_info{update}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{"$cur_info{min}"}{count}++; + $per_minute_info{update}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{"$cur_info{min}"}{duration} += $cur_info{duration}; + } + } elsif ($normalized =~ /\bselect\b/is) { + $overall_stat{'SELECT'}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'SELECT'}{count}++; + $per_hour_info{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{'SELECT'}{duration} += $cur_info{duration}; + if ($graph) { + $per_minute_info{select}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{"$cur_info{min}"}{count}++; + $per_minute_info{select}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{"$cur_info{min}"}{duration} += $cur_info{duration}; + } + } + &set_top_slowest($cur_info{query}, $cur_info{duration}, $cur_last_log_date); + + # Store normalyzed query count + $normalyzed_info{$normalized}{count}++; + # Store normalyzed query total duration + $normalyzed_info{$normalized}{duration} += $cur_info{duration}; + # Store normalyzed query count and duration per time + $normalyzed_info{$normalized}{chronos}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{count}++; + $normalyzed_info{$normalized}{chronos}{"$cur_info{year}$cur_info{month}$cur_info{day}"}{"$cur_info{hour}"}{duration} += $cur_info{duration}; + # Store normalyzed query samples + &set_top_sample($normalized, $cur_info{query}, $cur_info{duration}, $last_log_date); + %cur_info = (); + } + } + + # Extract the duration part from the query + my $t_action = ''; + my $t_duration = ''; + if ($t_query =~ s/duration:\s+([0-9\.]+)\s+ms\s+(query|statement):\s*//is) { + $t_duration = $1; + $t_action = $2; + } elsif ($t_query =~ s/duration:\s+([0-9\.]+)\s+ms\s+(prepare|parse|bind|execute|execute from fetch)\s+[^:]+://is) { + $t_duration = $1; + $t_action = $2; + } elsif ($t_loglevel eq 'LOG') { + if ($t_query !~ /incomplete startup packet|connection|receive|unexpected EOF|still waiting for [^\s]+Lock|checkpoint starting:|could not send data to client|parameter .*configuration file/) { + &logmsg('WARNING', "Unrecognized line: $t_loglevel: $t_query at line $nlines"); + } + return; + } + + # Skiping parse and bind logs + return if ( $t_action && ($t_action !~ /query|statement|execute/) ); + + + $cur_info{year} = $t_year; + $cur_info{month} = $t_month; + $cur_info{day} = $t_day; + $cur_info{hour} = $t_hour; + $cur_info{min} = $t_min; + $cur_info{sec} = $t_sec; + $cur_info{host} = $t_host; + $cur_info{pid} = $t_pid; + $cur_info{session} = $t_session; + $cur_info{loglevel} = $t_loglevel; + $cur_info{query} = $t_query; + $cur_info{duration} = $t_duration; + $cur_info{dbname} = $t_dbname; + $cur_info{dbuser} = $t_dbuser; + + return 1; +} + +# Normalyze error messages +sub normalize_error +{ + my $orig_query = shift; + + return if (!$orig_query); + + # Remove character position + $orig_query =~ s/ at character \d+//; + + return $orig_query; +} + + +sub create_graph +{ + my ($filename, $title, $x_label, $y_label, @legends) = @_; + + use GD::Graph::lines; + my $graf = new GD::Graph::lines(800, 400); + $graf->set( + x_label => $x_label || '', + y_label => $y_label || '', + title => $title || '', + bgclr => '#ffffff', + fgclr => '#dddddd', + legendclr => '#993300', + legend_placement=> 'RC', + dclrs => [ qw(red orange green) ], + x_labels_vertical => 1, + transparent => 1, + long_ticks => 1, + shadow_depth => 5, + box_axis => 0, + show_values => 0, + x_label_skip => 12, + ) or die "FATAL: error creating graph, " . $graf->error . "\n"; + $graf->set_text_clr('#993300'); + $graf->set_legend(@legends) if ($#legends >= 0); + my $gd = $graf->plot(\@graph_values) or die $graf->error; + open(IMG, ">$filename.png") or die $!; + binmode IMG; + print IMG $gd->png; + close IMG; + @graph_values = (); + +} + +sub average_five_minutes +{ + my $val = shift; + + my @avgs = ('00','05','10','15','20','25','30','35','40','45','50','55','59'); + for (my $i = 0; $i <= $#avgs; $i++) { + if ($val == $avgs[$i]) { + return "$avgs[$i]"; + } elsif ($i == 12) { + return "$avgs[$i-1]"; + } elsif ( ($val > $avgs[$i]) && ($val < $avgs[$i+1])) { + return "$avgs[$i]"; + } + } + return $val; +} + +sub create_graph_twoaxes +{ + my ($filename, $title, $x_label, $y1_label, $y2_label, @legends) = @_; + + use GD::Graph::lines; + my $len = $#graph_values; + my @use_axis = (); + for (my $i = 0; $i <= $len; $i++) { + if ($i < $len) { + push(@use_axis, '1'); + } else { + push(@use_axis, '2'); + } + } + my $graf = new GD::Graph::lines(800, 400); + $graf->set( + x_label => $x_label || '', + y1_label => $y1_label || '', + y2_label => $y2_label || '', + title => $title || '', + two_axes => 1, + use_axis => \@use_axis, + bgclr => '#ffffff', + fgclr => '#dddddd', + legendclr => '#993300', + legend_placement=> 'RC', + dclrs => [ qw(red orange green blue) ], + x_labels_vertical => 1, + transparent => 1, + long_ticks => 1, + shadow_depth => 5, + box_axis => 0, + show_values => 0, + ) or die "FATAL: error creating graph, " . $graf->error . "\n"; + $graf->set_text_clr('#993300'); + $graf->set_legend(@legends) if ($#legends >= 0); + my $gd = $graf->plot(\@graph_values) or die $graf->error; + open(IMG, ">$filename.png") or die $!; + binmode IMG; + print IMG $gd->png; + close IMG; + @graph_values = (); + +} + +