From: Darold Gilles Date: Thu, 7 Aug 2014 22:37:51 +0000 (+0200) Subject: Add -R | --retention option to set the maximum number of week reports to preserve... X-Git-Tag: v6.0~6 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0c124cfe86a0d04b5755bb8ab41110482d64d830;p=pgbadger Add -R | --retention option to set the maximum number of week reports to preserve in the output directory for incremental mode. Thanks to Kong Man for the feature request. --- diff --git a/README b/README index dab9588..03b06bc 100644 --- a/README +++ b/README @@ -2,9 +2,9 @@ NAME pgBadger - a fast PostgreSQL log analysis report SYNOPSIS - pgbadger [options] logfile [...] + Usage: pgbadger [options] logfile [...] - PostgreSQL log analyzer with fully detailed reports and charts. + PostgreSQL log analyzer with fully detailed reports and graphs. Arguments: @@ -44,6 +44,8 @@ SYNOPSIS -o | --outfile filename: define the filename for the output. Default depends on the output format: out.html, out.txt, out.bin, out.json or out.tsung. + With module JSON::XS installed, you can output file + in JSON format either. To dump output to stdout use - as filename. -O | --outdir path : directory where out file must be saved. -p | --prefix string : the value of your custom log_line_prefix as @@ -56,6 +58,10 @@ SYNOPSIS -q | --quiet : don't print anything to stdout, not even a progress bar. -r | --remote-host ip : set the host where to execute the cat command on remote logfile to parse localy the file. + -R | --retention N : number of week to keep in incremental mode. Default 0, + disabled. Used to set the number of weel to keep in + output directory. Older week end day directory are + automatically removed. -s | --sample number : number of query samples to store/display. Default: 3 -S | --select-only : only report SELECT queries. -t | --top number : number of queries to store/display. Default: 20 @@ -65,7 +71,7 @@ SYNOPSIS -v | --verbose : enable verbose or debug mode. Disabled by default. -V | --version : show pgBadger version and exit. -w | --watch-mode : only report errors just like logwatch could do. - -x | --extension : output format. Values: text, html, binary, json or + -x | --extension : output format. Values: text, html, bin, json or tsung. Default: html -X | --extra-files : in incremetal mode allow pgbadger to write CSS and JS files in the output directory as separate files. @@ -102,6 +108,7 @@ SYNOPSIS from report. Example: "pg_dump". --exclude-line regex : pgbadger will start to exclude any log entry that will match the given regex. Can be used multiple time. + --anonymize : obscure all literals in queries to hide confidential data. pgBadger is able to parse a remote log file using a passwordless ssh connection. Use the -r or --remote-host to set the host ip address or @@ -120,21 +127,21 @@ SYNOPSIS Examples: pgbadger /var/log/postgresql.log - pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log + pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz + /var/log/postgres.log pgbadger /var/log/postgresql/postgresql-2012-05-* pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log - pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log + pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" + /var/log/postgresql.log cat /var/log/postgres.log | pgbadger - - # log prefix with stderr log output - perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \ + # Log prefix with stderr log output + perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log # Log line prefix with syslog log output - perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \ + perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* - - Use my 8 CPUs to parse my 10GB file faster, really faster - + # Use my 8 CPUs to parse my 10GB file faster, much faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log Generate Tsung sessions XML file with select queries only: @@ -147,7 +154,7 @@ SYNOPSIS Generate report every week using incremental behavior: - 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \ + 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat This supposes that your log file and HTML report are also rotated every @@ -155,20 +162,25 @@ SYNOPSIS Or better, use the auto-generated incremental reports: - 0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 \ + 0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 -O /var/www/pg_reports/ - will generate a report per day and per week in the given output - directory. + will generate a report per day and per week. + + In incremental mode, you can also specify the number of week to keep in + the reports: + + /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 + -O /var/www/pg_reports/ If you have a pg_dump at 23:00 and 13:00 each day during half an hour, - you can use pgbadger as follow to exclude these periods from the report: + you can use pgbadger as follow to exclude these period from the report: pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log - This will help to not have all COPY order on top of slowest queries. You - can also use --exclude-appname "pg_dump" to solve this problem in a more - simple way. + This will help avoid having COPY statements, as generated by pg_dump, on + top of the list of slowest queries. You can also use --exclude-appname + "pg_dump" to solve this problem in a simpler way. DESCRIPTION pgBadger is a PostgreSQL log analyzer build for speed with fully diff --git a/doc/pgBadger.pod b/doc/pgBadger.pod index d98e2a2..21d8c09 100644 --- a/doc/pgBadger.pod +++ b/doc/pgBadger.pod @@ -4,9 +4,9 @@ pgBadger - a fast PostgreSQL log analysis report =head1 SYNOPSIS -pgbadger [options] logfile [...] +Usage: pgbadger [options] logfile [...] - PostgreSQL log analyzer with fully detailed reports and charts. + PostgreSQL log analyzer with fully detailed reports and graphs. Arguments: @@ -46,6 +46,8 @@ Options: -o | --outfile filename: define the filename for the output. Default depends on the output format: out.html, out.txt, out.bin, out.json or out.tsung. + With module JSON::XS installed, you can output file + in JSON format either. To dump output to stdout use - as filename. -O | --outdir path : directory where out file must be saved. -p | --prefix string : the value of your custom log_line_prefix as @@ -58,6 +60,10 @@ Options: -q | --quiet : don't print anything to stdout, not even a progress bar. -r | --remote-host ip : set the host where to execute the cat command on remote logfile to parse localy the file. + -R | --retention N : number of week to keep in incremental mode. Default 0, + disabled. Used to set the number of weel to keep in + output directory. Older week end day directory are + automatically removed. -s | --sample number : number of query samples to store/display. Default: 3 -S | --select-only : only report SELECT queries. -t | --top number : number of queries to store/display. Default: 20 @@ -67,10 +73,10 @@ Options: -v | --verbose : enable verbose or debug mode. Disabled by default. -V | --version : show pgBadger version and exit. -w | --watch-mode : only report errors just like logwatch could do. - -x | --extension : output format. Values: text, html, binary, json or + -x | --extension : output format. Values: text, html, bin, json or tsung. Default: html -X | --extra-files : in incremetal mode allow pgbadger to write CSS and JS - files in the output directory as separate files. + files in the output directory as separate files. -z | --zcat exec_path : set the full path to the zcat program. Use it if zcat or bzcat or unzip is not in your path. --pie-limit num : pie data lower than num% will show a sum instead. @@ -121,24 +127,25 @@ some additional options to fully control the ssh connection. Examples: - pgbadger /var/log/postgresql.log - pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log - pgbadger /var/log/postgresql/postgresql-2012-05-* - pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log - pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log - cat /var/log/postgres.log | pgbadger - - # log prefix with stderr log output - perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \ - /pglog/postgresql-2012-08-21* - perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log - # Log line prefix with syslog log output - perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \ - /pglog/postgresql-2012-08-21* - -Use my 8 CPUs to parse my 10GB file faster, really faster - + pgbadger /var/log/postgresql.log + pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz + /var/log/postgres.log + pgbadger /var/log/postgresql/postgresql-2012-05-* + pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log + pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" + /var/log/postgresql.log + cat /var/log/postgres.log | pgbadger - + # Log prefix with stderr log output + perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' + /pglog/postgresql-2012-08-21* + perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log + # Log line prefix with syslog log output + perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' + /pglog/postgresql-2012-08-21* + # Use my 8 CPUs to parse my 10GB file faster, much faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log + Generate Tsung sessions XML file with select queries only: perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log @@ -149,25 +156,31 @@ Reporting errors every week by cron job: Generate report every week using incremental behavior: - 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \ - -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat + 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` + -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat This supposes that your log file and HTML report are also rotated every week. Or better, use the auto-generated incremental reports: - 0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 \ - -O /var/www/pg_reports/ + 0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 + -O /var/www/pg_reports/ + +will generate a report per day and per week. + +In incremental mode, you can also specify the number of week to keep in the reports: -will generate a report per day and per week in the given output directory. + /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 + -O /var/www/pg_reports/ If you have a pg_dump at 23:00 and 13:00 each day during half an hour, you can -use pgbadger as follow to exclude these periods from the report: +use pgbadger as follow to exclude these period from the report: pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log -This will help to not have all COPY order on top of slowest queries. You can -also use --exclude-appname "pg_dump" to solve this problem in a more simple way. +This will help avoid having COPY statements, as generated by pg_dump, on top of +the list of slowest queries. You can also use --exclude-appname "pg_dump" to +solve this problem in a simpler way. =head1 DESCRIPTION diff --git a/pgbadger b/pgbadger index 0fedc9d..68e48ad 100755 --- a/pgbadger +++ b/pgbadger @@ -167,6 +167,7 @@ my $incr_date = ''; my $last_incr_date = ''; my $anonymize = 0; my $noclean = 0; +my $retention = 0; my $NUMPROGRESS = 10000; my @DIMENSIONS = (800, 300); @@ -267,6 +268,7 @@ my $result = GetOptions( "P|no-prettify!" => \$noprettify, "q|quiet!" => \$quiet, "r|remote-host=s" => \$remote_host, + 'R|retention=i' => \$retention, "s|sample=i" => \$sample, "S|select-only!" => \$select_only, "t|top=i" => \$top, @@ -877,7 +879,7 @@ $tmp_last_parsed = 'tmp_' . basename($last_parsed) if ($last_parsed); $tmp_last_parsed = "$TMP_DIR/$tmp_last_parsed"; # Clean the incremental directory if the feature is not disabled -if (!$noclean && $saved_last_line{datetime}) { +if (!$noclean && $saved_last_line{datetime} && $outdir) { # Search the current week following the last parse date $saved_last_line{datetime} =~ /^(\d+)\-(\d+)\-(\d+) /; @@ -886,7 +888,7 @@ if (!$noclean && $saved_last_line{datetime}) { my $last_day = $3; # Get the week number following the date my $wn = &get_week_number($last_year, $last_month, $last_day); - # Get the deys of the current week where binary files must be preserved + # Get the days of the current week where binary files must be preserved my @wdays = &get_wdays_per_month($wn - 1, "$last_year-$last_month"); # Find obsolete dir days that shoud be cleaned unless(opendir(DIR, "$outdir")) { @@ -930,6 +932,83 @@ if (!$noclean && $saved_last_line{datetime}) { } } +# Clear storage when a retention is specified in incremental mode +if ( $saved_last_line{datetime} && $outdir && $retention) { + + # Search the current week following the last parse date + $saved_last_line{datetime} =~ /^(\d+)\-(\d+)\-(\d+) /; + my $last_year = $1; + my $last_month = $2; + my $last_day = $3; + # Get the current week number + my $wn = &get_week_number($last_year, $last_month, $last_day); + my $limit = $last_year; + if (($wn - $retention) < 1) { + $limit--; + $limit .= "52"; + } else { + $limit .= sprintf("%02d", $wn - $retention); + } + + # Find obsolete weeks dir that shoud be cleaned + unless(opendir(DIR, "$outdir")) { + die "Error: can't opendir $outdir: $!"; + } + my @dyears = grep { $_ =~ /^\d+$/ } readdir(DIR); + closedir DIR; + my @obsolete_weeks = (); + foreach my $y (sort { $a <=> $b } @dyears) { + unless(opendir(DIR, "$outdir/$y")) { + die "Error: can't opendir $outdir/$y: $!"; + } + my @weeks = grep { $_ =~ /^week-\d+$/ } readdir(DIR); + closedir DIR; + foreach my $w (sort { $a <=> $b } @weeks) { + $w =~ /^week-(\d+)$/; + if ("$y$1" lt $limit) { + &logmsg('DEBUG', "Removing obsolete week directory $outdir/$y/week-$1"); + &cleanup_directory("$outdir/$y/week-$1", 1); + push(@obsolete_weeks, "$y$1"); + } + } + } + # Now removed the corresponding days + foreach my $y (sort { $a <=> $b } @dyears) { + unless(opendir(DIR, "$outdir/$y")) { + die "Error: can't opendir $outdir/$y: $!"; + } + my @dmonths = grep { $_ =~ /^\d+$/ } readdir(DIR); + closedir DIR; + my @rmmonths = (); + foreach my $m (sort { $a <=> $b } @dmonths) { + unless(opendir(DIR, "$outdir/$y/$m")) { + die "Error: can't opendir $outdir/$y/$m: $!"; + } + my @rmdays = (); + my @ddays = grep { $_ =~ /^\d+$/ } readdir(DIR); + closedir DIR; + foreach my $d (sort { $a <=> $b } @ddays) { + my $weekNumber = sprintf("%02d", POSIX::strftime("%U", 1, 1, 1, $d, $m - 1, $y - 1900)+1); + if (grep(/^$y$weekNumber$/, @obsolete_weeks)) { + &logmsg('DEBUG', "Removing obsolete directory $outdir/$y/$m/$d"); + &cleanup_directory("$outdir/$y/$m/$d", 1); + push(@rmdays, $d); + } + } + if ($#ddays == $#rmdays) { + &logmsg('DEBUG', "Removing obsolete empty directory $outdir/$y/$m"); + rmdir("$outdir/$y/$m"); + push(@rmmonths, $m); + } + } + if ($#dmonths == $#rmmonths) { + &logmsg('DEBUG', "Removing obsolete empty directory $outdir/$y"); + rmdir("$outdir/$y"); + } + } + +} + # Main loop reading log files my $global_totalsize = 0; my @given_log_files = ( @log_files ); @@ -1442,6 +1521,10 @@ Options: -q | --quiet : don't print anything to stdout, not even a progress bar. -r | --remote-host ip : set the host where to execute the cat command on remote logfile to parse localy the file. + -R | --retention N : number of week to keep in incremental mode. Default 0, + disabled. Used to set the number of weel to keep in + output directory. Older week end day directory are + automatically removed. -s | --sample number : number of query samples to store/display. Default: 3 -S | --select-only : only report SELECT queries. -t | --top number : number of queries to store/display. Default: 20 @@ -1546,6 +1629,11 @@ Or better, use the auto-generated incremental reports: will generate a report per day and per week. +In incremental mode, you can also specify the number of week to keep in the reports: + + /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 \ + -O /var/www/pg_reports/ + If you have a pg_dump at 23:00 and 13:00 each day during half an hour, you can use pgbadger as follow to exclude these period from the report: @@ -1560,6 +1648,20 @@ solve this problem in a simpler way. exit 0; } +sub cleanup_directory +{ + my ($dir, $remove_dir) = @_; + + unless(opendir(DIR, "$dir")) { + die "Error: can't opendir $dir: $!"; + } + my @todel = grep { !/^\./ } readdir(DIR); + closedir DIR; + map { unlink("$dir/$_"); } @todel; + rmdir("$dir") if ($remove_dir); +} + + sub write_resources { # Write resource file to report directory or return resources in and array of lines