Add -R | --retention option to set the maximum number of week reports to preserve...

author Darold Gilles <gilles@darold.net>

Thu, 7 Aug 2014 22:37:51 +0000 (00:37 +0200)

committer Darold Gilles <gilles@darold.net>

Thu, 7 Aug 2014 22:37:51 +0000 (00:37 +0200)
author Darold Gilles <gilles@darold.net>
Thu, 7 Aug 2014 22:37:51 +0000 (00:37 +0200)
committer Darold Gilles <gilles@darold.net>
Thu, 7 Aug 2014 22:37:51 +0000 (00:37 +0200)
diff --git a/README b/README

index dab9588a77231832b1106d66a3130b8a51a1af29..03b06bc6321b9371d9816110dbaaa75cc5582986 100644 (file)
--- a/README
+++ b/README
@@ -2,9 +2,9 @@ NAME
      pgBadger - a fast PostgreSQL log analysis report
  
  SYNOPSIS
-    pgbadger [options] logfile [...]
+    Usage: pgbadger [options] logfile [...]
  
-            PostgreSQL log analyzer with fully detailed reports and charts.
+            PostgreSQL log analyzer with fully detailed reports and graphs.
  
      Arguments:
  
@@ -44,6 +44,8 @@ SYNOPSIS
          -o | --outfile filename: define the filename for the output. Default depends
                                   on the output format: out.html, out.txt, out.bin,
                                   out.json or out.tsung.
+                                 With module JSON::XS installed, you can output file
+                                 in JSON format either.
                                   To dump output to stdout use - as filename.
          -O | --outdir path     : directory where out file must be saved.
          -p | --prefix string   : the value of your custom log_line_prefix as
@@ -56,6 +58,10 @@ SYNOPSIS
          -q | --quiet           : don't print anything to stdout, not even a progress bar.
          -r | --remote-host ip  : set the host where to execute the cat command on remote
                                   logfile to parse localy the file.
+        -R | --retention N     : number of week to keep in incremental mode. Default 0,
+                                 disabled. Used to set the number of weel to keep in
+                                 output directory. Older week end day directory are
+                                 automatically removed.
          -s | --sample number   : number of query samples to store/display. Default: 3
          -S | --select-only     : only report SELECT queries.
          -t | --top number      : number of queries to store/display. Default: 20
@@ -65,7 +71,7 @@ SYNOPSIS
          -v | --verbose         : enable verbose or debug mode. Disabled by default.
          -V | --version         : show pgBadger version and exit.
          -w | --watch-mode      : only report errors just like logwatch could do.
-        -x | --extension       : output format. Values: text, html, binary, json or
+        -x | --extension       : output format. Values: text, html, bin, json or
                                   tsung. Default: html
          -X | --extra-files     : in incremetal mode allow pgbadger to write CSS and JS
                                   files in the output directory as separate files.
@@ -102,6 +108,7 @@ SYNOPSIS
                                   from report. Example: "pg_dump".
          --exclude-line regex   : pgbadger will start to exclude any log entry that
                                   will match the given regex. Can be used multiple time.
+        --anonymize            : obscure all literals in queries to hide confidential data.
  
      pgBadger is able to parse a remote log file using a passwordless ssh
      connection. Use the -r or --remote-host to set the host ip address or
@@ -120,21 +127,21 @@ SYNOPSIS
      Examples:
  
              pgbadger /var/log/postgresql.log
-            pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log
+            pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz 
+                           /var/log/postgres.log
              pgbadger /var/log/postgresql/postgresql-2012-05-*
              pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
-            pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log
+            pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" 
+                           /var/log/postgresql.log
              cat /var/log/postgres.log | pgbadger -
-            # log prefix with stderr log output
-            perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \
+            # Log prefix with stderr log output
+            perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' 
                              /pglog/postgresql-2012-08-21*
              perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log
              # Log line prefix with syslog log output
-            perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \
+            perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' 
                              /pglog/postgresql-2012-08-21*
-
-    Use my 8 CPUs to parse my 10GB file faster, really faster
-
+            # Use my 8 CPUs to parse my 10GB file faster, much faster
              perl pgbadger -j 8 /pglog/postgresql-9.1-main.log
  
      Generate Tsung sessions XML file with select queries only:
@@ -147,7 +154,7 @@ SYNOPSIS
  
      Generate report every week using incremental behavior:
  
-        0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \
+        0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` 
              -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
  
      This supposes that your log file and HTML report are also rotated every
@@ -155,20 +162,25 @@ SYNOPSIS
  
      Or better, use the auto-generated incremental reports:
  
-        0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 \
+        0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 
              -O /var/www/pg_reports/
  
-    will generate a report per day and per week in the given output
-    directory.
+    will generate a report per day and per week.
+
+    In incremental mode, you can also specify the number of week to keep in
+    the reports:
+
+        /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 
+            -O /var/www/pg_reports/
  
      If you have a pg_dump at 23:00 and 13:00 each day during half an hour,
-    you can use pgbadger as follow to exclude these periods from the report:
+    you can use pgbadger as follow to exclude these period from the report:
  
          pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log
  
-    This will help to not have all COPY order on top of slowest queries. You
-    can also use --exclude-appname "pg_dump" to solve this problem in a more
-    simple way.
+    This will help avoid having COPY statements, as generated by pg_dump, on
+    top of the list of slowest queries. You can also use --exclude-appname
+    "pg_dump" to solve this problem in a simpler way.
  
  DESCRIPTION
      pgBadger is a PostgreSQL log analyzer build for speed with fully
diff --git a/doc/pgBadger.pod b/doc/pgBadger.pod

index d98e2a2b1837c92c9fa49a8029e3236314353fff..21d8c093672f98250dd4e4f2ee1e933b6727f6ee 100644 (file)
--- a/doc/pgBadger.pod
+++ b/doc/pgBadger.pod
@@ -4,9 +4,9 @@ pgBadger - a fast PostgreSQL log analysis report
  
  =head1 SYNOPSIS
  
-pgbadger [options] logfile [...]
+Usage: pgbadger [options] logfile [...]
  
-       PostgreSQL log analyzer with fully detailed reports and charts.
+       PostgreSQL log analyzer with fully detailed reports and graphs.
  
  Arguments:
  
@@ -46,6 +46,8 @@ Options:
      -o | --outfile filename: define the filename for the output. Default depends
                               on the output format: out.html, out.txt, out.bin,
                               out.json or out.tsung.
+                             With module JSON::XS installed, you can output file
+                             in JSON format either.
                               To dump output to stdout use - as filename.
      -O | --outdir path     : directory where out file must be saved.
      -p | --prefix string   : the value of your custom log_line_prefix as
@@ -58,6 +60,10 @@ Options:
      -q | --quiet           : don't print anything to stdout, not even a progress bar.
      -r | --remote-host ip  : set the host where to execute the cat command on remote
                               logfile to parse localy the file.
+    -R | --retention N     : number of week to keep in incremental mode. Default 0,
+                             disabled. Used to set the number of weel to keep in
+                             output directory. Older week end day directory are
+                             automatically removed.
      -s | --sample number   : number of query samples to store/display. Default: 3
      -S | --select-only     : only report SELECT queries.
      -t | --top number      : number of queries to store/display. Default: 20
@@ -67,10 +73,10 @@ Options:
      -v | --verbose         : enable verbose or debug mode. Disabled by default.
      -V | --version         : show pgBadger version and exit.
      -w | --watch-mode      : only report errors just like logwatch could do.
-    -x | --extension       : output format. Values: text, html, binary, json or
+    -x | --extension       : output format. Values: text, html, bin, json or
                               tsung. Default: html
      -X | --extra-files     : in incremetal mode allow pgbadger to write CSS and JS
-                             files in the output directory as separate files.
+                            files in the output directory as separate files.
      -z | --zcat exec_path  : set the full path to the zcat program. Use it if
                               zcat or bzcat or unzip is not in your path.
      --pie-limit num        : pie data lower than num% will show a sum instead.
@@ -121,24 +127,25 @@ some additional options to fully control the ssh connection.
  
  Examples:
  
-        pgbadger /var/log/postgresql.log
-        pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log
-        pgbadger /var/log/postgresql/postgresql-2012-05-*
-        pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
-        pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log
-        cat /var/log/postgres.log | pgbadger -
-        # log prefix with stderr log output
-        perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \
-                        /pglog/postgresql-2012-08-21*
-        perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log
-        # Log line prefix with syslog log output
-        perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \
-                        /pglog/postgresql-2012-08-21*
-
-Use my 8 CPUs to parse my 10GB file faster, really faster
-
+       pgbadger /var/log/postgresql.log
+       pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz 
+                      /var/log/postgres.log
+       pgbadger /var/log/postgresql/postgresql-2012-05-*
+       pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
+       pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" 
+                      /var/log/postgresql.log
+       cat /var/log/postgres.log | pgbadger -
+       # Log prefix with stderr log output
+       perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' 
+                       /pglog/postgresql-2012-08-21*
+       perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log
+       # Log line prefix with syslog log output
+       perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' 
+                       /pglog/postgresql-2012-08-21*
+       # Use my 8 CPUs to parse my 10GB file faster, much faster
         perl pgbadger -j 8 /pglog/postgresql-9.1-main.log
  
+
  Generate Tsung sessions XML file with select queries only:
  
      perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log
@@ -149,25 +156,31 @@ Reporting errors every week by cron job:
  
  Generate report every week using incremental behavior:
  
-    0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \
-        -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
+    0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` 
+       -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
  
  This supposes that your log file and HTML report are also rotated every week.
  
  Or better, use the auto-generated incremental reports:
  
-    0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 \
-        -O /var/www/pg_reports/
+    0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 
+       -O /var/www/pg_reports/
+
+will generate a report per day and per week.
+
+In incremental mode, you can also specify the number of week to keep in the reports:
  
-will generate a report per day and per week in the given output directory.
+    /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 
+       -O /var/www/pg_reports/
  
  If you have a pg_dump at 23:00 and 13:00 each day during half an hour, you can
-use pgbadger as follow to exclude these periods from the report:
+use pgbadger as follow to exclude these period from the report:
  
      pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log 
  
-This will help to not have all COPY order on top of slowest queries. You can
-also use --exclude-appname "pg_dump" to solve this problem in a more simple way.
+This will help avoid having COPY statements, as generated by pg_dump, on top of
+the list of slowest queries. You can also use --exclude-appname "pg_dump" to
+solve this problem in a simpler way.
  
  =head1 DESCRIPTION
  
diff --git a/pgbadger b/pgbadger

index 0fedc9d523f3ab877fa348adcab6d1112a50cb5d..68e48add7767408098b8a5253e7b78b6c0665707 100755 (executable)
--- a/pgbadger
+++ b/pgbadger
@@ -167,6 +167,7 @@ my $incr_date               = '';
  my $last_incr_date          = '';
  my $anonymize               = 0;
  my $noclean                 = 0;
+my $retention               = 0;
  
  my $NUMPROGRESS = 10000;
  my @DIMENSIONS  = (800, 300);
@@ -267,6 +268,7 @@ my $result = GetOptions(
         "P|no-prettify!"           => \$noprettify,
         "q|quiet!"                 => \$quiet,
         "r|remote-host=s"          => \$remote_host,
+        'R|retention=i'            => \$retention,
         "s|sample=i"               => \$sample,
         "S|select-only!"           => \$select_only,
         "t|top=i"                  => \$top,
@@ -877,7 +879,7 @@ $tmp_last_parsed = 'tmp_' . basename($last_parsed) if ($last_parsed);
  $tmp_last_parsed = "$TMP_DIR/$tmp_last_parsed";
  
  # Clean the incremental directory if the feature is not disabled
-if (!$noclean && $saved_last_line{datetime}) {
+if (!$noclean && $saved_last_line{datetime} && $outdir) {
  
         # Search the current week following the last parse date
         $saved_last_line{datetime} =~ /^(\d+)\-(\d+)\-(\d+) /;
@@ -886,7 +888,7 @@ if (!$noclean && $saved_last_line{datetime}) {
         my $last_day   = $3;
         # Get the week number following the date
         my $wn = &get_week_number($last_year, $last_month, $last_day);
-       # Get the deys of the current week where binary files must be preserved
+       # Get the days of the current week where binary files must be preserved
         my @wdays = &get_wdays_per_month($wn - 1, "$last_year-$last_month");
         # Find obsolete dir days that shoud be cleaned
         unless(opendir(DIR, "$outdir")) {
@@ -930,6 +932,83 @@ if (!$noclean && $saved_last_line{datetime}) {
         }
  }
  
+# Clear storage when a retention is specified in incremental mode
+if ( $saved_last_line{datetime} && $outdir && $retention) {
+
+       # Search the current week following the last parse date
+       $saved_last_line{datetime} =~ /^(\d+)\-(\d+)\-(\d+) /;
+       my $last_year  = $1;
+       my $last_month = $2;
+       my $last_day   = $3;
+       # Get the current week number
+       my $wn = &get_week_number($last_year, $last_month, $last_day);
+       my $limit = $last_year;
+       if (($wn - $retention) < 1) {
+               $limit--;
+               $limit .= "52";
+       } else {
+               $limit .= sprintf("%02d", $wn - $retention);
+       }
+
+       # Find obsolete weeks dir that shoud be cleaned
+       unless(opendir(DIR, "$outdir")) {
+               die "Error: can't opendir $outdir: $!";
+       }
+       my @dyears = grep { $_ =~ /^\d+$/ } readdir(DIR);
+       closedir DIR;
+       my @obsolete_weeks = ();
+       foreach my $y (sort { $a <=> $b } @dyears) {
+               unless(opendir(DIR, "$outdir/$y")) {
+                       die "Error: can't opendir $outdir/$y: $!";
+               }
+               my @weeks = grep { $_ =~ /^week-\d+$/ } readdir(DIR);
+               closedir DIR;
+               foreach my $w (sort { $a <=> $b } @weeks) {
+                       $w =~ /^week-(\d+)$/;
+                       if ("$y$1" lt $limit) {
+                               &logmsg('DEBUG', "Removing obsolete week directory $outdir/$y/week-$1");
+                               &cleanup_directory("$outdir/$y/week-$1", 1);
+                               push(@obsolete_weeks, "$y$1");
+                       }
+               }
+       }
+       # Now removed the corresponding days 
+       foreach my $y (sort { $a <=> $b } @dyears) {
+               unless(opendir(DIR, "$outdir/$y")) {
+                       die "Error: can't opendir $outdir/$y: $!";
+               }
+               my @dmonths = grep { $_ =~ /^\d+$/ } readdir(DIR);
+               closedir DIR;
+               my @rmmonths = ();
+               foreach my $m (sort { $a <=> $b } @dmonths) {
+                       unless(opendir(DIR, "$outdir/$y/$m")) {
+                               die "Error: can't opendir $outdir/$y/$m: $!";
+                       }
+                       my @rmdays = ();
+                       my @ddays = grep { $_ =~ /^\d+$/ } readdir(DIR);
+                       closedir DIR;
+                       foreach my $d (sort { $a <=> $b } @ddays) {
+                               my $weekNumber = sprintf("%02d", POSIX::strftime("%U", 1, 1, 1, $d, $m - 1, $y - 1900)+1);
+                               if (grep(/^$y$weekNumber$/, @obsolete_weeks)) {
+                                       &logmsg('DEBUG', "Removing obsolete directory $outdir/$y/$m/$d");
+                                       &cleanup_directory("$outdir/$y/$m/$d", 1);
+                                       push(@rmdays, $d);
+                               }
+                       }
+                       if ($#ddays == $#rmdays) {
+                               &logmsg('DEBUG', "Removing obsolete empty directory $outdir/$y/$m");
+                               rmdir("$outdir/$y/$m");
+                               push(@rmmonths, $m);
+                       }
+               }
+               if ($#dmonths == $#rmmonths) {
+                       &logmsg('DEBUG', "Removing obsolete empty directory $outdir/$y");
+                       rmdir("$outdir/$y");
+               }
+       }
+
+}
+
  # Main loop reading log files
  my $global_totalsize = 0;
  my @given_log_files = ( @log_files );
@@ -1442,6 +1521,10 @@ Options:
      -q | --quiet           : don't print anything to stdout, not even a progress bar.
      -r | --remote-host ip  : set the host where to execute the cat command on remote
                               logfile to parse localy the file.
+    -R | --retention N     : number of week to keep in incremental mode. Default 0,
+                             disabled. Used to set the number of weel to keep in
+                             output directory. Older week end day directory are
+                             automatically removed.
      -s | --sample number   : number of query samples to store/display. Default: 3
      -S | --select-only     : only report SELECT queries.
      -t | --top number      : number of queries to store/display. Default: 20
@@ -1546,6 +1629,11 @@ Or better, use the auto-generated incremental reports:
  
  will generate a report per day and per week.
  
+In incremental mode, you can also specify the number of week to keep in the reports:
+
+    /usr/bin/pgbadger --retention 2 -I -q /var/log/postgresql/postgresql.log.1 \
+       -O /var/www/pg_reports/
+
  If you have a pg_dump at 23:00 and 13:00 each day during half an hour, you can
  use pgbadger as follow to exclude these period from the report:
  
@@ -1560,6 +1648,20 @@ solve this problem in a simpler way.
         exit 0;
  }
  
+sub cleanup_directory
+{
+       my ($dir, $remove_dir) = @_;
+
+       unless(opendir(DIR, "$dir")) {
+               die "Error: can't opendir $dir: $!";
+       }
+       my @todel = grep { !/^\./ } readdir(DIR);
+       closedir DIR;
+       map { unlink("$dir/$_"); } @todel;
+       rmdir("$dir") if ($remove_dir);
+}
+
+
  sub write_resources
  {
         # Write resource file to report directory or return resources in and array of lines
author	Darold Gilles <gilles@darold.net>
	Thu, 7 Aug 2014 22:37:51 +0000 (00:37 +0200)
committer	Darold Gilles <gilles@darold.net>
	Thu, 7 Aug 2014 22:37:51 +0000 (00:37 +0200)
README		patch \| blob \| history
doc/pgBadger.pod		patch \| blob \| history
pgbadger		patch \| blob \| history