Add --exclude-line option for excluding log entries with a regex based on the full...

author Darold Gilles <gilles@darold.net>

Thu, 1 May 2014 14:24:53 +0000 (16:24 +0200)

committer Darold Gilles <gilles@darold.net>

Thu, 1 May 2014 14:24:53 +0000 (16:24 +0200)
author Darold Gilles <gilles@darold.net>
Thu, 1 May 2014 14:24:53 +0000 (16:24 +0200)
committer Darold Gilles <gilles@darold.net>
Thu, 1 May 2014 14:24:53 +0000 (16:24 +0200)
diff --git a/README b/README

index 705490017bb02fece958d6dbb406f4b2a0bee103..d1493b80376856bd1f8e5be5bfe51b889293bb40 100644 (file)
--- a/README
+++ b/README
@@ -24,14 +24,14 @@ SYNOPSIS
          -d | --dbname database : only report on entries for the given database.
          -e | --end datetime    : end date/time for the data to be parsed in log.
          -f | --format logtype  : possible values: syslog,stderr,csv. Default: stderr
-        -G | --nograph         : disable graphs on HTML output. Enabled by default.
+        -G | --nograph         : disable graphs on HTML output. Enable by default.
          -h | --help            : show this message and exit.
-        -i | --ident name      : program name used as syslog ident. Default: postgres
+        -i | --ident name      : programname used as syslog ident. Default: postgres
          -I | --incremental     : use incremental mode, reports will be generated by
                                   days in a separate directory, --outdir must be set.
-        -j | --jobs number     : number of jobs to run in parallel on each log file.
+        -j | --jobs number     : number of jobs to run on parallel on each log file.
                                   Default is 1, run as single process.
-        -J | --Jobs number     : number of log files to parse in parallel. Default
+        -J | --Jobs number     : number of log file to parse in parallel. Default
                                   is 1, run as single process.
          -l | --last-parsed file: allow incremental log parsing by registering the
                                   last datetime and line parsed. Useful if you want
@@ -94,6 +94,8 @@ SYNOPSIS
                                   You can use this option multiple times.
          --exclude-appname name : exclude entries for the specified application name
                                   from report. Example: "pg_dump".
+        --exclude-line regex   : pgbadger will start to exclude any log entry that
+                                 will match the given regex. Can be used multiple time.
  
      Examples:
  
@@ -111,7 +113,7 @@ SYNOPSIS
              perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \
                              /pglog/postgresql-2012-08-21*
  
-    Use my 8 CPUs to parse my 10GB file faster, much faster
+    Use my 8 CPUs to parse my 10GB file faster, really faster
  
              perl pgbadger -j 8 /pglog/postgresql-9.1-main.log
  
@@ -139,22 +141,21 @@ SYNOPSIS
      will generate a report per day and per week in the given output
      directory.
  
-    If you have pg_dumps scheduled at 23:00 and 13:00 every day, taking
-    less than an hour each, you can use pgBadger as follows to exclude these
-    periods from the report:
+    If you have a pg_dump at 23:00 and 13:00 each day during half an hour,
+    you can use pgbadger as follow to exclude these periods from the report:
  
          pgbadger --exclude-time "2013-09-.* (23|13):.*" postgresql.log
  
-    This will avoid having these COPY queries crowd out your other slowest
-    queries. You can also use --exclude-appname "pg_dump" to solve this
-    problem more simply.
+    This will help to not have all COPY order on top of slowest queries. You
+    can also use --exclude-appname "pg_dump" to solve this problem in a more
+    simple way.
  
  DESCRIPTION
-    pgBadger is a PostgreSQL log analyzer built for speed with fully
+    pgBadger is a PostgreSQL log analyzer build for speed with fully
      detailed reports from your PostgreSQL log file. It's a single and small
-    Perl script that outperforms any other PostgreSQL log analyzer.
+    Perl script that outperform any other PostgreSQL log analyzer.
  
-    It is written in pure Perl and uses a javascript library
+    It is written in pure Perl language and uses a javascript library
      (flotr2) to draw graphs so that you don't need to install any additional
      Perl modules or other packages. Furthermore, this library gives us more
      features such as zooming. pgBadger also uses the Bootstrap javascript
@@ -170,11 +171,11 @@ DESCRIPTION
      You can also limit pgBadger to only report errors or remove any part of
      the report using command line options.
  
-    pgBadger supports any custom format set into the log_line_prefix of your
-    postgresql.conf file provided that you use the %t, %p and %l patterns.
+    pgBadger supports any custom format set into log_line_prefix of your
+    postgresql.conf file provide that you use the %t, %p and %l patterns.
  
-    pgBadger allows parallel processing of a single or multiple
-    log files via the -j option with the number of CPUs.
+    pgBadger allow parallel processing on a single log file and multiple
+    files through the use of the -j option and the number of CPUs as value.
  
      If you want to save system performance you can also use log_duration
      instead of log_min_duration_statement to have reports on duration and
@@ -194,7 +195,7 @@ FEATURE
              The most frequent errors.
              Histogram of query times.
  
-    The following reports are also available with hourly charts divided into
+    The following reports are also available with hourly charts divide by
      periods of five minutes:
  
              SQL queries statistics.
@@ -202,11 +203,11 @@ FEATURE
              Checkpoints statistics.
              Autovacuum and autoanalyze statistics.
  
-    There are also some pie charts showing the distribution of:
+    There's also some pie reports of distribution about:
  
              Locks statistics.
-            Queries by type (select/insert/update/delete).
-            Distribution of query types per database/application
+            ueries by type (select/insert/update/delete).
+            Distribution of queries type per database/application
              Sessions per database/user/client.
              Connections per database/user/client.
              Autovacuum and autoanalyze per table.
@@ -218,7 +219,7 @@ FEATURE
      cumulative report per week.
  
      Histogram granularity can be adjusted using the -A command line option.
-    By default they will report the mean of each top queries/errors occurring
+    By default they will report the mean of each top queries/error occuring
      per hour, but you can specify the granularity down to the minute.
  
  REQUIREMENT
@@ -235,7 +236,7 @@ REQUIREMENT
      format you don't need to install it.
  
      Compressed log file format is autodetected from the file exension. If
-    pgBadger finds a gz extension it will use the zcat utility, with a bz2
+    pgBadger find a gz extension it will use the zcat utility, with a bz2
      extension it will use bzcat and if the file extension is zip then the
      unzip utility will be used.
  
@@ -246,16 +247,16 @@ REQUIREMENT
              --zcat="C:\tools\unzip -p"
  
      By default pgBadger will use the zcat, bzcat and unzip utilities
-    based on the file extension. If you use the default autodetection
-    of compression format you can mix gz, bz2 and zip files. Specifying a custom
-    value for the --zcat option will remove this feature of detecting mixed
-    compression formats.
+    following the file extension. If you use the default autodetection
+    compress format you can mixed gz, bz2 or zip files. Specifying a custom
+    value to --zcat option will remove this feature of mixed compressed
+    format.
  
-    Note that multiprocessing can not be used with compressed files, CSV
-    files, or on the Windows platform.
+    Note that multiprocessing can not be used with compressed files or CSV
+    files as well as under Windows platform.
  
  INSTALLATION
-    Download the tarball from github and unpack the archive as follows:
+    Download the tarball from github and unpack the archive as follow:
  
              tar xzf pgbadger-4.x.tar.gz
              cd pgbadger-4.x/
@@ -350,33 +351,33 @@ PARALLEL PROCESSING
      To enable parallel processing you just have to use the -j N option where
      N is the number of cores you want to use.
  
-    pgBadger will then proceed as follows:
+    pgbadger will then proceed as follow:
  
              for each log file
                  chunk size = int(file size / N)
                  look at start/end offsets of these chunks
                  fork N processes and seek to the start offset of each chunk
-                    each process will terminate when the parser reaches the end offset
+                    each process will terminate when the parser reach the end offset
                      of its chunk
-                    each process writes stats into a binary temporary file
-               wait for all child processes to terminate
+                    each process write stats into a binary temporary file
+               wait for all children has terminated
              All binary temporary files generated will then be read and loaded into
              memory to build the html output.
  
-    With this method, at the start/end of chunks pgBadger may truncate or omit a
-    maximum of N queries per log file which is an insignificant gap if you
+    With that method, at start/end of chunks pgbadger may truncate or omit a
+    maximum of N queries perl log file which is an insignificant gap if you
      have millions of queries in your log file. The chance that the query
-    that you were looking for is missing is near 0, this is why I think this
+    that you were looking for is loose is near 0, this is why I think this
      gap is livable. Most of the time the query is counted twice but
      truncated.
  
-    When you have many small log files and many CPUs it is faster to
+    When you have lot of small log files and lot of CPUs it is speedier to
      dedicate one core to one log file at a time. To enable this behavior you
      have to use option -J N instead. With 200 log files of 10MB each the use
-    of the -J option starts being really interesting with 8 Cores. Using this
-    method you will be sure to not lose any queries in the reports.
+    of the -J option start being really interesting with 8 Cores. Using this
+    method you will be sure to not loose any queries in the reports.
  
-    He are benchmarks performed on a server with 8 CPUs and a single file of
+    He are a benchmarck done on a server with 8 CPUs and a single file of
      9.5GB.
  
               Option |  1 CPU  | 2 CPU | 4 CPU | 8 CPU
@@ -384,7 +385,7 @@ PARALLEL PROCESSING
                 -j   | 1h41m18 | 50m25 | 25m39 | 15m58
                 -J   | 1h41m18 | 54m28 | 41m16 | 34m45
  
-    With 200 log files of 10MB each and a total of 2GB the results are
+    With 200 log files of 10MB each and a total og 2GB the results are
      slightly different:
  
               Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU
@@ -392,17 +393,17 @@ PARALLEL PROCESSING
                 -j   | 20m15 |  9m56 |  5m20 | 4m20
                 -J   | 20m15 |  9m49 |  5m00 | 2m40
  
-    So it is recommended to use -j unless you have hundreds of small log files
+    So it is recommanded to use -j unless you have hundred of small log file
      and can use at least 8 CPUs.
  
-    IMPORTANT: when you are using parallel parsing pgBadger will generate a
+    IMPORTANT: when you are using parallel parsing pgbadger will generate a
      lot of temporary files in the /tmp directory and will remove them at
-    end, so do not remove those files unless pgBadger is not running. They
+    end, so do not remove those files unless pgbadger is not running. They
      are all named with the following template tmp_pgbadgerXXXX.bin so they
      can be easily identified.
  
  INCREMENTAL REPORTS
-    pgBadger includes an automatic incremental report mode using option -I or
+    pgBadger include an automatic incremental report mode using option -I or
      --incremental. When running in this mode, pgBadger will generate one
      report per day and a cumulative report per week. Output is first done in
      binary format into the mandatory output directory (see option -O or
@@ -410,9 +411,9 @@ INCREMENTAL REPORTS
      index file.
  
      The main index file will show a dropdown menu per week with a link to
-    the week's report and links to daily reports of the week.
+    the week report and links to daily reports of this week.
  
-    For example, if you run pgBadger as follows based on a daily rotated
+    For example, if you run pgBadger as follow based on a daily rotated
      file:
  
          0 4 * * * /usr/bin/pgbadger -I -q /var/log/postgresql/postgresql.log.1 \
@@ -422,26 +423,26 @@ INCREMENTAL REPORTS
  
      In this mode pgBagder will create an automatic incremental file into the
      output directory, so you don't have to use the -l option unless you want
-    to change the path of that file. This means that you can run pgBadger in
-    this mode each day on a log file rotated each week, and it will not count
+    to change the path of that file. This mean that you can run pgBadger in
+    this mode each days on a log file rotated each week, it will not count
      the log entries twice.
  
  BINARY FORMAT
      Using the binary format it is possible to create custom incremental and
-    cumulative reports. For example, if you want to refresh a pgBadger
-    report each hour from a daily PostgreSQL log file, you can
-    run the following command every hour:
+    cumulative reports. For example, if you want to refresh a pgbadger
+    report each hour from a daily PostgreSQl log file, you can proceed by
+    running each hour the following commands:
  
-        pgbadger --last-parsed .pgbadger_last_state_file -o sunday/hourX.bin /var/log/pgsql/postgresql-Sun.log
+        pgbadder --last-parsed .pgbadger_last_state_file -o sunday/hourX.bin /var/log/pgsql/postgresql-Sun.log
  
      to generate the incremental data files in binary format. And to generate
-    a fresh HTML report from that binary file:
+    the fresh HTML report from that binary file:
  
-        pgbadger sunday/*.bin
+        pgbadder sunday/*.bin
  
-    Or as another example, if you have one log file per hour and you want
-    reports to be rebuilt each time the log file is switched. Proceed as
-    follows:
+    Or an other example, if you have one log file per hour and you want a
+    reports to be rebuild each time the log file is switched. Proceed as
+    follow:
  
              pgbadger -o day1/hour01.bin /var/log/pgsql/pglog/postgresql-2012-03-23_10.log
              pgbadger -o day1/hour02.bin /var/log/pgsql/pglog/postgresql-2012-03-23_11.log
@@ -453,7 +454,7 @@ BINARY FORMAT
  
              pgbadger -o day1_report.html day1/*.bin
  
-    Adjust the commands to suit your needs.
+    Adjust the commands following your needs.
  
  AUTHORS
      pgBadger is an original work from Gilles Darold.
diff --git a/doc/pgBadger.pod b/doc/pgBadger.pod

index 85ac27d017d5857fa234cdc79441ffaac6a9cffa..5907af15c804706d25cee14cf92475e161020aad 100644 (file)
--- a/doc/pgBadger.pod
+++ b/doc/pgBadger.pod
@@ -96,6 +96,8 @@ Options:
                               You can use this option multiple times.
      --exclude-appname name : exclude entries for the specified application name
                               from report. Example: "pg_dump".
+    --exclude-line regex   : pgbadger will start to exclude any log entry that
+                             will match the given regex. Can be used multiple time.
  
  Examples:
  
diff --git a/pgbadger b/pgbadger

index 6bd4b27237ead30bd18e40109477a2fa9ad05a98..7e2f09310309e9a6fe68ef1621eb004dd3477a49 100644 (file)
--- a/pgbadger
+++ b/pgbadger
@@ -112,6 +112,7 @@ my @dbclient                = ();
  my @dbappname               = ();
  my @exclude_user            = ();
  my @exclude_appname         = ();
+my @exclude_line            = ();
  my $ident                   = '';
  my $top                     = 0;
  my $sample                  = 3;
@@ -272,6 +273,7 @@ my $result = GetOptions(
         "exclude-file=s"           => \$exclude_file,
         "exclude-appname=s"        => \@exclude_appname,
         "include-query=s"          => \@include_query,
+       "exclude-line=s"           => \@exclude_line,
         "include-file=s"           => \$include_file,
         "disable-error!"           => \$disable_error,
         "disable-hourly!"          => \$disable_hourly,
@@ -1364,6 +1366,8 @@ Options:
                               You can use this option multiple times.
      --exclude-appname name : exclude entries for the specified application name
                              from report. Example: "pg_dump".
+    --exclude-line regex   : pgbadger will start to exclude any log entry that
+                            will match the given regex. Can be used multiple time.
  
  pgBadger is able to parse a remote log file using a passwordless ssh connection.
  Use the -r or --remote-host to set the host ip address or hostname. There's also
@@ -1661,6 +1665,10 @@ sub process_file
                 my $cur_pid = '';
                 my @matches = ();
                 my $goon = 0;
+               my $has_exclusion = 0;
+               if ($#exclude_line >= 0) {
+                       $has_exclusion = 1;
+               }
                 $start_offset ||= 0;
                 &logmsg('DEBUG', "Start parsing at offset $start_offset of file $logfile");
                 if ($start_offset) {
@@ -1671,6 +1679,15 @@ sub process_file
                         # We received a signal
                         last if ($terminate);
  
+                       # Start to exclude from parsing any desired lines
+                       if ($has_exclusion >= 0) {
+
+                               # Log line matches the excluded regex
+                               my $ef = 0;
+                               map { $ef = 1, last if ($line =~ /$_/is); } @exclude_line;
+                               next if ($ef);
+                       }
+
                         $cursize += length($line);
                         $current_offset += length($line);
  
@@ -8092,6 +8109,14 @@ sub compute_arg_list
         @exclude_appname = ();
         push(@exclude_appname, @tmp);
  
+       @tmp = ();
+       foreach my $v (@exclude_line) {
+               push(@tmp, split(/,/, $v));
+       }
+       @exclude_line = ();
+       push(@exclude_line, @tmp);
+
+
  }
  
  sub validate_log_line
author	Darold Gilles <gilles@darold.net>
	Thu, 1 May 2014 14:24:53 +0000 (16:24 +0200)
committer	Darold Gilles <gilles@darold.net>
	Thu, 1 May 2014 14:24:53 +0000 (16:24 +0200)
README		patch \| blob \| history
doc/pgBadger.pod		patch \| blob \| history
pgbadger		patch \| blob \| history