]> granicus.if.org Git - pgbadger/commitdiff
CSV log parser have been entirely rewritten to handle csv with multiline, it also...
authorDarold Gilles <gilles@darold.net>
Mon, 13 Aug 2012 19:51:20 +0000 (21:51 +0200)
committerDarold Gilles <gilles@darold.net>
Mon, 13 Aug 2012 19:51:20 +0000 (21:51 +0200)
pgbadger

index 96627182cdbf05ebf225419a6b6d2bca34d0cef2..e002f8987e4fb11627cd094138e2d9c88ec2d80c 100755 (executable)
--- a/pgbadger
+++ b/pgbadger
@@ -380,187 +380,213 @@ if ($last_parsed && -e $last_parsed) {
 foreach my $logfile (@log_files) {
        &logmsg('DEBUG', "Starting to parse log file: $logfile");
 
-       # Open log file for reading
-       my $totalsize = (stat("$logfile"))[7] || 0;
-       my $cursize   = 0;
-       my $lfile     = new IO::File;
-       if ($logfile !~ /\.gz/) {
-               $lfile->open($logfile) || die "FATAL: cannot read logfile $logfile. $!\n";
-       } else {
-
-               # Open a pipe to zcat program for compressed log
-               $lfile->open("$zcat $logfile |") || die "FATAL: cannot read from pipe to $zcat $logfile. $!\n";
-
-               # Real size of the file is unknow
-               $totalsize = 0;
-       }
-
        my $curdate = localtime(time);
 
        # Syslog do not have year information, so take care of year overlapping
        my ($gsec, $gmin, $ghour, $gmday, $gmon, $gyear, $gwday, $gyday, $gisdst) = localtime(time);
        $gyear += 1900;
        my $CURRENT_DATE = $gyear . sprintf("%02d", $gmon + 1) . sprintf("%02d", $gmday);
+       # Get size of the file
+       my $totalsize = (stat("$logfile"))[7] || 0;
+       my $cursize   = 0;
 
-       my $cur_pid = '';
-       my $csv_obj = undef;
        if ($format eq 'csv') {
                require Text::CSV;
-               $csv_obj = Text::CSV->new({'binary' => 1});
-       }
-
-       while (my $line = <$lfile>) {
-               $cursize += length($line);
-               chomp($line);
-               $line =~ s/\r//;
-               $nlines++;
-               next if (!$line);
-
-               if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
-                       if ($totalsize) {
-                               print progress_bar($cursize, $totalsize, 25, '=');
-                       } else {
-                               print ".";
+               my $csv = Text::CSV->new ({ binary => 1, eol => $/ });
+               open(my $io, "<", $logfile) or die "FATAL: cannot read csvlog file $logfile. $!\n";
+               # Parse csvlog lines
+               while (my $row = $csv->getline($io)) {
+
+                       # Set progress statistics
+                       $cursize += length(join(',', @$row));
+                       $nlines++;
+                       if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
+                               if ($totalsize) {
+                                       print progress_bar($cursize, $totalsize, 25, '=');
+                               } else {
+                                       print ".";
+                               }
                        }
+
+                       # Extract the date
+                       $row->[0] =~ m/^(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)\.(\d+)/;
+                       my @date = ($1, $2, $3, $4, $5, $6);
+                       my $cur_date = join('', @date);
+                       my $end_time = timegm_nocheck($6, $5, $4, $3, $2, $1);
+                       my $milli = $7 || 0;
+                       $row->[8] =~ m/^(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)/;
+                       my $start_time = timegm_nocheck($6, $5, $4, $3, $2, $1);
+                       my $duration = (($end_time - $start_time) * 1000) - $milli;
+                       $duration = 0 if ($duration < 0);
+
+                       # Skip unwanted lines
+                       next if ($from && ($from > $cur_date));
+                       last if ($to   && ($to < $cur_date));
+
+                       # Jump to the last line parsed if required
+                       next if (!&check_incremental_position($cur_date, join(',', @$row)));
+
+                       # Set approximative session duration
+                       if ($row->[13] && ($row->[13] !~ m/duration: \d+\.\d+ ms/)) {
+                               $row->[13] = "duration: $duration ms $row->[13]";
+                       }
+
+                       # Process the log line
+                       &parse_query(
+                               @date,
+                               $row->[4],    # connection from
+                               $row->[3],    # pid
+                               $row->[5],    # session
+                               'user=' . $row->[1] . ',db=' . $row->[2], # logprefix
+                               $row->[11],   # loglevel
+                               $row->[13] || $row->[14] || $row->[15],   #  message || detail || hint
+                       );
                }
+               $csv->eof or die "FATAL: cannot use CSV, " . $csv->error_diag() . "\n";
+               close $io;
+       } else {
 
-               # Parse syslog lines
-               if ($format eq 'syslog') {
+               # Open log file for reading
+               my $lfile     = new IO::File;
+               if ($logfile !~ /\.gz/) {
+                       $lfile->open($logfile) || die "FATAL: cannot read log file $logfile. $!\n";
+               } else {
 
-                       if ($line =~
-                               /^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*([^:]*?)\s*([A-Z]+?):\s+(.*)/)
-                       {
+                       # Open a pipe to zcat program for compressed log
+                       $lfile->open("$zcat $logfile |") || die "FATAL: cannot read from pipe to $zcat $logfile. $!\n";
 
-                               # skip non postgresql lines
-                               next if ($7 ne $ident);
+                       # Real size of the file is unknow
+                       $totalsize = 0;
+               }
 
-                               # Syslog do not have year information, so take care of year overlapping
-                               my $tmp_year = $gyear;
-                               if ("$tmp_year$month_abbr{$1}$2" > $CURRENT_DATE) {
-                                       $tmp_year = substr($CURRENT_DATE, 1, 4) - 1;
-                               }
+               my $cur_pid = '';
+               while (my $line = <$lfile>) {
+                       $cursize += length($line);
+                       chomp($line);
+                       $line =~ s/\r//;
+                       $nlines++;
+                       next if (!$line);
 
-                               # Fix single digit days
-                               my $day = sprintf("%02d", $2);
-                               my $cur_date = "$tmp_year$month_abbr{$1}$day$3$4$5";
+                       if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
+                               if ($totalsize) {
+                                       print progress_bar($cursize, $totalsize, 25, '=');
+                               } else {
+                                       print ".";
+                               }
+                       }
 
-                               # Skip unwanted lines
-                               next if ($from && ($from > $cur_date));
-                               last if ($to   && ($to < $cur_date));
-                               $cur_pid = $8;
+                       # Parse syslog lines
+                       if ($format eq 'syslog') {
 
-                               # Jump to the last line parsed if required
-                               next if (!&check_incremental_position($cur_date, $line));
+                               if ($line =~
+                                       /^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*([^:]*?)\s*([A-Z]+?):\s+(.*)/)
+                               {
 
-                               # Process the log line
-                               &parse_query($tmp_year, $month_abbr{$1}, $day, $3, $4, $5, $6, $8, $9, $10, $11, $12);
+                                       # skip non postgresql lines
+                                       next if ($7 ne $ident);
 
-                       } elsif ($line =~ /^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*)/) {
+                                       # Syslog do not have year information, so take care of year overlapping
+                                       my $tmp_year = $gyear;
+                                       if ("$tmp_year$month_abbr{$1}$2" > $CURRENT_DATE) {
+                                               $tmp_year = substr($CURRENT_DATE, 1, 4) - 1;
+                                       }
 
-                               $cur_pid = $8;
-                               my $t_query = $10;
-                               $t_query =~ s/#011/\t/g;
-                               if ($cur_info{$cur_pid}{statement}) {
-                                       $cur_info{$cur_pid}{statement} .= "\n" . $t_query;
-                               } elsif ($cur_info{$cur_pid}{context}) {
-                                       $cur_info{$cur_pid}{context} .= "\n" . $t_query;
-                               } elsif ($cur_info{$cur_pid}{detail}) {
-                                       $cur_info{$cur_pid}{detail} .= "\n" . $t_query;
-                               } else {
-                                       $cur_info{$cur_pid}{query} .= "\n" . $t_query;
-                               }
+                                       # Fix single digit days
+                                       my $day = sprintf("%02d", $2);
+                                       my $cur_date = "$tmp_year$month_abbr{$1}$day$3$4$5";
+
+                                       # Skip unwanted lines
+                                       next if ($from && ($from > $cur_date));
+                                       last if ($to   && ($to < $cur_date));
+                                       $cur_pid = $8;
+
+                                       # Jump to the last line parsed if required
+                                       next if (!&check_incremental_position($cur_date, $line));
+
+                                       # Process the log line
+                                       &parse_query($tmp_year, $month_abbr{$1}, $day, $3, $4, $5, $6, $8, $9, $10, $11, $12);
+
+                               } elsif ($line =~ /^(...)\s+(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s([^\[]+)\[(\d+)\]:\s\[([0-9\-]+)\]\s*(.*)/) {
+
+                                       $cur_pid = $8;
+                                       my $t_query = $10;
+                                       $t_query =~ s/#011/\t/g;
+                                       if ($cur_info{$cur_pid}{statement}) {
+                                               $cur_info{$cur_pid}{statement} .= "\n" . $t_query;
+                                       } elsif ($cur_info{$cur_pid}{context}) {
+                                               $cur_info{$cur_pid}{context} .= "\n" . $t_query;
+                                       } elsif ($cur_info{$cur_pid}{detail}) {
+                                               $cur_info{$cur_pid}{detail} .= "\n" . $t_query;
+                                       } else {
+                                               $cur_info{$cur_pid}{query} .= "\n" . $t_query;
+                                       }
 
-                               # Collect orphans lines of multiline queries
-                       } elsif ($line !~ /^...\s+\d+\s\d+:\d+:\d+\s[^\s]+\s[^\[]+\[\d+\]:/) {
+                                       # Collect orphans lines of multiline queries
+                               } elsif ($line !~ /^...\s+\d+\s\d+:\d+:\d+\s[^\s]+\s[^\[]+\[\d+\]:/) {
+
+                                       if ($cur_info{$cur_pid}{statement}) {
+                                               $cur_info{$cur_pid}{statement} .= "\n" . $line;
+                                       } elsif ($cur_info{$cur_pid}{context}) {
+                                               $cur_info{$cur_pid}{context} .= "\n" . $line;
+                                       } elsif ($cur_info{$cur_pid}{detail}) {
+                                               $cur_info{$cur_pid}{detail} .= "\n" . $line;
+                                       } else {
+                                               $cur_info{$cur_pid}{query} .= "\n" . $line;
+                                       }
 
-                               if ($cur_info{$cur_pid}{statement}) {
-                                       $cur_info{$cur_pid}{statement} .= "\n" . $line;
-                               } elsif ($cur_info{$cur_pid}{context}) {
-                                       $cur_info{$cur_pid}{context} .= "\n" . $line;
-                               } elsif ($cur_info{$cur_pid}{detail}) {
-                                       $cur_info{$cur_pid}{detail} .= "\n" . $line;
                                } else {
-                                       $cur_info{$cur_pid}{query} .= "\n" . $line;
+                                       &logmsg('DEBUG', "Unknown syslog line format: $line");
                                }
 
-                       } else {
-                               &logmsg('DEBUG', "Unknown syslog line format: $line");
-                       }
+                       } elsif ($format eq 'stderr') {
 
-               } elsif ($format eq 'stderr') {
+                               # Parse stderr lines
+                               if ($line =~ /(\d+)-(\d+)-(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s\[(\d+)\]:\s\[([0-9\-]+)\]\s*([^:]*?)\s*([A-Z]+?):\s+(.*)/)
+                               {
 
-                       # Parse stderr lines
-                       if ($line =~ /(\d+)-(\d+)-(\d+)\s(\d+):(\d+):(\d+)\s([^\s]+)\s\[(\d+)\]:\s\[([0-9\-]+)\]\s*([^:]*?)\s*([A-Z]+?):\s+(.*)/)
-                       {
+                                       # Skip unwanted lines
+                                       my $cur_date = "$1$2$3$4$5$6";
+                                       next if ($from && ($from > $cur_date));
+                                       last if ($to   && ($to < $cur_date));
+                                       $cur_pid = $8;
 
-                               # Skip unwanted lines
-                               my $cur_date = "$1$2$3$4$5$6";
-                               next if ($from && ($from > $cur_date));
-                               last if ($to   && ($to < $cur_date));
-                               $cur_pid = $8;
+                                       # Jump to the last line parsed if required
+                                       next if (!&check_incremental_position($cur_date, $line));
 
-                               # Jump to the last line parsed if required
-                               next if (!&check_incremental_position($cur_date, $line));
+                                       # Process the log line
+                                       &parse_query($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
 
-                               # Process the log line
-                               &parse_query($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12);
+                                       # Collect orphans lines of multiline queries
+                               } elsif ($line !~ /^\d+-\d+-\d+\s\d+:\d+:\d+\s[^\s]+\s\[\d+\]:/) {
 
-                               # Collect orphans lines of multiline queries
-                       } elsif ($line !~ /^\d+-\d+-\d+\s\d+:\d+:\d+\s[^\s]+\s\[\d+\]:/) {
+                                       if ($cur_info{$cur_pid}{statement}) {
+                                               $cur_info{$cur_pid}{statement} .= "\n" . $line;
+                                       } elsif ($cur_info{$cur_pid}{context}) {
+                                               $cur_info{$cur_pid}{context} .= "\n" . $line;
+                                       } elsif ($cur_info{$cur_pid}{detail}) {
+                                               $cur_info{$cur_pid}{detail} .= "\n" . $line;
+                                       } else {
+                                               $cur_info{$cur_pid}{query} .= "\n" . $line;
+                                       }
 
-                               if ($cur_info{$cur_pid}{statement}) {
-                                       $cur_info{$cur_pid}{statement} .= "\n" . $line;
-                               } elsif ($cur_info{$cur_pid}{context}) {
-                                       $cur_info{$cur_pid}{context} .= "\n" . $line;
-                               } elsif ($cur_info{$cur_pid}{detail}) {
-                                       $cur_info{$cur_pid}{detail} .= "\n" . $line;
                                } else {
-                                       $cur_info{$cur_pid}{query} .= "\n" . $line;
+                                       $cur_info{$cur_pid}{query} .= "\n" . $line if ($cur_info{$cur_pid}{query});
                                }
 
                        } else {
-                               $cur_info{$cur_pid}{query} .= "\n" . $line if ($cur_info{$cur_pid}{query});
-                       }
-
-               } elsif ($format eq 'csv') {
 
-                       # Parse csvlog lines
-                       if ($csv_obj->parse($line)) {
-                               my @cols = $csv_obj->fields();
-
-                               # Extract the date
-                               $cols[0] =~ m/(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)/;
-                               my @date = ($1, $2, $3, $4, $5, $6);
-                               my $cur_date = join('', @date);
-
-                               # Skip unwanted lines
-                               next if ($from && ($from > $cur_date));
-                               last if ($to   && ($to < $cur_date));
-
-                               # Jump to the last line parsed if required
-                               next if (!&check_incremental_position($cur_date, $line));
-
-                               # Process the log line
-                               &parse_query(
-                                       @date,
-                                       $cols[4],    # connection from
-                                       $cols[3],    # pid
-                                       $cols[5],    # session
-                                                    # logprefix
-                                       'user=' . $cols[1] . ',db=' . $cols[2],
-                                       $cols[11] . ':',    # loglevel
-                                       $cols[13],          # query
-                               );
-                       } else {
-                               &logmsg('DEBUG', "Unknown csv line format: $line, error: " . $csv_obj->error_input());
+                               # unknown format
+                               &logmsg('DEBUG', "Unknown line format: $line");
                        }
-
-               } else {
-
-                       # unknown format
-                       &logmsg('DEBUG', "Unknown line format: $line");
                }
+               $lfile->close();
+       }
+
+       # Get stats from all pending temporary storage
+       foreach my $pid (sort {$cur_info{$a}{date} <=> $cur_info{$b}{date}} %cur_info) {
+               &store_queries($pid);
        }
+       %cur_info = ();
 
        if ($progress) {
                if ($totalsize) {
@@ -569,14 +595,6 @@ foreach my $logfile (@log_files) {
                print STDERR "\n";
        }
 
-       $lfile->close();
-
-       # Get stats from all pending temporary storage
-       foreach my $pid (sort {$cur_info{$a}{date} <=> $cur_info{$b}{date}} %cur_info) {
-               &store_queries($pid);
-       }
-       %cur_info = ();
-
 }
 
 # Save last line parsed
@@ -3128,7 +3146,7 @@ sub parse_query
                # When we are ready to overwrite the last storage, add it to the global stats
                if (   ($t_loglevel =~ /LOG|FATAL|PANIC|ERROR|WARNING|HINT/)
                        && exists $cur_info{$t_pid}
-                       && ($t_session != $cur_info{$t_pid}{session}))
+                       && (($format eq 'csv') || ($t_session != $cur_info{$t_pid}{session})))
                {
                        &store_queries($t_pid);
                        delete $cur_info{$t_pid};