From 54e4971042dda0b69e0c1a70d2222ca759d1b2a5 Mon Sep 17 00:00:00 2001 From: Gilles Darold Date: Mon, 10 Jun 2019 09:17:57 +0200 Subject: [PATCH] Update usage and documentation. --- README | 70 +++++++++++++---------- doc/pgBadger.pod | 144 ++++++++++++++++++++++++----------------------- pgbadger | 36 ++++++------ 3 files changed, 132 insertions(+), 118 deletions(-) diff --git a/README b/README index 85395a3..7f64b38 100644 --- a/README +++ b/README @@ -18,18 +18,26 @@ SYNOPSIS queries and connections. Default 5 minutes. -A | --histo-average min: number of minutes to build the histogram graphs of queries. Default 60 minutes. - -b | --begin datetime : start date/time for the data to be parsed in log. + -b | --begin datetime : start date/time for the data to be parsed in log + (either a timestamp or a time) -c | --dbclient host : only report on entries for the given client host. -C | --nocomment : remove comments like /* ... */ from queries. -d | --dbname database : only report on entries for the given database. -D | --dns-resolv : client ip addresses are replaced by their DNS name. Be warned that this can really slow down pgBadger. - -e | --end datetime : end date/time for the data to be parsed in log. + -e | --end datetime : end date/time for the data to be parsed in log + (either a timestamp or a time) + -E | --explode : explode the main report by generating one report + per database. Global information not related to a + database are added to the postgres database report. -f | --format logtype : possible values: syslog, syslog2, stderr, jsonlog, cvs, pgbouncer and logplex. Use this option when pgBadger is not able to auto-detect the log format. -G | --nograph : disable graphs on HTML output. Enabled by default. -h | --help : show this message and exit. + -H | --html-dir path : path to directory where HTML report must be written + in incremental mode, binary files stay on directory + defined with -O, --outdir option. -i | --ident name : programname used as syslog ident. Default: postgres -I | --incremental : use incremental mode, reports will be generated by days in a separate directory, --outdir must be set. @@ -153,16 +161,16 @@ SYNOPSIS --normalized-only : only dump all normalized query to out.txt --log-timezone +/-XX : Set the number of hours from GMT of the timezone that must be used to adjust date/time read from - log file before being parsed. Using this option + log file before beeing parsed. Using this option make more difficult log search with a date/time. --prettify-json : use it if you want json output to be prettified. pgBadger is able to parse a remote log file using a passwordless ssh connection. Use the -r or --remote-host to set the host ip address or - hostname. There are also some additional options to fully control the ssh - connection: + hostname. There's also some additional options to fully control the ssh + connection. - --ssh-program ssh path to the ssh program to use. Default: ssh. + --ssh-program ssh path to the ssh program to use. Default: ssh. --ssh-user username connection login name. Default to running user. --ssh-identity file path to the identity file to use. --ssh-timeout second timeout to ssh connection failure. Default 10 secs. @@ -179,34 +187,34 @@ SYNOPSIS Examples: - pgbadger /var/log/postgresql.log - pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log - pgbadger /var/log/postgresql/postgresql-2012-05-* - pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log - pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log - cat /var/log/postgres.log | pgbadger - - # Log prefix with stderr log output - pgbadger --prefix '%t [%p]: user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* - pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log - # Log line prefix with syslog log output - pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* - # Use my 8 CPUs to parse my 10GB file faster, much faster - pgbadger -j 8 /pglog/postgresql-10.1-main.log + pgbadger /var/log/postgresql.log + pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log + pgbadger /var/log/postgresql/postgresql-2012-05-* + pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log + pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log + cat /var/log/postgres.log | pgbadger - + # Log prefix with stderr log output + pgbadger --prefix '%t [%p]: user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* + pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log + # Log line prefix with syslog log output + pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* + # Use my 8 CPUs to parse my 10GB file faster, much faster + pgbadger -j 8 /pglog/postgresql-10.1-main.log Use URI notation for remote log file: - pgbadger http://172.12.110.1//var/log/postgresql/postgresql-10.1-main.log - pgbadger ftp://username@172.12.110.14/postgresql-10.1-main.log - pgbadger ssh://username@172.12.110.14//var/log/postgresql/postgresql-10.1-main.log* + pgbadger http://172.12.110.1//var/log/postgresql/postgresql-10.1-main.log + pgbadger ftp://username@172.12.110.14/postgresql-10.1-main.log + pgbadger ssh://username@172.12.110.14//var/log/postgresql/postgresql-10.1-main.log* You can use together a local PostgreSQL log and a remote pgbouncer log file to parse: - pgbadger /var/log/postgresql/postgresql-10.1-main.log ssh://username.12.110.14/pgbouncer.log + pgbadger /var/log/postgresql/postgresql-10.1-main.log ssh://username.12.110.14/pgbouncer.log Generate Tsung sessions XML file with select queries only: - pgbadger -S -o sessions.tsung --prefix '%t [%p]: user=%u,db=%d ' /pglog/postgresql-10.1.log + pgbadger -S -o sessions.tsung --prefix '%t [%p]: user=%u,db=%d ' /pglog/postgresql-10.1.log Reporting errors every week by cron job: @@ -382,8 +390,8 @@ REQUIREMENT library so you don't need anything other than a web browser. Your browser will do all the work. - If you planned to parse PostgreSQL CSV log files you need a - Perl module: + If you planned to parse PostgreSQL CSV log files you might need some + Perl Modules: Text::CSV_XS - to parse PostgreSQL CSV log files. @@ -393,7 +401,7 @@ REQUIREMENT If you want to export statistics as JSON file you need an additional Perl module: - JSON::XS - JSON serializing/deserializing, done correctly and fast + JSON::XS - JSON serialising/deserialising, done correctly and fast This module is optional, if you don't select the json output format you don't need to install it. You can install it on a Debian like system @@ -459,7 +467,7 @@ POSTGRESQL CONFIGURATION Here every statement will be logged, on a busy server you may want to increase this value to only log queries with a longer duration. Note that if you have log_statement set to 'all' nothing will be logged - through the log_min_duration_statement directive. See the next section + through the log_min_duration_statement directive. See the next chapter for more information. pgBadger supports any custom format set into the log_line_prefix @@ -559,8 +567,8 @@ PARALLEL PROCESSING of the -J option starts being really interesting with 8 Cores. Using this method you will be sure not to lose any queries in the reports. - The following is a benchmark done on a server with 8 CPUs and a single - file of 9.5GB: + He are a benchmark done on a server with 8 CPUs and a single file of + 9.5GB. Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU --------+---------+-------+-------+------ @@ -663,7 +671,7 @@ AUTHORS Changelog file. LICENSE - pgBadger is free software distributed under the PostgreSQL License. + pgBadger is free software distributed under the PostgreSQL Licence. Copyright (c) 2012-2019, Gilles Darold diff --git a/doc/pgBadger.pod b/doc/pgBadger.pod index 06d9630..e573b2f 100644 --- a/doc/pgBadger.pod +++ b/doc/pgBadger.pod @@ -6,7 +6,7 @@ pgBadger - a fast PostgreSQL log analysis report Usage: pgbadger [options] logfile [...] - PostgreSQL log analyzer with fully detailed reports and graphs. + PostgreSQL log analyzer with fully detailed reports and graphs. Arguments: @@ -17,56 +17,62 @@ Arguments: Options: -a | --average minutes : number of minutes to build the average graphs of - queries and connections. Default 5 minutes. + queries and connections. Default 5 minutes. -A | --histo-average min: number of minutes to build the histogram graphs of queries. Default 60 minutes. -b | --begin datetime : start date/time for the data to be parsed in log - (either a timestamp or a time). + (either a timestamp or a time) -c | --dbclient host : only report on entries for the given client host. -C | --nocomment : remove comments like /* ... */ from queries. -d | --dbname database : only report on entries for the given database. -D | --dns-resolv : client ip addresses are replaced by their DNS name. Be warned that this can really slow down pgBadger. -e | --end datetime : end date/time for the data to be parsed in log - (either a timestamp or a time). + (either a timestamp or a time) + -E | --explode : explode the main report by generating one report + per database. Global information not related to a + database are added to the postgres database report. -f | --format logtype : possible values: syslog, syslog2, stderr, jsonlog, - cvs, pgbouncer and logplex. Use this option when - pgBadger is not able to auto-detect the log format. - -G | --nograph : disable graphs on HTML output. Enabled by default. - -h | --help : show this message and exit. + cvs, pgbouncer and logplex. Use this option when + pgBadger is not able to auto-detect the log format. + -G | --nograph : disable graphs on HTML output. Enabled by default. + -h | --help : show this message and exit. + -H | --html-dir path : path to directory where HTML report must be written + in incremental mode, binary files stay on directory + defined with -O, --outdir option. -i | --ident name : programname used as syslog ident. Default: postgres -I | --incremental : use incremental mode, reports will be generated by - days in a separate directory, --outdir must be set. + days in a separate directory, --outdir must be set. -j | --jobs number : number of jobs to run at same time. Run as single by default or when working with csvlog. -J | --Jobs number : number of log file to parse in parallel. Process one - file at a time by default or when csvlog is used. + file at a time by default or when csvlog is used. -l | --last-parsed file: allow incremental log parsing by registering the - last datetime and line parsed. Useful if you want - to watch errors since last run or if you want one - report per day with a log rotated each week. + last datetime and line parsed. Useful if you want + to watch errors since last run or if you want one + report per day with a log rotated each week. -L | --logfile-list file:file containing a list of log file to parse. -m | --maxlength size : maximum length of a query, it will be restricted to - the given size. Default: no truncate + the given size. Default: no truncate -M | --no-multiline : do not collect multiline statement to avoid garbage - especially on errors that generate a huge report. + especially on errors that generate a huge report. -n | --nohighlight : disable SQL code highlighting. -N | --appname name : only report on entries for given application name -o | --outfile filename: define the filename for the output. Default depends - on the output format: out.html, out.txt, out.bin, - out.json or out.tsung. This option can be used + on the output format: out.html, out.txt, out.bin, + out.json or out.tsung. This option can be used multiple time to output several format. To use json output the Perl module JSON::XS must be installed, - To dump output to stdout use - as filename. + To dump output to stdout use - as filename. -O | --outdir path : directory where out file must be saved. -p | --prefix string : the value of your custom log_line_prefix as - defined in your postgresql.conf. Only use it if you - aren't using one of the standard prefixes specified - in the pgBadger documentation, such as if your + defined in your postgresql.conf. Only use it if you + aren't using one of the standard prefixes specified + in the pgBadger documentation, such as if your prefix includes additional variables like client ip or application name. See examples below. -P | --no-prettify : disable SQL queries prettify formatter. - -q | --quiet : don't print anything to stdout, not even a progress + -q | --quiet : don't print anything to stdout, not even a progress bar. -r | --remote-host ip : set the host where to execute the cat command on remote logfile to parse locally the file. @@ -85,17 +91,17 @@ Options: -V | --version : show pgBadger version and exit. -w | --watch-mode : only report errors just like logwatch could do. -x | --extension : output format. Values: text, html, bin, json or - tsung. Default: html + tsung. Default: html -X | --extra-files : in incremental mode allow pgBadger to write CSS and JS files in the output directory as separate files. -z | --zcat exec_path : set the full path to the zcat program. Use it if - zcat or bzcat or unzip is not in your path. + zcat or bzcat or unzip is not in your path. -Z | --timezone +/-XX : Set the number of hours from GMT of the timezone. - Use this to adjust date/time in JavaScript graphs. - --pie-limit num : pie data lower than num% will show a sum instead. + Use this to adjust date/time in JavaScript graphs. + --pie-limit num : pie data lower than num% will show a sum instead. --exclude-query regex : any query matching the given regex will be excluded - from the report. For example: "^(VACUUM|COMMIT)" - You can use this option multiple times. + from the report. For example: "^(VACUUM|COMMIT)" + You can use this option multiple times. --exclude-file filename: path of the file which contains all the regex to use to exclude queries from the report. One regex per line. @@ -105,27 +111,27 @@ Options: --include-file filename: path of the file which contains all the regex of the queries to include from the report. One regex per line. - --disable-error : do not generate error report. + --disable-error : do not generate error report. --disable-hourly : do not generate hourly report. - --disable-type : do not generate report of queries by type, database + --disable-type : do not generate report of queries by type, database or user. - --disable-query : do not generate query reports (slowest, most - frequent, queries by users, by database, ...). + --disable-query : do not generate query reports (slowest, most + frequent, queries by users, by database, ...). --disable-session : do not generate session report. --disable-connection : do not generate connection report. - --disable-lock : do not generate lock report. + --disable-lock : do not generate lock report. --disable-temporary : do not generate temporary report. --disable-checkpoint : do not generate checkpoint/restartpoint report. --disable-autovacuum : do not generate autovacuum report. - --charset : used to set the HTML charset to be used. + --charset : used to set the HTML charset to be used. Default: utf-8. - --csv-separator : used to set the CSV field separator, default: , + --csv-separator : used to set the CSV field separator, default: , --exclude-time regex : any timestamp matching the given regex will be excluded from the report. Example: "2013-04-12 .*" - You can use this option multiple times. + You can use this option multiple times. --include-time regex : only timestamps matching the given regex will be included in the report. Example: "2013-04-12 .*" - You can use this option multiple times. + You can use this option multiple times. --exclude-db name : exclude entries for the specified database from report. Example: "pg_dump". Can be used multiple time. @@ -135,12 +141,12 @@ Options: --exclude-line regex : pgBadger will start to exclude any log entry that will match the given regex. Can be used multiple time. - --anonymize : obscure all literals in queries, useful to hide - confidential data. - --noreport : prevent pgBadger to create reports in incremental + --anonymize : obscure all literals in queries, useful to hide + confidential data. + --noreport : prevent pgBadger to create reports in incremental mode. - --log-duration : force pgBadger to associate log entries generated - by both log_duration = on and log_statement = 'all' + --log-duration : force pgBadger to associate log entries generated + by both log_duration = on and log_statement = 'all' --enable-checksum : used to add a md5 sum under each query report. --journalctl command : command to use to replace PostgreSQL logfile by a call to journalctl. Basically it might be: @@ -152,28 +158,28 @@ Options: --rebuild : used to rebuild all html reports in incremental output directories where there's binary data files. --pgbouncer-only : only show PgBouncer related menu in the header. - --start-monday : in incremental mode, calendar's weeks start on - sunday. Use this option to start on monday. + --start-monday : in incremental mode, calendar's weeks start on + sunday. Use this option to start on monday. --normalized-only : only dump all normalized query to out.txt --log-timezone +/-XX : Set the number of hours from GMT of the timezone - that must be used to adjust date/time read from - log file before beeing parsed. Using this option - make more difficult log search with a date/time. - --prettify-json : use it if you want json output to be prettified. + that must be used to adjust date/time read from + log file before beeing parsed. Using this option + make more difficult log search with a date/time. + --prettify-json : use it if you want json output to be prettified. pgBadger is able to parse a remote log file using a passwordless ssh connection. Use the -r or --remote-host to set the host ip address or hostname. There's also some additional options to fully control the ssh connection. - --ssh-program ssh path to the ssh program to use. Default: ssh. + --ssh-program ssh path to the ssh program to use. Default: ssh. --ssh-user username connection login name. Default to running user. --ssh-identity file path to the identity file to use. --ssh-timeout second timeout to ssh connection failure. Default 10 secs. --ssh-option options list of -o options to use for the ssh connection. Options always used: - -o ConnectTimeout=$ssh_timeout - -o PreferredAuthentications=hostbased,publickey + -o ConnectTimeout=$ssh_timeout + -o PreferredAuthentications=hostbased,publickey Log file to parse can also be specified using an URI, supported protocol are http[s] and [s]ftp. The curl command will be used to download the file and the @@ -182,33 +188,33 @@ use the ssh command like with the remote host use. See examples bellow. Examples: - pgbadger /var/log/postgresql.log - pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log - pgbadger /var/log/postgresql/postgresql-2012-05-* - pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log - pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log - cat /var/log/postgres.log | pgbadger - - # Log prefix with stderr log output - perl pgbadger --prefix '%t [%p]: user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* - perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log - # Log line prefix with syslog log output - perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* - # Use my 8 CPUs to parse my 10GB file faster, much faster - perl pgbadger -j 8 /pglog/postgresql-10.1-main.log + pgbadger /var/log/postgresql.log + pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log + pgbadger /var/log/postgresql/postgresql-2012-05-* + pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log + pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log + cat /var/log/postgres.log | pgbadger - + # Log prefix with stderr log output + pgbadger --prefix '%t [%p]: user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* + pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log + # Log line prefix with syslog log output + pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* + # Use my 8 CPUs to parse my 10GB file faster, much faster + pgbadger -j 8 /pglog/postgresql-10.1-main.log Use URI notation for remote log file: - perl pgbadger http://172.12.110.1//var/log/postgresql/postgresql-10.1-main.log - perl pgbadger ftp://username@172.12.110.14/postgresql-10.1-main.log - perl pgbadger ssh://username@172.12.110.14//var/log/postgresql/postgresql-10.1-main.log* + pgbadger http://172.12.110.1//var/log/postgresql/postgresql-10.1-main.log + pgbadger ftp://username@172.12.110.14/postgresql-10.1-main.log + pgbadger ssh://username@172.12.110.14//var/log/postgresql/postgresql-10.1-main.log* You can use together a local PostgreSQL log and a remote pgbouncer log file to parse: - perl pgbadger /var/log/postgresql/postgresql-10.1-main.log ssh://username.12.110.14/pgbouncer.log + pgbadger /var/log/postgresql/postgresql-10.1-main.log ssh://username.12.110.14/pgbouncer.log Generate Tsung sessions XML file with select queries only: - perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: user=%u,db=%d ' /pglog/postgresql-10.1.log + pgbadger -S -o sessions.tsung --prefix '%t [%p]: user=%u,db=%d ' /pglog/postgresql-10.1.log Reporting errors every week by cron job: diff --git a/pgbadger b/pgbadger index 1b3f2cb..e0ce5c7 100755 --- a/pgbadger +++ b/pgbadger @@ -1809,7 +1809,7 @@ sub usage print qq{ Usage: pgbadger [options] logfile [...] - PostgreSQL log analyzer with fully detailed reports and graphs. + PostgreSQL log analyzer with fully detailed reports and graphs. Arguments: @@ -1991,29 +1991,29 @@ use the ssh command like with the remote host use. See examples bellow. Examples: - pgbadger /var/log/postgresql.log - pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log - pgbadger /var/log/postgresql/postgresql-2012-05-* - pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log - pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log - cat /var/log/postgres.log | pgbadger - - # Log prefix with stderr log output - pgbadger --prefix '%t [%p]: user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* - pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log - # Log line prefix with syslog log output - pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* - # Use my 8 CPUs to parse my 10GB file faster, much faster - pgbadger -j 8 /pglog/postgresql-10.1-main.log + pgbadger /var/log/postgresql.log + pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log + pgbadger /var/log/postgresql/postgresql-2012-05-* + pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log + pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log + cat /var/log/postgres.log | pgbadger - + # Log prefix with stderr log output + pgbadger --prefix '%t [%p]: user=%u,db=%d,client=%h' /pglog/postgresql-2012-08-21* + pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log + # Log line prefix with syslog log output + pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' /pglog/postgresql-2012-08-21* + # Use my 8 CPUs to parse my 10GB file faster, much faster + pgbadger -j 8 /pglog/postgresql-10.1-main.log Use URI notation for remote log file: - pgbadger http://172.12.110.1//var/log/postgresql/postgresql-10.1-main.log - pgbadger ftp://username\@172.12.110.14/postgresql-10.1-main.log - pgbadger ssh://username\@172.12.110.14//var/log/postgresql/postgresql-10.1-main.log* + pgbadger http://172.12.110.1//var/log/postgresql/postgresql-10.1-main.log + pgbadger ftp://username\@172.12.110.14/postgresql-10.1-main.log + pgbadger ssh://username\@172.12.110.14//var/log/postgresql/postgresql-10.1-main.log* You can use together a local PostgreSQL log and a remote pgbouncer log file to parse: - pgbadger /var/log/postgresql/postgresql-10.1-main.log ssh://username@172.12.110.14/pgbouncer.log + pgbadger /var/log/postgresql/postgresql-10.1-main.log ssh://username@172.12.110.14/pgbouncer.log Generate Tsung sessions XML file with select queries only: -- 2.40.0