]> granicus.if.org Git - postgresql/commitdiff
Expose track_iotiming data via the statistics collector.
authorRobert Haas <rhaas@postgresql.org>
Thu, 5 Apr 2012 15:37:31 +0000 (11:37 -0400)
committerRobert Haas <rhaas@postgresql.org>
Thu, 5 Apr 2012 15:40:24 +0000 (11:40 -0400)
Ants Aasma's original patch to add timing information for buffer I/O
requests exposed this data at the relation level, which was judged too
costly.  I've here exposed it at the database level instead.

doc/src/sgml/config.sgml
doc/src/sgml/monitoring.sgml
src/backend/catalog/system_views.sql
src/backend/postmaster/pgstat.c
src/backend/storage/buffer/bufmgr.c
src/backend/utils/adt/pgstatfuncs.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/pgstat.h
src/test/regress/expected/rules.out

index 0a5d519214da042a6ad53a2e92e77b42221e9513..81392997f8848466c3c61fc0e611e406ae1ae3e9 100644 (file)
@@ -4295,8 +4295,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
         default, because it will repeatedly query the operating system for
         the current time, which may cause significant overhead on some
         platforms.  You can use the <xref linkend="pgtesttiming"> tool to
-        measure the overhead of timing on your system.  Only superusers can
-        change this setting.
+        measure the overhead of timing on your system.  Timing information is
+        displayed in <xref linkend="pg-stat-database-view">, in the output of
+        <xref linkend="sql-explain"> when the <literal>BUFFERS</> option is
+        used, and by <xref linkend="pgstatstatements">.  Only superusers can
+        change this setting.  
        </para>
       </listitem>
      </varlistentry>
index 840e54aef8a55232bf0dc94c6429b88c7537f00c..013dd335a4a73317a75c0d98785f29c9d4e38801 100644 (file)
@@ -144,6 +144,11 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
    statistics are collected about table and index accesses.
   </para>
 
+  <para>
+   The parameter <xref linkend="guc-track-iotiming"> enables monitoring
+   of block read and write times.
+  </para>
+
   <para>
    The parameter <xref linkend="guc-track-functions"> enables tracking of
    usage of user-defined functions.
@@ -961,6 +966,20 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
       This value can also be returned by directly calling
       the <function>pg_stat_get_db_deadlocks</function> function.</entry>
     </row>
+    <row>
+     <entry>block_read_time</entry>
+     <entry><type>bigint</></entry>
+     <entry>Time spent by backends reading data file blocks, in milliseconds.
+      The same value can be returned in microseconds by directly calling
+      the <function>pg_stat_get_db_block_time_read</function> function.</entry>
+    </row>
+    <row>
+     <entry>block_write_time</entry>
+     <entry><type>bigint</></entry>
+     <entry>Time spent by backends writing data file blocks, in milliseconds.
+      The same value can be returned in microseconds by directly calling
+      the <function>pg_stat_get_db_block_time_write</function> function.</entry>
+    </row>
     <row>
      <entry>stats_reset</entry>
      <entry><type>timestamptz</></entry>
index ab594eba9bcc114778a42f7ab9d3a590c6e2155d..9daebdf54784fe70dac350e0fb92bc0ffaa93161 100644 (file)
@@ -604,6 +604,8 @@ CREATE VIEW pg_stat_database AS
             pg_stat_get_db_temp_files(D.oid) AS temp_files,
             pg_stat_get_db_temp_bytes(D.oid) AS temp_bytes,
             pg_stat_get_db_deadlocks(D.oid) AS deadlocks,
+            pg_stat_get_db_block_time_read(D.oid) / 1000 AS block_read_time,
+            pg_stat_get_db_block_time_write(D.oid) / 1000 AS block_write_time,
             pg_stat_get_db_stat_reset_time(D.oid) AS stats_reset
     FROM pg_database D;
 
index a53fc52f4701f1eeeae80988529c9c6cc1d961a1..a7cb036cac01f61e9679875fd39c027d928d028d 100644 (file)
@@ -197,6 +197,8 @@ static PgStat_SubXactStatus *pgStatXactStack = NULL;
 
 static int     pgStatXactCommit = 0;
 static int     pgStatXactRollback = 0;
+PgStat_Counter pgStatBlockTimeRead = 0;
+PgStat_Counter pgStatBlockTimeWrite = 0;
 
 /* Record that's written to 2PC state file when pgstat state is persisted */
 typedef struct TwoPhasePgStatRecord
@@ -782,13 +784,15 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
                return;
 
        /*
-        * Report accumulated xact commit/rollback whenever we send a normal
-        * tabstat message
+        * Report accumulated xact commit/rollback and I/O timings whenever we send
+        * a normal tabstat message
         */
        if (OidIsValid(tsmsg->m_databaseid))
        {
                tsmsg->m_xact_commit = pgStatXactCommit;
                tsmsg->m_xact_rollback = pgStatXactRollback;
+               tsmsg->m_block_time_read = pgStatBlockTimeRead;
+               tsmsg->m_block_time_write = pgStatBlockTimeWrite;
                pgStatXactCommit = 0;
                pgStatXactRollback = 0;
        }
@@ -796,6 +800,8 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
        {
                tsmsg->m_xact_commit = 0;
                tsmsg->m_xact_rollback = 0;
+               tsmsg->m_block_time_read = 0;
+               tsmsg->m_block_time_write = 0;
        }
 
        n = tsmsg->m_nentries;
@@ -3352,6 +3358,8 @@ pgstat_get_db_entry(Oid databaseid, bool create)
                result->n_temp_files = 0;
                result->n_temp_bytes = 0;
                result->n_deadlocks = 0;
+               result->n_block_time_read = 0;
+               result->n_block_time_write = 0;
 
                result->stat_reset_timestamp = GetCurrentTimestamp();
 
@@ -4070,6 +4078,8 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
         */
        dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
        dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
+       dbentry->n_block_time_read += msg->m_block_time_read;
+       dbentry->n_block_time_write += msg->m_block_time_write;
 
        /*
         * Process all table entries in the message.
@@ -4266,6 +4276,8 @@ pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
        dbentry->n_temp_bytes = 0;
        dbentry->n_temp_files = 0;
        dbentry->n_deadlocks = 0;
+       dbentry->n_block_time_read = 0;
+       dbentry->n_block_time_write = 0;
 
        dbentry->stat_reset_timestamp = GetCurrentTimestamp();
 
index 4de6a7212ccce2482006b1340ad1de21882fe677..613d7544c6356e0bdcf9978ac4cc41d97cef2262 100644 (file)
@@ -450,6 +450,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
                        {
                                INSTR_TIME_SET_CURRENT(io_time);
                                INSTR_TIME_SUBTRACT(io_time, io_start);
+                               pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
                                INSTR_TIME_ADD(pgBufferUsage.time_read, io_time);
                        }
 
@@ -1888,7 +1889,8 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 {
        XLogRecPtr      recptr;
        ErrorContextCallback errcontext;
-       instr_time io_start, io_end;
+       instr_time      io_start,
+                               io_time;
 
        /*
         * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
@@ -1947,8 +1949,10 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
 
        if (track_iotiming)
        {
-               INSTR_TIME_SET_CURRENT(io_end);
-               INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start);
+               INSTR_TIME_SET_CURRENT(io_time);
+               INSTR_TIME_SUBTRACT(io_time, io_start);
+               pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
+               INSTR_TIME_ADD(pgBufferUsage.time_write, io_time);
        }
 
        pgBufferUsage.shared_blks_written++;
index 68b25274e38195c83486d4f1f9492f7da720bf81..20ee13db6cf545623570bc56d2b1976edaccae23 100644 (file)
@@ -82,6 +82,8 @@ extern Datum pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_stat_reset_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_temp_files(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_temp_bytes(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_db_block_time_read(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_db_block_time_write(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS);
@@ -1357,6 +1359,36 @@ pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS)
        PG_RETURN_INT64(result);
 }
 
+Datum
+pg_stat_get_db_block_time_read(PG_FUNCTION_ARGS)
+{
+       Oid                     dbid = PG_GETARG_OID(0);
+       int64           result;
+       PgStat_StatDBEntry *dbentry;
+
+       if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+               result = 0;
+       else
+               result = (int64) (dbentry->n_block_time_read);
+
+       PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_block_time_write(PG_FUNCTION_ARGS)
+{
+       Oid                     dbid = PG_GETARG_OID(0);
+       int64           result;
+       PgStat_StatDBEntry *dbentry;
+
+       if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+               result = 0;
+       else
+               result = (int64) (dbentry->n_block_time_write);
+
+       PG_RETURN_INT64(result);
+}
+
 Datum
 pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
 {
index 5d896bd11ae019aff70867379e16d06fddbe7d7d..2c21a958f55cfbf1548aae9d3df916d53f729986 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201203191
+#define CATALOG_VERSION_NO     201204051
 
 #endif
index 49b0754a2ca540d7c513ce4a706c1d2b155af2a5..597b9f5fe6a476bf1633fdafceed6617f0b58e5e 100644 (file)
@@ -2662,6 +2662,10 @@ DATA(insert OID = 3150 (  pg_stat_get_db_temp_files PGNSP PGUID 12 1 0 0 0 f f f
 DESCR("statistics: number of temporary files written");
 DATA(insert OID = 3151 (  pg_stat_get_db_temp_bytes PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_temp_bytes _null_ _null_ _null_ ));
 DESCR("statistics: number of bytes in temporary files written");
+DATA(insert OID = 2844 (  pg_stat_get_db_block_time_read       PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_block_time_read _null_ _null_ _null_ ));
+DESCR("statistics: block read time in microseconds");
+DATA(insert OID = 2845 (  pg_stat_get_db_block_time_write      PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_block_time_write _null_ _null_ _null_ ));
+DESCR("statistics: block write time in microseconds");
 DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ ));
 DESCR("statistics: number of timed checkpoints started by the bgwriter");
 DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints _null_ _null_ _null_ ));
index 1281bd8b69f2ef433bcd2b1f87a6e63f1d09dd07..b3618f28d01b8a08c82cf2d91d139e5a39f0c646 100644 (file)
@@ -233,6 +233,8 @@ typedef struct PgStat_MsgTabstat
        int                     m_nentries;
        int                     m_xact_commit;
        int                     m_xact_rollback;
+       PgStat_Counter    m_block_time_read;
+       PgStat_Counter    m_block_time_write;
        PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
 } PgStat_MsgTabstat;
 
@@ -536,6 +538,8 @@ typedef struct PgStat_StatDBEntry
        PgStat_Counter n_temp_files;
        PgStat_Counter n_temp_bytes;
        PgStat_Counter n_deadlocks;
+       PgStat_Counter n_block_time_read;               /* times in microseconds */
+       PgStat_Counter n_block_time_write;
 
        TimestampTz stat_reset_timestamp;
 
@@ -721,6 +725,12 @@ extern char *pgstat_stat_filename;
  */
 extern PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * Updated by pgstat_count_time_* macros.
+ */
+extern PgStat_Counter pgStatBlockTimeRead;
+extern PgStat_Counter pgStatBlockTimeWrite;
+
 /* ----------
  * Functions called from postmaster
  * ----------
@@ -816,6 +826,10 @@ extern void pgstat_initstats(Relation rel);
                if ((rel)->pgstat_info != NULL)                                                         \
                        (rel)->pgstat_info->t_counts.t_blocks_hit++;                    \
        } while (0)
+#define pgstat_count_buffer_read_time(n)                                                       \
+       pgStatBlockTimeRead += (n);
+#define pgstat_count_buffer_write_time(n)                                                      \
+       pgStatBlockTimeWrite += (n);
 
 extern void pgstat_count_heap_insert(Relation rel, int n);
 extern void pgstat_count_heap_update(Relation rel, bool hot);
index aaf0cca02609191dbd20550860217bad7ab91b71..67ebb951ebf363f53a1b440114ea8755f0e30c08 100644 (file)
@@ -1296,7 +1296,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
  pg_stat_all_indexes             | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"]));
  pg_stat_all_tables              | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_tuples_hot_updated(c.oid) AS n_tup_hot_upd, pg_stat_get_live_tuples(c.oid) AS n_live_tup, pg_stat_get_dead_tuples(c.oid) AS n_dead_tup, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze, pg_stat_get_vacuum_count(c.oid) AS vacuum_count, pg_stat_get_autovacuum_count(c.oid) AS autovacuum_count, pg_stat_get_analyze_count(c.oid) AS analyze_count, pg_stat_get_autoanalyze_count(c.oid) AS autoanalyze_count FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname;
  pg_stat_bgwriter                | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, pg_stat_get_buf_written_backend() AS buffers_backend, pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync, pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
- pg_stat_database                | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted, pg_stat_get_db_conflict_all(d.oid) AS conflicts, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM pg_database d;
+ pg_stat_database                | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit, pg_stat_get_db_tuples_returned(d.oid) AS tup_returned, pg_stat_get_db_tuples_fetched(d.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(d.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(d.oid) AS tup_updated, pg_stat_get_db_tuples_deleted(d.oid) AS tup_deleted, pg_stat_get_db_conflict_all(d.oid) AS conflicts, pg_stat_get_db_temp_files(d.oid) AS temp_files, pg_stat_get_db_temp_bytes(d.oid) AS temp_bytes, pg_stat_get_db_deadlocks(d.oid) AS deadlocks, (pg_stat_get_db_block_time_read(d.oid) / 1000) AS block_read_time, (pg_stat_get_db_block_time_write(d.oid) / 1000) AS block_write_time, pg_stat_get_db_stat_reset_time(d.oid) AS stats_reset FROM pg_database d;
  pg_stat_database_conflicts      | SELECT d.oid AS datid, d.datname, pg_stat_get_db_conflict_tablespace(d.oid) AS confl_tablespace, pg_stat_get_db_conflict_lock(d.oid) AS confl_lock, pg_stat_get_db_conflict_snapshot(d.oid) AS confl_snapshot, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d;
  pg_stat_replication             | SELECT s.pid, s.usesysid, u.rolname AS usename, s.application_name, s.client_addr, s.client_hostname, s.client_port, s.backend_start, w.state, w.sent_location, w.write_location, w.flush_location, w.replay_location, w.sync_priority, w.sync_state FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, waiting, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port), pg_authid u, pg_stat_get_wal_senders() w(pid, state, sent_location, write_location, flush_location, replay_location, sync_priority, sync_state) WHERE ((s.usesysid = u.oid) AND (s.pid = w.pid));
  pg_stat_sys_indexes             | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE ((pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'information_schema'::name])) OR (pg_stat_all_indexes.schemaname ~ '^pg_toast'::text));