From 40b9b957694cf7749c420c6c51a7e1d3c9b1fec1 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 27 Mar 2012 14:52:37 -0400 Subject: [PATCH] New GUC, track_iotiming, to track I/O timings. Currently, the only way to see the numbers this gathers is via EXPLAIN (ANALYZE, BUFFERS), but the plan is to add visibility through the stats collector and pg_stat_statements in subsequent patches. Ants Aasma, reviewed by Greg Smith, with some further changes by me. --- doc/src/sgml/config.sgml | 15 ++++++++++++ src/backend/commands/explain.c | 18 ++++++++++++++ src/backend/executor/instrument.c | 2 ++ src/backend/storage/buffer/bufmgr.c | 24 +++++++++++++++++++ src/backend/utils/misc/guc.c | 9 +++++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/executor/instrument.h | 2 ++ src/include/storage/bufmgr.h | 1 + 8 files changed, 72 insertions(+) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 3e178759a2..9baaa3fb36 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4284,6 +4284,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + track_iotiming (boolean) + + track_iotiming configuration parameter + + + + Enables timing of database I/O calls. This parameter is off by + default, because it will repeatedly query the operating system for + the current time, which may cause significant overhead on some + platforms. Only superusers can change this setting. + + + + track_functions (enum) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index a14cae1442..81d63f87f1 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -1236,6 +1236,8 @@ ExplainNode(PlanState *planstate, List *ancestors, usage->local_blks_written > 0); bool has_temp = (usage->temp_blks_read > 0 || usage->temp_blks_written > 0); + bool has_timing = (!INSTR_TIME_IS_ZERO(usage->time_read) || + !INSTR_TIME_IS_ZERO(usage->time_write)); /* Show only positive counter values. */ if (has_shared || has_local || has_temp) @@ -1291,6 +1293,20 @@ ExplainNode(PlanState *planstate, List *ancestors, } appendStringInfoChar(es->str, '\n'); } + + /* As above, show only positive counter values. */ + if (has_timing) + { + appendStringInfoSpaces(es->str, es->indent * 2); + appendStringInfoString(es->str, "I/O Timings:"); + if (!INSTR_TIME_IS_ZERO(usage->time_read)) + appendStringInfo(es->str, " read=%0.2f", + INSTR_TIME_GET_MILLISEC(usage->time_read)); + if (!INSTR_TIME_IS_ZERO(usage->time_write)) + appendStringInfo(es->str, " write=%0.2f", + INSTR_TIME_GET_MILLISEC(usage->time_write)); + appendStringInfoChar(es->str, '\n'); + } } else { @@ -1304,6 +1320,8 @@ ExplainNode(PlanState *planstate, List *ancestors, ExplainPropertyLong("Local Written Blocks", usage->local_blks_written, es); ExplainPropertyLong("Temp Read Blocks", usage->temp_blks_read, es); ExplainPropertyLong("Temp Written Blocks", usage->temp_blks_written, es); + ExplainPropertyFloat("I/O Read Time", INSTR_TIME_GET_MILLISEC(usage->time_read), 3, es); + ExplainPropertyFloat("I/O Write Time", INSTR_TIME_GET_MILLISEC(usage->time_write), 3, es); } } diff --git a/src/backend/executor/instrument.c b/src/backend/executor/instrument.c index 6e9f450d68..92e56d13ae 100644 --- a/src/backend/executor/instrument.c +++ b/src/backend/executor/instrument.c @@ -145,4 +145,6 @@ BufferUsageAccumDiff(BufferUsage *dst, dst->local_blks_written += add->local_blks_written - sub->local_blks_written; dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read; dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written; + INSTR_TIME_ACCUM_DIFF(dst->time_read, add->time_read, sub->time_read); + INSTR_TIME_ACCUM_DIFF(dst->time_write, add->time_write, sub->time_write); } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3924a51c0c..4de6a7212c 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -67,6 +67,7 @@ bool zero_damaged_pages = false; int bgwriter_lru_maxpages = 100; double bgwriter_lru_multiplier = 2.0; +bool track_iotiming = false; /* * How many buffers PrefetchBuffer callers should try to stay ahead of their @@ -437,8 +438,21 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, MemSet((char *) bufBlock, 0, BLCKSZ); else { + instr_time io_start, + io_time; + + if (track_iotiming) + INSTR_TIME_SET_CURRENT(io_start); + smgrread(smgr, forkNum, blockNum, (char *) bufBlock); + if (track_iotiming) + { + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, io_start); + INSTR_TIME_ADD(pgBufferUsage.time_read, io_time); + } + /* check for garbage data */ if (!PageHeaderIsValid((PageHeader) bufBlock)) { @@ -1874,6 +1888,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) { XLogRecPtr recptr; ErrorContextCallback errcontext; + instr_time io_start, io_end; /* * Acquire the buffer's io_in_progress lock. If StartBufferIO returns @@ -1921,12 +1936,21 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) buf->flags &= ~BM_JUST_DIRTIED; UnlockBufHdr(buf); + if (track_iotiming) + INSTR_TIME_SET_CURRENT(io_start); + smgrwrite(reln, buf->tag.forkNum, buf->tag.blockNum, (char *) BufHdrGetBlock(buf), false); + if (track_iotiming) + { + INSTR_TIME_SET_CURRENT(io_end); + INSTR_TIME_ACCUM_DIFF(pgBufferUsage.time_write, io_end, io_start); + } + pgBufferUsage.shared_blks_written++; /* diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 3d2fe3ebc8..8ea391a8e6 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1018,6 +1018,15 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"track_iotiming", PGC_SUSET, STATS_COLLECTOR, + gettext_noop("Collects timing information for database IO activity."), + NULL + }, + &track_iotiming, + false, + NULL, NULL, NULL + }, { {"update_process_title", PGC_SUSET, STATS_COLLECTOR, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 96da086b0f..cbae3bfb22 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #track_activities = on #track_counts = on +#track_iotiming = off #track_functions = none # none, pl, all #track_activity_query_size = 1024 # (change requires restart) #update_process_title = on diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h index 066f684f33..d29ab9b9f3 100644 --- a/src/include/executor/instrument.h +++ b/src/include/executor/instrument.h @@ -28,6 +28,8 @@ typedef struct BufferUsage long local_blks_written; /* # of local disk blocks written */ long temp_blks_read; /* # of temp blocks read */ long temp_blks_written; /* # of temp blocks written */ + instr_time time_read; /* time spent reading */ + instr_time time_write; /* time spent writing */ } BufferUsage; /* Flag bits included in InstrAlloc's instrument_options bitmask */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index de1bbd01d8..d72bad9f56 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -48,6 +48,7 @@ extern PGDLLIMPORT int NBuffers; extern bool zero_damaged_pages; extern int bgwriter_lru_maxpages; extern double bgwriter_lru_multiplier; +extern bool track_iotiming; extern int target_prefetch_pages; /* in buf_init.c */ -- 2.40.0