4 * Definitions for the PostgreSQL statistics collector daemon.
6 * Copyright (c) 2001-2017, PostgreSQL Global Development Group
14 #include "datatype/timestamp.h"
16 #include "libpq/pqcomm.h"
17 #include "port/atomics.h"
18 #include "portability/instr_time.h"
19 #include "postmaster/pgarch.h"
20 #include "storage/proc.h"
21 #include "utils/hsearch.h"
22 #include "utils/relcache.h"
26 * Paths for the statistics files (relative to installation's $PGDATA).
29 #define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
30 #define PGSTAT_STAT_PERMANENT_FILENAME "pg_stat/global.stat"
31 #define PGSTAT_STAT_PERMANENT_TMPFILE "pg_stat/global.tmp"
33 /* Default directory to store temporary statistics data in */
34 #define PG_STAT_TMP_DIR "pg_stat_tmp"
36 /* Values for track_functions GUC variable --- order is significant! */
37 typedef enum TrackFunctionsLevel
42 } TrackFunctionsLevel;
45 * The types of backend -> collector messages
48 typedef enum StatMsgType
53 PGSTAT_MTYPE_TABPURGE,
55 PGSTAT_MTYPE_RESETCOUNTER,
56 PGSTAT_MTYPE_RESETSHAREDCOUNTER,
57 PGSTAT_MTYPE_RESETSINGLECOUNTER,
58 PGSTAT_MTYPE_AUTOVAC_START,
61 PGSTAT_MTYPE_ARCHIVER,
62 PGSTAT_MTYPE_BGWRITER,
63 PGSTAT_MTYPE_FUNCSTAT,
64 PGSTAT_MTYPE_FUNCPURGE,
65 PGSTAT_MTYPE_RECOVERYCONFLICT,
66 PGSTAT_MTYPE_TEMPFILE,
71 * The data type used for counters.
74 typedef int64 PgStat_Counter;
77 * PgStat_TableCounts The actual per-table counts kept by a backend
79 * This struct should contain only actual event counters, because we memcmp
80 * it against zeroes to detect whether there are any counts to transmit.
81 * It is a component of PgStat_TableStatus (within-backend state) and
82 * PgStat_TableEntry (the transmitted message format).
84 * Note: for a table, tuples_returned is the number of tuples successfully
85 * fetched by heap_getnext, while tuples_fetched is the number of tuples
86 * successfully fetched by heap_fetch under the control of bitmap indexscans.
87 * For an index, tuples_returned is the number of index entries returned by
88 * the index AM, while tuples_fetched is the number of tuples successfully
89 * fetched by heap_fetch under the control of simple indexscans for this index.
91 * tuples_inserted/updated/deleted/hot_updated count attempted actions,
92 * regardless of whether the transaction committed. delta_live_tuples,
93 * delta_dead_tuples, and changed_tuples are set depending on commit or abort.
94 * Note that delta_live_tuples and delta_dead_tuples can be negative!
97 typedef struct PgStat_TableCounts
99 PgStat_Counter t_numscans;
101 PgStat_Counter t_tuples_returned;
102 PgStat_Counter t_tuples_fetched;
104 PgStat_Counter t_tuples_inserted;
105 PgStat_Counter t_tuples_updated;
106 PgStat_Counter t_tuples_deleted;
107 PgStat_Counter t_tuples_hot_updated;
110 PgStat_Counter t_delta_live_tuples;
111 PgStat_Counter t_delta_dead_tuples;
112 PgStat_Counter t_changed_tuples;
114 PgStat_Counter t_blocks_fetched;
115 PgStat_Counter t_blocks_hit;
116 } PgStat_TableCounts;
118 /* Possible targets for resetting cluster-wide shared values */
119 typedef enum PgStat_Shared_Reset_Target
123 } PgStat_Shared_Reset_Target;
125 /* Possible object types for resetting single counters */
126 typedef enum PgStat_Single_Reset_Type
130 } PgStat_Single_Reset_Type;
132 /* ------------------------------------------------------------
133 * Structures kept in backend local memory while accumulating counts
134 * ------------------------------------------------------------
139 * PgStat_TableStatus Per-table status within a backend
141 * Many of the event counters are nontransactional, ie, we count events
142 * in committed and aborted transactions alike. For these, we just count
143 * directly in the PgStat_TableStatus. However, delta_live_tuples,
144 * delta_dead_tuples, and changed_tuples must be derived from event counts
145 * with awareness of whether the transaction or subtransaction committed or
146 * aborted. Hence, we also keep a stack of per-(sub)transaction status
147 * records for every table modified in the current transaction. At commit
148 * or abort, we propagate tuples_inserted/updated/deleted up to the
149 * parent subtransaction level, or out to the parent PgStat_TableStatus,
153 typedef struct PgStat_TableStatus
155 Oid t_id; /* table's OID */
156 bool t_shared; /* is it a shared catalog? */
157 struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
158 PgStat_TableCounts t_counts; /* event counts to be sent */
159 } PgStat_TableStatus;
162 * PgStat_TableXactStatus Per-table, per-subtransaction status
165 typedef struct PgStat_TableXactStatus
167 PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
168 PgStat_Counter tuples_updated; /* tuples updated in (sub)xact */
169 PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
170 bool truncated; /* relation truncated in this (sub)xact */
171 PgStat_Counter inserted_pre_trunc; /* tuples inserted prior to truncate */
172 PgStat_Counter updated_pre_trunc; /* tuples updated prior to truncate */
173 PgStat_Counter deleted_pre_trunc; /* tuples deleted prior to truncate */
174 int nest_level; /* subtransaction nest level */
175 /* links to other structs for same relation: */
176 struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
177 PgStat_TableStatus *parent; /* per-table status */
178 /* structs of same subxact level are linked here: */
179 struct PgStat_TableXactStatus *next; /* next of same subxact */
180 } PgStat_TableXactStatus;
183 /* ------------------------------------------------------------
184 * Message formats follow
185 * ------------------------------------------------------------
190 * PgStat_MsgHdr The common message header
193 typedef struct PgStat_MsgHdr
200 * Space available in a message. This will keep the UDP packets below 1K,
201 * which should fit unfragmented into the MTU of the loopback interface.
202 * (Larger values of PGSTAT_MAX_MSG_SIZE would work for that on most
203 * platforms, but we're being conservative here.)
206 #define PGSTAT_MAX_MSG_SIZE 1000
207 #define PGSTAT_MSG_PAYLOAD (PGSTAT_MAX_MSG_SIZE - sizeof(PgStat_MsgHdr))
211 * PgStat_MsgDummy A dummy message, ignored by the collector
214 typedef struct PgStat_MsgDummy
221 * PgStat_MsgInquiry Sent by a backend to ask the collector
222 * to write the stats file(s).
224 * Ordinarily, an inquiry message prompts writing of the global stats file,
225 * the stats file for shared catalogs, and the stats file for the specified
226 * database. If databaseid is InvalidOid, only the first two are written.
228 * New file(s) will be written only if the existing file has a timestamp
229 * older than the specified cutoff_time; this prevents duplicated effort
230 * when multiple requests arrive at nearly the same time, assuming that
231 * backends send requests with cutoff_times a little bit in the past.
233 * clock_time should be the requestor's current local time; the collector
234 * uses this to check for the system clock going backward, but it has no
235 * effect unless that occurs. We assume clock_time >= cutoff_time, though.
239 typedef struct PgStat_MsgInquiry
242 TimestampTz clock_time; /* observed local clock time */
243 TimestampTz cutoff_time; /* minimum acceptable file timestamp */
244 Oid databaseid; /* requested DB (InvalidOid => shared only) */
249 * PgStat_TableEntry Per-table info in a MsgTabstat
252 typedef struct PgStat_TableEntry
255 PgStat_TableCounts t_counts;
259 * PgStat_MsgTabstat Sent by the backend to report table
260 * and buffer access statistics.
263 #define PGSTAT_NUM_TABENTRIES \
264 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 2 * sizeof(PgStat_Counter)) \
265 / sizeof(PgStat_TableEntry))
267 typedef struct PgStat_MsgTabstat
274 PgStat_Counter m_block_read_time; /* times in microseconds */
275 PgStat_Counter m_block_write_time;
276 PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
281 * PgStat_MsgTabpurge Sent by the backend to tell the collector
285 #define PGSTAT_NUM_TABPURGE \
286 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
289 typedef struct PgStat_MsgTabpurge
294 Oid m_tableid[PGSTAT_NUM_TABPURGE];
295 } PgStat_MsgTabpurge;
299 * PgStat_MsgDropdb Sent by the backend to tell the collector
300 * about a dropped database
303 typedef struct PgStat_MsgDropdb
311 * PgStat_MsgResetcounter Sent by the backend to tell the collector
315 typedef struct PgStat_MsgResetcounter
319 } PgStat_MsgResetcounter;
322 * PgStat_MsgResetsharedcounter Sent by the backend to tell the collector
323 * to reset a shared counter
326 typedef struct PgStat_MsgResetsharedcounter
329 PgStat_Shared_Reset_Target m_resettarget;
330 } PgStat_MsgResetsharedcounter;
333 * PgStat_MsgResetsinglecounter Sent by the backend to tell the collector
334 * to reset a single counter
337 typedef struct PgStat_MsgResetsinglecounter
341 PgStat_Single_Reset_Type m_resettype;
343 } PgStat_MsgResetsinglecounter;
346 * PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
347 * that a database is going to be processed
350 typedef struct PgStat_MsgAutovacStart
354 TimestampTz m_start_time;
355 } PgStat_MsgAutovacStart;
359 * PgStat_MsgVacuum Sent by the backend or autovacuum daemon
363 typedef struct PgStat_MsgVacuum
369 TimestampTz m_vacuumtime;
370 PgStat_Counter m_live_tuples;
371 PgStat_Counter m_dead_tuples;
376 * PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
380 typedef struct PgStat_MsgAnalyze
387 TimestampTz m_analyzetime;
388 PgStat_Counter m_live_tuples;
389 PgStat_Counter m_dead_tuples;
394 * PgStat_MsgArchiver Sent by the archiver to update statistics.
397 typedef struct PgStat_MsgArchiver
400 bool m_failed; /* Failed attempt */
401 char m_xlog[MAX_XFN_CHARS + 1];
402 TimestampTz m_timestamp;
403 } PgStat_MsgArchiver;
406 * PgStat_MsgBgWriter Sent by the bgwriter to update statistics.
409 typedef struct PgStat_MsgBgWriter
413 PgStat_Counter m_timed_checkpoints;
414 PgStat_Counter m_requested_checkpoints;
415 PgStat_Counter m_buf_written_checkpoints;
416 PgStat_Counter m_buf_written_clean;
417 PgStat_Counter m_maxwritten_clean;
418 PgStat_Counter m_buf_written_backend;
419 PgStat_Counter m_buf_fsync_backend;
420 PgStat_Counter m_buf_alloc;
421 PgStat_Counter m_checkpoint_write_time; /* times in milliseconds */
422 PgStat_Counter m_checkpoint_sync_time;
423 } PgStat_MsgBgWriter;
426 * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
429 typedef struct PgStat_MsgRecoveryConflict
435 } PgStat_MsgRecoveryConflict;
438 * PgStat_MsgTempFile Sent by the backend upon creating a temp file
441 typedef struct PgStat_MsgTempFile
447 } PgStat_MsgTempFile;
450 * PgStat_FunctionCounts The actual per-function counts kept by a backend
452 * This struct should contain only actual event counters, because we memcmp
453 * it against zeroes to detect whether there are any counts to transmit.
455 * Note that the time counters are in instr_time format here. We convert to
456 * microseconds in PgStat_Counter format when transmitting to the collector.
459 typedef struct PgStat_FunctionCounts
461 PgStat_Counter f_numcalls;
462 instr_time f_total_time;
463 instr_time f_self_time;
464 } PgStat_FunctionCounts;
467 * PgStat_BackendFunctionEntry Entry in backend's per-function hash table
470 typedef struct PgStat_BackendFunctionEntry
473 PgStat_FunctionCounts f_counts;
474 } PgStat_BackendFunctionEntry;
477 * PgStat_FunctionEntry Per-function info in a MsgFuncstat
480 typedef struct PgStat_FunctionEntry
483 PgStat_Counter f_numcalls;
484 PgStat_Counter f_total_time; /* times in microseconds */
485 PgStat_Counter f_self_time;
486 } PgStat_FunctionEntry;
489 * PgStat_MsgFuncstat Sent by the backend to report function
493 #define PGSTAT_NUM_FUNCENTRIES \
494 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
495 / sizeof(PgStat_FunctionEntry))
497 typedef struct PgStat_MsgFuncstat
502 PgStat_FunctionEntry m_entry[PGSTAT_NUM_FUNCENTRIES];
503 } PgStat_MsgFuncstat;
506 * PgStat_MsgFuncpurge Sent by the backend to tell the collector
507 * about dead functions.
510 #define PGSTAT_NUM_FUNCPURGE \
511 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
514 typedef struct PgStat_MsgFuncpurge
519 Oid m_functionid[PGSTAT_NUM_FUNCPURGE];
520 } PgStat_MsgFuncpurge;
523 * PgStat_MsgDeadlock Sent by the backend to tell the collector
524 * about a deadlock that occurred.
527 typedef struct PgStat_MsgDeadlock
531 } PgStat_MsgDeadlock;
535 * PgStat_Msg Union over all possible messages.
538 typedef union PgStat_Msg
540 PgStat_MsgHdr msg_hdr;
541 PgStat_MsgDummy msg_dummy;
542 PgStat_MsgInquiry msg_inquiry;
543 PgStat_MsgTabstat msg_tabstat;
544 PgStat_MsgTabpurge msg_tabpurge;
545 PgStat_MsgDropdb msg_dropdb;
546 PgStat_MsgResetcounter msg_resetcounter;
547 PgStat_MsgResetsharedcounter msg_resetsharedcounter;
548 PgStat_MsgResetsinglecounter msg_resetsinglecounter;
549 PgStat_MsgAutovacStart msg_autovacuum;
550 PgStat_MsgVacuum msg_vacuum;
551 PgStat_MsgAnalyze msg_analyze;
552 PgStat_MsgArchiver msg_archiver;
553 PgStat_MsgBgWriter msg_bgwriter;
554 PgStat_MsgFuncstat msg_funcstat;
555 PgStat_MsgFuncpurge msg_funcpurge;
556 PgStat_MsgRecoveryConflict msg_recoveryconflict;
557 PgStat_MsgDeadlock msg_deadlock;
561 /* ------------------------------------------------------------
562 * Statistic collector data structures follow
564 * PGSTAT_FILE_FORMAT_ID should be changed whenever any of these
565 * data structures change.
566 * ------------------------------------------------------------
569 #define PGSTAT_FILE_FORMAT_ID 0x01A5BC9D
572 * PgStat_StatDBEntry The collector's data per database
575 typedef struct PgStat_StatDBEntry
578 PgStat_Counter n_xact_commit;
579 PgStat_Counter n_xact_rollback;
580 PgStat_Counter n_blocks_fetched;
581 PgStat_Counter n_blocks_hit;
582 PgStat_Counter n_tuples_returned;
583 PgStat_Counter n_tuples_fetched;
584 PgStat_Counter n_tuples_inserted;
585 PgStat_Counter n_tuples_updated;
586 PgStat_Counter n_tuples_deleted;
587 TimestampTz last_autovac_time;
588 PgStat_Counter n_conflict_tablespace;
589 PgStat_Counter n_conflict_lock;
590 PgStat_Counter n_conflict_snapshot;
591 PgStat_Counter n_conflict_bufferpin;
592 PgStat_Counter n_conflict_startup_deadlock;
593 PgStat_Counter n_temp_files;
594 PgStat_Counter n_temp_bytes;
595 PgStat_Counter n_deadlocks;
596 PgStat_Counter n_block_read_time; /* times in microseconds */
597 PgStat_Counter n_block_write_time;
599 TimestampTz stat_reset_timestamp;
600 TimestampTz stats_timestamp; /* time of db stats file update */
603 * tables and functions must be last in the struct, because we don't write
604 * the pointers out to the stats file.
608 } PgStat_StatDBEntry;
612 * PgStat_StatTabEntry The collector's data per table (or index)
615 typedef struct PgStat_StatTabEntry
619 PgStat_Counter numscans;
621 PgStat_Counter tuples_returned;
622 PgStat_Counter tuples_fetched;
624 PgStat_Counter tuples_inserted;
625 PgStat_Counter tuples_updated;
626 PgStat_Counter tuples_deleted;
627 PgStat_Counter tuples_hot_updated;
629 PgStat_Counter n_live_tuples;
630 PgStat_Counter n_dead_tuples;
631 PgStat_Counter changes_since_analyze;
633 PgStat_Counter blocks_fetched;
634 PgStat_Counter blocks_hit;
636 TimestampTz vacuum_timestamp; /* user initiated vacuum */
637 PgStat_Counter vacuum_count;
638 TimestampTz autovac_vacuum_timestamp; /* autovacuum initiated */
639 PgStat_Counter autovac_vacuum_count;
640 TimestampTz analyze_timestamp; /* user initiated */
641 PgStat_Counter analyze_count;
642 TimestampTz autovac_analyze_timestamp; /* autovacuum initiated */
643 PgStat_Counter autovac_analyze_count;
644 } PgStat_StatTabEntry;
648 * PgStat_StatFuncEntry The collector's data per function
651 typedef struct PgStat_StatFuncEntry
655 PgStat_Counter f_numcalls;
657 PgStat_Counter f_total_time; /* times in microseconds */
658 PgStat_Counter f_self_time;
659 } PgStat_StatFuncEntry;
663 * Archiver statistics kept in the stats collector
665 typedef struct PgStat_ArchiverStats
667 PgStat_Counter archived_count; /* archival successes */
668 char last_archived_wal[MAX_XFN_CHARS + 1]; /* last WAL file
670 TimestampTz last_archived_timestamp; /* last archival success time */
671 PgStat_Counter failed_count; /* failed archival attempts */
672 char last_failed_wal[MAX_XFN_CHARS + 1]; /* WAL file involved in
674 TimestampTz last_failed_timestamp; /* last archival failure time */
675 TimestampTz stat_reset_timestamp;
676 } PgStat_ArchiverStats;
679 * Global statistics kept in the stats collector
681 typedef struct PgStat_GlobalStats
683 TimestampTz stats_timestamp; /* time of stats file update */
684 PgStat_Counter timed_checkpoints;
685 PgStat_Counter requested_checkpoints;
686 PgStat_Counter checkpoint_write_time; /* times in milliseconds */
687 PgStat_Counter checkpoint_sync_time;
688 PgStat_Counter buf_written_checkpoints;
689 PgStat_Counter buf_written_clean;
690 PgStat_Counter maxwritten_clean;
691 PgStat_Counter buf_written_backend;
692 PgStat_Counter buf_fsync_backend;
693 PgStat_Counter buf_alloc;
694 TimestampTz stat_reset_timestamp;
695 } PgStat_GlobalStats;
702 typedef enum BackendType
721 typedef enum BackendState
726 STATE_IDLEINTRANSACTION,
728 STATE_IDLEINTRANSACTION_ABORTED,
737 #define PG_WAIT_LWLOCK 0x01000000U
738 #define PG_WAIT_LOCK 0x03000000U
739 #define PG_WAIT_BUFFER_PIN 0x04000000U
740 #define PG_WAIT_ACTIVITY 0x05000000U
741 #define PG_WAIT_CLIENT 0x06000000U
742 #define PG_WAIT_EXTENSION 0x07000000U
743 #define PG_WAIT_IPC 0x08000000U
744 #define PG_WAIT_TIMEOUT 0x09000000U
745 #define PG_WAIT_IO 0x0A000000U
748 * Wait Events - Activity
750 * Use this category when a process is waiting because it has no work to do,
751 * unless the "Client" or "Timeout" category describes the situation better.
752 * Typically, this should only be used for background processes.
757 WAIT_EVENT_ARCHIVER_MAIN = PG_WAIT_ACTIVITY,
758 WAIT_EVENT_AUTOVACUUM_MAIN,
759 WAIT_EVENT_BGWRITER_HIBERNATE,
760 WAIT_EVENT_BGWRITER_MAIN,
761 WAIT_EVENT_CHECKPOINTER_MAIN,
762 WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
763 WAIT_EVENT_LOGICAL_APPLY_MAIN,
764 WAIT_EVENT_PGSTAT_MAIN,
765 WAIT_EVENT_RECOVERY_WAL_ALL,
766 WAIT_EVENT_RECOVERY_WAL_STREAM,
767 WAIT_EVENT_SYSLOGGER_MAIN,
768 WAIT_EVENT_WAL_RECEIVER_MAIN,
769 WAIT_EVENT_WAL_SENDER_MAIN,
770 WAIT_EVENT_WAL_WRITER_MAIN
774 * Wait Events - Client
776 * Use this category when a process is waiting to send data to or receive data
777 * from the frontend process to which it is connected. This is never used for
778 * a background process, which has no client connection.
783 WAIT_EVENT_CLIENT_READ = PG_WAIT_CLIENT,
784 WAIT_EVENT_CLIENT_WRITE,
785 WAIT_EVENT_LIBPQWALRECEIVER_CONNECT,
786 WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE,
787 WAIT_EVENT_SSL_OPEN_SERVER,
788 WAIT_EVENT_WAL_RECEIVER_WAIT_START,
789 WAIT_EVENT_WAL_SENDER_WAIT_WAL,
790 WAIT_EVENT_WAL_SENDER_WRITE_DATA
796 * Use this category when a process cannot complete the work it is doing because
797 * it is waiting for a notification from another process.
802 WAIT_EVENT_BGWORKER_SHUTDOWN = PG_WAIT_IPC,
803 WAIT_EVENT_BGWORKER_STARTUP,
804 WAIT_EVENT_BTREE_PAGE,
805 WAIT_EVENT_EXECUTE_GATHER,
806 WAIT_EVENT_LOGICAL_SYNC_DATA,
807 WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
808 WAIT_EVENT_MQ_INTERNAL,
809 WAIT_EVENT_MQ_PUT_MESSAGE,
810 WAIT_EVENT_MQ_RECEIVE,
812 WAIT_EVENT_PARALLEL_FINISH,
813 WAIT_EVENT_PARALLEL_BITMAP_SCAN,
814 WAIT_EVENT_PROCARRAY_GROUP_UPDATE,
815 WAIT_EVENT_CLOG_GROUP_UPDATE,
816 WAIT_EVENT_REPLICATION_ORIGIN_DROP,
817 WAIT_EVENT_REPLICATION_SLOT_DROP,
818 WAIT_EVENT_SAFE_SNAPSHOT,
823 * Wait Events - Timeout
825 * Use this category when a process is waiting for a timeout to expire.
830 WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT,
832 WAIT_EVENT_RECOVERY_APPLY_DELAY
838 * Use this category when a process is waiting for a IO.
843 WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
844 WAIT_EVENT_BUFFILE_WRITE,
845 WAIT_EVENT_CONTROL_FILE_READ,
846 WAIT_EVENT_CONTROL_FILE_SYNC,
847 WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE,
848 WAIT_EVENT_CONTROL_FILE_WRITE,
849 WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE,
850 WAIT_EVENT_COPY_FILE_READ,
851 WAIT_EVENT_COPY_FILE_WRITE,
852 WAIT_EVENT_DATA_FILE_EXTEND,
853 WAIT_EVENT_DATA_FILE_FLUSH,
854 WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC,
855 WAIT_EVENT_DATA_FILE_PREFETCH,
856 WAIT_EVENT_DATA_FILE_READ,
857 WAIT_EVENT_DATA_FILE_SYNC,
858 WAIT_EVENT_DATA_FILE_TRUNCATE,
859 WAIT_EVENT_DATA_FILE_WRITE,
860 WAIT_EVENT_DSM_FILL_ZERO_WRITE,
861 WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ,
862 WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC,
863 WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE,
864 WAIT_EVENT_LOCK_FILE_CREATE_READ,
865 WAIT_EVENT_LOCK_FILE_CREATE_SYNC,
866 WAIT_EVENT_LOCK_FILE_CREATE_WRITE,
867 WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ,
868 WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC,
869 WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC,
870 WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE,
871 WAIT_EVENT_LOGICAL_REWRITE_SYNC,
872 WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE,
873 WAIT_EVENT_LOGICAL_REWRITE_WRITE,
874 WAIT_EVENT_RELATION_MAP_READ,
875 WAIT_EVENT_RELATION_MAP_SYNC,
876 WAIT_EVENT_RELATION_MAP_WRITE,
877 WAIT_EVENT_REORDER_BUFFER_READ,
878 WAIT_EVENT_REORDER_BUFFER_WRITE,
879 WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ,
880 WAIT_EVENT_REPLICATION_SLOT_READ,
881 WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC,
882 WAIT_EVENT_REPLICATION_SLOT_SYNC,
883 WAIT_EVENT_REPLICATION_SLOT_WRITE,
884 WAIT_EVENT_SLRU_FLUSH_SYNC,
885 WAIT_EVENT_SLRU_READ,
886 WAIT_EVENT_SLRU_SYNC,
887 WAIT_EVENT_SLRU_WRITE,
888 WAIT_EVENT_SNAPBUILD_READ,
889 WAIT_EVENT_SNAPBUILD_SYNC,
890 WAIT_EVENT_SNAPBUILD_WRITE,
891 WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC,
892 WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE,
893 WAIT_EVENT_TIMELINE_HISTORY_READ,
894 WAIT_EVENT_TIMELINE_HISTORY_SYNC,
895 WAIT_EVENT_TIMELINE_HISTORY_WRITE,
896 WAIT_EVENT_TWOPHASE_FILE_READ,
897 WAIT_EVENT_TWOPHASE_FILE_SYNC,
898 WAIT_EVENT_TWOPHASE_FILE_WRITE,
899 WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ,
900 WAIT_EVENT_WAL_BOOTSTRAP_SYNC,
901 WAIT_EVENT_WAL_BOOTSTRAP_WRITE,
902 WAIT_EVENT_WAL_COPY_READ,
903 WAIT_EVENT_WAL_COPY_SYNC,
904 WAIT_EVENT_WAL_COPY_WRITE,
905 WAIT_EVENT_WAL_INIT_SYNC,
906 WAIT_EVENT_WAL_INIT_WRITE,
908 WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN,
913 * Command type for progress reporting purposes
916 typedef enum ProgressCommandType
918 PROGRESS_COMMAND_INVALID,
919 PROGRESS_COMMAND_VACUUM
920 } ProgressCommandType;
922 #define PGSTAT_NUM_PROGRESS_PARAM 10
925 * Shared-memory data structures
933 * For each backend, we keep the SSL status in a separate struct, that
934 * is only filled in if SSL is enabled.
936 typedef struct PgBackendSSLStatus
938 /* Information about SSL connection */
940 bool ssl_compression;
941 char ssl_version[NAMEDATALEN]; /* MUST be null-terminated */
942 char ssl_cipher[NAMEDATALEN]; /* MUST be null-terminated */
943 char ssl_clientdn[NAMEDATALEN]; /* MUST be null-terminated */
944 } PgBackendSSLStatus;
950 * Each live backend maintains a PgBackendStatus struct in shared memory
951 * showing its current activity. (The structs are allocated according to
952 * BackendId, but that is not critical.) Note that the collector process
953 * has no involvement in, or even access to, these structs.
955 * Each auxiliary process also maintains a PgBackendStatus struct in shared
959 typedef struct PgBackendStatus
962 * To avoid locking overhead, we use the following protocol: a backend
963 * increments st_changecount before modifying its entry, and again after
964 * finishing a modification. A would-be reader should note the value of
965 * st_changecount, copy the entry into private memory, then check
966 * st_changecount again. If the value hasn't changed, and if it's even,
967 * the copy is valid; otherwise start over. This makes updates cheap
968 * while reads are potentially expensive, but that's the tradeoff we want.
970 * The above protocol needs the memory barriers to ensure that the
971 * apparent order of execution is as it desires. Otherwise, for example,
972 * the CPU might rearrange the code so that st_changecount is incremented
973 * twice before the modification on a machine with weak memory ordering.
974 * This surprising result can lead to bugs.
978 /* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
981 /* Type of backends */
982 BackendType st_backendType;
984 /* Times when current backend, transaction, and activity started */
985 TimestampTz st_proc_start_timestamp;
986 TimestampTz st_xact_start_timestamp;
987 TimestampTz st_activity_start_timestamp;
988 TimestampTz st_state_start_timestamp;
990 /* Database OID, owning user's OID, connection client address */
993 SockAddr st_clientaddr;
994 char *st_clienthostname; /* MUST be null-terminated */
996 /* Information about SSL connection */
998 PgBackendSSLStatus *st_sslstatus;
1001 BackendState st_state;
1003 /* application name; MUST be null-terminated */
1007 * Current command string; MUST be null-terminated. Note that this string
1008 * possibly is truncated in the middle of a multi-byte character. As
1009 * activity strings are stored more frequently than read, that allows to
1010 * move the cost of correct truncation to the display side. Use
1011 * pgstat_clip_activity() to truncate correctly.
1013 char *st_activity_raw;
1016 * Command progress reporting. Any command which wishes can advertise
1017 * that it is running by setting st_progress_command,
1018 * st_progress_command_target, and st_progress_param[].
1019 * st_progress_command_target should be the OID of the relation which the
1020 * command targets (we assume there's just one, as this is meant for
1021 * utility commands), but the meaning of each element in the
1022 * st_progress_param array is command-specific.
1024 ProgressCommandType st_progress_command;
1025 Oid st_progress_command_target;
1026 int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
1030 * Macros to load and store st_changecount with the memory barriers.
1032 * pgstat_increment_changecount_before() and
1033 * pgstat_increment_changecount_after() need to be called before and after
1034 * PgBackendStatus entries are modified, respectively. This makes sure that
1035 * st_changecount is incremented around the modification.
1037 * Also pgstat_save_changecount_before() and pgstat_save_changecount_after()
1038 * need to be called before and after PgBackendStatus entries are copied into
1039 * private memory, respectively.
1041 #define pgstat_increment_changecount_before(beentry) \
1043 beentry->st_changecount++; \
1044 pg_write_barrier(); \
1047 #define pgstat_increment_changecount_after(beentry) \
1049 pg_write_barrier(); \
1050 beentry->st_changecount++; \
1051 Assert((beentry->st_changecount & 1) == 0); \
1054 #define pgstat_save_changecount_before(beentry, save_changecount) \
1056 save_changecount = beentry->st_changecount; \
1057 pg_read_barrier(); \
1060 #define pgstat_save_changecount_after(beentry, save_changecount) \
1062 pg_read_barrier(); \
1063 save_changecount = beentry->st_changecount; \
1067 * LocalPgBackendStatus
1069 * When we build the backend status array, we use LocalPgBackendStatus to be
1070 * able to add new values to the struct when needed without adding new fields
1071 * to the shared memory. It contains the backend status as a first member.
1074 typedef struct LocalPgBackendStatus
1077 * Local version of the backend status entry.
1079 PgBackendStatus backendStatus;
1082 * The xid of the current transaction if available, InvalidTransactionId
1085 TransactionId backend_xid;
1088 * The xmin of the current session if available, InvalidTransactionId if
1091 TransactionId backend_xmin;
1092 } LocalPgBackendStatus;
1095 * Working state needed to accumulate per-function-call timing statistics.
1097 typedef struct PgStat_FunctionCallUsage
1099 /* Link to function's hashtable entry (must still be there at exit!) */
1100 /* NULL means we are not tracking the current function call */
1101 PgStat_FunctionCounts *fs;
1102 /* Total time previously charged to function, as of function start */
1103 instr_time save_f_total_time;
1104 /* Backend-wide total time as of function start */
1105 instr_time save_total;
1106 /* system clock as of function start */
1108 } PgStat_FunctionCallUsage;
1115 extern bool pgstat_track_activities;
1116 extern bool pgstat_track_counts;
1117 extern int pgstat_track_functions;
1118 extern PGDLLIMPORT int pgstat_track_activity_query_size;
1119 extern char *pgstat_stat_directory;
1120 extern char *pgstat_stat_tmpname;
1121 extern char *pgstat_stat_filename;
1124 * BgWriter statistics counters are updated directly by bgwriter and bufmgr
1126 extern PgStat_MsgBgWriter BgWriterStats;
1129 * Updated by pgstat_count_buffer_*_time macros
1131 extern PgStat_Counter pgStatBlockReadTime;
1132 extern PgStat_Counter pgStatBlockWriteTime;
1135 * Functions called from postmaster
1138 extern Size BackendStatusShmemSize(void);
1139 extern void CreateSharedBackendStatus(void);
1141 extern void pgstat_init(void);
1142 extern int pgstat_start(void);
1143 extern void pgstat_reset_all(void);
1144 extern void allow_immediate_pgstat_restart(void);
1147 extern void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
1152 * Functions called from backends
1155 extern void pgstat_ping(void);
1157 extern void pgstat_report_stat(bool force);
1158 extern void pgstat_vacuum_stat(void);
1159 extern void pgstat_drop_database(Oid databaseid);
1161 extern void pgstat_clear_snapshot(void);
1162 extern void pgstat_reset_counters(void);
1163 extern void pgstat_reset_shared_counters(const char *);
1164 extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
1166 extern void pgstat_report_autovac(Oid dboid);
1167 extern void pgstat_report_vacuum(Oid tableoid, bool shared,
1168 PgStat_Counter livetuples, PgStat_Counter deadtuples);
1169 extern void pgstat_report_analyze(Relation rel,
1170 PgStat_Counter livetuples, PgStat_Counter deadtuples,
1173 extern void pgstat_report_recovery_conflict(int reason);
1174 extern void pgstat_report_deadlock(void);
1176 extern void pgstat_initialize(void);
1177 extern void pgstat_bestart(void);
1179 extern void pgstat_report_activity(BackendState state, const char *cmd_str);
1180 extern void pgstat_report_tempfile(size_t filesize);
1181 extern void pgstat_report_appname(const char *appname);
1182 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
1183 extern const char *pgstat_get_wait_event(uint32 wait_event_info);
1184 extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
1185 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
1186 extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
1188 extern const char *pgstat_get_backend_desc(BackendType backendType);
1190 extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
1192 extern void pgstat_progress_update_param(int index, int64 val);
1193 extern void pgstat_progress_update_multi_param(int nparam, const int *index,
1195 extern void pgstat_progress_end_command(void);
1197 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
1198 extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
1200 extern void pgstat_initstats(Relation rel);
1202 extern char *pgstat_clip_activity(const char *raw_activity);
1205 * pgstat_report_wait_start() -
1207 * Called from places where server process needs to wait. This is called
1208 * to report wait event information. The wait information is stored
1209 * as 4-bytes where first byte represents the wait event class (type of
1210 * wait, for different types of wait, refer WaitClass) and the next
1211 * 3-bytes represent the actual wait event. Currently 2-bytes are used
1212 * for wait event which is sufficient for current usage, 1-byte is
1213 * reserved for future usage.
1215 * NB: this *must* be able to survive being called before MyProc has been
1220 pgstat_report_wait_start(uint32 wait_event_info)
1222 volatile PGPROC *proc = MyProc;
1224 if (!pgstat_track_activities || !proc)
1228 * Since this is a four-byte field which is always read and written as
1229 * four-bytes, updates are atomic.
1231 proc->wait_event_info = wait_event_info;
1235 * pgstat_report_wait_end() -
1237 * Called to report end of a wait.
1239 * NB: this *must* be able to survive being called before MyProc has been
1244 pgstat_report_wait_end(void)
1246 volatile PGPROC *proc = MyProc;
1248 if (!pgstat_track_activities || !proc)
1252 * Since this is a four-byte field which is always read and written as
1253 * four-bytes, updates are atomic.
1255 proc->wait_event_info = 0;
1258 /* nontransactional event counts are simple enough to inline */
1260 #define pgstat_count_heap_scan(rel) \
1262 if ((rel)->pgstat_info != NULL) \
1263 (rel)->pgstat_info->t_counts.t_numscans++; \
1265 #define pgstat_count_heap_getnext(rel) \
1267 if ((rel)->pgstat_info != NULL) \
1268 (rel)->pgstat_info->t_counts.t_tuples_returned++; \
1270 #define pgstat_count_heap_fetch(rel) \
1272 if ((rel)->pgstat_info != NULL) \
1273 (rel)->pgstat_info->t_counts.t_tuples_fetched++; \
1275 #define pgstat_count_index_scan(rel) \
1277 if ((rel)->pgstat_info != NULL) \
1278 (rel)->pgstat_info->t_counts.t_numscans++; \
1280 #define pgstat_count_index_tuples(rel, n) \
1282 if ((rel)->pgstat_info != NULL) \
1283 (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
1285 #define pgstat_count_buffer_read(rel) \
1287 if ((rel)->pgstat_info != NULL) \
1288 (rel)->pgstat_info->t_counts.t_blocks_fetched++; \
1290 #define pgstat_count_buffer_hit(rel) \
1292 if ((rel)->pgstat_info != NULL) \
1293 (rel)->pgstat_info->t_counts.t_blocks_hit++; \
1295 #define pgstat_count_buffer_read_time(n) \
1296 (pgStatBlockReadTime += (n))
1297 #define pgstat_count_buffer_write_time(n) \
1298 (pgStatBlockWriteTime += (n))
1300 extern void pgstat_count_heap_insert(Relation rel, PgStat_Counter n);
1301 extern void pgstat_count_heap_update(Relation rel, bool hot);
1302 extern void pgstat_count_heap_delete(Relation rel);
1303 extern void pgstat_count_truncate(Relation rel);
1304 extern void pgstat_update_heap_dead_tuples(Relation rel, int delta);
1306 extern void pgstat_init_function_usage(FunctionCallInfoData *fcinfo,
1307 PgStat_FunctionCallUsage *fcu);
1308 extern void pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu,
1311 extern void AtEOXact_PgStat(bool isCommit);
1312 extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
1314 extern void AtPrepare_PgStat(void);
1315 extern void PostPrepare_PgStat(void);
1317 extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
1318 void *recdata, uint32 len);
1319 extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
1320 void *recdata, uint32 len);
1322 extern void pgstat_send_archiver(const char *xlog, bool failed);
1323 extern void pgstat_send_bgwriter(void);
1326 * Support functions for the SQL-callable functions to
1327 * generate the pgstat* views.
1330 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
1331 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
1332 extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
1333 extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid);
1334 extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
1335 extern int pgstat_fetch_stat_numbackends(void);
1336 extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
1337 extern PgStat_GlobalStats *pgstat_fetch_global(void);
1339 #endif /* PGSTAT_H */