4 * Definitions for the PostgreSQL statistics collector daemon.
6 * Copyright (c) 2001-2011, PostgreSQL Global Development Group
14 #include "libpq/pqcomm.h"
15 #include "portability/instr_time.h"
16 #include "utils/hsearch.h"
17 #include "utils/relcache.h"
18 #include "utils/timestamp.h"
21 /* Values for track_functions GUC variable --- order is significant! */
22 typedef enum TrackFunctionsLevel
27 } TrackFunctionsLevel;
30 * The types of backend -> collector messages
33 typedef enum StatMsgType
38 PGSTAT_MTYPE_TABPURGE,
40 PGSTAT_MTYPE_RESETCOUNTER,
41 PGSTAT_MTYPE_RESETSHAREDCOUNTER,
42 PGSTAT_MTYPE_RESETSINGLECOUNTER,
43 PGSTAT_MTYPE_AUTOVAC_START,
46 PGSTAT_MTYPE_BGWRITER,
47 PGSTAT_MTYPE_FUNCSTAT,
48 PGSTAT_MTYPE_FUNCPURGE,
49 PGSTAT_MTYPE_RECOVERYCONFLICT
53 * The data type used for counters.
56 typedef int64 PgStat_Counter;
59 * PgStat_TableCounts The actual per-table counts kept by a backend
61 * This struct should contain only actual event counters, because we memcmp
62 * it against zeroes to detect whether there are any counts to transmit.
63 * It is a component of PgStat_TableStatus (within-backend state) and
64 * PgStat_TableEntry (the transmitted message format).
66 * Note: for a table, tuples_returned is the number of tuples successfully
67 * fetched by heap_getnext, while tuples_fetched is the number of tuples
68 * successfully fetched by heap_fetch under the control of bitmap indexscans.
69 * For an index, tuples_returned is the number of index entries returned by
70 * the index AM, while tuples_fetched is the number of tuples successfully
71 * fetched by heap_fetch under the control of simple indexscans for this index.
73 * tuples_inserted/updated/deleted/hot_updated count attempted actions,
74 * regardless of whether the transaction committed. delta_live_tuples,
75 * delta_dead_tuples, and changed_tuples are set depending on commit or abort.
76 * Note that delta_live_tuples and delta_dead_tuples can be negative!
79 typedef struct PgStat_TableCounts
81 PgStat_Counter t_numscans;
83 PgStat_Counter t_tuples_returned;
84 PgStat_Counter t_tuples_fetched;
86 PgStat_Counter t_tuples_inserted;
87 PgStat_Counter t_tuples_updated;
88 PgStat_Counter t_tuples_deleted;
89 PgStat_Counter t_tuples_hot_updated;
91 PgStat_Counter t_delta_live_tuples;
92 PgStat_Counter t_delta_dead_tuples;
93 PgStat_Counter t_changed_tuples;
95 PgStat_Counter t_blocks_fetched;
96 PgStat_Counter t_blocks_hit;
99 /* Possible targets for resetting cluster-wide shared values */
100 typedef enum PgStat_Shared_Reset_Target
103 } PgStat_Shared_Reset_Target;
105 /* Possible object types for resetting single counters */
106 typedef enum PgStat_Single_Reset_Type
110 } PgStat_Single_Reset_Type;
112 /* ------------------------------------------------------------
113 * Structures kept in backend local memory while accumulating counts
114 * ------------------------------------------------------------
119 * PgStat_TableStatus Per-table status within a backend
121 * Many of the event counters are nontransactional, ie, we count events
122 * in committed and aborted transactions alike. For these, we just count
123 * directly in the PgStat_TableStatus. However, delta_live_tuples,
124 * delta_dead_tuples, and changed_tuples must be derived from event counts
125 * with awareness of whether the transaction or subtransaction committed or
126 * aborted. Hence, we also keep a stack of per-(sub)transaction status
127 * records for every table modified in the current transaction. At commit
128 * or abort, we propagate tuples_inserted/updated/deleted up to the
129 * parent subtransaction level, or out to the parent PgStat_TableStatus,
133 typedef struct PgStat_TableStatus
135 Oid t_id; /* table's OID */
136 bool t_shared; /* is it a shared catalog? */
137 struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
138 PgStat_TableCounts t_counts; /* event counts to be sent */
139 } PgStat_TableStatus;
142 * PgStat_TableXactStatus Per-table, per-subtransaction status
145 typedef struct PgStat_TableXactStatus
147 PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
148 PgStat_Counter tuples_updated; /* tuples updated in (sub)xact */
149 PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
150 int nest_level; /* subtransaction nest level */
151 /* links to other structs for same relation: */
152 struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
153 PgStat_TableStatus *parent; /* per-table status */
154 /* structs of same subxact level are linked here: */
155 struct PgStat_TableXactStatus *next; /* next of same subxact */
156 } PgStat_TableXactStatus;
159 /* ------------------------------------------------------------
160 * Message formats follow
161 * ------------------------------------------------------------
166 * PgStat_MsgHdr The common message header
169 typedef struct PgStat_MsgHdr
176 * Space available in a message. This will keep the UDP packets below 1K,
177 * which should fit unfragmented into the MTU of the lo interface on most
178 * platforms. Does anybody care for platforms where it doesn't?
181 #define PGSTAT_MSG_PAYLOAD (1000 - sizeof(PgStat_MsgHdr))
185 * PgStat_MsgDummy A dummy message, ignored by the collector
188 typedef struct PgStat_MsgDummy
195 * PgStat_MsgInquiry Sent by a backend to ask the collector
196 * to write the stats file.
200 typedef struct PgStat_MsgInquiry
203 TimestampTz inquiry_time; /* minimum acceptable file timestamp */
208 * PgStat_TableEntry Per-table info in a MsgTabstat
211 typedef struct PgStat_TableEntry
214 PgStat_TableCounts t_counts;
218 * PgStat_MsgTabstat Sent by the backend to report table
219 * and buffer access statistics.
222 #define PGSTAT_NUM_TABENTRIES \
223 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int)) \
224 / sizeof(PgStat_TableEntry))
226 typedef struct PgStat_MsgTabstat
233 PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
238 * PgStat_MsgTabpurge Sent by the backend to tell the collector
242 #define PGSTAT_NUM_TABPURGE \
243 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
246 typedef struct PgStat_MsgTabpurge
251 Oid m_tableid[PGSTAT_NUM_TABPURGE];
252 } PgStat_MsgTabpurge;
256 * PgStat_MsgDropdb Sent by the backend to tell the collector
257 * about a dropped database
260 typedef struct PgStat_MsgDropdb
268 * PgStat_MsgResetcounter Sent by the backend to tell the collector
272 typedef struct PgStat_MsgResetcounter
276 } PgStat_MsgResetcounter;
279 * PgStat_MsgResetsharedcounter Sent by the backend to tell the collector
280 * to reset a shared counter
283 typedef struct PgStat_MsgResetsharedcounter
286 PgStat_Shared_Reset_Target m_resettarget;
287 } PgStat_MsgResetsharedcounter;
290 * PgStat_MsgResetsinglecounter Sent by the backend to tell the collector
291 * to reset a single counter
294 typedef struct PgStat_MsgResetsinglecounter
298 PgStat_Single_Reset_Type m_resettype;
300 } PgStat_MsgResetsinglecounter;
303 * PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
304 * that a database is going to be processed
307 typedef struct PgStat_MsgAutovacStart
311 TimestampTz m_start_time;
312 } PgStat_MsgAutovacStart;
316 * PgStat_MsgVacuum Sent by the backend or autovacuum daemon
320 typedef struct PgStat_MsgVacuum
326 TimestampTz m_vacuumtime;
327 PgStat_Counter m_tuples;
332 * PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
336 typedef struct PgStat_MsgAnalyze
342 TimestampTz m_analyzetime;
343 PgStat_Counter m_live_tuples;
344 PgStat_Counter m_dead_tuples;
349 * PgStat_MsgBgWriter Sent by the bgwriter to update statistics.
352 typedef struct PgStat_MsgBgWriter
356 PgStat_Counter m_timed_checkpoints;
357 PgStat_Counter m_requested_checkpoints;
358 PgStat_Counter m_buf_written_checkpoints;
359 PgStat_Counter m_buf_written_clean;
360 PgStat_Counter m_maxwritten_clean;
361 PgStat_Counter m_buf_written_backend;
362 PgStat_Counter m_buf_fsync_backend;
363 PgStat_Counter m_buf_alloc;
364 } PgStat_MsgBgWriter;
367 * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
370 typedef struct PgStat_MsgRecoveryConflict
376 } PgStat_MsgRecoveryConflict;
379 * PgStat_FunctionCounts The actual per-function counts kept by a backend
381 * This struct should contain only actual event counters, because we memcmp
382 * it against zeroes to detect whether there are any counts to transmit.
384 * Note that the time counters are in instr_time format here. We convert to
385 * microseconds in PgStat_Counter format when transmitting to the collector.
388 typedef struct PgStat_FunctionCounts
390 PgStat_Counter f_numcalls;
392 instr_time f_time_self;
393 } PgStat_FunctionCounts;
396 * PgStat_BackendFunctionEntry Entry in backend's per-function hash table
399 typedef struct PgStat_BackendFunctionEntry
402 PgStat_FunctionCounts f_counts;
403 } PgStat_BackendFunctionEntry;
406 * PgStat_FunctionEntry Per-function info in a MsgFuncstat
409 typedef struct PgStat_FunctionEntry
412 PgStat_Counter f_numcalls;
413 PgStat_Counter f_time; /* times in microseconds */
414 PgStat_Counter f_time_self;
415 } PgStat_FunctionEntry;
418 * PgStat_MsgFuncstat Sent by the backend to report function
422 #define PGSTAT_NUM_FUNCENTRIES \
423 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
424 / sizeof(PgStat_FunctionEntry))
426 typedef struct PgStat_MsgFuncstat
431 PgStat_FunctionEntry m_entry[PGSTAT_NUM_FUNCENTRIES];
432 } PgStat_MsgFuncstat;
435 * PgStat_MsgFuncpurge Sent by the backend to tell the collector
436 * about dead functions.
439 #define PGSTAT_NUM_FUNCPURGE \
440 ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int)) \
443 typedef struct PgStat_MsgFuncpurge
448 Oid m_functionid[PGSTAT_NUM_FUNCPURGE];
449 } PgStat_MsgFuncpurge;
453 * PgStat_Msg Union over all possible messages.
456 typedef union PgStat_Msg
458 PgStat_MsgHdr msg_hdr;
459 PgStat_MsgDummy msg_dummy;
460 PgStat_MsgInquiry msg_inquiry;
461 PgStat_MsgTabstat msg_tabstat;
462 PgStat_MsgTabpurge msg_tabpurge;
463 PgStat_MsgDropdb msg_dropdb;
464 PgStat_MsgResetcounter msg_resetcounter;
465 PgStat_MsgResetsharedcounter msg_resetsharedcounter;
466 PgStat_MsgResetsinglecounter msg_resetsinglecounter;
467 PgStat_MsgAutovacStart msg_autovacuum;
468 PgStat_MsgVacuum msg_vacuum;
469 PgStat_MsgAnalyze msg_analyze;
470 PgStat_MsgBgWriter msg_bgwriter;
471 PgStat_MsgFuncstat msg_funcstat;
472 PgStat_MsgFuncpurge msg_funcpurge;
473 PgStat_MsgRecoveryConflict msg_recoveryconflict;
477 /* ------------------------------------------------------------
478 * Statistic collector data structures follow
480 * PGSTAT_FILE_FORMAT_ID should be changed whenever any of these
481 * data structures change.
482 * ------------------------------------------------------------
485 #define PGSTAT_FILE_FORMAT_ID 0x01A5BC99
488 * PgStat_StatDBEntry The collector's data per database
491 typedef struct PgStat_StatDBEntry
494 PgStat_Counter n_xact_commit;
495 PgStat_Counter n_xact_rollback;
496 PgStat_Counter n_blocks_fetched;
497 PgStat_Counter n_blocks_hit;
498 PgStat_Counter n_tuples_returned;
499 PgStat_Counter n_tuples_fetched;
500 PgStat_Counter n_tuples_inserted;
501 PgStat_Counter n_tuples_updated;
502 PgStat_Counter n_tuples_deleted;
503 TimestampTz last_autovac_time;
504 PgStat_Counter n_conflict_tablespace;
505 PgStat_Counter n_conflict_lock;
506 PgStat_Counter n_conflict_snapshot;
507 PgStat_Counter n_conflict_bufferpin;
508 PgStat_Counter n_conflict_startup_deadlock;
509 TimestampTz stat_reset_timestamp;
513 * tables and functions must be last in the struct, because we don't write
514 * the pointers out to the stats file.
518 } PgStat_StatDBEntry;
522 * PgStat_StatTabEntry The collector's data per table (or index)
525 typedef struct PgStat_StatTabEntry
529 PgStat_Counter numscans;
531 PgStat_Counter tuples_returned;
532 PgStat_Counter tuples_fetched;
534 PgStat_Counter tuples_inserted;
535 PgStat_Counter tuples_updated;
536 PgStat_Counter tuples_deleted;
537 PgStat_Counter tuples_hot_updated;
539 PgStat_Counter n_live_tuples;
540 PgStat_Counter n_dead_tuples;
541 PgStat_Counter changes_since_analyze;
543 PgStat_Counter blocks_fetched;
544 PgStat_Counter blocks_hit;
546 TimestampTz vacuum_timestamp; /* user initiated vacuum */
547 PgStat_Counter vacuum_count;
548 TimestampTz autovac_vacuum_timestamp; /* autovacuum initiated */
549 PgStat_Counter autovac_vacuum_count;
550 TimestampTz analyze_timestamp; /* user initiated */
551 PgStat_Counter analyze_count;
552 TimestampTz autovac_analyze_timestamp; /* autovacuum initiated */
553 PgStat_Counter autovac_analyze_count;
554 } PgStat_StatTabEntry;
558 * PgStat_StatFuncEntry The collector's data per function
561 typedef struct PgStat_StatFuncEntry
565 PgStat_Counter f_numcalls;
567 PgStat_Counter f_time; /* times in microseconds */
568 PgStat_Counter f_time_self;
569 } PgStat_StatFuncEntry;
573 * Global statistics kept in the stats collector
575 typedef struct PgStat_GlobalStats
577 TimestampTz stats_timestamp; /* time of stats file update */
578 PgStat_Counter timed_checkpoints;
579 PgStat_Counter requested_checkpoints;
580 PgStat_Counter buf_written_checkpoints;
581 PgStat_Counter buf_written_clean;
582 PgStat_Counter maxwritten_clean;
583 PgStat_Counter buf_written_backend;
584 PgStat_Counter buf_fsync_backend;
585 PgStat_Counter buf_alloc;
586 TimestampTz stat_reset_timestamp;
587 } PgStat_GlobalStats;
591 * Shared-memory data structures
598 * Each live backend maintains a PgBackendStatus struct in shared memory
599 * showing its current activity. (The structs are allocated according to
600 * BackendId, but that is not critical.) Note that the collector process
601 * has no involvement in, or even access to, these structs.
604 typedef struct PgBackendStatus
607 * To avoid locking overhead, we use the following protocol: a backend
608 * increments st_changecount before modifying its entry, and again after
609 * finishing a modification. A would-be reader should note the value of
610 * st_changecount, copy the entry into private memory, then check
611 * st_changecount again. If the value hasn't changed, and if it's even,
612 * the copy is valid; otherwise start over. This makes updates cheap
613 * while reads are potentially expensive, but that's the tradeoff we want.
617 /* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
620 /* Times when current backend, transaction, and activity started */
621 TimestampTz st_proc_start_timestamp;
622 TimestampTz st_xact_start_timestamp;
623 TimestampTz st_activity_start_timestamp;
625 /* Database OID, owning user's OID, connection client address */
628 SockAddr st_clientaddr;
629 char *st_clienthostname; /* MUST be null-terminated */
631 /* Is backend currently waiting on an lmgr lock? */
634 /* application name; MUST be null-terminated */
637 /* current command string; MUST be null-terminated */
642 * Working state needed to accumulate per-function-call timing statistics.
644 typedef struct PgStat_FunctionCallUsage
646 /* Link to function's hashtable entry (must still be there at exit!) */
647 /* NULL means we are not tracking the current function call */
648 PgStat_FunctionCounts *fs;
649 /* Total time previously charged to function, as of function start */
650 instr_time save_f_time;
651 /* Backend-wide total time as of function start */
652 instr_time save_total;
653 /* system clock as of function start */
655 } PgStat_FunctionCallUsage;
662 extern bool pgstat_track_activities;
663 extern bool pgstat_track_counts;
664 extern int pgstat_track_functions;
665 extern PGDLLIMPORT int pgstat_track_activity_query_size;
666 extern char *pgstat_stat_tmpname;
667 extern char *pgstat_stat_filename;
670 * BgWriter statistics counters are updated directly by bgwriter and bufmgr
672 extern PgStat_MsgBgWriter BgWriterStats;
675 * Functions called from postmaster
678 extern Size BackendStatusShmemSize(void);
679 extern void CreateSharedBackendStatus(void);
681 extern void pgstat_init(void);
682 extern int pgstat_start(void);
683 extern void pgstat_reset_all(void);
684 extern void allow_immediate_pgstat_restart(void);
687 extern void PgstatCollectorMain(int argc, char *argv[]);
692 * Functions called from backends
695 extern void pgstat_ping(void);
697 extern void pgstat_report_stat(bool force);
698 extern void pgstat_vacuum_stat(void);
699 extern void pgstat_drop_database(Oid databaseid);
701 extern void pgstat_clear_snapshot(void);
702 extern void pgstat_reset_counters(void);
703 extern void pgstat_reset_shared_counters(const char *);
704 extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
706 extern void pgstat_report_autovac(Oid dboid);
707 extern void pgstat_report_vacuum(Oid tableoid, bool shared,
708 PgStat_Counter tuples);
709 extern void pgstat_report_analyze(Relation rel,
710 PgStat_Counter livetuples, PgStat_Counter deadtuples);
712 extern void pgstat_report_recovery_conflict(int reason);
714 extern void pgstat_initialize(void);
715 extern void pgstat_bestart(void);
717 extern void pgstat_report_activity(const char *cmd_str);
718 extern void pgstat_report_appname(const char *appname);
719 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
720 extern void pgstat_report_waiting(bool waiting);
721 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
723 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
724 extern PgStat_BackendFunctionEntry *find_funcstat_entry(Oid func_id);
726 extern void pgstat_initstats(Relation rel);
728 /* nontransactional event counts are simple enough to inline */
730 #define pgstat_count_heap_scan(rel) \
732 if ((rel)->pgstat_info != NULL) \
733 (rel)->pgstat_info->t_counts.t_numscans++; \
735 #define pgstat_count_heap_getnext(rel) \
737 if ((rel)->pgstat_info != NULL) \
738 (rel)->pgstat_info->t_counts.t_tuples_returned++; \
740 #define pgstat_count_heap_fetch(rel) \
742 if ((rel)->pgstat_info != NULL) \
743 (rel)->pgstat_info->t_counts.t_tuples_fetched++; \
745 #define pgstat_count_index_scan(rel) \
747 if ((rel)->pgstat_info != NULL) \
748 (rel)->pgstat_info->t_counts.t_numscans++; \
750 #define pgstat_count_index_tuples(rel, n) \
752 if ((rel)->pgstat_info != NULL) \
753 (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
755 #define pgstat_count_buffer_read(rel) \
757 if ((rel)->pgstat_info != NULL) \
758 (rel)->pgstat_info->t_counts.t_blocks_fetched++; \
760 #define pgstat_count_buffer_hit(rel) \
762 if ((rel)->pgstat_info != NULL) \
763 (rel)->pgstat_info->t_counts.t_blocks_hit++; \
766 extern void pgstat_count_heap_insert(Relation rel);
767 extern void pgstat_count_heap_update(Relation rel, bool hot);
768 extern void pgstat_count_heap_delete(Relation rel);
769 extern void pgstat_update_heap_dead_tuples(Relation rel, int delta);
771 extern void pgstat_init_function_usage(FunctionCallInfoData *fcinfo,
772 PgStat_FunctionCallUsage *fcu);
773 extern void pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu,
776 extern void AtEOXact_PgStat(bool isCommit);
777 extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
779 extern void AtPrepare_PgStat(void);
780 extern void PostPrepare_PgStat(void);
782 extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
783 void *recdata, uint32 len);
784 extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
785 void *recdata, uint32 len);
787 extern void pgstat_send_bgwriter(void);
790 * Support functions for the SQL-callable functions to
791 * generate the pgstat* views.
794 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
795 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
796 extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
797 extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
798 extern int pgstat_fetch_stat_numbackends(void);
799 extern PgStat_GlobalStats *pgstat_fetch_global(void);
801 #endif /* PGSTAT_H */