]> granicus.if.org Git - postgresql/blob - src/include/pgstat.h
Fix up pgstats counting of live and dead tuples to recognize that committed
[postgresql] / src / include / pgstat.h
1 /* ----------
2  *      pgstat.h
3  *
4  *      Definitions for the PostgreSQL statistics collector daemon.
5  *
6  *      Copyright (c) 2001-2007, PostgreSQL Global Development Group
7  *
8  *      $PostgreSQL: pgsql/src/include/pgstat.h,v 1.59 2007/05/27 03:50:39 tgl Exp $
9  * ----------
10  */
11 #ifndef PGSTAT_H
12 #define PGSTAT_H
13
14 #include "libpq/pqcomm.h"
15 #include "utils/hsearch.h"
16 #include "utils/rel.h"
17 #include "utils/timestamp.h"
18
19
20 /* ----------
21  * The types of backend -> collector messages
22  * ----------
23  */
24 typedef enum StatMsgType
25 {
26         PGSTAT_MTYPE_DUMMY,
27         PGSTAT_MTYPE_TABSTAT,
28         PGSTAT_MTYPE_TABPURGE,
29         PGSTAT_MTYPE_DROPDB,
30         PGSTAT_MTYPE_RESETCOUNTER,
31         PGSTAT_MTYPE_AUTOVAC_START,
32         PGSTAT_MTYPE_VACUUM,
33         PGSTAT_MTYPE_ANALYZE,
34         PGSTAT_MTYPE_BGWRITER
35 } StatMsgType;
36
37 /* ----------
38  * The data type used for counters.
39  * ----------
40  */
41 typedef int64 PgStat_Counter;
42
43 /* ----------
44  * PgStat_TableCounts                   The actual per-table counts kept by a backend
45  *
46  * This struct should contain only actual event counters, because we memcmp
47  * it against zeroes to detect whether there are any counts to transmit.
48  * It is a component of PgStat_TableStatus (within-backend state) and
49  * PgStat_TableEntry (the transmitted message format).
50  *
51  * Note: for a table, tuples_returned is the number of tuples successfully
52  * fetched by heap_getnext, while tuples_fetched is the number of tuples
53  * successfully fetched by heap_fetch under the control of bitmap indexscans.
54  * For an index, tuples_returned is the number of index entries returned by
55  * the index AM, while tuples_fetched is the number of tuples successfully
56  * fetched by heap_fetch under the control of simple indexscans for this index.
57  *
58  * tuples_inserted/tuples_updated/tuples_deleted count attempted actions,
59  * regardless of whether the transaction committed.  new_live_tuples and
60  * new_dead_tuples are properly adjusted depending on commit or abort.
61  * ----------
62  */
63 typedef struct PgStat_TableCounts
64 {
65         PgStat_Counter t_numscans;
66
67         PgStat_Counter t_tuples_returned;
68         PgStat_Counter t_tuples_fetched;
69
70         PgStat_Counter t_tuples_inserted;
71         PgStat_Counter t_tuples_updated;
72         PgStat_Counter t_tuples_deleted;
73
74         PgStat_Counter t_new_live_tuples;
75         PgStat_Counter t_new_dead_tuples;
76
77         PgStat_Counter t_blocks_fetched;
78         PgStat_Counter t_blocks_hit;
79 } PgStat_TableCounts;
80
81
82 /* ------------------------------------------------------------
83  * Structures kept in backend local memory while accumulating counts
84  * ------------------------------------------------------------
85  */
86
87
88 /* ----------
89  * PgStat_TableStatus                   Per-table status within a backend
90  *
91  * Most of the event counters are nontransactional, ie, we count events
92  * in committed and aborted transactions alike.  For these, we just count
93  * directly in the PgStat_TableStatus.  However, new_live_tuples and
94  * new_dead_tuples must be derived from tuple insertion and deletion counts
95  * with awareness of whether the transaction or subtransaction committed or
96  * aborted.  Hence, we also keep a stack of per-(sub)transaction status
97  * records for every table modified in the current transaction.  At commit
98  * or abort, we propagate tuples_inserted and tuples_deleted up to the
99  * parent subtransaction level, or out to the parent PgStat_TableStatus,
100  * as appropriate.
101  * ----------
102  */
103 typedef struct PgStat_TableStatus
104 {
105         Oid                     t_id;                           /* table's OID */
106         bool            t_shared;                       /* is it a shared catalog? */
107         struct PgStat_TableXactStatus *trans;   /* lowest subxact's counts */
108         PgStat_TableCounts t_counts;    /* event counts to be sent */
109 } PgStat_TableStatus;
110
111 /* ----------
112  * PgStat_TableXactStatus               Per-table, per-subtransaction status
113  * ----------
114  */
115 typedef struct PgStat_TableXactStatus
116 {
117         PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
118         PgStat_Counter tuples_deleted;  /* tuples deleted in (sub)xact */
119         int                     nest_level;                     /* subtransaction nest level */
120         /* links to other structs for same relation: */
121         struct PgStat_TableXactStatus *upper;   /* next higher subxact if any */
122         PgStat_TableStatus *parent;                             /* per-table status */
123         /* structs of same subxact level are linked here: */
124         struct PgStat_TableXactStatus *next;    /* next of same subxact */
125 } PgStat_TableXactStatus;
126
127
128 /* ------------------------------------------------------------
129  * Message formats follow
130  * ------------------------------------------------------------
131  */
132
133
134 /* ----------
135  * PgStat_MsgHdr                                The common message header
136  * ----------
137  */
138 typedef struct PgStat_MsgHdr
139 {
140         StatMsgType m_type;
141         int                     m_size;
142 } PgStat_MsgHdr;
143
144 /* ----------
145  * Space available in a message.  This will keep the UDP packets below 1K,
146  * which should fit unfragmented into the MTU of the lo interface on most
147  * platforms. Does anybody care for platforms where it doesn't?
148  * ----------
149  */
150 #define PGSTAT_MSG_PAYLOAD      (1000 - sizeof(PgStat_MsgHdr))
151
152
153 /* ----------
154  * PgStat_MsgDummy                              A dummy message, ignored by the collector
155  * ----------
156  */
157 typedef struct PgStat_MsgDummy
158 {
159         PgStat_MsgHdr m_hdr;
160 } PgStat_MsgDummy;
161
162
163 /* ----------
164  * PgStat_TableEntry                    Per-table info in a MsgTabstat
165  * ----------
166  */
167 typedef struct PgStat_TableEntry
168 {
169         Oid                     t_id;
170         PgStat_TableCounts t_counts;
171 } PgStat_TableEntry;
172
173 /* ----------
174  * PgStat_MsgTabstat                    Sent by the backend to report table
175  *                                                              and buffer access statistics.
176  * ----------
177  */
178 #define PGSTAT_NUM_TABENTRIES  \
179         ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int))  \
180          / sizeof(PgStat_TableEntry))
181
182 typedef struct PgStat_MsgTabstat
183 {
184         PgStat_MsgHdr m_hdr;
185         Oid                     m_databaseid;
186         int                     m_nentries;
187         int                     m_xact_commit;
188         int                     m_xact_rollback;
189         PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
190 } PgStat_MsgTabstat;
191
192
193 /* ----------
194  * PgStat_MsgTabpurge                   Sent by the backend to tell the collector
195  *                                                              about dead tables.
196  * ----------
197  */
198 #define PGSTAT_NUM_TABPURGE  \
199         ((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - sizeof(int))  \
200          / sizeof(Oid))
201
202 typedef struct PgStat_MsgTabpurge
203 {
204         PgStat_MsgHdr m_hdr;
205         Oid                     m_databaseid;
206         int                     m_nentries;
207         Oid                     m_tableid[PGSTAT_NUM_TABPURGE];
208 } PgStat_MsgTabpurge;
209
210
211 /* ----------
212  * PgStat_MsgDropdb                             Sent by the backend to tell the collector
213  *                                                              about a dropped database
214  * ----------
215  */
216 typedef struct PgStat_MsgDropdb
217 {
218         PgStat_MsgHdr m_hdr;
219         Oid                     m_databaseid;
220 } PgStat_MsgDropdb;
221
222
223 /* ----------
224  * PgStat_MsgResetcounter               Sent by the backend to tell the collector
225  *                                                              to reset counters
226  * ----------
227  */
228 typedef struct PgStat_MsgResetcounter
229 {
230         PgStat_MsgHdr m_hdr;
231         Oid                     m_databaseid;
232 } PgStat_MsgResetcounter;
233
234
235 /* ----------
236  * PgStat_MsgAutovacStart               Sent by the autovacuum daemon to signal
237  *                                                              that a database is going to be processed
238  * ----------
239  */
240 typedef struct PgStat_MsgAutovacStart
241 {
242         PgStat_MsgHdr m_hdr;
243         Oid                     m_databaseid;
244         TimestampTz m_start_time;
245 } PgStat_MsgAutovacStart;
246
247
248 /* ----------
249  * PgStat_MsgVacuum                             Sent by the backend or autovacuum daemon
250  *                                                              after VACUUM or VACUUM ANALYZE
251  * ----------
252  */
253 typedef struct PgStat_MsgVacuum
254 {
255         PgStat_MsgHdr m_hdr;
256         Oid                     m_databaseid;
257         Oid                     m_tableoid;
258         bool            m_analyze;
259         bool            m_autovacuum;
260         TimestampTz m_vacuumtime;
261         PgStat_Counter m_tuples;
262 } PgStat_MsgVacuum;
263
264
265 /* ----------
266  * PgStat_MsgAnalyze                    Sent by the backend or autovacuum daemon
267  *                                                              after ANALYZE
268  * ----------
269  */
270 typedef struct PgStat_MsgAnalyze
271 {
272         PgStat_MsgHdr m_hdr;
273         Oid                     m_databaseid;
274         Oid                     m_tableoid;
275         bool            m_autovacuum;
276         TimestampTz m_analyzetime;
277         PgStat_Counter m_live_tuples;
278         PgStat_Counter m_dead_tuples;
279 } PgStat_MsgAnalyze;
280
281
282 /* ----------
283  * PgStat_MsgBgWriter           Sent by the bgwriter to update statistics.
284  * ----------
285  */
286 typedef struct PgStat_MsgBgWriter
287 {
288         PgStat_MsgHdr m_hdr;
289
290         PgStat_Counter  m_timed_checkpoints;
291         PgStat_Counter  m_requested_checkpoints;
292         PgStat_Counter  m_buf_written_checkpoints;
293         PgStat_Counter  m_buf_written_lru;
294         PgStat_Counter  m_buf_written_all;
295         PgStat_Counter  m_maxwritten_lru;
296         PgStat_Counter  m_maxwritten_all;
297 } PgStat_MsgBgWriter;
298
299
300 /* ----------
301  * PgStat_Msg                                   Union over all possible messages.
302  * ----------
303  */
304 typedef union PgStat_Msg
305 {
306         PgStat_MsgHdr msg_hdr;
307         PgStat_MsgDummy msg_dummy;
308         PgStat_MsgTabstat msg_tabstat;
309         PgStat_MsgTabpurge msg_tabpurge;
310         PgStat_MsgDropdb msg_dropdb;
311         PgStat_MsgResetcounter msg_resetcounter;
312         PgStat_MsgAutovacStart msg_autovacuum;
313         PgStat_MsgVacuum msg_vacuum;
314         PgStat_MsgAnalyze msg_analyze;
315         PgStat_MsgBgWriter msg_bgwriter;
316 } PgStat_Msg;
317
318
319 /* ------------------------------------------------------------
320  * Statistic collector data structures follow
321  *
322  * PGSTAT_FILE_FORMAT_ID should be changed whenever any of these
323  * data structures change.
324  * ------------------------------------------------------------
325  */
326
327 #define PGSTAT_FILE_FORMAT_ID   0x01A5BC96
328
329 /* ----------
330  * PgStat_StatDBEntry                   The collector's data per database
331  * ----------
332  */
333 typedef struct PgStat_StatDBEntry
334 {
335         Oid                     databaseid;
336         PgStat_Counter n_xact_commit;
337         PgStat_Counter n_xact_rollback;
338         PgStat_Counter n_blocks_fetched;
339         PgStat_Counter n_blocks_hit;
340         PgStat_Counter n_tuples_returned;
341         PgStat_Counter n_tuples_fetched;
342         PgStat_Counter n_tuples_inserted;
343         PgStat_Counter n_tuples_updated;
344         PgStat_Counter n_tuples_deleted;
345         TimestampTz last_autovac_time;
346
347         /*
348          * tables must be last in the struct, because we don't write the pointer
349          * out to the stats file.
350          */
351         HTAB       *tables;
352 } PgStat_StatDBEntry;
353
354
355 /* ----------
356  * PgStat_StatTabEntry                  The collector's data per table (or index)
357  * ----------
358  */
359 typedef struct PgStat_StatTabEntry
360 {
361         Oid                     tableid;
362
363         PgStat_Counter numscans;
364
365         PgStat_Counter tuples_returned;
366         PgStat_Counter tuples_fetched;
367
368         PgStat_Counter tuples_inserted;
369         PgStat_Counter tuples_updated;
370         PgStat_Counter tuples_deleted;
371
372         PgStat_Counter n_live_tuples;
373         PgStat_Counter n_dead_tuples;
374         PgStat_Counter last_anl_tuples;
375
376         PgStat_Counter blocks_fetched;
377         PgStat_Counter blocks_hit;
378
379         TimestampTz vacuum_timestamp;           /* user initiated vacuum */
380         TimestampTz autovac_vacuum_timestamp;           /* autovacuum initiated */
381         TimestampTz analyze_timestamp;          /* user initiated */
382         TimestampTz autovac_analyze_timestamp;          /* autovacuum initiated */
383 } PgStat_StatTabEntry;
384
385
386 /*
387  * Global statistics kept in the stats collector
388  */
389 typedef struct PgStat_GlobalStats
390 {
391         PgStat_Counter  timed_checkpoints;
392         PgStat_Counter  requested_checkpoints;
393         PgStat_Counter  buf_written_checkpoints;
394         PgStat_Counter  buf_written_lru;
395         PgStat_Counter  buf_written_all;
396         PgStat_Counter  maxwritten_lru;
397         PgStat_Counter  maxwritten_all;
398 } PgStat_GlobalStats;
399
400
401 /* ----------
402  * Shared-memory data structures
403  * ----------
404  */
405
406 /* Max length of st_activity string ... perhaps replace with a GUC var? */
407 #define PGBE_ACTIVITY_SIZE      1024
408
409 /* ----------
410  * PgBackendStatus
411  *
412  * Each live backend maintains a PgBackendStatus struct in shared memory
413  * showing its current activity.  (The structs are allocated according to
414  * BackendId, but that is not critical.)  Note that the collector process
415  * has no involvement in, or even access to, these structs.
416  * ----------
417  */
418 typedef struct PgBackendStatus
419 {
420         /*
421          * To avoid locking overhead, we use the following protocol: a backend
422          * increments st_changecount before modifying its entry, and again after
423          * finishing a modification.  A would-be reader should note the value of
424          * st_changecount, copy the entry into private memory, then check
425          * st_changecount again.  If the value hasn't changed, and if it's even,
426          * the copy is valid; otherwise start over.  This makes updates cheap
427          * while reads are potentially expensive, but that's the tradeoff we want.
428          */
429         int                     st_changecount;
430
431         /* The entry is valid iff st_procpid > 0, unused if st_procpid == 0 */
432         int                     st_procpid;
433
434         /* Times when current backend, transaction, and activity started */
435         TimestampTz st_proc_start_timestamp;
436         TimestampTz st_txn_start_timestamp;
437         TimestampTz st_activity_start_timestamp;
438
439         /* Database OID, owning user's OID, connection client address */
440         Oid                     st_databaseid;
441         Oid                     st_userid;
442         SockAddr        st_clientaddr;
443
444         /* Is backend currently waiting on an lmgr lock? */
445         bool            st_waiting;
446
447         /* current command string; MUST be null-terminated */
448         char            st_activity[PGBE_ACTIVITY_SIZE];
449 } PgBackendStatus;
450
451
452 /* ----------
453  * GUC parameters
454  * ----------
455  */
456 extern bool pgstat_collect_startcollector;
457 extern bool pgstat_collect_resetonpmstart;
458 extern bool pgstat_collect_tuplelevel;
459 extern bool pgstat_collect_blocklevel;
460 extern bool pgstat_collect_querystring;
461
462 /*
463  * BgWriter statistics counters are updated directly by bgwriter and bufmgr
464  */
465 extern PgStat_MsgBgWriter BgWriterStats;
466
467 /* ----------
468  * Functions called from postmaster
469  * ----------
470  */
471 extern Size BackendStatusShmemSize(void);
472 extern void CreateSharedBackendStatus(void);
473
474 extern void pgstat_init(void);
475 extern int      pgstat_start(void);
476 extern void pgstat_reset_all(void);
477 extern void allow_immediate_pgstat_restart(void);
478 #ifdef EXEC_BACKEND
479 extern void PgstatCollectorMain(int argc, char *argv[]);
480 #endif
481
482
483 /* ----------
484  * Functions called from backends
485  * ----------
486  */
487 extern void pgstat_ping(void);
488
489 extern void pgstat_report_tabstat(bool force);
490 extern void pgstat_vacuum_tabstat(void);
491 extern void pgstat_drop_database(Oid databaseid);
492 extern void pgstat_drop_relation(Oid relid);
493
494 extern void pgstat_clear_snapshot(void);
495 extern void pgstat_reset_counters(void);
496
497 extern void pgstat_report_autovac(Oid dboid);
498 extern void pgstat_report_vacuum(Oid tableoid, bool shared,
499                                          bool analyze, PgStat_Counter tuples);
500 extern void pgstat_report_analyze(Oid tableoid, bool shared,
501                                           PgStat_Counter livetuples,
502                                           PgStat_Counter deadtuples);
503
504 extern void pgstat_bestart(void);
505 extern void pgstat_report_activity(const char *what);
506 extern void pgstat_report_txn_timestamp(TimestampTz tstamp);
507 extern void pgstat_report_waiting(bool waiting);
508
509 extern void pgstat_initstats(Relation rel);
510
511 /* nontransactional event counts are simple enough to inline */
512
513 #define pgstat_count_heap_scan(rel)                                                                             \
514         do {                                                                                                                            \
515                 if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL)    \
516                         (rel)->pgstat_info->t_counts.t_numscans++;                                      \
517         } while (0)
518 /* kluge for bitmap scans: */
519 #define pgstat_discount_heap_scan(rel)                                                                  \
520         do {                                                                                                                            \
521                 if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL)    \
522                         (rel)->pgstat_info->t_counts.t_numscans--;                                      \
523         } while (0)
524 #define pgstat_count_heap_getnext(rel)                                                                  \
525         do {                                                                                                                            \
526                 if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL)    \
527                         (rel)->pgstat_info->t_counts.t_tuples_returned++;                       \
528         } while (0)
529 #define pgstat_count_heap_fetch(rel)                                                                    \
530         do {                                                                                                                            \
531                 if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL)    \
532                         (rel)->pgstat_info->t_counts.t_tuples_fetched++;                        \
533         } while (0)
534 #define pgstat_count_index_scan(rel)                                                                    \
535         do {                                                                                                                            \
536                 if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL)    \
537                         (rel)->pgstat_info->t_counts.t_numscans++;                                      \
538         } while (0)
539 #define pgstat_count_index_tuples(rel, n)                                                               \
540         do {                                                                                                                            \
541                 if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL)    \
542                         (rel)->pgstat_info->t_counts.t_tuples_returned += (n);          \
543         } while (0)
544 #define pgstat_count_buffer_read(rel)                                                                   \
545         do {                                                                                                                            \
546                 if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL)    \
547                         (rel)->pgstat_info->t_counts.t_blocks_fetched++;                        \
548         } while (0)
549 #define pgstat_count_buffer_hit(rel)                                                                    \
550         do {                                                                                                                            \
551                 if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL)    \
552                         (rel)->pgstat_info->t_counts.t_blocks_hit++;                            \
553         } while (0)
554
555 extern void pgstat_count_heap_insert(Relation rel);
556 extern void pgstat_count_heap_update(Relation rel);
557 extern void pgstat_count_heap_delete(Relation rel);
558
559 extern void AtEOXact_PgStat(bool isCommit);
560 extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
561
562 extern void AtPrepare_PgStat(void);
563 extern void PostPrepare_PgStat(void);
564
565 extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
566                                                                            void *recdata, uint32 len);
567 extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
568                                                                           void *recdata, uint32 len);
569
570 extern void pgstat_send_bgwriter(void);
571
572 /* ----------
573  * Support functions for the SQL-callable functions to
574  * generate the pgstat* views.
575  * ----------
576  */
577 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
578 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
579 extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid);
580 extern int      pgstat_fetch_stat_numbackends(void);
581 extern PgStat_GlobalStats *pgstat_fetch_global(void);
582
583 #endif   /* PGSTAT_H */