4 * All the statistics collector stuff hacked up in one big, ugly file.
6 * TODO: - Separate collector, postmaster and backend stuff
7 * into different files.
9 * - Add some automatic call for pgstat vacuuming.
11 * - Add a pgstat config column to pg_database, so this
12 * entire thing can be enabled/disabled on a per db basis.
14 * Copyright (c) 2001-2003, PostgreSQL Global Development Group
16 * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.73 2004/05/29 22:48:19 tgl Exp $
23 #include <sys/param.h>
25 #include <sys/types.h>
26 #include <sys/socket.h>
28 #include <netinet/in.h>
29 #include <arpa/inet.h>
35 #include "access/heapam.h"
36 #include "access/xact.h"
37 #include "catalog/catname.h"
38 #include "catalog/pg_database.h"
39 #include "catalog/pg_shadow.h"
40 #include "libpq/libpq.h"
41 #include "libpq/pqsignal.h"
42 #include "mb/pg_wchar.h"
43 #include "miscadmin.h"
44 #include "postmaster/postmaster.h"
45 #include "storage/backendid.h"
46 #include "storage/ipc.h"
47 #include "storage/pg_shmem.h"
48 #include "storage/pmsignal.h"
49 #include "tcop/tcopprot.h"
50 #include "utils/hsearch.h"
51 #include "utils/memutils.h"
52 #include "utils/ps_status.h"
53 #include "utils/rel.h"
54 #include "utils/syscache.h"
61 bool pgstat_collect_startcollector = true;
62 bool pgstat_collect_resetonpmstart = true;
63 bool pgstat_collect_querystring = false;
64 bool pgstat_collect_tuplelevel = false;
65 bool pgstat_collect_blocklevel = false;
68 * Other global variables
71 bool pgstat_is_running = false;
77 NON_EXEC_STATIC int pgStatSock = -1;
78 static int pgStatPipe[2];
79 static struct sockaddr_storage pgStatAddr;
82 static time_t last_pgstat_start_time;
84 static long pgStatNumMessages = 0;
86 static bool pgStatRunningInCollector = FALSE;
88 static int pgStatTabstatAlloc = 0;
89 static int pgStatTabstatUsed = 0;
90 static PgStat_MsgTabstat **pgStatTabstatMessages = NULL;
92 #define TABSTAT_QUANTUM 4 /* we alloc this many at a time */
94 static int pgStatXactCommit = 0;
95 static int pgStatXactRollback = 0;
97 static TransactionId pgStatDBHashXact = InvalidTransactionId;
98 static HTAB *pgStatDBHash = NULL;
99 static HTAB *pgStatBeDead = NULL;
100 static PgStat_StatBeEntry *pgStatBeTable = NULL;
101 static int pgStatNumBackends = 0;
103 static char pgStat_fname[MAXPGPATH];
104 static char pgStat_tmpfname[MAXPGPATH];
108 * Local function forward declarations
113 typedef enum STATS_PROCESS_TYPE
117 } STATS_PROCESS_TYPE;
119 static pid_t pgstat_forkexec(STATS_PROCESS_TYPE procType);
120 static void pgstat_parseArgs(int argc, char *argv[]);
124 NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]);
125 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
126 static void pgstat_recvbuffer(void);
127 static void pgstat_die(SIGNAL_ARGS);
129 static int pgstat_add_backend(PgStat_MsgHdr *msg);
130 static void pgstat_sub_backend(int procpid);
131 static void pgstat_drop_database(Oid databaseid);
132 static void pgstat_write_statsfile(void);
133 static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
134 PgStat_StatBeEntry **betab,
137 static void pgstat_setheader(PgStat_MsgHdr *hdr, int mtype);
138 static void pgstat_send(void *msg, int len);
140 static void pgstat_recv_bestart(PgStat_MsgBestart *msg, int len);
141 static void pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len);
142 static void pgstat_recv_activity(PgStat_MsgActivity *msg, int len);
143 static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
144 static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
145 static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
146 static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
149 /* ------------------------------------------------------------
150 * Public functions called from postmaster follow
151 * ------------------------------------------------------------
157 * Called from postmaster at startup. Create the resources required
158 * by the statistics collector process. If unable to do so, do not
159 * fail --- better to let the postmaster start with stats collection
166 ACCEPT_TYPE_ARG3 alen;
167 struct addrinfo *addrs = NULL,
176 #define TESTBYTEVAL ((char) 199)
179 * Force start of collector daemon if something to collect
181 if (pgstat_collect_querystring ||
182 pgstat_collect_tuplelevel ||
183 pgstat_collect_blocklevel)
184 pgstat_collect_startcollector = true;
187 * Initialize the filename for the status reports. (In the EXEC_BACKEND
188 * case, this only sets the value in the postmaster. The collector
189 * subprocess will recompute the value for itself, and individual
190 * backends must do so also if they want to access the file.)
192 snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
195 * If we don't have to start a collector or should reset the collected
196 * statistics on postmaster start, simply remove the file.
198 if (!pgstat_collect_startcollector || pgstat_collect_resetonpmstart)
199 unlink(pgStat_fname);
202 * Nothing else required if collector will not get started
204 if (!pgstat_collect_startcollector)
208 * Create the UDP socket for sending and receiving statistic messages
210 hints.ai_flags = AI_PASSIVE;
211 hints.ai_family = PF_UNSPEC;
212 hints.ai_socktype = SOCK_DGRAM;
213 hints.ai_protocol = 0;
214 hints.ai_addrlen = 0;
215 hints.ai_addr = NULL;
216 hints.ai_canonname = NULL;
217 hints.ai_next = NULL;
218 ret = getaddrinfo_all("localhost", NULL, &hints, &addrs);
222 (errmsg("could not resolve \"localhost\": %s",
223 gai_strerror(ret))));
228 * On some platforms, getaddrinfo_all() may return multiple addresses
229 * only one of which will actually work (eg, both IPv6 and IPv4 addresses
230 * when kernel will reject IPv6). Worse, the failure may occur at the
231 * bind() or perhaps even connect() stage. So we must loop through the
232 * results till we find a working combination. We will generate LOG
233 * messages, but no error, for bogus combinations.
235 for (addr = addrs; addr; addr = addr->ai_next)
237 #ifdef HAVE_UNIX_SOCKETS
238 /* Ignore AF_UNIX sockets, if any are returned. */
239 if (addr->ai_family == AF_UNIX)
245 if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) < 0)
248 (errcode_for_socket_access(),
249 errmsg("could not create socket for statistics collector: %m")));
254 * Bind it to a kernel assigned port on localhost and get the assigned
255 * port via getsockname().
257 if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
260 (errcode_for_socket_access(),
261 errmsg("could not bind socket for statistics collector: %m")));
262 closesocket(pgStatSock);
267 alen = sizeof(pgStatAddr);
268 if (getsockname(pgStatSock, (struct sockaddr *) &pgStatAddr, &alen) < 0)
271 (errcode_for_socket_access(),
272 errmsg("could not get address of socket for statistics collector: %m")));
273 closesocket(pgStatSock);
279 * Connect the socket to its own address. This saves a few cycles by
280 * not having to respecify the target address on every send. This also
281 * provides a kernel-level check that only packets from this same
282 * address will be received.
284 if (connect(pgStatSock, (struct sockaddr *) &pgStatAddr, alen) < 0)
287 (errcode_for_socket_access(),
288 errmsg("could not connect socket for statistics collector: %m")));
289 closesocket(pgStatSock);
295 * Try to send and receive a one-byte test message on the socket.
296 * This is to catch situations where the socket can be created but
297 * will not actually pass data (for instance, because kernel packet
298 * filtering rules prevent it).
300 test_byte = TESTBYTEVAL;
301 if (send(pgStatSock, &test_byte, 1, 0) != 1)
304 (errcode_for_socket_access(),
305 errmsg("could not send test message on socket for statistics collector: %m")));
306 closesocket(pgStatSock);
312 * There could possibly be a little delay before the message can be
313 * received. We arbitrarily allow up to half a second before deciding
316 for (;;) /* need a loop to handle EINTR */
319 FD_SET(pgStatSock, &rset);
322 sel_res = select(pgStatSock+1, &rset, NULL, NULL, &tv);
323 if (sel_res >= 0 || errno != EINTR)
329 (errcode_for_socket_access(),
330 errmsg("select() failed in statistics collector: %m")));
331 closesocket(pgStatSock);
335 if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
338 * This is the case we actually think is likely, so take pains to
339 * give a specific message for it.
341 * errno will not be set meaningfully here, so don't use it.
344 (ERRCODE_CONNECTION_FAILURE,
345 errmsg("test message did not get through on socket for statistics collector")));
346 closesocket(pgStatSock);
351 test_byte++; /* just make sure variable is changed */
353 if (recv(pgStatSock, &test_byte, 1, 0) != 1)
356 (errcode_for_socket_access(),
357 errmsg("could not receive test message on socket for statistics collector: %m")));
358 closesocket(pgStatSock);
363 if (test_byte != TESTBYTEVAL) /* strictly paranoia ... */
366 (ERRCODE_INTERNAL_ERROR,
367 errmsg("incorrect test message transmission on socket for statistics collector")));
368 closesocket(pgStatSock);
373 /* If we get here, we have a working socket */
377 /* Did we find a working address? */
378 if (!addr || pgStatSock < 0)
381 (errcode_for_socket_access(),
382 errmsg("disabling statistics collector for lack of working socket")));
387 * Set the socket to non-blocking IO. This ensures that if the
388 * collector falls behind (despite the buffering process), statistics
389 * messages will be discarded; backends won't block waiting to send
390 * messages to the collector.
392 if (!set_noblock(pgStatSock))
395 (errcode_for_socket_access(),
396 errmsg("could not set statistics collector socket to nonblocking mode: %m")));
400 freeaddrinfo_all(hints.ai_family, addrs);
406 freeaddrinfo_all(hints.ai_family, addrs);
409 closesocket(pgStatSock);
412 /* Adjust GUC variables to suppress useless activity */
413 pgstat_collect_startcollector = false;
414 pgstat_collect_querystring = false;
415 pgstat_collect_tuplelevel = false;
416 pgstat_collect_blocklevel = false;
423 * pgstat_forkexec() -
425 * Format up the arglist for, then fork and exec, statistics
426 * (buffer and collector) processes
429 pgstat_forkexec(STATS_PROCESS_TYPE procType)
432 int ac = 0, bufc = 0, i;
433 char pgstatBuf[2][32];
435 av[ac++] = "postgres";
439 case STAT_PROC_BUFFER:
440 av[ac++] = "-forkbuf";
443 case STAT_PROC_COLLECTOR:
444 av[ac++] = "-forkcol";
451 av[ac++] = NULL; /* filled in by postmaster_forkexec */
453 /* postgres_exec_path is not passed by write_backend_variables */
454 av[ac++] = postgres_exec_path;
456 /* Pipe file ids (those not passed by write_backend_variables) */
457 snprintf(pgstatBuf[bufc++],32,"%d",pgStatPipe[0]);
458 snprintf(pgstatBuf[bufc++],32,"%d",pgStatPipe[1]);
460 /* Add to the arg list */
461 Assert(bufc <= lengthof(pgstatBuf));
462 for (i = 0; i < bufc; i++)
463 av[ac++] = pgstatBuf[i];
466 Assert(ac < lengthof(av));
468 return postmaster_forkexec(ac, av);
473 * pgstat_parseArgs() -
475 * Extract data from the arglist for exec'ed statistics
476 * (buffer and collector) processes
479 pgstat_parseArgs(int argc, char *argv[])
484 StrNCpy(postgres_exec_path, argv[argc++], MAXPGPATH);
485 pgStatPipe[0] = atoi(argv[argc++]);
486 pgStatPipe[1] = atoi(argv[argc++]);
489 #endif /* EXEC_BACKEND */
495 * Called from postmaster at startup or after an existing collector
496 * died. Attempt to fire up a fresh statistics collector.
498 * Note: if fail, we will be called again from the postmaster main loop.
507 * Do nothing if no collector needed
509 if (pgstat_is_running || !pgstat_collect_startcollector)
513 * Do nothing if too soon since last collector start. This is a
514 * safety valve to protect against continuous respawn attempts if the
515 * collector is dying immediately at launch. Note that since we will
516 * be re-called from the postmaster main loop, we will get another
519 curtime = time(NULL);
520 if ((unsigned int) (curtime - last_pgstat_start_time) <
521 (unsigned int) PGSTAT_RESTART_INTERVAL)
523 last_pgstat_start_time = curtime;
526 * Check that the socket is there, else pgstat_init failed.
531 (errmsg("statistics collector startup skipped")));
534 * We can only get here if someone tries to manually turn
535 * pgstat_collect_startcollector on after it had been off.
537 pgstat_collect_startcollector = false;
542 * Okay, fork off the collector. Remember its PID for
550 /* Specific beos actions before backend startup */
551 beos_before_backend_startup();
555 switch ((pgStatPid = (int) pgstat_forkexec(STAT_PROC_BUFFER)))
557 switch ((pgStatPid = (int) fork()))
562 /* Specific beos actions */
563 beos_backend_startup_failed();
566 (errmsg("could not fork statistics buffer: %m")));
571 /* in postmaster child ... */
573 /* Specific beos actions after backend startup */
574 beos_backend_startup();
576 /* Close the postmaster's sockets */
577 ClosePostmasterPorts();
579 /* Drop our connection to postmaster's shared memory, as well */
580 PGSharedMemoryDetach();
582 PgstatBufferMain(0, NULL);
587 pgstat_is_running = true;
594 * pgstat_ispgstat() -
596 * Called from postmaster to check if a terminated child process
597 * was the statistics collector.
601 pgstat_ispgstat(int pid)
603 if (!pgstat_is_running)
606 if (pgStatPid != pid)
610 pgstat_is_running = false;
619 * Called from postmaster to tell collector a backend terminated.
623 pgstat_beterm(int pid)
625 PgStat_MsgBeterm msg;
630 MemSet(&(msg.m_hdr), 0, sizeof(msg.m_hdr));
631 msg.m_hdr.m_type = PGSTAT_MTYPE_BETERM;
632 msg.m_hdr.m_procpid = pid;
634 pgstat_send(&msg, sizeof(msg));
638 /* ------------------------------------------------------------
639 * Public functions used by backends follow
640 *------------------------------------------------------------
647 * Tell the collector that this new backend is soon ready to process
648 * queries. Called from tcop/postgres.c before entering the mainloop.
654 PgStat_MsgBestart msg;
659 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_BESTART);
660 pgstat_send(&msg, sizeof(msg));
665 * pgstat_report_activity() -
667 * Called in tcop/postgres.c to tell the collector what the backend
668 * is actually doing (usually "<IDLE>" or the start of the query to
673 pgstat_report_activity(const char *what)
675 PgStat_MsgActivity msg;
678 if (!pgstat_collect_querystring || pgStatSock < 0)
682 len = pg_mbcliplen((const unsigned char *) what, len,
683 PGSTAT_ACTIVITY_SIZE - 1);
685 memcpy(msg.m_what, what, len);
686 msg.m_what[len] = '\0';
687 len += offsetof(PgStat_MsgActivity, m_what) +1;
689 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ACTIVITY);
690 pgstat_send(&msg, len);
695 * pgstat_report_tabstat() -
697 * Called from tcop/postgres.c to send the so far collected
698 * per table access statistics to the collector.
702 pgstat_report_tabstat(void)
706 if (pgStatSock < 0 ||
707 !(pgstat_collect_querystring ||
708 pgstat_collect_tuplelevel ||
709 pgstat_collect_blocklevel))
711 /* Not reporting stats, so just flush whatever we have */
712 pgStatTabstatUsed = 0;
717 * For each message buffer used during the last query set the header
718 * fields and send it out.
720 for (i = 0; i < pgStatTabstatUsed; i++)
722 PgStat_MsgTabstat *tsmsg = pgStatTabstatMessages[i];
726 n = tsmsg->m_nentries;
727 len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
728 n * sizeof(PgStat_TableEntry);
730 tsmsg->m_xact_commit = pgStatXactCommit;
731 tsmsg->m_xact_rollback = pgStatXactRollback;
732 pgStatXactCommit = 0;
733 pgStatXactRollback = 0;
735 pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
736 pgstat_send(tsmsg, len);
739 pgStatTabstatUsed = 0;
744 * pgstat_vacuum_tabstat() -
746 * Will tell the collector about objects he can get rid of.
750 pgstat_vacuum_tabstat(void)
758 HASH_SEQ_STATUS hstat;
759 PgStat_StatDBEntry *dbentry;
760 PgStat_StatTabEntry *tabentry;
763 PgStat_MsgTabpurge msg;
771 * If not done for this transaction, read the statistics collector
772 * stats file into some hash tables.
774 if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId()))
776 pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
777 &pgStatBeTable, &pgStatNumBackends);
778 pgStatDBHashXact = GetCurrentTransactionId();
782 * Lookup our own database entry
784 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
785 (void *) &MyDatabaseId,
790 if (dbentry->tables == NULL)
794 * Initialize our messages table counter to zero
799 * Check for all tables if they still exist.
801 hash_seq_init(&hstat, dbentry->tables);
802 while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
805 * Check if this relation is still alive by looking up it's
806 * pg_class tuple in the system catalog cache.
808 reltup = SearchSysCache(RELOID,
809 ObjectIdGetDatum(tabentry->tableid),
811 if (HeapTupleIsValid(reltup))
813 ReleaseSysCache(reltup);
818 * Add this tables Oid to the message
820 msg.m_tableid[msg.m_nentries++] = tabentry->tableid;
824 * If the message is full, send it out and reinitialize ot zero
826 if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
828 len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
829 +msg.m_nentries * sizeof(Oid);
831 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
832 pgstat_send(&msg, len);
841 if (msg.m_nentries > 0)
843 len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
844 +msg.m_nentries * sizeof(Oid);
846 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
847 pgstat_send(&msg, len);
851 * Read pg_database and remember the Oid's of all existing databases
855 dbidlist = (Oid *) palloc(sizeof(Oid) * dbidalloc);
857 dbrel = heap_openr(DatabaseRelationName, AccessShareLock);
858 dbscan = heap_beginscan(dbrel, SnapshotNow, 0, NULL);
859 while ((dbtup = heap_getnext(dbscan, ForwardScanDirection)) != NULL)
861 if (dbidused >= dbidalloc)
864 dbidlist = (Oid *) repalloc((char *) dbidlist,
865 sizeof(Oid) * dbidalloc);
867 dbidlist[dbidused++] = HeapTupleGetOid(dbtup);
869 heap_endscan(dbscan);
870 heap_close(dbrel, AccessShareLock);
873 * Search the database hash table for dead databases and tell the
874 * collector to drop them as well.
876 hash_seq_init(&hstat, pgStatDBHash);
877 while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
879 Oid dbid = dbentry->databaseid;
881 for (i = 0; i < dbidused; i++)
883 if (dbidlist[i] == dbid)
890 if (dbid != InvalidOid)
893 pgstat_drop_database(dbid);
898 * Free the dbid list.
900 pfree((char *) dbidlist);
903 * Tell the caller how many removeable objects we found
910 * pgstat_drop_database() -
912 * Tell the collector that we just dropped a database.
913 * This is the only message that shouldn't get lost in space. Otherwise
914 * the collector will keep the statistics for the dead DB until his
915 * stats file got removed while the postmaster is down.
919 pgstat_drop_database(Oid databaseid)
921 PgStat_MsgDropdb msg;
926 msg.m_databaseid = databaseid;
928 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
929 pgstat_send(&msg, sizeof(msg));
934 * pgstat_reset_counters() -
936 * Tell the statistics collector to reset counters for our database.
940 pgstat_reset_counters(void)
942 PgStat_MsgResetcounter msg;
949 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
950 errmsg("must be superuser to reset statistics counters")));
952 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
953 pgstat_send(&msg, sizeof(msg));
960 * Send some junk data to the collector to increase traffic.
971 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
972 pgstat_send(&msg, sizeof(msg));
976 * Create or enlarge the pgStatTabstatMessages array
979 more_tabstat_space(void)
981 PgStat_MsgTabstat *newMessages;
982 PgStat_MsgTabstat **msgArray;
983 int newAlloc = pgStatTabstatAlloc + TABSTAT_QUANTUM;
986 /* Create (another) quantum of message buffers */
987 newMessages = (PgStat_MsgTabstat *)
988 malloc(sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
989 if (newMessages == NULL)
992 (errcode(ERRCODE_OUT_OF_MEMORY),
993 errmsg("out of memory")));
997 /* Create or enlarge the pointer array */
998 if (pgStatTabstatMessages == NULL)
999 msgArray = (PgStat_MsgTabstat **)
1000 malloc(sizeof(PgStat_MsgTabstat *) * newAlloc);
1002 msgArray = (PgStat_MsgTabstat **)
1003 realloc(pgStatTabstatMessages,
1004 sizeof(PgStat_MsgTabstat *) * newAlloc);
1005 if (msgArray == NULL)
1009 (errcode(ERRCODE_OUT_OF_MEMORY),
1010 errmsg("out of memory")));
1014 MemSet(newMessages, 0, sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
1015 for (i = 0; i < TABSTAT_QUANTUM; i++)
1016 msgArray[pgStatTabstatAlloc + i] = newMessages++;
1017 pgStatTabstatMessages = msgArray;
1018 pgStatTabstatAlloc = newAlloc;
1024 * pgstat_initstats() -
1026 * Called from various places usually dealing with initialization
1027 * of Relation or Scan structures. The data placed into these
1028 * structures from here tell where later to count for buffer reads,
1029 * scans and tuples fetched.
1033 pgstat_initstats(PgStat_Info *stats, Relation rel)
1035 Oid rel_id = rel->rd_id;
1036 PgStat_TableEntry *useent;
1037 PgStat_MsgTabstat *tsmsg;
1042 * Initialize data not to count at all.
1044 stats->tabentry = NULL;
1045 stats->no_stats = FALSE;
1046 stats->heap_scan_counted = FALSE;
1047 stats->index_scan_counted = FALSE;
1049 if (pgStatSock < 0 ||
1050 !(pgstat_collect_tuplelevel ||
1051 pgstat_collect_blocklevel))
1053 stats->no_stats = TRUE;
1058 * Search the already-used message slots for this relation.
1060 for (mb = 0; mb < pgStatTabstatUsed; mb++)
1062 tsmsg = pgStatTabstatMessages[mb];
1064 for (i = tsmsg->m_nentries; --i >= 0; )
1066 if (tsmsg->m_entry[i].t_id == rel_id)
1068 stats->tabentry = (void *) &(tsmsg->m_entry[i]);
1073 if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES)
1077 * Not found, but found a message buffer with an empty slot
1078 * instead. Fine, let's use this one.
1080 i = tsmsg->m_nentries++;
1081 useent = &tsmsg->m_entry[i];
1082 MemSet(useent, 0, sizeof(PgStat_TableEntry));
1083 useent->t_id = rel_id;
1084 stats->tabentry = (void *) useent;
1089 * If we ran out of message buffers, we just allocate more.
1091 if (pgStatTabstatUsed >= pgStatTabstatAlloc)
1093 if (!more_tabstat_space())
1095 stats->no_stats = TRUE;
1098 Assert(pgStatTabstatUsed < pgStatTabstatAlloc);
1102 * Use the first entry of the next message buffer.
1104 mb = pgStatTabstatUsed++;
1105 tsmsg = pgStatTabstatMessages[mb];
1106 tsmsg->m_nentries = 1;
1107 useent = &tsmsg->m_entry[0];
1108 MemSet(useent, 0, sizeof(PgStat_TableEntry));
1109 useent->t_id = rel_id;
1110 stats->tabentry = (void *) useent;
1115 * pgstat_count_xact_commit() -
1117 * Called from access/transam/xact.c to count transaction commits.
1121 pgstat_count_xact_commit(void)
1123 if (!(pgstat_collect_querystring ||
1124 pgstat_collect_tuplelevel ||
1125 pgstat_collect_blocklevel))
1131 * If there was no relation activity yet, just make one existing
1132 * message buffer used without slots, causing the next report to tell
1133 * new xact-counters.
1135 if (pgStatTabstatAlloc == 0)
1137 if (!more_tabstat_space())
1140 if (pgStatTabstatUsed == 0)
1142 pgStatTabstatUsed++;
1143 pgStatTabstatMessages[0]->m_nentries = 0;
1149 * pgstat_count_xact_rollback() -
1151 * Called from access/transam/xact.c to count transaction rollbacks.
1155 pgstat_count_xact_rollback(void)
1157 if (!(pgstat_collect_querystring ||
1158 pgstat_collect_tuplelevel ||
1159 pgstat_collect_blocklevel))
1162 pgStatXactRollback++;
1165 * If there was no relation activity yet, just make one existing
1166 * message buffer used without slots, causing the next report to tell
1167 * new xact-counters.
1169 if (pgStatTabstatAlloc == 0)
1171 if (!more_tabstat_space())
1174 if (pgStatTabstatUsed == 0)
1176 pgStatTabstatUsed++;
1177 pgStatTabstatMessages[0]->m_nentries = 0;
1183 * pgstat_fetch_stat_dbentry() -
1185 * Support function for the SQL-callable pgstat* functions. Returns
1186 * the collected statistics for one database or NULL. NULL doesn't mean
1187 * that the database doesn't exist, it is just not yet known by the
1188 * collector, so the caller is better off to report ZERO instead.
1191 PgStat_StatDBEntry *
1192 pgstat_fetch_stat_dbentry(Oid dbid)
1194 PgStat_StatDBEntry *dbentry;
1197 * If not done for this transaction, read the statistics collector
1198 * stats file into some hash tables. Be careful with the
1199 * read_statsfile() call below!
1201 if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId()))
1203 pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
1204 &pgStatBeTable, &pgStatNumBackends);
1205 pgStatDBHashXact = GetCurrentTransactionId();
1209 * Lookup the requested database
1211 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1214 if (dbentry == NULL)
1222 * pgstat_fetch_stat_tabentry() -
1224 * Support function for the SQL-callable pgstat* functions. Returns
1225 * the collected statistics for one table or NULL. NULL doesn't mean
1226 * that the table doesn't exist, it is just not yet known by the
1227 * collector, so the caller is better off to report ZERO instead.
1230 PgStat_StatTabEntry *
1231 pgstat_fetch_stat_tabentry(Oid relid)
1233 PgStat_StatDBEntry *dbentry;
1234 PgStat_StatTabEntry *tabentry;
1237 * If not done for this transaction, read the statistics collector
1238 * stats file into some hash tables. Be careful with the
1239 * read_statsfile() call below!
1241 if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId()))
1243 pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
1244 &pgStatBeTable, &pgStatNumBackends);
1245 pgStatDBHashXact = GetCurrentTransactionId();
1249 * Lookup our database.
1251 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1252 (void *) &MyDatabaseId,
1254 if (dbentry == NULL)
1258 * Now inside the DB's table hash table lookup the requested one.
1260 if (dbentry->tables == NULL)
1262 tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
1265 if (tabentry == NULL)
1273 * pgstat_fetch_stat_beentry() -
1275 * Support function for the SQL-callable pgstat* functions. Returns
1276 * the actual activity slot of one active backend. The caller is
1277 * responsible for a check if the actual user is permitted to see
1278 * that info (especially the querystring).
1281 PgStat_StatBeEntry *
1282 pgstat_fetch_stat_beentry(int beid)
1284 if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId()))
1286 pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
1287 &pgStatBeTable, &pgStatNumBackends);
1288 pgStatDBHashXact = GetCurrentTransactionId();
1291 if (beid < 1 || beid > pgStatNumBackends)
1294 return &pgStatBeTable[beid - 1];
1299 * pgstat_fetch_stat_numbackends() -
1301 * Support function for the SQL-callable pgstat* functions. Returns
1302 * the maximum current backend id.
1306 pgstat_fetch_stat_numbackends(void)
1308 if (!TransactionIdEquals(pgStatDBHashXact, GetCurrentTransactionId()))
1310 pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId,
1311 &pgStatBeTable, &pgStatNumBackends);
1312 pgStatDBHashXact = GetCurrentTransactionId();
1315 return pgStatNumBackends;
1320 /* ------------------------------------------------------------
1321 * Local support functions follow
1322 * ------------------------------------------------------------
1327 * pgstat_setheader() -
1329 * Set common header fields in a statistics message
1333 pgstat_setheader(PgStat_MsgHdr *hdr, int mtype)
1335 hdr->m_type = mtype;
1336 hdr->m_backendid = MyBackendId;
1337 hdr->m_procpid = MyProcPid;
1338 hdr->m_databaseid = MyDatabaseId;
1339 hdr->m_userid = GetSessionUserId();
1346 * Send out one statistics message to the collector
1350 pgstat_send(void *msg, int len)
1355 ((PgStat_MsgHdr *) msg)->m_size = len;
1357 send(pgStatSock, msg, len, 0);
1358 /* We deliberately ignore any error from send() */
1363 * PgstatBufferMain() -
1365 * Start up the statistics buffer process. This is the body of the
1366 * postmaster child process.
1368 * The argc/argv parameters are valid only in EXEC_BACKEND case.
1371 NON_EXEC_STATIC void
1372 PgstatBufferMain(int argc, char *argv[])
1374 IsUnderPostmaster = true; /* we are a postmaster subprocess now */
1376 MyProcPid = getpid(); /* reset MyProcPid */
1378 /* Lose the postmaster's on-exit routines */
1382 * Ignore all signals usually bound to some action in the postmaster,
1383 * except for SIGCHLD --- see pgstat_recvbuffer.
1385 pqsignal(SIGHUP, SIG_IGN);
1386 pqsignal(SIGINT, SIG_IGN);
1387 pqsignal(SIGTERM, SIG_IGN);
1388 pqsignal(SIGQUIT, SIG_IGN);
1389 pqsignal(SIGALRM, SIG_IGN);
1390 pqsignal(SIGPIPE, SIG_IGN);
1391 pqsignal(SIGUSR1, SIG_IGN);
1392 pqsignal(SIGUSR2, SIG_IGN);
1393 pqsignal(SIGCHLD, pgstat_die);
1394 pqsignal(SIGTTIN, SIG_DFL);
1395 pqsignal(SIGTTOU, SIG_DFL);
1396 pqsignal(SIGCONT, SIG_DFL);
1397 pqsignal(SIGWINCH, SIG_DFL);
1398 /* unblock will happen in pgstat_recvbuffer */
1401 pgstat_parseArgs(argc,argv);
1405 * Start a buffering process to read from the socket, so we have a
1406 * little more time to process incoming messages.
1408 * NOTE: the process structure is: postmaster is parent of buffer process
1409 * is parent of collector process. This way, the buffer can detect
1410 * collector failure via SIGCHLD, whereas otherwise it wouldn't notice
1411 * collector failure until it tried to write on the pipe. That would
1412 * mean that after the postmaster started a new collector, we'd have
1413 * two buffer processes competing to read from the UDP socket --- not
1416 if (pgpipe(pgStatPipe) < 0)
1419 (errcode_for_socket_access(),
1420 errmsg("could not create pipe for statistics buffer: %m")));
1425 /* child becomes collector process */
1426 switch (pgstat_forkexec(STAT_PROC_COLLECTOR))
1433 (errmsg("could not fork statistics collector: %m")));
1436 #ifndef EXEC_BACKEND
1438 /* child becomes collector process */
1439 PgstatCollectorMain(0, NULL);
1444 /* parent becomes buffer process */
1445 closesocket(pgStatPipe[0]);
1446 pgstat_recvbuffer();
1453 * PgstatCollectorMain() -
1455 * Start up the statistics collector itself. This is the body of the
1456 * postmaster grandchild process.
1458 * The argc/argv parameters are valid only in EXEC_BACKEND case.
1461 NON_EXEC_STATIC void
1462 PgstatCollectorMain(int argc, char *argv[])
1469 struct timeval timeout;
1470 struct timeval next_statwrite;
1471 bool need_statwrite;
1474 MyProcPid = getpid(); /* reset MyProcPid */
1477 * Reset signal handling. With the exception of restoring default
1478 * SIGCHLD handling, this is a no-op in the non-EXEC_BACKEND case
1479 * because we'll have inherited these settings from the buffer process;
1480 * but it's not a no-op for EXEC_BACKEND.
1482 pqsignal(SIGHUP, SIG_IGN);
1483 pqsignal(SIGINT, SIG_IGN);
1484 pqsignal(SIGTERM, SIG_IGN);
1485 pqsignal(SIGQUIT, SIG_IGN);
1486 pqsignal(SIGALRM, SIG_IGN);
1487 pqsignal(SIGPIPE, SIG_IGN);
1488 pqsignal(SIGUSR1, SIG_IGN);
1489 pqsignal(SIGUSR2, SIG_IGN);
1490 pqsignal(SIGCHLD, SIG_DFL);
1491 pqsignal(SIGTTIN, SIG_DFL);
1492 pqsignal(SIGTTOU, SIG_DFL);
1493 pqsignal(SIGCONT, SIG_DFL);
1494 pqsignal(SIGWINCH, SIG_DFL);
1495 PG_SETMASK(&UnBlockSig);
1498 pgstat_parseArgs(argc,argv);
1501 /* Close unwanted files */
1502 closesocket(pgStatPipe[1]);
1503 closesocket(pgStatSock);
1506 * Identify myself via ps
1508 init_ps_display("stats collector process", "", "");
1512 * Initialize filenames needed for status reports.
1514 snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
1515 /* tmpfname need only be set correctly in this process */
1516 snprintf(pgStat_tmpfname, MAXPGPATH, PGSTAT_STAT_TMPFILE,
1520 * Arrange to write the initial status file right away
1522 gettimeofday(&next_statwrite, NULL);
1523 need_statwrite = TRUE;
1526 * Read in an existing statistics stats file or initialize the stats
1529 pgStatRunningInCollector = TRUE;
1530 pgstat_read_statsfile(&pgStatDBHash, InvalidOid, NULL, NULL);
1533 * Create the dead backend hashtable
1535 memset(&hash_ctl, 0, sizeof(hash_ctl));
1536 hash_ctl.keysize = sizeof(int);
1537 hash_ctl.entrysize = sizeof(PgStat_StatBeDead);
1538 hash_ctl.hash = tag_hash;
1539 pgStatBeDead = hash_create("Dead Backends", PGSTAT_BE_HASH_SIZE,
1540 &hash_ctl, HASH_ELEM | HASH_FUNCTION);
1541 if (pgStatBeDead == NULL)
1543 /* assume the problem is out-of-memory */
1545 (errcode(ERRCODE_OUT_OF_MEMORY),
1546 errmsg("out of memory in statistics collector --- abort")));
1551 * Create the known backends table
1553 pgStatBeTable = (PgStat_StatBeEntry *) malloc(
1554 sizeof(PgStat_StatBeEntry) * MaxBackends);
1555 if (pgStatBeTable == NULL)
1558 (errcode(ERRCODE_OUT_OF_MEMORY),
1559 errmsg("out of memory in statistics collector --- abort")));
1562 memset(pgStatBeTable, 0, sizeof(PgStat_StatBeEntry) * MaxBackends);
1564 readPipe = pgStatPipe[0];
1567 * Process incoming messages and handle all the reporting stuff until
1568 * there are no more messages.
1573 * If we need to write the status file again (there have been
1574 * changes in the statistics since we wrote it last) calculate the
1575 * timeout until we have to do so.
1581 gettimeofday(&now, NULL);
1582 /* avoid assuming that tv_sec is signed */
1583 if (now.tv_sec > next_statwrite.tv_sec ||
1584 (now.tv_sec == next_statwrite.tv_sec &&
1585 now.tv_usec >= next_statwrite.tv_usec))
1588 timeout.tv_usec = 0;
1592 timeout.tv_sec = next_statwrite.tv_sec - now.tv_sec;
1593 timeout.tv_usec = next_statwrite.tv_usec - now.tv_usec;
1594 if (timeout.tv_usec < 0)
1597 timeout.tv_usec += 1000000;
1603 * Setup the descriptor set for select(2)
1606 FD_SET(readPipe, &rfds);
1609 * Now wait for something to do.
1611 nready = select(readPipe+1, &rfds, NULL, NULL,
1612 (need_statwrite) ? &timeout : NULL);
1618 (errcode_for_socket_access(),
1619 errmsg("select() failed in statistics collector: %m")));
1624 * If there are no descriptors ready, our timeout for writing the
1625 * stats file happened.
1629 pgstat_write_statsfile();
1630 need_statwrite = FALSE;
1636 * Check if there is a new statistics message to collect.
1638 if (FD_ISSET(readPipe, &rfds))
1641 * We may need to issue multiple read calls in case the buffer
1642 * process didn't write the message in a single write, which
1643 * is possible since it dumps its buffer bytewise. In any
1644 * case, we'd need two reads since we don't know the message
1648 int targetlen = sizeof(PgStat_MsgHdr); /* initial */
1649 bool pipeEOF = false;
1651 while (nread < targetlen)
1653 len = piperead(readPipe, ((char *) &msg) + nread,
1660 (errcode_for_socket_access(),
1661 errmsg("could not read from statistics collector pipe: %m")));
1664 if (len == 0) /* EOF on the pipe! */
1670 if (nread == sizeof(PgStat_MsgHdr))
1672 /* we have the header, compute actual msg length */
1673 targetlen = msg.msg_hdr.m_size;
1674 if (targetlen < (int) sizeof(PgStat_MsgHdr) ||
1675 targetlen > (int) sizeof(msg))
1678 * Bogus message length implies that we got out of
1679 * sync with the buffer process somehow. Abort so
1680 * that we can restart both processes.
1683 (errmsg("invalid statistics message length")));
1690 * EOF on the pipe implies that the buffer process exited.
1691 * Fall out of outer loop.
1697 * Distribute the message to the specific function handling
1700 switch (msg.msg_hdr.m_type)
1702 case PGSTAT_MTYPE_DUMMY:
1705 case PGSTAT_MTYPE_BESTART:
1706 pgstat_recv_bestart((PgStat_MsgBestart *) &msg, nread);
1709 case PGSTAT_MTYPE_BETERM:
1710 pgstat_recv_beterm((PgStat_MsgBeterm *) &msg, nread);
1713 case PGSTAT_MTYPE_TABSTAT:
1714 pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, nread);
1717 case PGSTAT_MTYPE_TABPURGE:
1718 pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, nread);
1721 case PGSTAT_MTYPE_ACTIVITY:
1722 pgstat_recv_activity((PgStat_MsgActivity *) &msg, nread);
1725 case PGSTAT_MTYPE_DROPDB:
1726 pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, nread);
1729 case PGSTAT_MTYPE_RESETCOUNTER:
1730 pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg,
1739 * Globally count messages.
1741 pgStatNumMessages++;
1744 * If this is the first message after we wrote the stats file
1745 * the last time, setup the timeout that it'd be written.
1747 if (!need_statwrite)
1749 gettimeofday(&next_statwrite, NULL);
1750 next_statwrite.tv_usec += ((PGSTAT_STAT_INTERVAL) * 1000);
1751 next_statwrite.tv_sec += (next_statwrite.tv_usec / 1000000);
1752 next_statwrite.tv_usec %= 1000000;
1753 need_statwrite = TRUE;
1758 * Note that we do NOT check for postmaster exit inside the loop;
1759 * only EOF on the buffer pipe causes us to fall out. This
1760 * ensures we don't exit prematurely if there are still a few
1761 * messages in the buffer or pipe at postmaster shutdown.
1766 * Okay, we saw EOF on the buffer pipe, so there are no more messages
1767 * to process. If the buffer process quit because of postmaster
1768 * shutdown, we want to save the final stats to reuse at next startup.
1769 * But if the buffer process failed, it seems best not to (there may
1770 * even now be a new collector firing up, and we don't want it to read
1771 * a partially-rewritten stats file).
1773 if (!PostmasterIsAlive(false))
1774 pgstat_write_statsfile();
1779 * pgstat_recvbuffer() -
1781 * This is the body of the separate buffering process. Its only
1782 * purpose is to receive messages from the UDP socket as fast as
1783 * possible and forward them over a pipe into the collector itself.
1784 * If the collector is slow to absorb messages, they are buffered here.
1788 pgstat_recvbuffer(void)
1792 struct timeval timeout;
1793 int writePipe = pgStatPipe[1];
1799 PgStat_Msg input_buffer;
1801 int msg_send = 0; /* next send index in buffer */
1802 int msg_recv = 0; /* next receive index */
1803 int msg_have = 0; /* number of bytes stored */
1804 bool overflow = false;
1807 * Identify myself via ps
1809 init_ps_display("stats buffer process", "", "");
1813 * We want to die if our child collector process does. There are two
1814 * ways we might notice that it has died: receive SIGCHLD, or get a
1815 * write failure on the pipe leading to the child. We can set SIGPIPE
1816 * to kill us here. Our SIGCHLD handler was already set up before we
1817 * forked (must do it that way, else it's a race condition).
1819 pqsignal(SIGPIPE, SIG_DFL);
1820 PG_SETMASK(&UnBlockSig);
1823 * Set the write pipe to nonblock mode, so that we cannot block when
1824 * the collector falls behind.
1826 if (!set_noblock(writePipe))
1829 (errcode_for_socket_access(),
1830 errmsg("could not set statistics collector pipe to nonblocking mode: %m")));
1835 * Allocate the message buffer
1837 msgbuffer = (char *) malloc(PGSTAT_RECVBUFFERSZ);
1838 if (msgbuffer == NULL)
1841 (errcode(ERRCODE_OUT_OF_MEMORY),
1842 errmsg("out of memory in statistics collector --- abort")));
1856 * As long as we have buffer space we add the socket to the read
1859 if (msg_have <= (int) (PGSTAT_RECVBUFFERSZ - sizeof(PgStat_Msg)))
1861 FD_SET(pgStatSock, &rfds);
1870 (errmsg("statistics buffer is full")));
1876 * If we have messages to write out, we add the pipe to the write
1881 FD_SET(writePipe, &wfds);
1882 if (writePipe > maxfd)
1887 * Wait for some work to do; but not for more than 10 seconds
1888 * (this determines how quickly we will shut down after postmaster
1891 timeout.tv_sec = 10;
1892 timeout.tv_usec = 0;
1894 nready = select(maxfd + 1, &rfds, &wfds, NULL, &timeout);
1900 (errcode_for_socket_access(),
1901 errmsg("select() failed in statistics buffer: %m")));
1906 * If there is a message on the socket, read it and check for
1909 if (FD_ISSET(pgStatSock, &rfds))
1911 len = recv(pgStatSock, (char *) &input_buffer,
1912 sizeof(PgStat_Msg), 0);
1916 (errcode_for_socket_access(),
1917 errmsg("could not read statistics message: %m")));
1922 * We ignore messages that are smaller than our common header
1924 if (len < sizeof(PgStat_MsgHdr))
1928 * The received length must match the length in the header
1930 if (input_buffer.msg_hdr.m_size != len)
1934 * O.K. - we accept this message. Copy it to the circular
1940 xfr = PGSTAT_RECVBUFFERSZ - msg_recv;
1944 memcpy(msgbuffer + msg_recv,
1945 ((char *) &input_buffer) + frm,
1948 if (msg_recv == PGSTAT_RECVBUFFERSZ)
1957 * If the collector is ready to receive, write some data into his
1958 * pipe. We may or may not be able to write all that we have.
1960 * NOTE: if what we have is less than PIPE_BUF bytes but more than
1961 * the space available in the pipe buffer, most kernels will
1962 * refuse to write any of it, and will return EAGAIN. This means
1963 * we will busy-loop until the situation changes (either because
1964 * the collector caught up, or because more data arrives so that
1965 * we have more than PIPE_BUF bytes buffered). This is not good,
1966 * but is there any way around it? We have no way to tell when
1967 * the collector has caught up...
1969 if (FD_ISSET(writePipe, &wfds))
1971 xfr = PGSTAT_RECVBUFFERSZ - msg_send;
1975 len = pipewrite(writePipe, msgbuffer + msg_send, xfr);
1978 if (errno == EINTR || errno == EAGAIN)
1979 continue; /* not enough space in pipe */
1981 (errcode_for_socket_access(),
1982 errmsg("could not write to statistics collector pipe: %m")));
1985 /* NB: len < xfr is okay */
1987 if (msg_send == PGSTAT_RECVBUFFERSZ)
1993 * Make sure we forwarded all messages before we check for
1994 * Postmaster termination.
1996 if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds))
2000 * If the postmaster has terminated, we've done our job.
2002 if (!PostmasterIsAlive(true))
2008 pgstat_die(SIGNAL_ARGS)
2015 * pgstat_add_backend() -
2017 * Support function to keep our backend list up to date.
2021 pgstat_add_backend(PgStat_MsgHdr *msg)
2023 PgStat_StatDBEntry *dbentry;
2024 PgStat_StatBeEntry *beentry;
2025 PgStat_StatBeDead *deadbe;
2029 * Check that the backend ID is valid
2031 if (msg->m_backendid < 1 || msg->m_backendid > MaxBackends)
2034 (errmsg("invalid server process ID %d", msg->m_backendid)));
2039 * Get the slot for this backendid.
2041 beentry = &pgStatBeTable[msg->m_backendid - 1];
2042 if (beentry->databaseid != InvalidOid)
2045 * If the slot contains the PID of this backend, everything is
2046 * fine and we got nothing to do.
2048 if (beentry->procpid == msg->m_procpid)
2053 * Lookup if this backend is known to be dead. This can be caused due
2054 * to messages arriving in the wrong order - i.e. Postmaster's BETERM
2055 * message might have arrived before we received all the backends
2056 * stats messages, or even a new backend with the same backendid was
2057 * faster in sending his BESTART.
2059 * If the backend is known to be dead, we ignore this add.
2061 deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
2062 (void *) &(msg->m_procpid),
2068 * Backend isn't known to be dead. If it's slot is currently used, we
2069 * have to kick out the old backend.
2071 if (beentry->databaseid != InvalidOid)
2072 pgstat_sub_backend(beentry->procpid);
2075 * Put this new backend into the slot.
2077 beentry->databaseid = msg->m_databaseid;
2078 beentry->procpid = msg->m_procpid;
2079 beentry->userid = msg->m_userid;
2080 beentry->activity_start_sec = 0;
2081 beentry->activity_start_usec = 0;
2082 MemSet(beentry->activity, 0, PGSTAT_ACTIVITY_SIZE);
2085 * Lookup or create the database entry for this backends DB.
2087 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2088 (void *) &(msg->m_databaseid),
2089 HASH_ENTER, &found);
2090 if (dbentry == NULL)
2093 (errcode(ERRCODE_OUT_OF_MEMORY),
2094 errmsg("out of memory in statistics collector --- abort")));
2099 * If not found, initialize the new one.
2105 dbentry->tables = NULL;
2106 dbentry->n_xact_commit = 0;
2107 dbentry->n_xact_rollback = 0;
2108 dbentry->n_blocks_fetched = 0;
2109 dbentry->n_blocks_hit = 0;
2110 dbentry->n_connects = 0;
2111 dbentry->destroy = 0;
2113 memset(&hash_ctl, 0, sizeof(hash_ctl));
2114 hash_ctl.keysize = sizeof(Oid);
2115 hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2116 hash_ctl.hash = tag_hash;
2117 dbentry->tables = hash_create("Per-database table",
2118 PGSTAT_TAB_HASH_SIZE,
2120 HASH_ELEM | HASH_FUNCTION);
2121 if (dbentry->tables == NULL)
2123 /* assume the problem is out-of-memory */
2125 (errcode(ERRCODE_OUT_OF_MEMORY),
2126 errmsg("out of memory in statistics collector --- abort")));
2132 * Count number of connects to the database
2134 dbentry->n_connects++;
2141 * pgstat_sub_backend() -
2143 * Remove a backend from the actual backends list.
2147 pgstat_sub_backend(int procpid)
2150 PgStat_StatBeDead *deadbe;
2154 * Search in the known-backends table for the slot containing this
2157 for (i = 0; i < MaxBackends; i++)
2159 if (pgStatBeTable[i].databaseid != InvalidOid &&
2160 pgStatBeTable[i].procpid == procpid)
2163 * That's him. Add an entry to the known to be dead backends.
2164 * Due to possible misorder in the arrival of UDP packets it's
2165 * possible that even if we know the backend is dead, there
2166 * could still be messages queued that arrive later. Those
2167 * messages must not cause our number of backends statistics
2168 * to get screwed up, so we remember for a couple of seconds
2169 * that this PID is dead and ignore them (only the counting of
2170 * backends, not the table access stats they sent).
2172 deadbe = (PgStat_StatBeDead *) hash_search(pgStatBeDead,
2179 (errcode(ERRCODE_OUT_OF_MEMORY),
2180 errmsg("out of memory in statistics collector --- abort")));
2185 deadbe->backendid = i + 1;
2186 deadbe->destroy = PGSTAT_DESTROY_COUNT;
2190 * Declare the backend slot empty.
2192 pgStatBeTable[i].databaseid = InvalidOid;
2198 * No big problem if not found. This can happen if UDP messages arrive
2199 * out of order here.
2205 * pgstat_write_statsfile() -
2211 pgstat_write_statsfile(void)
2213 HASH_SEQ_STATUS hstat;
2214 HASH_SEQ_STATUS tstat;
2215 PgStat_StatDBEntry *dbentry;
2216 PgStat_StatTabEntry *tabentry;
2217 PgStat_StatBeDead *deadbe;
2222 * Open the statistics temp file to write out the current values.
2224 fpout = fopen(pgStat_tmpfname, PG_BINARY_W);
2228 (errcode_for_file_access(),
2229 errmsg("could not open temporary statistics file \"%s\": %m",
2235 * Walk through the database table.
2237 hash_seq_init(&hstat, pgStatDBHash);
2238 while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
2241 * If this database is marked destroyed, count down and do so if
2244 if (dbentry->destroy > 0)
2246 if (--(dbentry->destroy) == 0)
2248 if (dbentry->tables != NULL)
2249 hash_destroy(dbentry->tables);
2251 if (hash_search(pgStatDBHash,
2252 (void *) &(dbentry->databaseid),
2253 HASH_REMOVE, NULL) == NULL)
2256 (errmsg("database hash table corrupted "
2257 "during cleanup --- abort")));
2263 * Don't include statistics for it.
2269 * Write out the DB line including the number of life backends.
2272 fwrite(dbentry, sizeof(PgStat_StatDBEntry), 1, fpout);
2275 * Walk through the databases access stats per table.
2277 hash_seq_init(&tstat, dbentry->tables);
2278 while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
2281 * If table entry marked for destruction, same as above for
2282 * the database entry.
2284 if (tabentry->destroy > 0)
2286 if (--(tabentry->destroy) == 0)
2288 if (hash_search(dbentry->tables,
2289 (void *) &(tabentry->tableid),
2290 HASH_REMOVE, NULL) == NULL)
2293 (errmsg("tables hash table for "
2294 "database %u corrupted during "
2295 "cleanup --- abort",
2296 dbentry->databaseid)));
2304 * At least we think this is still a life table. Print it's
2308 fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
2312 * Mark the end of this DB
2318 * Write out the known running backends to the stats file.
2322 fwrite(&i, sizeof(i), 1, fpout);
2324 for (i = 0; i < MaxBackends; i++)
2326 if (pgStatBeTable[i].databaseid != InvalidOid)
2329 fwrite(&pgStatBeTable[i], sizeof(PgStat_StatBeEntry), 1, fpout);
2334 * No more output to be done. Close the temp file and replace the old
2335 * pgstat.stat with it.
2338 if (fclose(fpout) < 0)
2341 (errcode_for_file_access(),
2342 errmsg("could not close temporary statistics file \"%s\": %m",
2347 if (rename(pgStat_tmpfname, pgStat_fname) < 0)
2350 (errcode_for_file_access(),
2351 errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
2352 pgStat_tmpfname, pgStat_fname)));
2357 * Clear out the dead backends table
2359 hash_seq_init(&hstat, pgStatBeDead);
2360 while ((deadbe = (PgStat_StatBeDead *) hash_seq_search(&hstat)) != NULL)
2363 * Count down the destroy delay and remove entries where it
2366 if (--(deadbe->destroy) <= 0)
2368 if (hash_search(pgStatBeDead,
2369 (void *) &(deadbe->procpid),
2370 HASH_REMOVE, NULL) == NULL)
2373 (errmsg("dead-server-process hash table corrupted "
2374 "during cleanup --- abort")));
2383 * pgstat_read_statsfile() -
2385 * Reads in an existing statistics collector and initializes the
2386 * databases hash table (who's entries point to the tables hash tables)
2387 * and the current backend table.
2391 pgstat_read_statsfile(HTAB **dbhash, Oid onlydb,
2392 PgStat_StatBeEntry **betab, int *numbackends)
2394 PgStat_StatDBEntry *dbentry;
2395 PgStat_StatDBEntry dbbuf;
2396 PgStat_StatTabEntry *tabentry;
2397 PgStat_StatTabEntry tabbuf;
2399 HTAB *tabhash = NULL;
2401 int maxbackends = 0;
2402 int havebackends = 0;
2404 MemoryContext use_mcxt;
2408 * If running in the collector we use the DynaHashCxt memory context.
2409 * If running in a backend, we use the TopTransactionContext instead,
2410 * so the caller must only know the last XactId when this call
2411 * happened to know if his tables are still valid or already gone!
2413 if (pgStatRunningInCollector)
2420 use_mcxt = TopTransactionContext;
2421 mcxt_flags = HASH_CONTEXT;
2425 * Create the DB hashtable
2427 memset(&hash_ctl, 0, sizeof(hash_ctl));
2428 hash_ctl.keysize = sizeof(Oid);
2429 hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
2430 hash_ctl.hash = tag_hash;
2431 hash_ctl.hcxt = use_mcxt;
2432 *dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
2433 HASH_ELEM | HASH_FUNCTION | mcxt_flags);
2434 if (*dbhash == NULL)
2436 /* assume the problem is out-of-memory */
2437 if (pgStatRunningInCollector)
2440 (errcode(ERRCODE_OUT_OF_MEMORY),
2441 errmsg("out of memory in statistics collector --- abort")));
2444 /* in backend, can do normal error */
2446 (errcode(ERRCODE_OUT_OF_MEMORY),
2447 errmsg("out of memory")));
2451 * Initialize the number of known backends to zero, just in case we do
2452 * a silent error return below.
2454 if (numbackends != NULL)
2460 * In EXEC_BACKEND case, we won't have inherited pgStat_fname from
2461 * postmaster, so compute it first time through.
2464 if (pgStat_fname[0] == '\0')
2466 Assert(DataDir != NULL);
2467 snprintf(pgStat_fname, MAXPGPATH, PGSTAT_STAT_FILENAME, DataDir);
2472 * Try to open the status file. If it doesn't exist, the backends
2473 * simply return zero for anything and the collector simply starts
2474 * from scratch with empty counters.
2476 if ((fpin = fopen(pgStat_fname, PG_BINARY_R)) == NULL)
2480 * We found an existing collector stats file. Read it and put all the
2481 * hashtable entries into place.
2485 switch (fgetc(fpin))
2488 * 'D' A PgStat_StatDBEntry struct describing a database
2489 * follows. Subsequently, zero to many 'T' entries will
2490 * follow until a 'd' is encountered.
2493 if (fread(&dbbuf, 1, sizeof(dbbuf), fpin) != sizeof(dbbuf))
2495 ereport(pgStatRunningInCollector ? LOG : WARNING,
2496 (errmsg("corrupted pgstat.stat file")));
2502 * Add to the DB hash
2504 dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
2505 (void *) &dbbuf.databaseid,
2508 if (dbentry == NULL)
2510 if (pgStatRunningInCollector)
2513 (errcode(ERRCODE_OUT_OF_MEMORY),
2514 errmsg("out of memory in statistics collector --- abort")));
2521 (errcode(ERRCODE_OUT_OF_MEMORY),
2522 errmsg("out of memory")));
2527 ereport(pgStatRunningInCollector ? LOG : WARNING,
2528 (errmsg("corrupted pgstat.stat file")));
2533 memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
2534 dbentry->tables = NULL;
2535 dbentry->destroy = 0;
2536 dbentry->n_backends = 0;
2539 * Don't collect tables if not the requested DB
2541 if (onlydb != InvalidOid && onlydb != dbbuf.databaseid)
2544 memset(&hash_ctl, 0, sizeof(hash_ctl));
2545 hash_ctl.keysize = sizeof(Oid);
2546 hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2547 hash_ctl.hash = tag_hash;
2548 hash_ctl.hcxt = use_mcxt;
2549 dbentry->tables = hash_create("Per-database table",
2550 PGSTAT_TAB_HASH_SIZE,
2552 HASH_ELEM | HASH_FUNCTION | mcxt_flags);
2553 if (dbentry->tables == NULL)
2555 /* assume the problem is out-of-memory */
2556 if (pgStatRunningInCollector)
2559 (errcode(ERRCODE_OUT_OF_MEMORY),
2560 errmsg("out of memory in statistics collector --- abort")));
2563 /* in backend, can do normal error */
2566 (errcode(ERRCODE_OUT_OF_MEMORY),
2567 errmsg("out of memory")));
2571 * Arrange that following 'T's add entries to this
2572 * databases tables hash table.
2574 tabhash = dbentry->tables;
2578 * 'd' End of this database.
2585 * 'T' A PgStat_StatTabEntry follows.
2588 if (fread(&tabbuf, 1, sizeof(tabbuf), fpin) != sizeof(tabbuf))
2590 ereport(pgStatRunningInCollector ? LOG : WARNING,
2591 (errmsg("corrupted pgstat.stat file")));
2597 * Skip if table belongs to a not requested database.
2599 if (tabhash == NULL)
2602 tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
2603 (void *) &tabbuf.tableid,
2604 HASH_ENTER, &found);
2605 if (tabentry == NULL)
2607 if (pgStatRunningInCollector)
2610 (errcode(ERRCODE_OUT_OF_MEMORY),
2611 errmsg("out of memory in statistics collector --- abort")));
2614 /* in backend, can do normal error */
2617 (errcode(ERRCODE_OUT_OF_MEMORY),
2618 errmsg("out of memory")));
2623 ereport(pgStatRunningInCollector ? LOG : WARNING,
2624 (errmsg("corrupted pgstat.stat file")));
2629 memcpy(tabentry, &tabbuf, sizeof(tabbuf));
2633 * 'M' The maximum number of backends to expect follows.
2636 if (betab == NULL || numbackends == NULL)
2641 if (fread(&maxbackends, 1, sizeof(maxbackends), fpin) !=
2642 sizeof(maxbackends))
2644 ereport(pgStatRunningInCollector ? LOG : WARNING,
2645 (errmsg("corrupted pgstat.stat file")));
2649 if (maxbackends == 0)
2656 * Allocate space (in TopTransactionContext too) for the
2659 if (use_mcxt == NULL)
2660 *betab = (PgStat_StatBeEntry *) malloc(
2661 sizeof(PgStat_StatBeEntry) * maxbackends);
2663 *betab = (PgStat_StatBeEntry *) MemoryContextAlloc(
2665 sizeof(PgStat_StatBeEntry) * maxbackends);
2669 * 'B' A PgStat_StatBeEntry follows.
2672 if (betab == NULL || numbackends == NULL)
2684 * Read it directly into the table.
2686 if (fread(&(*betab)[havebackends], 1,
2687 sizeof(PgStat_StatBeEntry), fpin) !=
2688 sizeof(PgStat_StatBeEntry))
2690 ereport(pgStatRunningInCollector ? LOG : WARNING,
2691 (errmsg("corrupted pgstat.stat file")));
2697 * Count backends per database here.
2699 dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
2700 (void *) &((*betab)[havebackends].databaseid),
2703 dbentry->n_backends++;
2706 if (numbackends != 0)
2707 *numbackends = havebackends;
2708 if (havebackends >= maxbackends)
2716 * 'E' The EOF marker of a complete stats file.
2723 ereport(pgStatRunningInCollector ? LOG : WARNING,
2724 (errmsg("corrupted pgstat.stat file")));
2735 * pgstat_recv_bestart() -
2737 * Process a backend starup message.
2741 pgstat_recv_bestart(PgStat_MsgBestart *msg, int len)
2743 pgstat_add_backend(&msg->m_hdr);
2748 * pgstat_recv_beterm() -
2750 * Process a backend termination message.
2754 pgstat_recv_beterm(PgStat_MsgBeterm *msg, int len)
2756 pgstat_sub_backend(msg->m_hdr.m_procpid);
2761 * pgstat_recv_activity() -
2763 * Remember what the backend is doing.
2767 pgstat_recv_activity(PgStat_MsgActivity *msg, int len)
2769 PgStat_StatBeEntry *entry;
2772 * Here we check explicitly for 0 return, since we don't want to
2773 * mangle the activity of an active backend by a delayed packed from a
2776 if (pgstat_add_backend(&msg->m_hdr) != 0)
2779 entry = &(pgStatBeTable[msg->m_hdr.m_backendid - 1]);
2781 strncpy(entry->activity, msg->m_what, PGSTAT_ACTIVITY_SIZE);
2783 entry->activity_start_sec =
2784 GetCurrentAbsoluteTimeUsec(&entry->activity_start_usec);
2789 * pgstat_recv_tabstat() -
2791 * Count what the backend has done.
2795 pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
2797 PgStat_TableEntry *tabmsg = &(msg->m_entry[0]);
2798 PgStat_StatDBEntry *dbentry;
2799 PgStat_StatTabEntry *tabentry;
2804 * Make sure the backend is counted for.
2806 if (pgstat_add_backend(&msg->m_hdr) < 0)
2810 * Lookup the database in the hashtable.
2812 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2813 (void *) &(msg->m_hdr.m_databaseid),
2819 * If the database is marked for destroy, this is a delayed UDP packet
2820 * and not worth being counted.
2822 if (dbentry->destroy > 0)
2825 dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
2826 dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
2829 * Process all table entries in the message.
2831 for (i = 0; i < msg->m_nentries; i++)
2833 tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2834 (void *) &(tabmsg[i].t_id),
2835 HASH_ENTER, &found);
2836 if (tabentry == NULL)
2839 (errcode(ERRCODE_OUT_OF_MEMORY),
2840 errmsg("out of memory in statistics collector --- abort")));
2847 * If it's a new table entry, initialize counters to the
2848 * values we just got.
2850 tabentry->numscans = tabmsg[i].t_numscans;
2851 tabentry->tuples_returned = tabmsg[i].t_tuples_returned;
2852 tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched;
2853 tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted;
2854 tabentry->tuples_updated = tabmsg[i].t_tuples_updated;
2855 tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted;
2856 tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
2857 tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
2859 tabentry->destroy = 0;
2864 * Otherwise add the values to the existing entry.
2866 tabentry->numscans += tabmsg[i].t_numscans;
2867 tabentry->tuples_returned += tabmsg[i].t_tuples_returned;
2868 tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched;
2869 tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted;
2870 tabentry->tuples_updated += tabmsg[i].t_tuples_updated;
2871 tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted;
2872 tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched;
2873 tabentry->blocks_hit += tabmsg[i].t_blocks_hit;
2877 * And add the block IO to the database entry.
2879 dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched;
2880 dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit;
2886 * pgstat_recv_tabpurge() -
2888 * Arrange for dead table removal.
2892 pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
2894 PgStat_StatDBEntry *dbentry;
2895 PgStat_StatTabEntry *tabentry;
2899 * Make sure the backend is counted for.
2901 if (pgstat_add_backend(&msg->m_hdr) < 0)
2905 * Lookup the database in the hashtable.
2907 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2908 (void *) &(msg->m_hdr.m_databaseid),
2914 * If the database is marked for destroy, this is a delayed UDP packet
2915 * and the tables will go away at DB destruction.
2917 if (dbentry->destroy > 0)
2921 * Process all table entries in the message.
2923 for (i = 0; i < msg->m_nentries; i++)
2925 tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2926 (void *) &(msg->m_tableid[i]),
2929 tabentry->destroy = PGSTAT_DESTROY_COUNT;
2935 * pgstat_recv_dropdb() -
2937 * Arrange for dead database removal
2941 pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
2943 PgStat_StatDBEntry *dbentry;
2946 * Make sure the backend is counted for.
2948 if (pgstat_add_backend(&msg->m_hdr) < 0)
2952 * Lookup the database in the hashtable.
2954 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2955 (void *) &(msg->m_databaseid),
2961 * Mark the database for destruction.
2963 dbentry->destroy = PGSTAT_DESTROY_COUNT;
2968 * pgstat_recv_dropdb() -
2970 * Arrange for dead database removal
2974 pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
2977 PgStat_StatDBEntry *dbentry;
2980 * Make sure the backend is counted for.
2982 if (pgstat_add_backend(&msg->m_hdr) < 0)
2986 * Lookup the database in the hashtable.
2988 dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2989 (void *) &(msg->m_hdr.m_databaseid),
2995 * We simply throw away all the databases table entries by recreating
2996 * a new hash table for them.
2998 if (dbentry->tables != NULL)
2999 hash_destroy(dbentry->tables);
3001 dbentry->tables = NULL;
3002 dbentry->n_xact_commit = 0;
3003 dbentry->n_xact_rollback = 0;
3004 dbentry->n_blocks_fetched = 0;
3005 dbentry->n_blocks_hit = 0;
3006 dbentry->n_connects = 0;
3007 dbentry->destroy = 0;
3009 memset(&hash_ctl, 0, sizeof(hash_ctl));
3010 hash_ctl.keysize = sizeof(Oid);
3011 hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
3012 hash_ctl.hash = tag_hash;
3013 dbentry->tables = hash_create("Per-database table",
3014 PGSTAT_TAB_HASH_SIZE,
3016 HASH_ELEM | HASH_FUNCTION);
3017 if (dbentry->tables == NULL)
3019 /* assume the problem is out-of-memory */
3021 (errcode(ERRCODE_OUT_OF_MEMORY),
3022 errmsg("out of memory in statistics collector --- abort")));