granicus.if.org Git - postgresql/blob - src/backend/postmaster/pgstat.c

   1 /* ----------
   2  * pgstat.c
   3  *
   4  *      All the statistics collector stuff hacked up in one big, ugly file.
   5  *
   6  *      TODO:   - Separate collector, postmaster and backend stuff
   7  *                        into different files.
   8  *
   9  *                      - Add some automatic call for pgstat vacuuming.
  10  *
  11  *                      - Add a pgstat config column to pg_database, so this
  12  *                        entire thing can be enabled/disabled on a per db basis.
  13  *
  14  *      Copyright (c) 2001-2006, PostgreSQL Global Development Group
  15  *
  16  *      $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.131 2006/06/27 03:45:16 alvherre Exp $
  17  * ----------
  18  */
  19 #include "postgres.h"
  20
  21 #include <unistd.h>
  22 #include <fcntl.h>
  23 #include <sys/param.h>
  24 #include <sys/time.h>
  25 #include <sys/socket.h>
  26 #include <netdb.h>
  27 #include <netinet/in.h>
  28 #include <arpa/inet.h>
  29 #include <signal.h>
  30 #include <time.h>
  31
  32 #include "pgstat.h"
  33
  34 #include "access/heapam.h"
  35 #include "access/xact.h"
  36 #include "catalog/pg_database.h"
  37 #include "libpq/libpq.h"
  38 #include "libpq/pqsignal.h"
  39 #include "mb/pg_wchar.h"
  40 #include "miscadmin.h"
  41 #include "postmaster/autovacuum.h"
  42 #include "postmaster/fork_process.h"
  43 #include "postmaster/postmaster.h"
  44 #include "storage/backendid.h"
  45 #include "storage/fd.h"
  46 #include "storage/ipc.h"
  47 #include "storage/pg_shmem.h"
  48 #include "storage/pmsignal.h"
  49 #include "storage/procarray.h"
  50 #include "tcop/tcopprot.h"
  51 #include "utils/hsearch.h"
  52 #include "utils/memutils.h"
  53 #include "utils/ps_status.h"
  54 #include "utils/rel.h"
  55 #include "utils/syscache.h"
  56
  57
  58 /* ----------
  59  * Paths for the statistics files (relative to installation's $PGDATA).
  60  * ----------
  61  */
  62 #define PGSTAT_STAT_FILENAME    "global/pgstat.stat"
  63 #define PGSTAT_STAT_TMPFILE             "global/pgstat.tmp"
  64
  65 /* ----------
  66  * Timer definitions.
  67  * ----------
  68  */
  69 #define PGSTAT_STAT_INTERVAL    500             /* How often to write the status file;
  70                                                                                  * in milliseconds. */
  71
  72 #define PGSTAT_RESTART_INTERVAL 60              /* How often to attempt to restart a
  73                                                                                  * failed statistics collector; in
  74                                                                                  * seconds. */
  75
  76 /* ----------
  77  * Amount of space reserved in pgstat_recvbuffer().
  78  * ----------
  79  */
  80 #define PGSTAT_RECVBUFFERSZ             ((int) (1024 * sizeof(PgStat_Msg)))
  81
  82 /* ----------
  83  * The initial size hints for the hash tables used in the collector.
  84  * ----------
  85  */
  86 #define PGSTAT_DB_HASH_SIZE             16
  87 #define PGSTAT_TAB_HASH_SIZE    512
  88
  89
  90 /* ----------
  91  * GUC parameters
  92  * ----------
  93  */
  94 bool            pgstat_collect_startcollector = true;
  95 bool            pgstat_collect_resetonpmstart = false;
  96 bool            pgstat_collect_tuplelevel = false;
  97 bool            pgstat_collect_blocklevel = false;
  98 bool            pgstat_collect_querystring = false;
  99
 100 /* ----------
 101  * Local data
 102  * ----------
 103  */
 104 NON_EXEC_STATIC int pgStatSock = -1;
 105 NON_EXEC_STATIC int pgStatPipe[2] = {-1, -1};
 106
 107 static struct sockaddr_storage pgStatAddr;
 108
 109 static pid_t pgStatCollectorPid = 0;
 110
 111 static time_t last_pgstat_start_time;
 112
 113 static bool pgStatRunningInCollector = false;
 114
 115 /*
 116  * Place where backends store per-table info to be sent to the collector.
 117  * We store shared relations separately from non-shared ones, to be able to
 118  * send them in separate messages.
 119  */
 120 typedef struct TabStatArray
 121 {
 122         int                     tsa_alloc;              /* num allocated */
 123         int                     tsa_used;               /* num actually used */
 124         PgStat_MsgTabstat **tsa_messages;       /* the array itself */
 125 } TabStatArray;
 126
 127 #define TABSTAT_QUANTUM         4       /* we alloc this many at a time */
 128
 129 static TabStatArray RegularTabStat = {0, 0, NULL};
 130 static TabStatArray SharedTabStat = {0, 0, NULL};
 131
 132 static int      pgStatXactCommit = 0;
 133 static int      pgStatXactRollback = 0;
 134
 135 static TransactionId pgStatDBHashXact = InvalidTransactionId;
 136 static HTAB *pgStatDBHash = NULL;
 137 static TransactionId pgStatLocalStatusXact = InvalidTransactionId;
 138 static PgBackendStatus *localBackendStatusTable = NULL;
 139 static int      localNumBackends = 0;
 140
 141 static volatile bool    need_statwrite;
 142
 143
 144 /* ----------
 145  * Local function forward declarations
 146  * ----------
 147  */
 148 #ifdef EXEC_BACKEND
 149
 150 typedef enum STATS_PROCESS_TYPE
 151 {
 152         STAT_PROC_BUFFER,
 153         STAT_PROC_COLLECTOR
 154 }       STATS_PROCESS_TYPE;
 155
 156 static pid_t pgstat_forkexec(STATS_PROCESS_TYPE procType);
 157 static void pgstat_parseArgs(int argc, char *argv[]);
 158 #endif
 159
 160 NON_EXEC_STATIC void PgstatBufferMain(int argc, char *argv[]);
 161 NON_EXEC_STATIC void PgstatCollectorMain(int argc, char *argv[]);
 162 static void force_statwrite(SIGNAL_ARGS);
 163 static void pgstat_recvbuffer(void);
 164 static void pgstat_exit(SIGNAL_ARGS);
 165 static void pgstat_die(SIGNAL_ARGS);
 166 static void pgstat_beshutdown_hook(int code, Datum arg);
 167
 168 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
 169 static void pgstat_drop_database(Oid databaseid);
 170 static void pgstat_write_statsfile(void);
 171 static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb);
 172 static void backend_read_statsfile(void);
 173 static void pgstat_read_current_status(void);
 174
 175 static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
 176 static void pgstat_send(void *msg, int len);
 177
 178 static void pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len);
 179 static void pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len);
 180 static void pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len);
 181 static void pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len);
 182 static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 183 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 184 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 185
 186
 187 /* ------------------------------------------------------------
 188  * Public functions called from postmaster follow
 189  * ------------------------------------------------------------
 190  */
 191
 192 /* ----------
 193  * pgstat_init() -
 194  *
 195  *      Called from postmaster at startup. Create the resources required
 196  *      by the statistics collector process.  If unable to do so, do not
 197  *      fail --- better to let the postmaster start with stats collection
 198  *      disabled.
 199  * ----------
 200  */
 201 void
 202 pgstat_init(void)
 203 {
 204         ACCEPT_TYPE_ARG3 alen;
 205         struct addrinfo *addrs = NULL,
 206                            *addr,
 207                                 hints;
 208         int                     ret;
 209         fd_set          rset;
 210         struct timeval tv;
 211         char            test_byte;
 212         int                     sel_res;
 213         int                     tries = 0;
 214
 215 #define TESTBYTEVAL ((char) 199)
 216
 217         /*
 218          * Force start of collector daemon if something to collect.  Note that
 219          * pgstat_collect_querystring is now an independent facility that does
 220          * not require the collector daemon.
 221          */
 222         if (pgstat_collect_tuplelevel ||
 223                 pgstat_collect_blocklevel)
 224                 pgstat_collect_startcollector = true;
 225
 226         /*
 227          * If we don't have to start a collector or should reset the collected
 228          * statistics on postmaster start, simply remove the stats file.
 229          */
 230         if (!pgstat_collect_startcollector || pgstat_collect_resetonpmstart)
 231                 pgstat_reset_all();
 232
 233         /*
 234          * Nothing else required if collector will not get started
 235          */
 236         if (!pgstat_collect_startcollector)
 237                 return;
 238
 239         /*
 240          * Create the UDP socket for sending and receiving statistic messages
 241          */
 242         hints.ai_flags = AI_PASSIVE;
 243         hints.ai_family = PF_UNSPEC;
 244         hints.ai_socktype = SOCK_DGRAM;
 245         hints.ai_protocol = 0;
 246         hints.ai_addrlen = 0;
 247         hints.ai_addr = NULL;
 248         hints.ai_canonname = NULL;
 249         hints.ai_next = NULL;
 250         ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
 251         if (ret || !addrs)
 252         {
 253                 ereport(LOG,
 254                                 (errmsg("could not resolve \"localhost\": %s",
 255                                                 gai_strerror(ret))));
 256                 goto startup_failed;
 257         }
 258
 259         /*
 260          * On some platforms, pg_getaddrinfo_all() may return multiple addresses
 261          * only one of which will actually work (eg, both IPv6 and IPv4 addresses
 262          * when kernel will reject IPv6).  Worse, the failure may occur at the
 263          * bind() or perhaps even connect() stage.      So we must loop through the
 264          * results till we find a working combination. We will generate LOG
 265          * messages, but no error, for bogus combinations.
 266          */
 267         for (addr = addrs; addr; addr = addr->ai_next)
 268         {
 269 #ifdef HAVE_UNIX_SOCKETS
 270                 /* Ignore AF_UNIX sockets, if any are returned. */
 271                 if (addr->ai_family == AF_UNIX)
 272                         continue;
 273 #endif
 274
 275                 if (++tries > 1)
 276                         ereport(LOG,
 277                                 (errmsg("trying another address for the statistics collector")));
 278
 279                 /*
 280                  * Create the socket.
 281                  */
 282                 if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) < 0)
 283                 {
 284                         ereport(LOG,
 285                                         (errcode_for_socket_access(),
 286                         errmsg("could not create socket for statistics collector: %m")));
 287                         continue;
 288                 }
 289
 290                 /*
 291                  * Bind it to a kernel assigned port on localhost and get the assigned
 292                  * port via getsockname().
 293                  */
 294                 if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0)
 295                 {
 296                         ereport(LOG,
 297                                         (errcode_for_socket_access(),
 298                           errmsg("could not bind socket for statistics collector: %m")));
 299                         closesocket(pgStatSock);
 300                         pgStatSock = -1;
 301                         continue;
 302                 }
 303
 304                 alen = sizeof(pgStatAddr);
 305                 if (getsockname(pgStatSock, (struct sockaddr *) & pgStatAddr, &alen) < 0)
 306                 {
 307                         ereport(LOG,
 308                                         (errcode_for_socket_access(),
 309                                          errmsg("could not get address of socket for statistics collector: %m")));
 310                         closesocket(pgStatSock);
 311                         pgStatSock = -1;
 312                         continue;
 313                 }
 314
 315                 /*
 316                  * Connect the socket to its own address.  This saves a few cycles by
 317                  * not having to respecify the target address on every send. This also
 318                  * provides a kernel-level check that only packets from this same
 319                  * address will be received.
 320                  */
 321                 if (connect(pgStatSock, (struct sockaddr *) & pgStatAddr, alen) < 0)
 322                 {
 323                         ereport(LOG,
 324                                         (errcode_for_socket_access(),
 325                         errmsg("could not connect socket for statistics collector: %m")));
 326                         closesocket(pgStatSock);
 327                         pgStatSock = -1;
 328                         continue;
 329                 }
 330
 331                 /*
 332                  * Try to send and receive a one-byte test message on the socket. This
 333                  * is to catch situations where the socket can be created but will not
 334                  * actually pass data (for instance, because kernel packet filtering
 335                  * rules prevent it).
 336                  */
 337                 test_byte = TESTBYTEVAL;
 338                 if (send(pgStatSock, &test_byte, 1, 0) != 1)
 339                 {
 340                         ereport(LOG,
 341                                         (errcode_for_socket_access(),
 342                                          errmsg("could not send test message on socket for statistics collector: %m")));
 343                         closesocket(pgStatSock);
 344                         pgStatSock = -1;
 345                         continue;
 346                 }
 347
 348                 /*
 349                  * There could possibly be a little delay before the message can be
 350                  * received.  We arbitrarily allow up to half a second before deciding
 351                  * it's broken.
 352                  */
 353                 for (;;)                                /* need a loop to handle EINTR */
 354                 {
 355                         FD_ZERO(&rset);
 356                         FD_SET(pgStatSock, &rset);
 357                         tv.tv_sec = 0;
 358                         tv.tv_usec = 500000;
 359                         sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv);
 360                         if (sel_res >= 0 || errno != EINTR)
 361                                 break;
 362                 }
 363                 if (sel_res < 0)
 364                 {
 365                         ereport(LOG,
 366                                         (errcode_for_socket_access(),
 367                                          errmsg("select() failed in statistics collector: %m")));
 368                         closesocket(pgStatSock);
 369                         pgStatSock = -1;
 370                         continue;
 371                 }
 372                 if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset))
 373                 {
 374                         /*
 375                          * This is the case we actually think is likely, so take pains to
 376                          * give a specific message for it.
 377                          *
 378                          * errno will not be set meaningfully here, so don't use it.
 379                          */
 380                         ereport(LOG,
 381                                         (errcode(ERRCODE_CONNECTION_FAILURE),
 382                                          errmsg("test message did not get through on socket for statistics collector")));
 383                         closesocket(pgStatSock);
 384                         pgStatSock = -1;
 385                         continue;
 386                 }
 387
 388                 test_byte++;                    /* just make sure variable is changed */
 389
 390                 if (recv(pgStatSock, &test_byte, 1, 0) != 1)
 391                 {
 392                         ereport(LOG,
 393                                         (errcode_for_socket_access(),
 394                                          errmsg("could not receive test message on socket for statistics collector: %m")));
 395                         closesocket(pgStatSock);
 396                         pgStatSock = -1;
 397                         continue;
 398                 }
 399
 400                 if (test_byte != TESTBYTEVAL)   /* strictly paranoia ... */
 401                 {
 402                         ereport(LOG,
 403                                         (errcode(ERRCODE_INTERNAL_ERROR),
 404                                          errmsg("incorrect test message transmission on socket for statistics collector")));
 405                         closesocket(pgStatSock);
 406                         pgStatSock = -1;
 407                         continue;
 408                 }
 409
 410                 /* If we get here, we have a working socket */
 411                 break;
 412         }
 413
 414         /* Did we find a working address? */
 415         if (!addr || pgStatSock < 0)
 416                 goto startup_failed;
 417
 418         /*
 419          * Set the socket to non-blocking IO.  This ensures that if the collector
 420          * falls behind (despite the buffering process), statistics messages will
 421          * be discarded; backends won't block waiting to send messages to the
 422          * collector.
 423          */
 424         if (!pg_set_noblock(pgStatSock))
 425         {
 426                 ereport(LOG,
 427                                 (errcode_for_socket_access(),
 428                                  errmsg("could not set statistics collector socket to nonblocking mode: %m")));
 429                 goto startup_failed;
 430         }
 431
 432         pg_freeaddrinfo_all(hints.ai_family, addrs);
 433
 434         return;
 435
 436 startup_failed:
 437         ereport(LOG,
 438           (errmsg("disabling statistics collector for lack of working socket")));
 439
 440         if (addrs)
 441                 pg_freeaddrinfo_all(hints.ai_family, addrs);
 442
 443         if (pgStatSock >= 0)
 444                 closesocket(pgStatSock);
 445         pgStatSock = -1;
 446
 447         /* Adjust GUC variables to suppress useless activity */
 448         pgstat_collect_startcollector = false;
 449         pgstat_collect_tuplelevel = false;
 450         pgstat_collect_blocklevel = false;
 451 }
 452
 453 /*
 454  * pgstat_reset_all() -
 455  *
 456  * Remove the stats file.  This is used on server start if the
 457  * stats_reset_on_server_start feature is enabled, or if WAL
 458  * recovery is needed after a crash.
 459  */
 460 void
 461 pgstat_reset_all(void)
 462 {
 463         unlink(PGSTAT_STAT_FILENAME);
 464 }
 465
 466 #ifdef EXEC_BACKEND
 467
 468 /*
 469  * pgstat_forkexec() -
 470  *
 471  * Format up the arglist for, then fork and exec, statistics
 472  * (buffer and collector) processes
 473  */
 474 static pid_t
 475 pgstat_forkexec(STATS_PROCESS_TYPE procType)
 476 {
 477         char       *av[10];
 478         int                     ac = 0,
 479                                 bufc = 0,
 480                                 i;
 481         char            pgstatBuf[2][32];
 482
 483         av[ac++] = "postgres";
 484
 485         switch (procType)
 486         {
 487                 case STAT_PROC_BUFFER:
 488                         av[ac++] = "--forkbuf";
 489                         break;
 490
 491                 case STAT_PROC_COLLECTOR:
 492                         av[ac++] = "--forkcol";
 493                         break;
 494
 495                 default:
 496                         Assert(false);
 497         }
 498
 499         av[ac++] = NULL;                        /* filled in by postmaster_forkexec */
 500
 501         /* postgres_exec_path is not passed by write_backend_variables */
 502         av[ac++] = postgres_exec_path;
 503
 504         /* Add to the arg list */
 505         Assert(bufc <= lengthof(pgstatBuf));
 506         for (i = 0; i < bufc; i++)
 507                 av[ac++] = pgstatBuf[i];
 508
 509         av[ac] = NULL;
 510         Assert(ac < lengthof(av));
 511
 512         return postmaster_forkexec(ac, av);
 513 }
 514
 515
 516 /*
 517  * pgstat_parseArgs() -
 518  *
 519  * Extract data from the arglist for exec'ed statistics
 520  * (buffer and collector) processes
 521  */
 522 static void
 523 pgstat_parseArgs(int argc, char *argv[])
 524 {
 525         Assert(argc == 4);
 526
 527         argc = 3;
 528         StrNCpy(postgres_exec_path, argv[argc++], MAXPGPATH);
 529 }
 530 #endif   /* EXEC_BACKEND */
 531
 532
 533 /* ----------
 534  * pgstat_start() -
 535  *
 536  *      Called from postmaster at startup or after an existing collector
 537  *      died.  Attempt to fire up a fresh statistics collector.
 538  *
 539  *      Returns PID of child process, or 0 if fail.
 540  *
 541  *      Note: if fail, we will be called again from the postmaster main loop.
 542  * ----------
 543  */
 544 int
 545 pgstat_start(void)
 546 {
 547         time_t          curtime;
 548         pid_t           pgStatPid;
 549
 550         /*
 551          * Do nothing if no collector needed
 552          */
 553         if (!pgstat_collect_startcollector)
 554                 return 0;
 555
 556         /*
 557          * Do nothing if too soon since last collector start.  This is a safety
 558          * valve to protect against continuous respawn attempts if the collector
 559          * is dying immediately at launch.      Note that since we will be re-called
 560          * from the postmaster main loop, we will get another chance later.
 561          */
 562         curtime = time(NULL);
 563         if ((unsigned int) (curtime - last_pgstat_start_time) <
 564                 (unsigned int) PGSTAT_RESTART_INTERVAL)
 565                 return 0;
 566         last_pgstat_start_time = curtime;
 567
 568         /*
 569          * Check that the socket is there, else pgstat_init failed.
 570          */
 571         if (pgStatSock < 0)
 572         {
 573                 ereport(LOG,
 574                                 (errmsg("statistics collector startup skipped")));
 575
 576                 /*
 577                  * We can only get here if someone tries to manually turn
 578                  * pgstat_collect_startcollector on after it had been off.
 579                  */
 580                 pgstat_collect_startcollector = false;
 581                 return 0;
 582         }
 583
 584         /*
 585          * Okay, fork off the collector.
 586          */
 587 #ifdef EXEC_BACKEND
 588         switch ((pgStatPid = pgstat_forkexec(STAT_PROC_BUFFER)))
 589 #else
 590         switch ((pgStatPid = fork_process()))
 591 #endif
 592         {
 593                 case -1:
 594                         ereport(LOG,
 595                                         (errmsg("could not fork statistics buffer: %m")));
 596                         return 0;
 597
 598 #ifndef EXEC_BACKEND
 599                 case 0:
 600                         /* in postmaster child ... */
 601                         /* Close the postmaster's sockets */
 602                         ClosePostmasterPorts(false);
 603
 604                         /* Lose the postmaster's on-exit routines */
 605                         on_exit_reset();
 606
 607                         /* Drop our connection to postmaster's shared memory, as well */
 608                         PGSharedMemoryDetach();
 609
 610                         PgstatBufferMain(0, NULL);
 611                         break;
 612 #endif
 613
 614                 default:
 615                         return (int) pgStatPid;
 616         }
 617
 618         /* shouldn't get here */
 619         return 0;
 620 }
 621
 622
 623 /* ------------------------------------------------------------
 624  * Public functions used by backends follow
 625  *------------------------------------------------------------
 626  */
 627
 628
 629 /* ----------
 630  * pgstat_report_tabstat() -
 631  *
 632  *      Called from tcop/postgres.c to send the so far collected
 633  *      per table access statistics to the collector.
 634  * ----------
 635  */
 636 void
 637 pgstat_report_tabstat(void)
 638 {
 639         int                     i;
 640
 641         if (pgStatSock < 0 ||
 642                 (!pgstat_collect_tuplelevel &&
 643                  !pgstat_collect_blocklevel))
 644         {
 645                 /* Not reporting stats, so just flush whatever we have */
 646                 RegularTabStat.tsa_used = 0;
 647                 SharedTabStat.tsa_used = 0;
 648                 return;
 649         }
 650
 651         /*
 652          * For each message buffer used during the last query set the header
 653          * fields and send it out.
 654          */
 655         for (i = 0; i < RegularTabStat.tsa_used; i++)
 656         {
 657                 PgStat_MsgTabstat *tsmsg = RegularTabStat.tsa_messages[i];
 658                 int                     n;
 659                 int                     len;
 660
 661                 n = tsmsg->m_nentries;
 662                 len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
 663                         n * sizeof(PgStat_TableEntry);
 664
 665                 tsmsg->m_xact_commit = pgStatXactCommit;
 666                 tsmsg->m_xact_rollback = pgStatXactRollback;
 667                 pgStatXactCommit = 0;
 668                 pgStatXactRollback = 0;
 669
 670                 pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
 671                 tsmsg->m_databaseid = MyDatabaseId;
 672                 pgstat_send(tsmsg, len);
 673         }
 674         RegularTabStat.tsa_used = 0;
 675
 676         /* Ditto, for shared relations */
 677         for (i = 0; i < SharedTabStat.tsa_used; i++)
 678         {
 679                 PgStat_MsgTabstat *tsmsg = SharedTabStat.tsa_messages[i];
 680                 int                     n;
 681                 int                     len;
 682
 683                 n = tsmsg->m_nentries;
 684                 len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
 685                         n * sizeof(PgStat_TableEntry);
 686
 687                 /* We don't report transaction commit/abort here */
 688                 tsmsg->m_xact_commit = 0;
 689                 tsmsg->m_xact_rollback = 0;
 690
 691                 pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
 692                 tsmsg->m_databaseid = InvalidOid;
 693                 pgstat_send(tsmsg, len);
 694         }
 695         SharedTabStat.tsa_used = 0;
 696 }
 697
 698
 699 /* ----------
 700  * pgstat_vacuum_tabstat() -
 701  *
 702  *      Will tell the collector about objects he can get rid of.
 703  * ----------
 704  */
 705 void
 706 pgstat_vacuum_tabstat(void)
 707 {
 708         List       *oidlist;
 709         Relation        rel;
 710         HeapScanDesc scan;
 711         HeapTuple       tup;
 712         PgStat_MsgTabpurge msg;
 713         HASH_SEQ_STATUS hstat;
 714         PgStat_StatDBEntry *dbentry;
 715         PgStat_StatTabEntry *tabentry;
 716         int                     len;
 717
 718         if (pgStatSock < 0)
 719                 return;
 720
 721         /*
 722          * If not done for this transaction, read the statistics collector stats
 723          * file into some hash tables.
 724          */
 725         backend_read_statsfile();
 726
 727         /*
 728          * Read pg_database and make a list of OIDs of all existing databases
 729          */
 730         oidlist = NIL;
 731         rel = heap_open(DatabaseRelationId, AccessShareLock);
 732         scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
 733         while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
 734         {
 735                 oidlist = lappend_oid(oidlist, HeapTupleGetOid(tup));
 736         }
 737         heap_endscan(scan);
 738         heap_close(rel, AccessShareLock);
 739
 740         /*
 741          * Search the database hash table for dead databases and tell the
 742          * collector to drop them.
 743          */
 744         hash_seq_init(&hstat, pgStatDBHash);
 745         while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
 746         {
 747                 Oid                     dbid = dbentry->databaseid;
 748
 749                 if (!list_member_oid(oidlist, dbid))
 750                         pgstat_drop_database(dbid);
 751         }
 752
 753         /* Clean up */
 754         list_free(oidlist);
 755
 756         /*
 757          * Lookup our own database entry; if not found, nothing more to do.
 758          */
 759         dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
 760                                                                                                  (void *) &MyDatabaseId,
 761                                                                                                  HASH_FIND, NULL);
 762         if (dbentry == NULL || dbentry->tables == NULL)
 763                 return;
 764
 765         /*
 766          * Similarly to above, make a list of all known relations in this DB.
 767          */
 768         oidlist = NIL;
 769         rel = heap_open(RelationRelationId, AccessShareLock);
 770         scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
 771         while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
 772         {
 773                 oidlist = lappend_oid(oidlist, HeapTupleGetOid(tup));
 774         }
 775         heap_endscan(scan);
 776         heap_close(rel, AccessShareLock);
 777
 778         /*
 779          * Initialize our messages table counter to zero
 780          */
 781         msg.m_nentries = 0;
 782
 783         /*
 784          * Check for all tables listed in stats hashtable if they still exist.
 785          */
 786         hash_seq_init(&hstat, dbentry->tables);
 787         while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != NULL)
 788         {
 789                 if (list_member_oid(oidlist, tabentry->tableid))
 790                         continue;
 791
 792                 /*
 793                  * Not there, so add this table's Oid to the message
 794                  */
 795                 msg.m_tableid[msg.m_nentries++] = tabentry->tableid;
 796
 797                 /*
 798                  * If the message is full, send it out and reinitialize to empty
 799                  */
 800                 if (msg.m_nentries >= PGSTAT_NUM_TABPURGE)
 801                 {
 802                         len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
 803                                 +msg.m_nentries * sizeof(Oid);
 804
 805                         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
 806                         msg.m_databaseid = MyDatabaseId;
 807                         pgstat_send(&msg, len);
 808
 809                         msg.m_nentries = 0;
 810                 }
 811         }
 812
 813         /*
 814          * Send the rest
 815          */
 816         if (msg.m_nentries > 0)
 817         {
 818                 len = offsetof(PgStat_MsgTabpurge, m_tableid[0])
 819                         +msg.m_nentries * sizeof(Oid);
 820
 821                 pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
 822                 msg.m_databaseid = MyDatabaseId;
 823                 pgstat_send(&msg, len);
 824         }
 825
 826         /* Clean up */
 827         list_free(oidlist);
 828 }
 829
 830
 831 /* ----------
 832  * pgstat_drop_database() -
 833  *
 834  *      Tell the collector that we just dropped a database.
 835  *      (If the message gets lost, we will still clean the dead DB eventually
 836  *      via future invocations of pgstat_vacuum_tabstat().)
 837  * ----------
 838  */
 839 static void
 840 pgstat_drop_database(Oid databaseid)
 841 {
 842         PgStat_MsgDropdb msg;
 843
 844         if (pgStatSock < 0)
 845                 return;
 846
 847         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DROPDB);
 848         msg.m_databaseid = databaseid;
 849         pgstat_send(&msg, sizeof(msg));
 850 }
 851
 852
 853 /* ----------
 854  * pgstat_drop_relation() -
 855  *
 856  *      Tell the collector that we just dropped a relation.
 857  *      (If the message gets lost, we will still clean the dead entry eventually
 858  *      via future invocations of pgstat_vacuum_tabstat().)
 859  * ----------
 860  */
 861 void
 862 pgstat_drop_relation(Oid relid)
 863 {
 864         PgStat_MsgTabpurge msg;
 865         int                     len;
 866
 867         if (pgStatSock < 0)
 868                 return;
 869
 870         msg.m_tableid[0] = relid;
 871         msg.m_nentries = 1;
 872
 873         len = offsetof(PgStat_MsgTabpurge, m_tableid[0]) + sizeof(Oid);
 874
 875         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_TABPURGE);
 876         msg.m_databaseid = MyDatabaseId;
 877         pgstat_send(&msg, len);
 878 }
 879
 880
 881 /* ----------
 882  * pgstat_reset_counters() -
 883  *
 884  *      Tell the statistics collector to reset counters for our database.
 885  * ----------
 886  */
 887 void
 888 pgstat_reset_counters(void)
 889 {
 890         PgStat_MsgResetcounter msg;
 891
 892         if (pgStatSock < 0)
 893                 return;
 894
 895         if (!superuser())
 896                 ereport(ERROR,
 897                                 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
 898                                  errmsg("must be superuser to reset statistics counters")));
 899
 900         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETCOUNTER);
 901         msg.m_databaseid = MyDatabaseId;
 902         pgstat_send(&msg, sizeof(msg));
 903 }
 904
 905
 906 /* ----------
 907  * pgstat_report_autovac() -
 908  *
 909  *      Called from autovacuum.c to report startup of an autovacuum process.
 910  *      We are called before InitPostgres is done, so can't rely on MyDatabaseId;
 911  *      the db OID must be passed in, instead.
 912  * ----------
 913  */
 914 void
 915 pgstat_report_autovac(Oid dboid)
 916 {
 917         PgStat_MsgAutovacStart msg;
 918
 919         if (pgStatSock < 0)
 920                 return;
 921
 922         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_AUTOVAC_START);
 923         msg.m_databaseid = dboid;
 924         msg.m_start_time = GetCurrentTimestamp();
 925
 926         pgstat_send(&msg, sizeof(msg));
 927 }
 928
 929
 930 /* ---------
 931  * pgstat_report_vacuum() -
 932  *
 933  *      Tell the collector about the table we just vacuumed.
 934  * ---------
 935  */
 936 void
 937 pgstat_report_vacuum(Oid tableoid, bool shared,
 938                                          bool analyze, PgStat_Counter tuples)
 939 {
 940         PgStat_MsgVacuum msg;
 941
 942         if (pgStatSock < 0 ||
 943                 !pgstat_collect_tuplelevel)
 944                 return;
 945
 946         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
 947         msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
 948         msg.m_tableoid = tableoid;
 949         msg.m_analyze = analyze;
 950         msg.m_autovacuum = IsAutoVacuumProcess(); /* is this autovacuum? */
 951         msg.m_vacuumtime = GetCurrentTimestamp();
 952         msg.m_tuples = tuples;
 953         pgstat_send(&msg, sizeof(msg));
 954 }
 955
 956 /* --------
 957  * pgstat_report_analyze() -
 958  *
 959  *      Tell the collector about the table we just analyzed.
 960  * --------
 961  */
 962 void
 963 pgstat_report_analyze(Oid tableoid, bool shared, PgStat_Counter livetuples,
 964                                           PgStat_Counter deadtuples)
 965 {
 966         PgStat_MsgAnalyze msg;
 967
 968         if (pgStatSock < 0 ||
 969                 !pgstat_collect_tuplelevel)
 970                 return;
 971
 972         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
 973         msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
 974         msg.m_tableoid = tableoid;
 975         msg.m_autovacuum = IsAutoVacuumProcess(); /* is this autovacuum? */
 976         msg.m_analyzetime = GetCurrentTimestamp();
 977         msg.m_live_tuples = livetuples;
 978         msg.m_dead_tuples = deadtuples;
 979         pgstat_send(&msg, sizeof(msg));
 980 }
 981
 982
 983 /* ----------
 984  * pgstat_ping() -
 985  *
 986  *      Send some junk data to the collector to increase traffic.
 987  * ----------
 988  */
 989 void
 990 pgstat_ping(void)
 991 {
 992         PgStat_MsgDummy msg;
 993
 994         if (pgStatSock < 0)
 995                 return;
 996
 997         pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DUMMY);
 998         pgstat_send(&msg, sizeof(msg));
 999 }
1000
1001 /*
1002  * Enlarge a TabStatArray
1003  */
1004 static void
1005 more_tabstat_space(TabStatArray *tsarr)
1006 {
1007         PgStat_MsgTabstat *newMessages;
1008         PgStat_MsgTabstat **msgArray;
1009         int                     newAlloc;
1010         int                     i;
1011
1012         AssertArg(PointerIsValid(tsarr));
1013
1014         newAlloc = tsarr->tsa_alloc + TABSTAT_QUANTUM;
1015
1016         /* Create (another) quantum of message buffers */
1017         newMessages = (PgStat_MsgTabstat *)
1018                 MemoryContextAllocZero(TopMemoryContext,
1019                                                            sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
1020
1021         /* Create or enlarge the pointer array */
1022         if (tsarr->tsa_messages == NULL)
1023                 msgArray = (PgStat_MsgTabstat **)
1024                         MemoryContextAlloc(TopMemoryContext,
1025                                                            sizeof(PgStat_MsgTabstat *) * newAlloc);
1026         else
1027                 msgArray = (PgStat_MsgTabstat **)
1028                         repalloc(tsarr->tsa_messages,
1029                                          sizeof(PgStat_MsgTabstat *) * newAlloc);
1030
1031         for (i = 0; i < TABSTAT_QUANTUM; i++)
1032                 msgArray[tsarr->tsa_alloc + i] = newMessages++;
1033         tsarr->tsa_messages = msgArray;
1034         tsarr->tsa_alloc = newAlloc;
1035
1036         Assert(tsarr->tsa_used < tsarr->tsa_alloc);
1037 }
1038
1039 /* ----------
1040  * pgstat_initstats() -
1041  *
1042  *      Called from various places usually dealing with initialization
1043  *      of Relation or Scan structures. The data placed into these
1044  *      structures from here tell where later to count for buffer reads,
1045  *      scans and tuples fetched.
1046  * ----------
1047  */
1048 void
1049 pgstat_initstats(PgStat_Info *stats, Relation rel)
1050 {
1051         Oid                     rel_id = rel->rd_id;
1052         PgStat_TableEntry *useent;
1053         TabStatArray *tsarr;
1054         PgStat_MsgTabstat *tsmsg;
1055         int                     mb;
1056         int                     i;
1057
1058         /*
1059          * Initialize data not to count at all.
1060          */
1061         stats->tabentry = NULL;
1062
1063         if (pgStatSock < 0 ||
1064                 !(pgstat_collect_tuplelevel ||
1065                   pgstat_collect_blocklevel))
1066                 return;
1067
1068         tsarr = rel->rd_rel->relisshared ? &SharedTabStat : &RegularTabStat;
1069
1070         /*
1071          * Search the already-used message slots for this relation.
1072          */
1073         for (mb = 0; mb < tsarr->tsa_used; mb++)
1074         {
1075                 tsmsg = tsarr->tsa_messages[mb];
1076
1077                 for (i = tsmsg->m_nentries; --i >= 0;)
1078                 {
1079                         if (tsmsg->m_entry[i].t_id == rel_id)
1080                         {
1081                                 stats->tabentry = (void *) &(tsmsg->m_entry[i]);
1082                                 return;
1083                         }
1084                 }
1085
1086                 if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES)
1087                         continue;
1088
1089                 /*
1090                  * Not found, but found a message buffer with an empty slot instead.
1091                  * Fine, let's use this one.
1092                  */
1093                 i = tsmsg->m_nentries++;
1094                 useent = &tsmsg->m_entry[i];
1095                 MemSet(useent, 0, sizeof(PgStat_TableEntry));
1096                 useent->t_id = rel_id;
1097                 stats->tabentry = (void *) useent;
1098                 return;
1099         }
1100
1101         /*
1102          * If we ran out of message buffers, we just allocate more.
1103          */
1104         if (tsarr->tsa_used >= tsarr->tsa_alloc)
1105                 more_tabstat_space(tsarr);
1106
1107         /*
1108          * Use the first entry of the next message buffer.
1109          */
1110         mb = tsarr->tsa_used++;
1111         tsmsg = tsarr->tsa_messages[mb];
1112         tsmsg->m_nentries = 1;
1113         useent = &tsmsg->m_entry[0];
1114         MemSet(useent, 0, sizeof(PgStat_TableEntry));
1115         useent->t_id = rel_id;
1116         stats->tabentry = (void *) useent;
1117 }
1118
1119
1120 /* ----------
1121  * pgstat_count_xact_commit() -
1122  *
1123  *      Called from access/transam/xact.c to count transaction commits.
1124  * ----------
1125  */
1126 void
1127 pgstat_count_xact_commit(void)
1128 {
1129         if      (!pgstat_collect_tuplelevel &&
1130                  !pgstat_collect_blocklevel)
1131                 return;
1132
1133         pgStatXactCommit++;
1134
1135         /*
1136          * If there was no relation activity yet, just make one existing message
1137          * buffer used without slots, causing the next report to tell new
1138          * xact-counters.
1139          */
1140         if (RegularTabStat.tsa_alloc == 0)
1141                 more_tabstat_space(&RegularTabStat);
1142
1143         if (RegularTabStat.tsa_used == 0)
1144         {
1145                 RegularTabStat.tsa_used++;
1146                 RegularTabStat.tsa_messages[0]->m_nentries = 0;
1147         }
1148 }
1149
1150
1151 /* ----------
1152  * pgstat_count_xact_rollback() -
1153  *
1154  *      Called from access/transam/xact.c to count transaction rollbacks.
1155  * ----------
1156  */
1157 void
1158 pgstat_count_xact_rollback(void)
1159 {
1160         if      (!pgstat_collect_tuplelevel &&
1161                  !pgstat_collect_blocklevel)
1162                 return;
1163
1164         pgStatXactRollback++;
1165
1166         /*
1167          * If there was no relation activity yet, just make one existing message
1168          * buffer used without slots, causing the next report to tell new
1169          * xact-counters.
1170          */
1171         if (RegularTabStat.tsa_alloc == 0)
1172                 more_tabstat_space(&RegularTabStat);
1173
1174         if (RegularTabStat.tsa_used == 0)
1175         {
1176                 RegularTabStat.tsa_used++;
1177                 RegularTabStat.tsa_messages[0]->m_nentries = 0;
1178         }
1179 }
1180
1181
1182 /* ----------
1183  * pgstat_fetch_stat_dbentry() -
1184  *
1185  *      Support function for the SQL-callable pgstat* functions. Returns
1186  *      the collected statistics for one database or NULL. NULL doesn't mean
1187  *      that the database doesn't exist, it is just not yet known by the
1188  *      collector, so the caller is better off to report ZERO instead.
1189  * ----------
1190  */
1191 PgStat_StatDBEntry *
1192 pgstat_fetch_stat_dbentry(Oid dbid)
1193 {
1194         /*
1195          * If not done for this transaction, read the statistics collector stats
1196          * file into some hash tables.
1197          */
1198         backend_read_statsfile();
1199
1200         /*
1201          * Lookup the requested database; return NULL if not found
1202          */
1203         return (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1204                                                                                           (void *) &dbid,
1205                                                                                           HASH_FIND, NULL);
1206 }
1207
1208
1209 /* ----------
1210  * pgstat_fetch_stat_tabentry() -
1211  *
1212  *      Support function for the SQL-callable pgstat* functions. Returns
1213  *      the collected statistics for one table or NULL. NULL doesn't mean
1214  *      that the table doesn't exist, it is just not yet known by the
1215  *      collector, so the caller is better off to report ZERO instead.
1216  * ----------
1217  */
1218 PgStat_StatTabEntry *
1219 pgstat_fetch_stat_tabentry(Oid relid)
1220 {
1221         Oid                     dbid;
1222         PgStat_StatDBEntry *dbentry;
1223         PgStat_StatTabEntry *tabentry;
1224
1225         /*
1226          * If not done for this transaction, read the statistics collector stats
1227          * file into some hash tables.
1228          */
1229         backend_read_statsfile();
1230
1231         /*
1232          * Lookup our database, then look in its table hash table.
1233          */
1234         dbid = MyDatabaseId;
1235         dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1236                                                                                                  (void *) &dbid,
1237                                                                                                  HASH_FIND, NULL);
1238         if (dbentry != NULL && dbentry->tables != NULL)
1239         {
1240                 tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
1241                                                                                                            (void *) &relid,
1242                                                                                                            HASH_FIND, NULL);
1243                 if (tabentry)
1244                         return tabentry;
1245         }
1246
1247         /*
1248          * If we didn't find it, maybe it's a shared table.
1249          */
1250         dbid = InvalidOid;
1251         dbentry = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
1252                                                                                                  (void *) &dbid,
1253                                                                                                  HASH_FIND, NULL);
1254         if (dbentry != NULL && dbentry->tables != NULL)
1255         {
1256                 tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
1257                                                                                                            (void *) &relid,
1258                                                                                                            HASH_FIND, NULL);
1259                 if (tabentry)
1260                         return tabentry;
1261         }
1262
1263         return NULL;
1264 }
1265
1266
1267 /* ----------
1268  * pgstat_fetch_stat_beentry() -
1269  *
1270  *      Support function for the SQL-callable pgstat* functions. Returns
1271  *      our local copy of the current-activity entry for one backend.
1272  *
1273  *      NB: caller is responsible for a check if the user is permitted to see
1274  *      this info (especially the querystring).
1275  * ----------
1276  */
1277 PgBackendStatus *
1278 pgstat_fetch_stat_beentry(int beid)
1279 {
1280         pgstat_read_current_status();
1281
1282         if (beid < 1 || beid > localNumBackends)
1283                 return NULL;
1284
1285         return &localBackendStatusTable[beid - 1];
1286 }
1287
1288
1289 /* ----------
1290  * pgstat_fetch_stat_numbackends() -
1291  *
1292  *      Support function for the SQL-callable pgstat* functions. Returns
1293  *      the maximum current backend id.
1294  * ----------
1295  */
1296 int
1297 pgstat_fetch_stat_numbackends(void)
1298 {
1299         pgstat_read_current_status();
1300
1301         return localNumBackends;
1302 }
1303
1304
1305 /* ------------------------------------------------------------
1306  * Functions for management of the shared-memory PgBackendStatus array
1307  * ------------------------------------------------------------
1308  */
1309
1310 static PgBackendStatus *BackendStatusArray = NULL;
1311 static PgBackendStatus *MyBEEntry = NULL;
1312
1313
1314 /*
1315  * Report shared-memory space needed by CreateSharedBackendStatus.
1316  */
1317 Size
1318 BackendStatusShmemSize(void)
1319 {
1320         Size            size;
1321
1322         size = mul_size(sizeof(PgBackendStatus), MaxBackends);
1323         return size;
1324 }
1325
1326 /*
1327  * Initialize the shared status array during postmaster startup.
1328  */
1329 void
1330 CreateSharedBackendStatus(void)
1331 {
1332         Size            size = BackendStatusShmemSize();
1333         bool            found;
1334
1335         /* Create or attach to the shared array */
1336         BackendStatusArray = (PgBackendStatus *)
1337                 ShmemInitStruct("Backend Status Array", size, &found);
1338
1339         if (!found)
1340         {
1341                 /*
1342                  * We're the first - initialize.
1343                  */
1344                 MemSet(BackendStatusArray, 0, size);
1345         }
1346 }
1347
1348
1349 /* ----------
1350  * pgstat_bestart() -
1351  *
1352  *      Initialize this backend's entry in the PgBackendStatus array,
1353  *      and set up an on-proc-exit hook that will clear it again.
1354  *      Called from InitPostgres.  MyBackendId and MyDatabaseId must be set.
1355  * ----------
1356  */
1357 void
1358 pgstat_bestart(void)
1359 {
1360         volatile PgBackendStatus *beentry;
1361         TimestampTz proc_start_timestamp;
1362         Oid                     userid;
1363         SockAddr        clientaddr;
1364
1365         Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
1366         MyBEEntry = &BackendStatusArray[MyBackendId - 1];
1367
1368         /*
1369          * To minimize the time spent modifying the entry, fetch all the
1370          * needed data first.
1371          *
1372          * If we have a MyProcPort, use its session start time (for consistency,
1373          * and to save a kernel call).
1374          */
1375         if (MyProcPort)
1376                 proc_start_timestamp = MyProcPort->SessionStartTime;
1377         else
1378                 proc_start_timestamp = GetCurrentTimestamp();
1379         userid = GetSessionUserId();
1380
1381         /*
1382          * We may not have a MyProcPort (eg, if this is the autovacuum process).
1383          * If so, use all-zeroes client address, which is dealt with specially in
1384          * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
1385          */
1386         if (MyProcPort)
1387                 memcpy(&clientaddr, &MyProcPort->raddr, sizeof(clientaddr));
1388         else
1389                 MemSet(&clientaddr, 0, sizeof(clientaddr));
1390
1391         /*
1392          * Initialize my status entry, following the protocol of bumping
1393          * st_changecount before and after; and make sure it's even afterwards.
1394          * We use a volatile pointer here to ensure the compiler doesn't try to
1395          * get cute.
1396          */
1397         beentry = MyBEEntry;
1398         do {
1399                 beentry->st_changecount++;
1400         } while ((beentry->st_changecount & 1) == 0);
1401
1402         beentry->st_procpid = MyProcPid;
1403         beentry->st_proc_start_timestamp = proc_start_timestamp;
1404         beentry->st_activity_start_timestamp = 0;
1405         beentry->st_databaseid = MyDatabaseId;
1406         beentry->st_userid = userid;
1407         beentry->st_clientaddr = clientaddr;
1408         beentry->st_activity[0] = '\0';
1409         /* Also make sure the last byte in the string area is always 0 */
1410         beentry->st_activity[PGBE_ACTIVITY_SIZE - 1] = '\0';
1411
1412         beentry->st_changecount++;
1413         Assert((beentry->st_changecount & 1) == 0);
1414
1415         /*
1416          * Set up a process-exit hook to clean up.
1417          */
1418         on_shmem_exit(pgstat_beshutdown_hook, 0);
1419 }
1420
1421 /*
1422  * Shut down a single backend's statistics reporting at process exit.
1423  *
1424  * Flush any remaining statistics counts out to the collector.
1425  * Without this, operations triggered during backend exit (such as
1426  * temp table deletions) won't be counted.
1427  *
1428  * Lastly, clear out our entry in the PgBackendStatus array.
1429  */
1430 static void
1431 pgstat_beshutdown_hook(int code, Datum arg)
1432 {
1433         volatile PgBackendStatus *beentry;
1434
1435         pgstat_report_tabstat();
1436
1437         /*
1438          * Clear my status entry, following the protocol of bumping
1439          * st_changecount before and after.  We use a volatile pointer here
1440          * to ensure the compiler doesn't try to get cute.
1441          */
1442         beentry = MyBEEntry;
1443         beentry->st_changecount++;
1444
1445         beentry->st_procpid = 0;        /* mark invalid */
1446
1447         beentry->st_changecount++;
1448         Assert((beentry->st_changecount & 1) == 0);
1449 }
1450
1451
1452 /* ----------
1453  * pgstat_report_activity() -
1454  *
1455  *      Called from tcop/postgres.c to report what the backend is actually doing
1456  *      (usually "<IDLE>" or the start of the query to be executed).
1457  * ----------
1458  */
1459 void
1460 pgstat_report_activity(const char *cmd_str)
1461 {
1462         volatile PgBackendStatus *beentry;
1463         TimestampTz start_timestamp;
1464         int                     len;
1465
1466         if (!pgstat_collect_querystring)
1467                 return;
1468
1469         /*
1470          * To minimize the time spent modifying the entry, fetch all the
1471          * needed data first.
1472          */
1473         start_timestamp = GetCurrentStatementStartTimestamp();
1474
1475         len = strlen(cmd_str);
1476         len = pg_mbcliplen(cmd_str, len, PGBE_ACTIVITY_SIZE - 1);
1477
1478         /*
1479          * Update my status entry, following the protocol of bumping
1480          * st_changecount before and after.  We use a volatile pointer here
1481          * to ensure the compiler doesn't try to get cute.
1482          */
1483         beentry = MyBEEntry;
1484         beentry->st_changecount++;
1485
1486         beentry->st_activity_start_timestamp = start_timestamp;
1487         memcpy((char *) beentry->st_activity, cmd_str, len);
1488         beentry->st_activity[len] = '\0';
1489
1490         beentry->st_changecount++;
1491         Assert((beentry->st_changecount & 1) == 0);
1492 }
1493
1494
1495 /* ----------
1496  * pgstat_read_current_status() -
1497  *
1498  *      Copy the current contents of the PgBackendStatus array to local memory,
1499  *      if not already done in this transaction.
1500  * ----------
1501  */
1502 static void
1503 pgstat_read_current_status(void)
1504 {
1505         TransactionId topXid = GetTopTransactionId();
1506         volatile PgBackendStatus *beentry;
1507         PgBackendStatus *localentry;
1508         int                     i;
1509
1510         Assert(!pgStatRunningInCollector);
1511         if (TransactionIdEquals(pgStatLocalStatusXact, topXid))
1512                 return;                                 /* already done */
1513
1514         localBackendStatusTable = (PgBackendStatus *)
1515                 MemoryContextAlloc(TopTransactionContext,
1516                                                    sizeof(PgBackendStatus) * MaxBackends);
1517         localNumBackends = 0;
1518
1519         beentry = BackendStatusArray;
1520         localentry = localBackendStatusTable;
1521         for (i = 1; i <= MaxBackends; i++)
1522         {
1523                 /*
1524                  * Follow the protocol of retrying if st_changecount changes while
1525                  * we copy the entry, or if it's odd.  (The check for odd is needed
1526                  * to cover the case where we are able to completely copy the entry
1527                  * while the source backend is between increment steps.)  We use a
1528                  * volatile pointer here to ensure the compiler doesn't try to get
1529                  * cute.
1530                  */
1531                 for (;;)
1532                 {
1533                         int             save_changecount = beentry->st_changecount;
1534
1535                         /*
1536                          * XXX if PGBE_ACTIVITY_SIZE is really large, it might be best
1537                          * to use strcpy not memcpy for copying the activity string?
1538                          */
1539                         memcpy(localentry, (char *) beentry, sizeof(PgBackendStatus));
1540
1541                         if (save_changecount == beentry->st_changecount &&
1542                                 (save_changecount & 1) == 0)
1543                                 break;
1544
1545                         /* Make sure we can break out of loop if stuck... */
1546                         CHECK_FOR_INTERRUPTS();
1547                 }
1548
1549                 beentry++;
1550                 /* Only valid entries get included into the local array */
1551                 if (localentry->st_procpid > 0)
1552                 {
1553                         localentry++;
1554                         localNumBackends++;
1555                 }
1556         }
1557
1558         pgStatLocalStatusXact = topXid;
1559 }
1560
1561
1562 /* ------------------------------------------------------------
1563  * Local support functions follow
1564  * ------------------------------------------------------------
1565  */
1566
1567
1568 /* ----------
1569  * pgstat_setheader() -
1570  *
1571  *              Set common header fields in a statistics message
1572  * ----------
1573  */
1574 static void
1575 pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype)
1576 {
1577         hdr->m_type = mtype;
1578 }
1579
1580
1581 /* ----------
1582  * pgstat_send() -
1583  *
1584  *              Send out one statistics message to the collector
1585  * ----------
1586  */
1587 static void
1588 pgstat_send(void *msg, int len)
1589 {
1590         if (pgStatSock < 0)
1591                 return;
1592
1593         ((PgStat_MsgHdr *) msg)->m_size = len;
1594
1595 #ifdef USE_ASSERT_CHECKING
1596         if (send(pgStatSock, msg, len, 0) < 0)
1597                 elog(LOG, "could not send to statistics collector: %m");
1598 #else
1599         send(pgStatSock, msg, len, 0);
1600         /* We deliberately ignore any error from send() */
1601 #endif
1602 }
1603
1604
1605 /* ----------
1606  * PgstatBufferMain() -
1607  *
1608  *      Start up the statistics buffer process.  This is the body of the
1609  *      postmaster child process.
1610  *
1611  *      The argc/argv parameters are valid only in EXEC_BACKEND case.
1612  * ----------
1613  */
1614 NON_EXEC_STATIC void
1615 PgstatBufferMain(int argc, char *argv[])
1616 {
1617         IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
1618
1619         MyProcPid = getpid();           /* reset MyProcPid */
1620
1621         /*
1622          * Ignore all signals usually bound to some action in the postmaster,
1623          * except for SIGCHLD and SIGQUIT --- see pgstat_recvbuffer.
1624          */
1625         pqsignal(SIGHUP, SIG_IGN);
1626         pqsignal(SIGINT, SIG_IGN);
1627         pqsignal(SIGTERM, SIG_IGN);
1628         pqsignal(SIGQUIT, pgstat_exit);
1629         pqsignal(SIGALRM, SIG_IGN);
1630         pqsignal(SIGPIPE, SIG_IGN);
1631         pqsignal(SIGUSR1, SIG_IGN);
1632         pqsignal(SIGUSR2, SIG_IGN);
1633         pqsignal(SIGCHLD, pgstat_die);
1634         pqsignal(SIGTTIN, SIG_DFL);
1635         pqsignal(SIGTTOU, SIG_DFL);
1636         pqsignal(SIGCONT, SIG_DFL);
1637         pqsignal(SIGWINCH, SIG_DFL);
1638         /* unblock will happen in pgstat_recvbuffer */
1639
1640 #ifdef EXEC_BACKEND
1641         pgstat_parseArgs(argc, argv);
1642 #endif
1643
1644         /*
1645          * Start a buffering process to read from the socket, so we have a little
1646          * more time to process incoming messages.
1647          *
1648          * NOTE: the process structure is: postmaster is parent of buffer process
1649          * is parent of collector process.      This way, the buffer can detect
1650          * collector failure via SIGCHLD, whereas otherwise it wouldn't notice
1651          * collector failure until it tried to write on the pipe.  That would mean
1652          * that after the postmaster started a new collector, we'd have two buffer
1653          * processes competing to read from the UDP socket --- not good.
1654          */
1655         if (pgpipe(pgStatPipe) < 0)
1656                 ereport(ERROR,
1657                                 (errcode_for_socket_access(),
1658                                  errmsg("could not create pipe for statistics buffer: %m")));
1659
1660         /* child becomes collector process */
1661 #ifdef EXEC_BACKEND
1662         pgStatCollectorPid = pgstat_forkexec(STAT_PROC_COLLECTOR);
1663 #else
1664         pgStatCollectorPid = fork();
1665 #endif
1666         switch (pgStatCollectorPid)
1667         {
1668                 case -1:
1669                         ereport(ERROR,
1670                                         (errmsg("could not fork statistics collector: %m")));
1671
1672 #ifndef EXEC_BACKEND
1673                 case 0:
1674                         /* child becomes collector process */
1675                         PgstatCollectorMain(0, NULL);
1676                         break;
1677 #endif
1678
1679                 default:
1680                         /* parent becomes buffer process */
1681                         closesocket(pgStatPipe[0]);
1682                         pgstat_recvbuffer();
1683         }
1684         exit(0);
1685 }
1686
1687
1688 /* ----------
1689  * PgstatCollectorMain() -
1690  *
1691  *      Start up the statistics collector itself.  This is the body of the
1692  *      postmaster grandchild process.
1693  *
1694  *      The argc/argv parameters are valid only in EXEC_BACKEND case.
1695  * ----------
1696  */
1697 NON_EXEC_STATIC void
1698 PgstatCollectorMain(int argc, char *argv[])
1699 {
1700         PgStat_Msg      msg;
1701         fd_set          rfds;
1702         int                     readPipe;
1703         int                     len = 0;
1704         struct itimerval timeout;
1705         bool            need_timer = false;
1706
1707         MyProcPid = getpid();           /* reset MyProcPid */
1708
1709         /*
1710          * Reset signal handling.  With the exception of restoring default SIGCHLD
1711          * and SIGQUIT handling, this is a no-op in the non-EXEC_BACKEND case
1712          * because we'll have inherited these settings from the buffer process;
1713          * but it's not a no-op for EXEC_BACKEND.
1714          */
1715         pqsignal(SIGHUP, SIG_IGN);
1716         pqsignal(SIGINT, SIG_IGN);
1717         pqsignal(SIGTERM, SIG_IGN);
1718 #ifndef WIN32
1719         pqsignal(SIGQUIT, SIG_IGN);
1720 #else
1721         /* kluge to allow buffer process to kill collector; FIXME */
1722         pqsignal(SIGQUIT, pgstat_exit);
1723 #endif
1724         pqsignal(SIGALRM, force_statwrite);
1725         pqsignal(SIGPIPE, SIG_IGN);
1726         pqsignal(SIGUSR1, SIG_IGN);
1727         pqsignal(SIGUSR2, SIG_IGN);
1728         pqsignal(SIGCHLD, SIG_DFL);
1729         pqsignal(SIGTTIN, SIG_DFL);
1730         pqsignal(SIGTTOU, SIG_DFL);
1731         pqsignal(SIGCONT, SIG_DFL);
1732         pqsignal(SIGWINCH, SIG_DFL);
1733         PG_SETMASK(&UnBlockSig);
1734
1735 #ifdef EXEC_BACKEND
1736         pgstat_parseArgs(argc, argv);
1737 #endif
1738
1739         /* Close unwanted files */
1740         closesocket(pgStatPipe[1]);
1741         closesocket(pgStatSock);
1742
1743         /*
1744          * Identify myself via ps
1745          */
1746         init_ps_display("stats collector process", "", "");
1747         set_ps_display("");
1748
1749         /*
1750          * Arrange to write the initial status file right away
1751          */
1752         need_statwrite = true;
1753
1754         /* Preset the delay between status file writes */
1755         MemSet(&timeout, 0, sizeof(struct itimerval));
1756         timeout.it_value.tv_sec = PGSTAT_STAT_INTERVAL / 1000;
1757         timeout.it_value.tv_usec = PGSTAT_STAT_INTERVAL % 1000;
1758
1759         /*
1760          * Read in an existing statistics stats file or initialize the stats to
1761          * zero.
1762          */
1763         pgStatRunningInCollector = true;
1764         pgstat_read_statsfile(&pgStatDBHash, InvalidOid);
1765
1766         readPipe = pgStatPipe[0];
1767
1768         /*
1769          * Process incoming messages and handle all the reporting stuff until
1770          * there are no more messages.
1771          */
1772         for (;;)
1773         {
1774                 /*
1775                  * If time to write the stats file, do so.  Note that the alarm
1776                  * interrupt isn't re-enabled immediately, but only after we next
1777                  * receive a stats message; so no cycles are wasted when there is
1778                  * nothing going on.
1779                  */
1780                 if (need_statwrite)
1781                 {
1782                         pgstat_write_statsfile();
1783                         need_statwrite = false;
1784                         need_timer = true;
1785                 }
1786
1787                 /*
1788                  * Setup the descriptor set for select(2)
1789                  */
1790                 FD_ZERO(&rfds);
1791                 FD_SET(readPipe, &rfds);
1792
1793                 /*
1794                  * Now wait for something to do.
1795                  */
1796                 if (select(readPipe + 1, &rfds, NULL, NULL, NULL) < 0)
1797                 {
1798                         if (errno == EINTR)
1799                                 continue;
1800                         ereport(ERROR,
1801                                         (errcode_for_socket_access(),
1802                                          errmsg("select() failed in statistics collector: %m")));
1803                 }
1804
1805                 /*
1806                  * Check if there is a new statistics message to collect.
1807                  */
1808                 if (FD_ISSET(readPipe, &rfds))
1809                 {
1810                         /*
1811                          * We may need to issue multiple read calls in case the buffer
1812                          * process didn't write the message in a single write, which is
1813                          * possible since it dumps its buffer bytewise. In any case, we'd
1814                          * need two reads since we don't know the message length
1815                          * initially.
1816                          */
1817                         int                     nread = 0;
1818                         int                     targetlen = sizeof(PgStat_MsgHdr);              /* initial */
1819                         bool            pipeEOF = false;
1820
1821                         while (nread < targetlen)
1822                         {
1823                                 len = piperead(readPipe, ((char *) &msg) + nread,
1824                                                            targetlen - nread);
1825                                 if (len < 0)
1826                                 {
1827                                         if (errno == EINTR)
1828                                                 continue;
1829                                         ereport(ERROR,
1830                                                         (errcode_for_socket_access(),
1831                                                          errmsg("could not read from statistics collector pipe: %m")));
1832                                 }
1833                                 if (len == 0)   /* EOF on the pipe! */
1834                                 {
1835                                         pipeEOF = true;
1836                                         break;
1837                                 }
1838                                 nread += len;
1839                                 if (nread == sizeof(PgStat_MsgHdr))
1840                                 {
1841                                         /* we have the header, compute actual msg length */
1842                                         targetlen = msg.msg_hdr.m_size;
1843                                         if (targetlen < (int) sizeof(PgStat_MsgHdr) ||
1844                                                 targetlen > (int) sizeof(msg))
1845                                         {
1846                                                 /*
1847                                                  * Bogus message length implies that we got out of
1848                                                  * sync with the buffer process somehow. Abort so that
1849                                                  * we can restart both processes.
1850                                                  */
1851                                                 ereport(ERROR,
1852                                                           (errmsg("invalid statistics message length")));
1853                                         }
1854                                 }
1855                         }
1856
1857                         /*
1858                          * EOF on the pipe implies that the buffer process exited. Fall
1859                          * out of outer loop.
1860                          */
1861                         if (pipeEOF)
1862                                 break;
1863
1864                         /*
1865                          * Distribute the message to the specific function handling it.
1866                          */
1867                         switch (msg.msg_hdr.m_type)
1868                         {
1869                                 case PGSTAT_MTYPE_DUMMY:
1870                                         break;
1871
1872                                 case PGSTAT_MTYPE_TABSTAT:
1873                                         pgstat_recv_tabstat((PgStat_MsgTabstat *) &msg, nread);
1874                                         break;
1875
1876                                 case PGSTAT_MTYPE_TABPURGE:
1877                                         pgstat_recv_tabpurge((PgStat_MsgTabpurge *) &msg, nread);
1878                                         break;
1879
1880                                 case PGSTAT_MTYPE_DROPDB:
1881                                         pgstat_recv_dropdb((PgStat_MsgDropdb *) &msg, nread);
1882                                         break;
1883
1884                                 case PGSTAT_MTYPE_RESETCOUNTER:
1885                                         pgstat_recv_resetcounter((PgStat_MsgResetcounter *) &msg,
1886                                                                                          nread);
1887                                         break;
1888
1889                                 case PGSTAT_MTYPE_AUTOVAC_START:
1890                                         pgstat_recv_autovac((PgStat_MsgAutovacStart *) &msg, nread);
1891                                         break;
1892
1893                                 case PGSTAT_MTYPE_VACUUM:
1894                                         pgstat_recv_vacuum((PgStat_MsgVacuum *) &msg, nread);
1895                                         break;
1896
1897                                 case PGSTAT_MTYPE_ANALYZE:
1898                                         pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, nread);
1899                                         break;
1900
1901                                 default:
1902                                         break;
1903                         }
1904
1905                         /*
1906                          * If this is the first message after we wrote the stats file the
1907                          * last time, enable the alarm interrupt to make it be written
1908                          * again later.
1909                          */
1910                         if (need_timer)
1911                         {
1912                                 if (setitimer(ITIMER_REAL, &timeout, NULL))
1913                                         ereport(ERROR,
1914                                                   (errmsg("could not set statistics collector timer: %m")));
1915                                 need_timer = false;
1916                         }
1917                 }
1918
1919                 /*
1920                  * Note that we do NOT check for postmaster exit inside the loop; only
1921                  * EOF on the buffer pipe causes us to fall out.  This ensures we
1922                  * don't exit prematurely if there are still a few messages in the
1923                  * buffer or pipe at postmaster shutdown.
1924                  */
1925         }
1926
1927         /*
1928          * Okay, we saw EOF on the buffer pipe, so there are no more messages to
1929          * process.  If the buffer process quit because of postmaster shutdown, we
1930          * want to save the final stats to reuse at next startup. But if the
1931          * buffer process failed, it seems best not to (there may even now be a
1932          * new collector firing up, and we don't want it to read a
1933          * partially-rewritten stats file).
1934          */
1935         if (!PostmasterIsAlive(false))
1936                 pgstat_write_statsfile();
1937 }
1938
1939
1940 /* SIGALRM signal handler for collector process */
1941 static void
1942 force_statwrite(SIGNAL_ARGS)
1943 {
1944         need_statwrite = true;
1945 }
1946
1947
1948 /* ----------
1949  * pgstat_recvbuffer() -
1950  *
1951  *      This is the body of the separate buffering process. Its only
1952  *      purpose is to receive messages from the UDP socket as fast as
1953  *      possible and forward them over a pipe into the collector itself.
1954  *      If the collector is slow to absorb messages, they are buffered here.
1955  * ----------
1956  */
1957 static void
1958 pgstat_recvbuffer(void)
1959 {
1960         fd_set          rfds;
1961         fd_set          wfds;
1962         struct timeval timeout;
1963         int                     writePipe = pgStatPipe[1];
1964         int                     maxfd;
1965         int                     len;
1966         int                     xfr;
1967         int                     frm;
1968         PgStat_Msg      input_buffer;
1969         char       *msgbuffer;
1970         int                     msg_send = 0;   /* next send index in buffer */
1971         int                     msg_recv = 0;   /* next receive index */
1972         int                     msg_have = 0;   /* number of bytes stored */
1973         bool            overflow = false;
1974
1975         /*
1976          * Identify myself via ps
1977          */
1978         init_ps_display("stats buffer process", "", "");
1979         set_ps_display("");
1980
1981         /*
1982          * We want to die if our child collector process does.  There are two ways
1983          * we might notice that it has died: receive SIGCHLD, or get a write
1984          * failure on the pipe leading to the child.  We can set SIGPIPE to kill
1985          * us here.  Our SIGCHLD handler was already set up before we forked (must
1986          * do it that way, else it's a race condition).
1987          */
1988         pqsignal(SIGPIPE, SIG_DFL);
1989         PG_SETMASK(&UnBlockSig);
1990
1991         /*
1992          * Set the write pipe to nonblock mode, so that we cannot block when the
1993          * collector falls behind.
1994          */
1995         if (!pg_set_noblock(writePipe))
1996                 ereport(ERROR,
1997                                 (errcode_for_socket_access(),
1998                                  errmsg("could not set statistics collector pipe to nonblocking mode: %m")));
1999
2000         /*
2001          * Allocate the message buffer
2002          */
2003         msgbuffer = (char *) palloc(PGSTAT_RECVBUFFERSZ);
2004
2005         /*
2006          * Loop forever
2007          */
2008         for (;;)
2009         {
2010                 FD_ZERO(&rfds);
2011                 FD_ZERO(&wfds);
2012                 maxfd = -1;
2013
2014                 /*
2015                  * As long as we have buffer space we add the socket to the read
2016                  * descriptor set.
2017                  */
2018                 if (msg_have <= (int) (PGSTAT_RECVBUFFERSZ - sizeof(PgStat_Msg)))
2019                 {
2020                         FD_SET(pgStatSock, &rfds);
2021                         maxfd = pgStatSock;
2022                         overflow = false;
2023                 }
2024                 else
2025                 {
2026                         if (!overflow)
2027                         {
2028                                 ereport(LOG,
2029                                                 (errmsg("statistics buffer is full")));
2030                                 overflow = true;
2031                         }
2032                 }
2033
2034                 /*
2035                  * If we have messages to write out, we add the pipe to the write
2036                  * descriptor set.
2037                  */
2038                 if (msg_have > 0)
2039                 {
2040                         FD_SET(writePipe, &wfds);
2041                         if (writePipe > maxfd)
2042                                 maxfd = writePipe;
2043                 }
2044
2045                 /*
2046                  * Wait for some work to do; but not for more than 10 seconds. (This
2047                  * determines how quickly we will shut down after an ungraceful
2048                  * postmaster termination; so it needn't be very fast.)
2049                  *
2050                  * struct timeout is modified by select() on some operating systems,
2051                  * so re-fill it each time.
2052                  */
2053                 timeout.tv_sec = 10;
2054                 timeout.tv_usec = 0;
2055
2056                 if (select(maxfd + 1, &rfds, &wfds, NULL, &timeout) < 0)
2057                 {
2058                         if (errno == EINTR)
2059                                 continue;
2060                         ereport(ERROR,
2061                                         (errcode_for_socket_access(),
2062                                          errmsg("select() failed in statistics buffer: %m")));
2063                 }
2064
2065                 /*
2066                  * If there is a message on the socket, read it and check for
2067                  * validity.
2068                  */
2069                 if (FD_ISSET(pgStatSock, &rfds))
2070                 {
2071                         len = recv(pgStatSock, (char *) &input_buffer,
2072                                            sizeof(PgStat_Msg), 0);
2073                         if (len < 0)
2074                                 ereport(ERROR,
2075                                                 (errcode_for_socket_access(),
2076                                                  errmsg("could not read statistics message: %m")));
2077
2078                         /*
2079                          * We ignore messages that are smaller than our common header
2080                          */
2081                         if (len < sizeof(PgStat_MsgHdr))
2082                                 continue;
2083
2084                         /*
2085                          * The received length must match the length in the header
2086                          */
2087                         if (input_buffer.msg_hdr.m_size != len)
2088                                 continue;
2089
2090                         /*
2091                          * O.K. - we accept this message.  Copy it to the circular
2092                          * msgbuffer.
2093                          */
2094                         frm = 0;
2095                         while (len > 0)
2096                         {
2097                                 xfr = PGSTAT_RECVBUFFERSZ - msg_recv;
2098                                 if (xfr > len)
2099                                         xfr = len;
2100                                 Assert(xfr > 0);
2101                                 memcpy(msgbuffer + msg_recv,
2102                                            ((char *) &input_buffer) + frm,
2103                                            xfr);
2104                                 msg_recv += xfr;
2105                                 if (msg_recv == PGSTAT_RECVBUFFERSZ)
2106                                         msg_recv = 0;
2107                                 msg_have += xfr;
2108                                 frm += xfr;
2109                                 len -= xfr;
2110                         }
2111                 }
2112
2113                 /*
2114                  * If the collector is ready to receive, write some data into his
2115                  * pipe.  We may or may not be able to write all that we have.
2116                  *
2117                  * NOTE: if what we have is less than PIPE_BUF bytes but more than the
2118                  * space available in the pipe buffer, most kernels will refuse to
2119                  * write any of it, and will return EAGAIN.  This means we will
2120                  * busy-loop until the situation changes (either because the collector
2121                  * caught up, or because more data arrives so that we have more than
2122                  * PIPE_BUF bytes buffered).  This is not good, but is there any way
2123                  * around it?  We have no way to tell when the collector has caught
2124                  * up...
2125                  */
2126                 if (FD_ISSET(writePipe, &wfds))
2127                 {
2128                         xfr = PGSTAT_RECVBUFFERSZ - msg_send;
2129                         if (xfr > msg_have)
2130                                 xfr = msg_have;
2131                         Assert(xfr > 0);
2132                         len = pipewrite(writePipe, msgbuffer + msg_send, xfr);
2133                         if (len < 0)
2134                         {
2135                                 if (errno == EINTR || errno == EAGAIN)
2136                                         continue;       /* not enough space in pipe */
2137                                 ereport(ERROR,
2138                                                 (errcode_for_socket_access(),
2139                                 errmsg("could not write to statistics collector pipe: %m")));
2140                         }
2141                         /* NB: len < xfr is okay */
2142                         msg_send += len;
2143                         if (msg_send == PGSTAT_RECVBUFFERSZ)
2144                                 msg_send = 0;
2145                         msg_have -= len;
2146                 }
2147
2148                 /*
2149                  * Make sure we forwarded all messages before we check for postmaster
2150                  * termination.
2151                  */
2152                 if (msg_have != 0 || FD_ISSET(pgStatSock, &rfds))
2153                         continue;
2154
2155                 /*
2156                  * If the postmaster has terminated, we die too.  (This is no longer
2157                  * the normal exit path, however.)
2158                  */
2159                 if (!PostmasterIsAlive(true))
2160                         exit(0);
2161         }
2162 }
2163
2164 /* SIGQUIT signal handler for buffer process */
2165 static void
2166 pgstat_exit(SIGNAL_ARGS)
2167 {
2168         /*
2169          * For now, we just nail the doors shut and get out of town.  It might be
2170          * cleaner to allow any pending messages to be sent, but that creates a
2171          * tradeoff against speed of exit.
2172          */
2173
2174         /*
2175          * If running in bufferer, kill our collector as well. On some broken
2176          * win32 systems, it does not shut down automatically because of issues
2177          * with socket inheritance.  XXX so why not fix the socket inheritance...
2178          */
2179 #ifdef WIN32
2180         if (pgStatCollectorPid > 0)
2181                 kill(pgStatCollectorPid, SIGQUIT);
2182 #endif
2183         exit(0);
2184 }
2185
2186 /* SIGCHLD signal handler for buffer process */
2187 static void
2188 pgstat_die(SIGNAL_ARGS)
2189 {
2190         exit(1);
2191 }
2192
2193
2194 /*
2195  * Lookup the hash table entry for the specified database. If no hash
2196  * table entry exists, initialize it, if the create parameter is true.
2197  * Else, return NULL.
2198  */
2199 static PgStat_StatDBEntry *
2200 pgstat_get_db_entry(Oid databaseid, bool create)
2201 {
2202         PgStat_StatDBEntry *result;
2203         bool            found;
2204         HASHACTION      action = (create ? HASH_ENTER : HASH_FIND);
2205
2206         /* Lookup or create the hash table entry for this database */
2207         result = (PgStat_StatDBEntry *) hash_search(pgStatDBHash,
2208                                                                                                 &databaseid,
2209                                                                                                 action, &found);
2210
2211         if (!create && !found)
2212                 return NULL;
2213
2214         /* If not found, initialize the new one. */
2215         if (!found)
2216         {
2217                 HASHCTL         hash_ctl;
2218
2219                 result->tables = NULL;
2220                 result->n_xact_commit = 0;
2221                 result->n_xact_rollback = 0;
2222                 result->n_blocks_fetched = 0;
2223                 result->n_blocks_hit = 0;
2224                 result->last_autovac_time = 0;
2225
2226                 memset(&hash_ctl, 0, sizeof(hash_ctl));
2227                 hash_ctl.keysize = sizeof(Oid);
2228                 hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2229                 hash_ctl.hash = oid_hash;
2230                 result->tables = hash_create("Per-database table",
2231                                                                          PGSTAT_TAB_HASH_SIZE,
2232                                                                          &hash_ctl,
2233                                                                          HASH_ELEM | HASH_FUNCTION);
2234         }
2235
2236         return result;
2237 }
2238
2239
2240 /* ----------
2241  * pgstat_write_statsfile() -
2242  *
2243  *      Tell the news.
2244  * ----------
2245  */
2246 static void
2247 pgstat_write_statsfile(void)
2248 {
2249         HASH_SEQ_STATUS hstat;
2250         HASH_SEQ_STATUS tstat;
2251         PgStat_StatDBEntry *dbentry;
2252         PgStat_StatTabEntry *tabentry;
2253         FILE       *fpout;
2254         int32           format_id;
2255
2256         /*
2257          * Open the statistics temp file to write out the current values.
2258          */
2259         fpout = fopen(PGSTAT_STAT_TMPFILE, PG_BINARY_W);
2260         if (fpout == NULL)
2261         {
2262                 ereport(LOG,
2263                                 (errcode_for_file_access(),
2264                                  errmsg("could not open temporary statistics file \"%s\": %m",
2265                                                 PGSTAT_STAT_TMPFILE)));
2266                 return;
2267         }
2268
2269         /*
2270          * Write the file header --- currently just a format ID.
2271          */
2272         format_id = PGSTAT_FILE_FORMAT_ID;
2273         fwrite(&format_id, sizeof(format_id), 1, fpout);
2274
2275         /*
2276          * Walk through the database table.
2277          */
2278         hash_seq_init(&hstat, pgStatDBHash);
2279         while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != NULL)
2280         {
2281                 /*
2282                  * Write out the DB entry including the number of live backends.
2283                  * We don't write the tables pointer since it's of no use to any
2284                  * other process.
2285                  */
2286                 fputc('D', fpout);
2287                 fwrite(dbentry, offsetof(PgStat_StatDBEntry, tables), 1, fpout);
2288
2289                 /*
2290                  * Walk through the database's access stats per table.
2291                  */
2292                 hash_seq_init(&tstat, dbentry->tables);
2293                 while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != NULL)
2294                 {
2295                         fputc('T', fpout);
2296                         fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
2297                 }
2298
2299                 /*
2300                  * Mark the end of this DB
2301                  */
2302                 fputc('d', fpout);
2303         }
2304
2305         /*
2306          * No more output to be done. Close the temp file and replace the old
2307          * pgstat.stat with it.  The ferror() check replaces testing for error
2308          * after each individual fputc or fwrite above.
2309          */
2310         fputc('E', fpout);
2311
2312         if (ferror(fpout))
2313         {
2314                 ereport(LOG,
2315                                 (errcode_for_file_access(),
2316                                  errmsg("could not write temporary statistics file \"%s\": %m",
2317                                                 PGSTAT_STAT_TMPFILE)));
2318                 fclose(fpout);
2319                 unlink(PGSTAT_STAT_TMPFILE);
2320         }
2321         else if (fclose(fpout) < 0)
2322         {
2323                 ereport(LOG,
2324                                 (errcode_for_file_access(),
2325                            errmsg("could not close temporary statistics file \"%s\": %m",
2326                                           PGSTAT_STAT_TMPFILE)));
2327                 unlink(PGSTAT_STAT_TMPFILE);
2328         }
2329         else if (rename(PGSTAT_STAT_TMPFILE, PGSTAT_STAT_FILENAME) < 0)
2330         {
2331                 ereport(LOG,
2332                                 (errcode_for_file_access(),
2333                                  errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
2334                                                 PGSTAT_STAT_TMPFILE, PGSTAT_STAT_FILENAME)));
2335                 unlink(PGSTAT_STAT_TMPFILE);
2336         }
2337 }
2338
2339
2340 /* ----------
2341  * pgstat_read_statsfile() -
2342  *
2343  *      Reads in an existing statistics collector file and initializes the
2344  *      databases' hash table (whose entries point to the tables' hash tables).
2345  * ----------
2346  */
2347 static void
2348 pgstat_read_statsfile(HTAB **dbhash, Oid onlydb)
2349 {
2350         PgStat_StatDBEntry *dbentry;
2351         PgStat_StatDBEntry dbbuf;
2352         PgStat_StatTabEntry *tabentry;
2353         PgStat_StatTabEntry tabbuf;
2354         HASHCTL         hash_ctl;
2355         HTAB       *tabhash = NULL;
2356         FILE       *fpin;
2357         int32           format_id;
2358         bool            found;
2359         MemoryContext use_mcxt;
2360         int                     mcxt_flags;
2361
2362         /*
2363          * If running in the collector or the autovacuum process, we use the
2364          * DynaHashCxt memory context.  If running in a backend, we use the
2365          * TopTransactionContext instead, so the caller must only know the last
2366          * XactId when this call happened to know if his tables are still valid or
2367          * already gone!
2368          */
2369         if (pgStatRunningInCollector || IsAutoVacuumProcess())
2370         {
2371                 use_mcxt = NULL;
2372                 mcxt_flags = 0;
2373         }
2374         else
2375         {
2376                 use_mcxt = TopTransactionContext;
2377                 mcxt_flags = HASH_CONTEXT;
2378         }
2379
2380         /*
2381          * Create the DB hashtable
2382          */
2383         memset(&hash_ctl, 0, sizeof(hash_ctl));
2384         hash_ctl.keysize = sizeof(Oid);
2385         hash_ctl.entrysize = sizeof(PgStat_StatDBEntry);
2386         hash_ctl.hash = oid_hash;
2387         hash_ctl.hcxt = use_mcxt;
2388         *dbhash = hash_create("Databases hash", PGSTAT_DB_HASH_SIZE, &hash_ctl,
2389                                                   HASH_ELEM | HASH_FUNCTION | mcxt_flags);
2390
2391         /*
2392          * Try to open the status file. If it doesn't exist, the backends simply
2393          * return zero for anything and the collector simply starts from scratch
2394          * with empty counters.
2395          */
2396         if ((fpin = AllocateFile(PGSTAT_STAT_FILENAME, PG_BINARY_R)) == NULL)
2397                 return;
2398
2399         /*
2400          * Verify it's of the expected format.
2401          */
2402         if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
2403                 || format_id != PGSTAT_FILE_FORMAT_ID)
2404         {
2405                 ereport(pgStatRunningInCollector ? LOG : WARNING,
2406                                 (errmsg("corrupted pgstat.stat file")));
2407                 goto done;
2408         }
2409
2410         /*
2411          * We found an existing collector stats file. Read it and put all the
2412          * hashtable entries into place.
2413          */
2414         for (;;)
2415         {
2416                 switch (fgetc(fpin))
2417                 {
2418                                 /*
2419                                  * 'D'  A PgStat_StatDBEntry struct describing a database
2420                                  * follows. Subsequently, zero to many 'T' entries will follow
2421                                  * until a 'd' is encountered.
2422                                  */
2423                         case 'D':
2424                                 if (fread(&dbbuf, 1, offsetof(PgStat_StatDBEntry, tables),
2425                                                   fpin) != offsetof(PgStat_StatDBEntry, tables))
2426                                 {
2427                                         ereport(pgStatRunningInCollector ? LOG : WARNING,
2428                                                         (errmsg("corrupted pgstat.stat file")));
2429                                         goto done;
2430                                 }
2431
2432                                 /*
2433                                  * Add to the DB hash
2434                                  */
2435                                 dbentry = (PgStat_StatDBEntry *) hash_search(*dbhash,
2436                                                                                                   (void *) &dbbuf.databaseid,
2437                                                                                                                          HASH_ENTER,
2438                                                                                                                          &found);
2439                                 if (found)
2440                                 {
2441                                         ereport(pgStatRunningInCollector ? LOG : WARNING,
2442                                                         (errmsg("corrupted pgstat.stat file")));
2443                                         goto done;
2444                                 }
2445
2446                                 memcpy(dbentry, &dbbuf, sizeof(PgStat_StatDBEntry));
2447                                 dbentry->tables = NULL;
2448
2449                                 /*
2450                                  * Don't collect tables if not the requested DB (or the
2451                                  * shared-table info)
2452                                  */
2453                                 if (onlydb != InvalidOid)
2454                                 {
2455                                         if (dbbuf.databaseid != onlydb &&
2456                                                 dbbuf.databaseid != InvalidOid)
2457                                                 break;
2458                                 }
2459
2460                                 memset(&hash_ctl, 0, sizeof(hash_ctl));
2461                                 hash_ctl.keysize = sizeof(Oid);
2462                                 hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2463                                 hash_ctl.hash = oid_hash;
2464                                 hash_ctl.hcxt = use_mcxt;
2465                                 dbentry->tables = hash_create("Per-database table",
2466                                                                                           PGSTAT_TAB_HASH_SIZE,
2467                                                                                           &hash_ctl,
2468                                                                          HASH_ELEM | HASH_FUNCTION | mcxt_flags);
2469
2470                                 /*
2471                                  * Arrange that following 'T's add entries to this database's
2472                                  * tables hash table.
2473                                  */
2474                                 tabhash = dbentry->tables;
2475                                 break;
2476
2477                                 /*
2478                                  * 'd'  End of this database.
2479                                  */
2480                         case 'd':
2481                                 tabhash = NULL;
2482                                 break;
2483
2484                                 /*
2485                                  * 'T'  A PgStat_StatTabEntry follows.
2486                                  */
2487                         case 'T':
2488                                 if (fread(&tabbuf, 1, sizeof(PgStat_StatTabEntry),
2489                                                   fpin) != sizeof(PgStat_StatTabEntry))
2490                                 {
2491                                         ereport(pgStatRunningInCollector ? LOG : WARNING,
2492                                                         (errmsg("corrupted pgstat.stat file")));
2493                                         goto done;
2494                                 }
2495
2496                                 /*
2497                                  * Skip if table belongs to a not requested database.
2498                                  */
2499                                 if (tabhash == NULL)
2500                                         break;
2501
2502                                 tabentry = (PgStat_StatTabEntry *) hash_search(tabhash,
2503                                                                                                         (void *) &tabbuf.tableid,
2504                                                                                                                  HASH_ENTER, &found);
2505
2506                                 if (found)
2507                                 {
2508                                         ereport(pgStatRunningInCollector ? LOG : WARNING,
2509                                                         (errmsg("corrupted pgstat.stat file")));
2510                                         goto done;
2511                                 }
2512
2513                                 memcpy(tabentry, &tabbuf, sizeof(tabbuf));
2514                                 break;
2515
2516                                 /*
2517                                  * 'E'  The EOF marker of a complete stats file.
2518                                  */
2519                         case 'E':
2520                                 goto done;
2521
2522                         default:
2523                                 ereport(pgStatRunningInCollector ? LOG : WARNING,
2524                                                 (errmsg("corrupted pgstat.stat file")));
2525                                 goto done;
2526                 }
2527         }
2528
2529 done:
2530         FreeFile(fpin);
2531 }
2532
2533 /*
2534  * If not done for this transaction, read the statistics collector
2535  * stats file into some hash tables.
2536  *
2537  * Because we store the tables in TopTransactionContext, the result
2538  * is good for the entire current main transaction.
2539  *
2540  * Inside the autovacuum process, the statfile is assumed to be valid
2541  * "forever", that is one iteration, within one database.  This means
2542  * we only consider the statistics as they were when the autovacuum
2543  * iteration started.
2544  */
2545 static void
2546 backend_read_statsfile(void)
2547 {
2548         if (IsAutoVacuumProcess())
2549         {
2550                 /* already read it? */
2551                 if (pgStatDBHash)
2552                         return;
2553                 Assert(!pgStatRunningInCollector);
2554                 pgstat_read_statsfile(&pgStatDBHash, InvalidOid);
2555         }
2556         else
2557         {
2558                 TransactionId topXid = GetTopTransactionId();
2559
2560                 if (!TransactionIdEquals(pgStatDBHashXact, topXid))
2561                 {
2562                         Assert(!pgStatRunningInCollector);
2563                         pgstat_read_statsfile(&pgStatDBHash, MyDatabaseId);
2564                         pgStatDBHashXact = topXid;
2565                 }
2566         }
2567 }
2568
2569 /* ----------
2570  * pgstat_recv_tabstat() -
2571  *
2572  *      Count what the backend has done.
2573  * ----------
2574  */
2575 static void
2576 pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
2577 {
2578         PgStat_TableEntry *tabmsg = &(msg->m_entry[0]);
2579         PgStat_StatDBEntry *dbentry;
2580         PgStat_StatTabEntry *tabentry;
2581         int                     i;
2582         bool            found;
2583
2584         dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
2585
2586         /*
2587          * Update database-wide stats.
2588          */
2589         dbentry->n_xact_commit += (PgStat_Counter) (msg->m_xact_commit);
2590         dbentry->n_xact_rollback += (PgStat_Counter) (msg->m_xact_rollback);
2591
2592         /*
2593          * Process all table entries in the message.
2594          */
2595         for (i = 0; i < msg->m_nentries; i++)
2596         {
2597                 tabentry = (PgStat_StatTabEntry *) hash_search(dbentry->tables,
2598                                                                                                   (void *) &(tabmsg[i].t_id),
2599                                                                                                            HASH_ENTER, &found);
2600
2601                 if (!found)
2602                 {
2603                         /*
2604                          * If it's a new table entry, initialize counters to the values we
2605                          * just got.
2606                          */
2607                         tabentry->numscans = tabmsg[i].t_numscans;
2608                         tabentry->tuples_returned = tabmsg[i].t_tuples_returned;
2609                         tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched;
2610                         tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted;
2611                         tabentry->tuples_updated = tabmsg[i].t_tuples_updated;
2612                         tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted;
2613
2614                         tabentry->n_live_tuples = tabmsg[i].t_tuples_inserted;
2615                         tabentry->n_dead_tuples = tabmsg[i].t_tuples_updated +
2616                                 tabmsg[i].t_tuples_deleted;
2617                         tabentry->last_anl_tuples = 0;
2618                         tabentry->vacuum_timestamp = 0;
2619                         tabentry->autovac_vacuum_timestamp = 0;
2620                         tabentry->analyze_timestamp = 0;
2621                         tabentry->autovac_analyze_timestamp = 0;
2622
2623                         tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
2624                         tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
2625                 }
2626                 else
2627                 {
2628                         /*
2629                          * Otherwise add the values to the existing entry.
2630                          */
2631                         tabentry->numscans += tabmsg[i].t_numscans;
2632                         tabentry->tuples_returned += tabmsg[i].t_tuples_returned;
2633                         tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched;
2634                         tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted;
2635                         tabentry->tuples_updated += tabmsg[i].t_tuples_updated;
2636                         tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted;
2637
2638                         tabentry->n_live_tuples += tabmsg[i].t_tuples_inserted -
2639                                 tabmsg[i].t_tuples_deleted;
2640                         tabentry->n_dead_tuples += tabmsg[i].t_tuples_updated +
2641                                 tabmsg[i].t_tuples_deleted;
2642
2643                         tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched;
2644                         tabentry->blocks_hit += tabmsg[i].t_blocks_hit;
2645                 }
2646
2647                 /*
2648                  * And add the block IO to the database entry.
2649                  */
2650                 dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched;
2651                 dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit;
2652         }
2653 }
2654
2655
2656 /* ----------
2657  * pgstat_recv_tabpurge() -
2658  *
2659  *      Arrange for dead table removal.
2660  * ----------
2661  */
2662 static void
2663 pgstat_recv_tabpurge(PgStat_MsgTabpurge *msg, int len)
2664 {
2665         PgStat_StatDBEntry *dbentry;
2666         int                     i;
2667
2668         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
2669
2670         /*
2671          * No need to purge if we don't even know the database.
2672          */
2673         if (!dbentry || !dbentry->tables)
2674                 return;
2675
2676         /*
2677          * Process all table entries in the message.
2678          */
2679         for (i = 0; i < msg->m_nentries; i++)
2680         {
2681                 /* Remove from hashtable if present; we don't care if it's not. */
2682                 (void) hash_search(dbentry->tables,
2683                                                    (void *) &(msg->m_tableid[i]),
2684                                                    HASH_REMOVE, NULL);
2685         }
2686 }
2687
2688
2689 /* ----------
2690  * pgstat_recv_dropdb() -
2691  *
2692  *      Arrange for dead database removal
2693  * ----------
2694  */
2695 static void
2696 pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
2697 {
2698         PgStat_StatDBEntry *dbentry;
2699
2700         /*
2701          * Lookup the database in the hashtable.
2702          */
2703         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
2704
2705         /*
2706          * If found, remove it.
2707          */
2708         if (dbentry)
2709         {
2710                 if (dbentry->tables != NULL)
2711                         hash_destroy(dbentry->tables);
2712
2713                 if (hash_search(pgStatDBHash,
2714                                                 (void *) &(dbentry->databaseid),
2715                                                 HASH_REMOVE, NULL) == NULL)
2716                         ereport(ERROR,
2717                                         (errmsg("database hash table corrupted "
2718                                                         "during cleanup --- abort")));
2719         }
2720 }
2721
2722
2723 /* ----------
2724  * pgstat_recv_resetcounter() -
2725  *
2726  *      Reset the statistics for the specified database.
2727  * ----------
2728  */
2729 static void
2730 pgstat_recv_resetcounter(PgStat_MsgResetcounter *msg, int len)
2731 {
2732         HASHCTL         hash_ctl;
2733         PgStat_StatDBEntry *dbentry;
2734
2735         /*
2736          * Lookup the database in the hashtable.  Nothing to do if not there.
2737          */
2738         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
2739
2740         if (!dbentry)
2741                 return;
2742
2743         /*
2744          * We simply throw away all the database's table entries by recreating a
2745          * new hash table for them.
2746          */
2747         if (dbentry->tables != NULL)
2748                 hash_destroy(dbentry->tables);
2749
2750         dbentry->tables = NULL;
2751         dbentry->n_xact_commit = 0;
2752         dbentry->n_xact_rollback = 0;
2753         dbentry->n_blocks_fetched = 0;
2754         dbentry->n_blocks_hit = 0;
2755
2756         memset(&hash_ctl, 0, sizeof(hash_ctl));
2757         hash_ctl.keysize = sizeof(Oid);
2758         hash_ctl.entrysize = sizeof(PgStat_StatTabEntry);
2759         hash_ctl.hash = oid_hash;
2760         dbentry->tables = hash_create("Per-database table",
2761                                                                   PGSTAT_TAB_HASH_SIZE,
2762                                                                   &hash_ctl,
2763                                                                   HASH_ELEM | HASH_FUNCTION);
2764 }
2765
2766 /* ----------
2767  * pgstat_recv_autovac() -
2768  *
2769  *      Process an autovacuum signalling message.
2770  * ----------
2771  */
2772 static void
2773 pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len)
2774 {
2775         PgStat_StatDBEntry *dbentry;
2776
2777         /*
2778          * Lookup the database in the hashtable.  Don't create the entry if it
2779          * doesn't exist, because autovacuum may be processing a template
2780          * database.  If this isn't the case, the database is most likely to have
2781          * an entry already.  (If it doesn't, not much harm is done anyway --
2782          * it'll get created as soon as somebody actually uses the database.)
2783          */
2784         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
2785         if (dbentry == NULL)
2786                 return;
2787
2788         /*
2789          * Store the last autovacuum time in the database entry.
2790          */
2791         dbentry->last_autovac_time = msg->m_start_time;
2792 }
2793
2794 /* ----------
2795  * pgstat_recv_vacuum() -
2796  *
2797  *      Process a VACUUM message.
2798  * ----------
2799  */
2800 static void
2801 pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
2802 {
2803         PgStat_StatDBEntry *dbentry;
2804         PgStat_StatTabEntry *tabentry;
2805
2806         /*
2807          * Don't create either the database or table entry if it doesn't already
2808          * exist.  This avoids bloating the stats with entries for stuff that is
2809          * only touched by vacuum and not by live operations.
2810          */
2811         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
2812         if (dbentry == NULL)
2813                 return;
2814
2815         tabentry = hash_search(dbentry->tables, &(msg->m_tableoid),
2816                                                    HASH_FIND, NULL);
2817         if (tabentry == NULL)
2818                 return;
2819
2820         if (msg->m_autovacuum)
2821                 tabentry->autovac_vacuum_timestamp = msg->m_vacuumtime;
2822         else
2823                 tabentry->vacuum_timestamp = msg->m_vacuumtime;
2824         tabentry->n_live_tuples = msg->m_tuples;
2825         tabentry->n_dead_tuples = 0;
2826         if (msg->m_analyze)
2827         {
2828                 tabentry->last_anl_tuples = msg->m_tuples;
2829                 if (msg->m_autovacuum)
2830                         tabentry->autovac_analyze_timestamp = msg->m_vacuumtime;
2831                 else
2832                         tabentry->analyze_timestamp = msg->m_vacuumtime;
2833         }
2834         else
2835         {
2836                 /* last_anl_tuples must never exceed n_live_tuples */
2837                 tabentry->last_anl_tuples = Min(tabentry->last_anl_tuples,
2838                                                                                 msg->m_tuples);
2839         }
2840 }
2841
2842 /* ----------
2843  * pgstat_recv_analyze() -
2844  *
2845  *      Process an ANALYZE message.
2846  * ----------
2847  */
2848 static void
2849 pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
2850 {
2851         PgStat_StatDBEntry *dbentry;
2852         PgStat_StatTabEntry *tabentry;
2853
2854         /*
2855          * Don't create either the database or table entry if it doesn't already
2856          * exist.  This avoids bloating the stats with entries for stuff that is
2857          * only touched by analyze and not by live operations.
2858          */
2859         dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
2860         if (dbentry == NULL)
2861                 return;
2862
2863         tabentry = hash_search(dbentry->tables, &(msg->m_tableoid),
2864                                                    HASH_FIND, NULL);
2865         if (tabentry == NULL)
2866                 return;
2867
2868         if (msg->m_autovacuum)
2869                 tabentry->autovac_analyze_timestamp = msg->m_analyzetime;
2870         else
2871                 tabentry->analyze_timestamp = msg->m_analyzetime;
2872         tabentry->n_live_tuples = msg->m_live_tuples;
2873         tabentry->n_dead_tuples = msg->m_dead_tuples;
2874         tabentry->last_anl_tuples = msg->m_live_tuples + msg->m_dead_tuples;
2875 }