]> granicus.if.org Git - pgbouncer/commitdiff
Track separate stats for queries and transactions
authorTomas Vondra <tomas.vondra@2ndquadrant.com>
Wed, 20 Sep 2017 19:00:17 +0000 (21:00 +0200)
committerTomas Vondra <tomas.vondra@2ndquadrant.com>
Sun, 24 Sep 2017 16:16:55 +0000 (18:16 +0200)
The documentation claimed avg_query tracks average query duration,
but it was only updated when switching into 'idle' state. So in
transaction or session pooling modes, it was actually the duration
of the whole transaction, including 'idle in transaction' time and
time spen executing (multiple) queries.

This was causing confusion, e.g. when the application was keeping
transactions open without executing any queries. In that case the
avg_query metric was increasing although the database was handling
queries just fine (without any slow-down).

Instead of just fixing the avg_query metric, it seems quite useful
to have timing data both for queries and transactions. So this
patch replaces

 - total_query_time
 - avg_query

with

 - total_xact_time
 - total_query_time
 - avg_xact_time
 - avg_query_time

This is also related to the definition of 'request' which was used
to compute the avg_query, and so it equal to a transaction. This
patch replaces the counter with two - one for transactions and one
for queries. As we also computed avg_req, the patch replaces

 - total_requests
 - avg_req

with

 - total_xact_count
 - total_query_time
 - avg_xact_count
 - avg_query_count

Note: This is likely to affect monitoring solutions, as the column
names in SHOW STATS change.

doc/usage.rst
include/bouncer.h
src/client.c
src/server.c
src/stats.c

index f29e9b858c40ac388b38ce3cc445b317a745aafd..6e3561adf5a8091ea482a84555f94869b4b042dc 100644 (file)
@@ -174,8 +174,11 @@ Shows statistics.
 database
     Statistics are presented per database.
 
-total_requests
-    Total number of SQL requests pooled by **pgbouncer**.
+total_xact_count
+    Total number of SQL transactions pooled by **pgbouncer**.
+
+total_query_count
+    Total number of SQL queries pooled by **pgbouncer**.
 
 total_received
     Total volume in bytes of network traffic received by **pgbouncer**.
@@ -183,12 +186,20 @@ total_received
 total_sent
     Total volume in bytes of network traffic sent by **pgbouncer**.
 
+total_xact_time
+    Total number of microseconds spent by **pgbouncer** when connected
+    to PostgreSQL in a transaction, either idle in transaction or
+    executing queries.
+
 total_query_time
     Total number of microseconds spent by **pgbouncer** when actively
-    connected to PostgreSQL.
+    connected to PostgreSQL, executing queries.
 
-avg_req
-    Average requests per second in last stat period.
+avg_xact_count
+    Average transactions per second in last stat period.
+
+avg_query_count
+    Average queries per second in last stat period.
 
 avg_recv
     Average received (from clients) bytes per second.
@@ -196,7 +207,10 @@ avg_recv
 avg_sent
     Average sent (to clients) bytes per second.
 
-avg_query
+avg_xact_time
+       Average transaction duration in microseconds.
+
+avg_query_time
     Average query duration in microseconds.
 
 SHOW SERVERS;
index 8990573df00735630b53619d766064bc6cc353ac..b792d0bc649d43f16908b2d34fa792923bdddec3 100644 (file)
@@ -188,10 +188,12 @@ int pga_cmp_addr(const PgAddr *a, const PgAddr *b);
  * Stats, kept per-pool.
  */
 struct PgStats {
-       uint64_t request_count;
+       uint64_t xact_count;
+       uint64_t query_count;
        uint64_t server_bytes;
        uint64_t client_bytes;
-       usec_t query_time;      /* total req time in us */
+       usec_t xact_time;       /* total transaction time in us */
+       usec_t query_time;      /* total query time in us */
 };
 
 /*
@@ -356,6 +358,7 @@ struct PgSocket {
        usec_t connect_time;    /* when connection was made */
        usec_t request_time;    /* last activity time */
        usec_t query_start;     /* query start moment */
+       usec_t xact_start;      /* xact start moment */
 
        uint8_t cancel_key[BACKENDKEY_LEN]; /* client: generated, server: remote */
        PgAddr remote_addr;     /* ip:port for remote endpoint */
index 0dfa102274dbb5eea1ad3cb5dbcc27ade3f4c291..245dc5c6047e972a75fad426b3a55e4d0d272190 100644 (file)
@@ -661,10 +661,16 @@ static bool handle_client_work(PgSocket *client, PktHdr *pkt)
 
        /* update stats */
        if (!client->query_start) {
-               client->pool->stats.request_count++;
+               client->pool->stats.query_count++;
                client->query_start = get_cached_time();
        }
 
+       /* remember timestamp of the first query in a transaction */
+       if (!client->xact_start) {
+               client->pool->stats.xact_count++;
+               client->xact_start = client->query_start;
+       }
+
        if (client->pool->db->admin)
                return admin_handle_client(client, pkt);
 
index 48ebe6d809d602b13dc7695bca228899e00e22e2..bf39f5cf23cf824c3e299b731d0231a10a15e1c4 100644 (file)
@@ -343,15 +343,28 @@ static bool handle_server_work(PgSocket *server, PktHdr *pkt)
                        return handle_auth_response(client, pkt);
                } else {
                        sbuf_prepare_send(sbuf, &client->sbuf, pkt->len);
-                       if (ready && client->query_start) {
+
+                       /* every statement (independent or in a transaction) counts as a query */
+                       if ((ready || idle_tx) && client->query_start) {
                                usec_t total;
                                total = get_cached_time() - client->query_start;
                                client->query_start = 0;
                                server->pool->stats.query_time += total;
                                slog_debug(client, "query time: %d us", (int)total);
-                       } else if (ready) {
+                       } else if (ready || idle_tx) {
                                slog_warning(client, "FIXME: query end, but query_start == 0");
                        }
+
+                       /* statement ending in "idle" ends a transaction */
+                       if (ready && client->xact_start) {
+                               usec_t total;
+                               total = get_cached_time() - client->xact_start;
+                               client->xact_start = 0;
+                               server->pool->stats.xact_time += total;
+                               slog_debug(client, "transaction time: %d us", (int)total);
+                       } else if (ready) {
+                               slog_warning(client, "FIXME: transaction end, but xact_start == 0");
+                       }
                }
        } else {
                if (server->state != SV_TESTED)
index c4e0027de36feac95ca3304e8e911ca3dc5c37a1..1ae702de900096138a4196c8f2263f78efe5b74d 100644 (file)
@@ -25,21 +25,27 @@ static void reset_stats(PgStats *stat)
 {
        stat->server_bytes = 0;
        stat->client_bytes = 0;
-       stat->request_count = 0;
+       stat->query_count = 0;
        stat->query_time = 0;
+       stat->xact_count = 0;
+       stat->xact_time = 0;
 }
 
 static void stat_add(PgStats *total, PgStats *stat)
 {
        total->server_bytes += stat->server_bytes;
        total->client_bytes += stat->client_bytes;
-       total->request_count += stat->request_count;
+       total->query_count += stat->query_count;
        total->query_time += stat->query_time;
+       total->xact_count += stat->xact_count;
+       total->xact_time += stat->xact_time;
 }
 
 static void calc_average(PgStats *avg, PgStats *cur, PgStats *old)
 {
-       uint64_t qcount;
+       uint64_t query_count;
+       uint64_t xact_count;
+
        usec_t dur = get_cached_time() - old_stamp;
 
        reset_stats(avg);
@@ -47,23 +53,33 @@ static void calc_average(PgStats *avg, PgStats *cur, PgStats *old)
        if (dur <= 0)
                return;
 
-       avg->request_count = USEC * (cur->request_count - old->request_count) / dur;
+       query_count = cur->query_count - old->query_count;
+       xact_count = cur->xact_count - old->xact_count;
+
+       avg->query_count = USEC * query_count / dur;
+       avg->xact_count = USEC * xact_count / dur;
+
        avg->client_bytes = USEC * (cur->client_bytes - old->client_bytes) / dur;
        avg->server_bytes = USEC * (cur->server_bytes - old->server_bytes) / dur;
-       qcount = cur->request_count - old->request_count;
-       if (qcount > 0)
-               avg->query_time = (cur->query_time - old->query_time) / qcount;
+
+       if (query_count > 0)
+               avg->query_time = (cur->query_time - old->query_time) / query_count;
+
+       if (xact_count > 0)
+               avg->xact_time = (cur->xact_time - old->xact_time) / xact_count;
 }
 
 static void write_stats(PktBuf *buf, PgStats *stat, PgStats *old, char *dbname)
 {
        PgStats avg;
        calc_average(&avg, stat, old);
-       pktbuf_write_DataRow(buf, "sqqqqqqqq", dbname,
-                            stat->request_count, stat->client_bytes,
-                            stat->server_bytes, stat->query_time,
-                            avg.request_count, avg.client_bytes,
-                            avg.server_bytes, avg.query_time);
+       pktbuf_write_DataRow(buf, "sqqqqqqqqqqqq", dbname,
+                            stat->xact_count, stat->query_count,
+                            stat->client_bytes, stat->server_bytes,
+                            stat->xact_time, stat->query_time,
+                            avg.xact_count, avg.query_count,
+                            avg.client_bytes, avg.server_bytes,
+                            avg.xact_time, avg.query_time);
 }
 
 bool admin_database_stats(PgSocket *client, struct StatList *pool_list)
@@ -86,11 +102,13 @@ bool admin_database_stats(PgSocket *client, struct StatList *pool_list)
                return true;
        }
 
-       pktbuf_write_RowDescription(buf, "sqqqqqqqq", "database",
-                                   "total_requests", "total_received",
-                                   "total_sent", "total_query_time",
-                                   "avg_req", "avg_recv", "avg_sent",
-                                   "avg_query");
+       pktbuf_write_RowDescription(buf, "sqqqqqqqqqqqq", "database",
+                                   "total_xact_count", "total_query_count",
+                                   "total_received", "total_sent",
+                                   "total_xact_time", "total_query_time",
+                                   "avg_xact_count", "avg_query_count",
+                                   "avg_recv", "avg_sent",
+                                   "avg_xact_time", "avg_query_time");
        statlist_for_each(item, pool_list) {
                pool = container_of(item, PgPool, head);
 
@@ -152,13 +170,17 @@ bool show_stat_totals(PgSocket *client, struct StatList *pool_list)
 #define WTOTAL(name) pktbuf_write_DataRow(buf, "sq", "total_" #name, st_total.name)
 #define WAVG(name) pktbuf_write_DataRow(buf, "sq", "avg_" #name, avg.name)
 
-       WTOTAL(request_count);
+       WTOTAL(xact_count);
+       WTOTAL(query_count);
        WTOTAL(client_bytes);
        WTOTAL(server_bytes);
+       WTOTAL(xact_time);
        WTOTAL(query_time);
-       WAVG(request_count);
+       WAVG(xact_count);
+       WAVG(query_count);
        WAVG(client_bytes);
        WAVG(server_bytes);
+       WAVG(xact_time);
        WAVG(query_time);
 
        admin_flush(client, buf, "SHOW");
@@ -188,12 +210,15 @@ static void refresh_stats(int s, short flags, void *arg)
        }
        calc_average(&avg, &cur_total, &old_total);
        /* send totals to logfile */
-       log_info("Stats: %" PRIu64 " req/s,"
+       log_info("Stats: %" PRIu64 " xacts/s,"
+                " %" PRIu64 " queries/s,"
                 " in %" PRIu64 " B/s,"
                 " out %" PRIu64 " B/s,"
+                " xact %" PRIu64 " us,"
                 " query %" PRIu64 " us",
-                avg.request_count, avg.client_bytes,
-                avg.server_bytes, avg.query_time);
+                avg.xact_count, avg.query_count,
+                avg.client_bytes, avg.server_bytes,
+                avg.xact_time, avg.query_time);
 
        safe_evtimer_add(&ev_stats, &period);
 }