From 28de1c6f93e7b225e7acacfcdd47c67c02832af2 Mon Sep 17 00:00:00 2001 From: Marco Slot Date: Tue, 25 Sep 2018 10:10:52 +0000 Subject: [PATCH] Do not wait server_login_retry for next connect if cancellation succeeds If postgres restarts while there are N cancellations in the queue, pgbouncer is currently unavailable for at least N*server_login_retry because it uses every new connection for one queued cancellation and then waits server_login_retry before opening a new connection because the last_connect_failed flag is still set to 1. This can lead to prolonged downtime. This changes fixes the issue by introducing a last_login_failed flag. The last_connect_failed is now reset when a cancellation succeeds, such that launch_new_connection no longer waits if pgbouncer manages to connect, but has queued cancellations. The last_login_failed flag has the same semantics as the last_connect_failed flag had previously, such that check_fast_fail still rejects connections when there are no servers available and the last login failed. --- include/bouncer.h | 1 + src/objects.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/bouncer.h b/include/bouncer.h index 1fa6ab6..96eb66c 100644 --- a/include/bouncer.h +++ b/include/bouncer.h @@ -237,6 +237,7 @@ struct PgPool { /* if last connect failed, there should be delay before next */ usec_t last_connect_time; unsigned last_connect_failed:1; + unsigned last_login_failed:1; unsigned welcome_msg_ready:1; }; diff --git a/src/objects.c b/src/objects.c index 2035fdf..12272fc 100644 --- a/src/objects.c +++ b/src/objects.c @@ -592,8 +592,8 @@ bool check_fast_fail(PgSocket *client) int cnt; PgPool *pool = client->pool; - /* reject if no servers and last connect failed */ - if (!pool->last_connect_failed) + /* reject if no servers are available and the last login failed */ + if (!pool->last_login_failed) return true; cnt = pool_server_count(pool) - statlist_count(&pool->new_server_list); if (cnt) @@ -760,6 +760,7 @@ bool release_server(PgSocket *server) case SV_TESTED: break; case SV_LOGIN: + pool->last_login_failed = 0; pool->last_connect_failed = 0; break; default: @@ -832,9 +833,20 @@ void disconnect_server(PgSocket *server, bool notify, const char *reason, ...) * except when sending cancel packet */ if (!server->ready) + { + pool->last_login_failed = 1; pool->last_connect_failed = 1; + } else + { + /* + * We did manage to connect and used the connection for query + * cancellation, so to the best of our knowledge we can connect to + * the server, reset last_connect_failed accordingly. + */ + pool->last_connect_failed = 0; send_term = 0; + } break; default: fatal("disconnect_server: bad server state (%d)", server->state); -- 2.50.1