]> granicus.if.org Git - pgbouncer/commitdiff
Do not wait server_login_retry for next connect if cancellation succeeds
authorMarco Slot <marco.slot@gmail.com>
Tue, 25 Sep 2018 10:10:52 +0000 (10:10 +0000)
committerDimitri Fontaine <dim@tapoueh.org>
Thu, 8 Nov 2018 15:13:05 +0000 (16:13 +0100)
If postgres restarts while there are N cancellations in the queue,
pgbouncer is currently unavailable for at least N*server_login_retry
because it uses every new connection for one queued cancellation and
then waits server_login_retry before opening a new connection because
the last_connect_failed flag is still set to 1. This can lead to
prolonged downtime.

This changes fixes the issue by introducing a last_login_failed flag.
The last_connect_failed is now reset when a cancellation succeeds, such
that launch_new_connection no longer waits if pgbouncer manages to
connect, but has queued cancellations. The last_login_failed flag has
the same semantics as the last_connect_failed flag had previously, such
that check_fast_fail still rejects connections when there are no servers
available and the last login failed.

include/bouncer.h
src/objects.c

index 1fa6ab61b28a16e65a5dbd399f034df39c7c6980..96eb66c1c3c31250e965558093b0c9f20851182d 100644 (file)
@@ -237,6 +237,7 @@ struct PgPool {
        /* if last connect failed, there should be delay before next */
        usec_t last_connect_time;
        unsigned last_connect_failed:1;
+       unsigned last_login_failed:1;
 
        unsigned welcome_msg_ready:1;
 };
index 2035fdfb4d073dc18ed64f2dd1c2624ec2dcb860..12272fcfe3f420ca6a3ebd6da360f24866724eea 100644 (file)
@@ -592,8 +592,8 @@ bool check_fast_fail(PgSocket *client)
        int cnt;
        PgPool *pool = client->pool;
 
-       /* reject if no servers and last connect failed */
-       if (!pool->last_connect_failed)
+       /* reject if no servers are available and the last login failed */
+       if (!pool->last_login_failed)
                return true;
        cnt = pool_server_count(pool) - statlist_count(&pool->new_server_list);
        if (cnt)
@@ -760,6 +760,7 @@ bool release_server(PgSocket *server)
        case SV_TESTED:
                break;
        case SV_LOGIN:
+               pool->last_login_failed = 0;
                pool->last_connect_failed = 0;
                break;
        default:
@@ -832,9 +833,20 @@ void disconnect_server(PgSocket *server, bool notify, const char *reason, ...)
                 * except when sending cancel packet
                 */
                if (!server->ready)
+               {
+                       pool->last_login_failed = 1;
                        pool->last_connect_failed = 1;
+               }
                else
+               {
+                       /*
+                        * We did manage to connect and used the connection for query
+                        * cancellation, so to the best of our knowledge we can connect to
+                        * the server, reset last_connect_failed accordingly.
+                        */
+                       pool->last_connect_failed = 0;
                        send_term = 0;
+               }
                break;
        default:
                fatal("disconnect_server: bad server state (%d)", server->state);