]> granicus.if.org Git - postgresql/commitdiff
Don't pay heed to wal_sender_timeout while creating a decoding slot.
authorAndres Freund <andres@anarazel.de>
Wed, 28 May 2014 22:32:09 +0000 (00:32 +0200)
committerAndres Freund <andres@anarazel.de>
Wed, 28 May 2014 22:32:09 +0000 (00:32 +0200)
Sometimes CREATE_REPLICATION_SLOT ... LOGICAL ... needs to wait for
further WAL using WalSndWaitForWal(). That used to always respect
wal_sender_timeout and kill the session when waiting long enough
because no feedback/ping messages can be sent while the slot is still
being created.
Introduce the notion that last_reply_timestamp = 0 means that the
walsender currently doesn't need timeout processing to avoid that
problem. Use that notion for CREATE_REPLICATION_SLOT ... LOGICAL.

Bugreport and initial patch by Steve Singer, revised by me.

src/backend/replication/walsender.c

index eb405cb616a93cc5758381b2e2cdbfaeeb597b9a..088ee2c0976e2091b8b47aaf4279bded2a379a61 100644 (file)
@@ -148,9 +148,10 @@ static StringInfoData reply_message;
 static StringInfoData tmpbuf;
 
 /*
- * Timestamp of the last receipt of the reply from the standby.
+ * Timestamp of the last receipt of the reply from the standby. Set to 0 if
+ * wal_sender_timeout doesn't need to be active.
  */
-static TimestampTz last_reply_timestamp;
+static TimestampTz last_reply_timestamp = 0;
 
 /* Have we sent a heartbeat message asking for reply, since last reply? */
 static bool waiting_for_ping_response = false;
@@ -796,6 +797,15 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
                                                                                logical_read_xlog_page,
                                                                                WalSndPrepareWrite, WalSndWriteData);
 
+               /*
+                * Signal that we don't need the timeout mechanism. We're just
+                * creating the replication slot and don't yet accept feedback
+                * messages or send keepalives. As we possibly need to wait for
+                * further WAL the walsender would otherwise possibly be killed too
+                * soon.
+                */
+               last_reply_timestamp = 0;
+
                /* build initial snapshot, might take a while */
                DecodingContextFindStartpoint(ctx);
 
@@ -1693,7 +1703,7 @@ WalSndComputeSleeptime(TimestampTz now)
 {
        long            sleeptime = 10000;              /* 10 s */
 
-       if (wal_sender_timeout > 0)
+       if (wal_sender_timeout > 0 && last_reply_timestamp > 0)
        {
                TimestampTz wakeup_time;
                long            sec_to_timeout;
@@ -1735,6 +1745,10 @@ WalSndCheckTimeOut(TimestampTz now)
 {
        TimestampTz timeout;
 
+       /* don't bail out if we're doing something that doesn't require timeouts */
+       if (last_reply_timestamp <= 0)
+               return;
+
        timeout = TimestampTzPlusMilliseconds(last_reply_timestamp,
                                                                                  wal_sender_timeout);
 
@@ -1764,7 +1778,10 @@ WalSndLoop(WalSndSendDataCallback send_data)
        initStringInfo(&reply_message);
        initStringInfo(&tmpbuf);
 
-       /* Initialize the last reply timestamp */
+       /*
+        * Initialize the last reply timestamp. That enables timeout processing
+        * from hereon.
+        */
        last_reply_timestamp = GetCurrentTimestamp();
        waiting_for_ping_response = false;
 
@@ -2879,7 +2896,11 @@ WalSndKeepaliveIfNecessary(TimestampTz now)
 {
        TimestampTz ping_time;
 
-       if (wal_sender_timeout <= 0)
+       /*
+        * Don't send keepalive messages if timeouts are globally disabled or
+        * we're doing something not partaking in timeouts.
+        */
+       if (wal_sender_timeout <= 0 || last_reply_timestamp <= 0)
                return;
 
        if (waiting_for_ping_response)