From 21d48d66c8be053ef5ce0474bb30f8a91b7c3dd6 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Thu, 29 May 2014 00:32:09 +0200 Subject: [PATCH] Don't pay heed to wal_sender_timeout while creating a decoding slot. Sometimes CREATE_REPLICATION_SLOT ... LOGICAL ... needs to wait for further WAL using WalSndWaitForWal(). That used to always respect wal_sender_timeout and kill the session when waiting long enough because no feedback/ping messages can be sent while the slot is still being created. Introduce the notion that last_reply_timestamp = 0 means that the walsender currently doesn't need timeout processing to avoid that problem. Use that notion for CREATE_REPLICATION_SLOT ... LOGICAL. Bugreport and initial patch by Steve Singer, revised by me. --- src/backend/replication/walsender.c | 31 ++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index eb405cb616..088ee2c097 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -148,9 +148,10 @@ static StringInfoData reply_message; static StringInfoData tmpbuf; /* - * Timestamp of the last receipt of the reply from the standby. + * Timestamp of the last receipt of the reply from the standby. Set to 0 if + * wal_sender_timeout doesn't need to be active. */ -static TimestampTz last_reply_timestamp; +static TimestampTz last_reply_timestamp = 0; /* Have we sent a heartbeat message asking for reply, since last reply? */ static bool waiting_for_ping_response = false; @@ -796,6 +797,15 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) logical_read_xlog_page, WalSndPrepareWrite, WalSndWriteData); + /* + * Signal that we don't need the timeout mechanism. We're just + * creating the replication slot and don't yet accept feedback + * messages or send keepalives. As we possibly need to wait for + * further WAL the walsender would otherwise possibly be killed too + * soon. + */ + last_reply_timestamp = 0; + /* build initial snapshot, might take a while */ DecodingContextFindStartpoint(ctx); @@ -1693,7 +1703,7 @@ WalSndComputeSleeptime(TimestampTz now) { long sleeptime = 10000; /* 10 s */ - if (wal_sender_timeout > 0) + if (wal_sender_timeout > 0 && last_reply_timestamp > 0) { TimestampTz wakeup_time; long sec_to_timeout; @@ -1735,6 +1745,10 @@ WalSndCheckTimeOut(TimestampTz now) { TimestampTz timeout; + /* don't bail out if we're doing something that doesn't require timeouts */ + if (last_reply_timestamp <= 0) + return; + timeout = TimestampTzPlusMilliseconds(last_reply_timestamp, wal_sender_timeout); @@ -1764,7 +1778,10 @@ WalSndLoop(WalSndSendDataCallback send_data) initStringInfo(&reply_message); initStringInfo(&tmpbuf); - /* Initialize the last reply timestamp */ + /* + * Initialize the last reply timestamp. That enables timeout processing + * from hereon. + */ last_reply_timestamp = GetCurrentTimestamp(); waiting_for_ping_response = false; @@ -2879,7 +2896,11 @@ WalSndKeepaliveIfNecessary(TimestampTz now) { TimestampTz ping_time; - if (wal_sender_timeout <= 0) + /* + * Don't send keepalive messages if timeouts are globally disabled or + * we're doing something not partaking in timeouts. + */ + if (wal_sender_timeout <= 0 || last_reply_timestamp <= 0) return; if (waiting_for_ping_response) -- 2.40.0