</listitem>
</varlistentry>
- <varlistentry id="guc-replication-timeout" xreflabel="replication_timeout">
- <term><varname>replication_timeout</varname> (<type>integer</type>)</term>
+ <varlistentry id="guc-wal-sender-timeout" xreflabel="wal_sender_timeout">
+ <term><varname>wal_sender_timeout</varname> (<type>integer</type>)</term>
<indexterm>
- <primary><varname>replication_timeout</> configuration parameter</primary>
+ <primary><varname>wal_sender_timeout</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
the <filename>postgresql.conf</> file or on the server command line.
The default value is 60 seconds.
</para>
- <para>
- To prevent connections from being terminated prematurely,
- <xref linkend="guc-wal-receiver-status-interval">
- must be enabled on the standby, and its value must be less than the
- value of <varname>replication_timeout</>.
- </para>
</listitem>
</varlistentry>
the <filename>postgresql.conf</> file or on the server command line.
The default value is 10 seconds.
</para>
- <para>
- When <xref linkend="guc-replication-timeout"> is enabled on a sending server,
- <varname>wal_receiver_status_interval</> must be enabled, and its value
- must be less than the value of <varname>replication_timeout</>.
- </para>
</listitem>
</varlistentry>
</listitem>
</varlistentry>
+ <varlistentry id="guc-wal-receiver-timeout" xreflabel="wal_receiver_timeout">
+ <term><varname>wal_receiver_timeout</varname> (<type>integer</type>)</term>
+ <indexterm>
+ <primary><varname>wal_receiver_timeout</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Terminate replication connections that are inactive longer
+ than the specified number of milliseconds. This is useful for
+ the receiving standby server to detect a primary node crash or network
+ outage.
+ A value of zero disables the timeout mechanism. This parameter
+ can only be set in
+ the <filename>postgresql.conf</> file or on the server command line.
+ The default value is 60 seconds.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
</sect1>
<listitem>
<para>
Add
- <link linkend="guc-replication-timeout"><varname>replication_timeout</></link>
+ <varname>replication_timeout</>
setting (Fujii Masao, Heikki Linnakangas)
</para>
/* GUC variables */
int wal_receiver_status_interval;
+int wal_receiver_timeout;
bool hot_standby_feedback;
/* libpqreceiver hooks to these when loaded */
static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len);
static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr);
static void XLogWalRcvFlush(bool dying);
-static void XLogWalRcvSendReply(void);
+static void XLogWalRcvSendReply(bool force, bool requestReply);
static void XLogWalRcvSendHSFeedback(void);
static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime);
{
char conninfo[MAXCONNINFO];
XLogRecPtr startpoint;
-
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
+ TimestampTz last_recv_timestamp;
+ bool ping_sent;
/*
* WalRcv should be set up already (if we are a backend, we inherit this
MemSet(&reply_message, 0, sizeof(reply_message));
MemSet(&feedback_message, 0, sizeof(feedback_message));
+ /* Initialize the last recv timestamp */
+ last_recv_timestamp = GetCurrentTimestamp();
+ ping_sent = false;
+
/* Loop until end-of-streaming or error */
for (;;)
{
/* Wait a while for data to arrive */
if (walrcv_receive(NAPTIME_PER_CYCLE, &type, &buf, &len))
{
+ /* Something was received from master, so reset timeout */
+ last_recv_timestamp = GetCurrentTimestamp();
+ ping_sent = false;
+
/* Accept the received data, and process it */
XLogWalRcvProcessMsg(type, buf, len);
/* Receive any more data we can without sleeping */
while (walrcv_receive(0, &type, &buf, &len))
+ {
+ last_recv_timestamp = GetCurrentTimestamp();
+ ping_sent = false;
XLogWalRcvProcessMsg(type, buf, len);
+ }
/* Let the master know that we received some data. */
- XLogWalRcvSendReply();
+ XLogWalRcvSendReply(false, false);
/*
* If we've written some records, flush them to disk and let the
else
{
/*
- * We didn't receive anything new, but send a status update to the
- * master anyway, to report any progress in applying WAL.
+ * We didn't receive anything new. If we haven't heard anything
+ * from the server for more than wal_receiver_timeout / 2,
+ * ping the server. Also, if it's been longer than
+ * wal_receiver_status_interval since the last update we sent,
+ * send a status update to the master anyway, to report any
+ * progress in applying WAL.
+ */
+ bool requestReply = false;
+
+ /*
+ * Check if time since last receive from standby has reached the
+ * configured limit.
*/
- XLogWalRcvSendReply();
+ if (wal_receiver_timeout > 0)
+ {
+ TimestampTz now = GetCurrentTimestamp();
+ TimestampTz timeout;
+
+ timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
+ wal_receiver_timeout);
+
+ if (now >= timeout)
+ ereport(ERROR,
+ (errmsg("terminating walreceiver due to timeout")));
+
+ /*
+ * We didn't receive anything new, for half of receiver
+ * replication timeout. Ping the server.
+ */
+ if (!ping_sent)
+ {
+ timeout = TimestampTzPlusMilliseconds(last_recv_timestamp,
+ (wal_receiver_timeout/2));
+ if (now >= timeout)
+ {
+ requestReply = true;
+ ping_sent = true;
+ }
+ }
+ }
+
+ XLogWalRcvSendReply(requestReply, requestReply);
XLogWalRcvSendHSFeedback();
}
}
memcpy(&keepalive, buf, sizeof(PrimaryKeepaliveMessage));
ProcessWalSndrMessage(keepalive.walEnd, keepalive.sendTime);
+
+ /* If the primary requested a reply, send one immediately */
+ if (keepalive.replyRequested)
+ XLogWalRcvSendReply(true, false);
break;
}
default:
/* Also let the master know that we made some progress */
if (!dying)
- {
- XLogWalRcvSendReply();
- XLogWalRcvSendHSFeedback();
- }
+ XLogWalRcvSendReply(false, false);
}
}
/*
- * Send reply message to primary, indicating our current XLOG positions and
- * the current time.
+ * Send reply message to primary, indicating our current XLOG positions, oldest
+ * xmin and the current time.
+ *
+ * If 'force' is not set, the message is only sent if enough time has
+ * passed since last status update to reach wal_receiver_status_internal.
+ * If wal_receiver_status_interval is disabled altogether and 'force' is
+ * false, this is a no-op.
+ *
+ * If 'requestReply' is true, requests the server to reply immediately upon
+ * receiving this message. This is used for heartbearts, when approaching
+ * wal_receiver_timeout.
*/
static void
-XLogWalRcvSendReply(void)
+XLogWalRcvSendReply(bool force, bool requestReply)
{
char buf[sizeof(StandbyReplyMessage) + 1];
TimestampTz now;
* If the user doesn't want status to be reported to the master, be sure
* to exit before doing anything at all.
*/
- if (wal_receiver_status_interval <= 0)
+ if (!force && wal_receiver_status_interval <= 0)
return;
/* Get current timestamp. */
* this is only for reporting purposes and only on idle systems, that's
* probably OK.
*/
- if (XLByteEQ(reply_message.write, LogstreamResult.Write)
+ if (!force
+ && XLByteEQ(reply_message.write, LogstreamResult.Write)
&& XLByteEQ(reply_message.flush, LogstreamResult.Flush)
&& !TimestampDifferenceExceeds(reply_message.sendTime, now,
wal_receiver_status_interval * 1000))
reply_message.flush = LogstreamResult.Flush;
reply_message.apply = GetXLogReplayRecPtr(NULL);
reply_message.sendTime = now;
+ reply_message.replyRequested = requestReply;
elog(DEBUG2, "sending write %X/%X flush %X/%X apply %X/%X",
(uint32) (reply_message.write >> 32), (uint32) reply_message.write,
/* User-settable parameters for walsender */
int max_wal_senders = 0; /* the maximum number of concurrent walsenders */
-int replication_timeout = 60 * 1000; /* maximum time to send one
+int wal_sender_timeout = 60 * 1000; /* maximum time to send one
* WAL data message */
/*
* State for WalSndWakeupRequest
*/
static XLogRecPtr sentPtr = 0;
+/* Buffer for processing reply messages. */
+static StringInfoData reply_message;
/*
- * Buffer for processing reply messages.
+ * Buffer for constructing outgoing messages.
+ * (1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE bytes)
*/
-static StringInfoData reply_message;
+static char *output_message;
/*
* Timestamp of the last receipt of the reply from the standby.
*/
static TimestampTz last_reply_timestamp;
+/* Have we sent a heartbeat message asking for reply, since last reply? */
+static bool ping_sent = false;
/* Flags set by signal handlers for later service in main loop */
static volatile sig_atomic_t got_SIGHUP = false;
static void WalSndLoop(void) __attribute__((noreturn));
static void InitWalSenderSlot(void);
static void WalSndKill(int code, Datum arg);
-static void XLogSend(char *msgbuf, bool *caughtup);
+static void XLogSend(bool *caughtup);
static void IdentifySystem(void);
static void StartReplication(StartReplicationCmd *cmd);
static void ProcessStandbyMessage(void);
static void ProcessStandbyReplyMessage(void);
static void ProcessStandbyHSFeedbackMessage(void);
static void ProcessRepliesIfAny(void);
-static void WalSndKeepalive(char *msgbuf);
+static void WalSndKeepalive(bool requestReply);
/* Initialize walsender process before entering the main command loop */
* Save the last reply timestamp if we've received at least one reply.
*/
if (received)
+ {
last_reply_timestamp = GetCurrentTimestamp();
+ ping_sent = false;
+ }
}
/*
(uint32) (reply.flush >> 32), (uint32) reply.flush,
(uint32) (reply.apply >> 32), (uint32) reply.apply);
+ /* Send a reply if the standby requested one. */
+ if (reply.replyRequested)
+ WalSndKeepalive(false);
+
/*
* Update shared state for this WalSender process based on reply data from
* standby.
static void
WalSndLoop(void)
{
- char *output_message;
bool caughtup = false;
/*
/* Initialize the last reply timestamp */
last_reply_timestamp = GetCurrentTimestamp();
+ ping_sent = false;
/* Loop forever, unless we get an error */
for (;;)
* caught up.
*/
if (!pq_is_send_pending())
- XLogSend(output_message, &caughtup);
+ XLogSend(&caughtup);
else
caughtup = false;
if (walsender_ready_to_stop)
{
/* ... let's just be real sure we're caught up ... */
- XLogSend(output_message, &caughtup);
+ XLogSend(&caughtup);
if (caughtup && !pq_is_send_pending())
{
/* Inform the standby that XLOG streaming is done */
if (pq_is_send_pending())
wakeEvents |= WL_SOCKET_WRITEABLE;
- else if (MyWalSnd->sendKeepalive)
+ else if (wal_sender_timeout > 0 && !ping_sent)
{
- WalSndKeepalive(output_message);
- /* Try to flush pending output to the client */
- if (pq_flush_if_writable() != 0)
- break;
+ /*
+ * If half of wal_sender_timeout has lapsed without receiving
+ * any reply from standby, send a keep-alive message to standby
+ * requesting an immediate reply.
+ */
+ timeout = TimestampTzPlusMilliseconds(last_reply_timestamp,
+ wal_sender_timeout / 2);
+ if (GetCurrentTimestamp() >= timeout)
+ {
+ WalSndKeepalive(true);
+ ping_sent = true;
+ /* Try to flush pending output to the client */
+ if (pq_flush_if_writable() != 0)
+ break;
+ }
}
/* Determine time until replication timeout */
- if (replication_timeout > 0)
+ if (wal_sender_timeout > 0)
{
timeout = TimestampTzPlusMilliseconds(last_reply_timestamp,
- replication_timeout);
- sleeptime = 1 + (replication_timeout / 10);
+ wal_sender_timeout);
+ sleeptime = 1 + (wal_sender_timeout / 10);
}
- /* Sleep until something happens or replication timeout */
+ /* Sleep until something happens or we time out */
ImmediateInterruptOK = true;
CHECK_FOR_INTERRUPTS();
WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
* possibility that the client replied just as we reached the
* timeout ... he's supposed to reply *before* that.
*/
- if (replication_timeout > 0 &&
- GetCurrentTimestamp() >= timeout)
+ if (wal_sender_timeout > 0 && GetCurrentTimestamp() >= timeout)
{
/*
* Since typically expiration of replication timeout means
* but not yet sent to the client, and buffer it in the libpq output
* buffer.
*
- * msgbuf is a work area in which the output message is constructed. It's
- * passed in just so we can avoid re-palloc'ing the buffer on each cycle.
- * It must be of size 1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE.
- *
* If there is no unsent WAL remaining, *caughtup is set to true, otherwise
* *caughtup is set to false.
*/
static void
-XLogSend(char *msgbuf, bool *caughtup)
+XLogSend(bool *caughtup)
{
XLogRecPtr SendRqstPtr;
XLogRecPtr startptr;
/*
* OK to read and send the slice.
*/
- msgbuf[0] = 'w';
+ output_message[0] = 'w';
/*
* Read the log directly into the output buffer to avoid extra memcpy
* calls.
*/
- XLogRead(msgbuf + 1 + sizeof(WalDataMessageHeader), startptr, nbytes);
+ XLogRead(output_message + 1 + sizeof(WalDataMessageHeader), startptr, nbytes);
/*
* We fill the message header last so that the send timestamp is taken as
msghdr.walEnd = SendRqstPtr;
msghdr.sendTime = GetCurrentTimestamp();
- memcpy(msgbuf + 1, &msghdr, sizeof(WalDataMessageHeader));
+ memcpy(output_message + 1, &msghdr, sizeof(WalDataMessageHeader));
- pq_putmessage_noblock('d', msgbuf, 1 + sizeof(WalDataMessageHeader) + nbytes);
+ pq_putmessage_noblock('d', output_message, 1 + sizeof(WalDataMessageHeader) + nbytes);
sentPtr = endptr;
return (Datum) 0;
}
+/*
+ * This function is used to send keepalive message to standby.
+ * If requestReply is set, sets a flag in the message requesting the standby
+ * to send a message back to us, for heartbeat purposes.
+ */
static void
-WalSndKeepalive(char *msgbuf)
+WalSndKeepalive(bool requestReply)
{
PrimaryKeepaliveMessage keepalive_message;
/* Construct a new message */
keepalive_message.walEnd = sentPtr;
keepalive_message.sendTime = GetCurrentTimestamp();
+ keepalive_message.replyRequested = requestReply;
elog(DEBUG2, "sending replication keepalive");
/* Prepend with the message type and send it. */
- msgbuf[0] = 'k';
- memcpy(msgbuf + 1, &keepalive_message, sizeof(PrimaryKeepaliveMessage));
- pq_putmessage_noblock('d', msgbuf, sizeof(PrimaryKeepaliveMessage) + 1);
+ output_message[0] = 'k';
+ memcpy(output_message + 1, &keepalive_message, sizeof(PrimaryKeepaliveMessage));
+ pq_putmessage_noblock('d', output_message, sizeof(PrimaryKeepaliveMessage) + 1);
}
/*
NULL, NULL, NULL
},
+ {
+ {"wal_receiver_timeout", PGC_SIGHUP, REPLICATION_STANDBY,
+ gettext_noop("Sets the maximum wait time to receive data from master."),
+ NULL,
+ GUC_UNIT_MS
+ },
+ &wal_receiver_timeout,
+ 60 * 1000, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
{
{"max_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS,
gettext_noop("Sets the maximum number of concurrent connections."),
},
{
- {"replication_timeout", PGC_SIGHUP, REPLICATION_SENDING,
+ {"wal_sender_timeout", PGC_SIGHUP, REPLICATION_SENDING,
gettext_noop("Sets the maximum time to wait for WAL replication."),
NULL,
GUC_UNIT_MS
},
- &replication_timeout,
+ &wal_sender_timeout,
60 * 1000, 0, INT_MAX,
NULL, NULL, NULL
},
#max_wal_senders = 0 # max number of walsender processes
# (change requires restart)
#wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables
-#replication_timeout = 60s # in milliseconds; 0 disables
+#wal_sender_timeout = 60s # in milliseconds; 0 disables
# - Master Server -
# 0 disables
#hot_standby_feedback = off # send info from standby to prevent
# query conflicts
+#wal_receiver_timeout = 60s # time that receiver waits for
+ # communication from master
+ # in milliseconds; 0 disables
#------------------------------------------------------------------------------
/* Sender's system clock at the time of transmission */
TimestampTz sendTime;
+
+ /*
+ * If replyRequested is set, the client should reply immediately to this
+ * message, to avoid a timeout disconnect.
+ */
+ bool replyRequested;
} WalSndrMessage;
/* Sender's system clock at the time of transmission */
TimestampTz sendTime;
+
+ /*
+ * If replyRequested is set, the server should reply immediately to this
+ * message, to avoid a timeout disconnect.
+ */
+ bool replyRequested;
} StandbyReplyMessage;
/*
#include "storage/spin.h"
#include "pgtime.h"
+/* user-settable parameters */
extern int wal_receiver_status_interval;
+extern int wal_receiver_timeout;
extern bool hot_standby_feedback;
/*
/* user-settable parameters */
extern int max_wal_senders;
-extern int replication_timeout;
+extern int wal_sender_timeout;
extern void InitWalSender(void);
extern void exec_replication_command(const char *query_string);
XLogRecPtr sentPtr; /* WAL has been sent up to this point */
bool needreload; /* does currently-open file need to be
* reloaded? */
- bool sendKeepalive; /* do we send keepalives on this connection? */
/*
* The xlog locations that have been written, flushed, and applied by