</listitem>
</varlistentry>
+ <varlistentry id="min-recovery-apply-delay" xreflabel="min_recovery_apply_delay">
+ <term><varname>min_recovery_apply_delay</varname> (<type>integer</type>)</term>
+ <indexterm>
+ <primary><varname>min_recovery_apply_delay</> recovery parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ By default, a standby server keeps restoring WAL records from the
+ primary as soon as possible. It may be useful to have a time-delayed
+ copy of the data, offering various options to correct data loss errors.
+ This paramater allows you to delay recovery by a fixed period of time,
+ specified in milliseconds if no unit is specified. For example, if
+ you set this parameter to <literal>5min</literal>, the standby will
+ replay each transaction commit only when the system time on the standby
+ is at least five minutes past the commit time reported by the master.
+ </para>
+ <para>
+ It is possible that the replication delay between servers exceeds the
+ value of this parameter, in which case no delay is added.
+ Note that the delay is calculated between the WAL timestamp as written
+ on master and the time on the current standby. Delays
+ in transfer because of networks or cascading replication configurations
+ may reduce the actual wait time significantly. If the system
+ clocks on master and standby are not synchronised, this may lead to
+ recovery applying records earlier than expected but is not a major issue
+ because the useful settings of the parameter are much larger than
+ typical time deviation between the servers. Be careful to allow for
+ different timezone settings on master and standby.
+ </para>
+ <para>
+ The delay occurs only on WAL records for COMMIT and Restore Points.
+ Other records may be replayed earlier than the specified delay, which
+ is not an issue for MVCC though may potentially increase the number
+ of recovery conflicts generated.
+ </para>
+ <para>
+ The delay occurs until the standby is promoted or triggered. After that
+ the standby will end recovery without further waiting.
+ </para>
+ <para>
+ This parameter is intended for use with streaming replication deployments,
+ however, if the parameter is specified it will be honoured in all cases.
+ Synchronous replication is not affected by this setting because there is
+ not yet any setting to request synchronous apply of transaction commits.
+ <varname>hot_standby_feedback</> will be delayed by use of this feature
+ which could lead to bloat on the master; use both together with care.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect1>
static TransactionId recoveryTargetXid;
static TimestampTz recoveryTargetTime;
static char *recoveryTargetName;
+static int min_recovery_apply_delay = 0;
+static TimestampTz recoveryDelayUntilTime;
/* options taken from recovery.conf for XLOG streaming */
static bool StandbyModeRequested = false;
static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
-static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
+static bool recoveryStopsHere(XLogRecord *record, bool *includeThis, bool *delayThis);
static void recoveryPausesHere(void);
+static void recoveryApplyDelay(void);
+static bool SetRecoveryDelayUntilTime(TimestampTz xtime);
static void SetLatestXTime(TimestampTz xtime);
static void SetCurrentChunkStartTime(TimestampTz xtime);
static void CheckRequiredParameterValues(void);
(errmsg_internal("trigger_file = '%s'",
TriggerFile)));
}
+ else if (strcmp(item->name, "min_recovery_apply_delay") == 0)
+ {
+ const char *hintmsg;
+
+ if (!parse_int(item->value, &min_recovery_apply_delay, GUC_UNIT_MS,
+ &hintmsg))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("parameter \"%s\" requires a temporal value", "min_recovery_apply_delay"),
+ hintmsg ? errhint("%s", _(hintmsg)) : 0));
+ ereport(DEBUG2,
+ (errmsg("min_recovery_apply_delay = '%s'", item->value)));
+ }
else
ereport(FATAL,
(errmsg("unrecognized recovery parameter \"%s\"",
* We also track the timestamp of the latest applied COMMIT/ABORT
* record in XLogCtl->recoveryLastXTime, for logging purposes.
* Also, some information is saved in recoveryStopXid et al for use in
- * annotating the new timeline's history file.
+ * annotating the new timeline's history file; and recoveryDelayUntilTime
+ * is updated, for time-delayed standbys.
*/
static bool
-recoveryStopsHere(XLogRecord *record, bool *includeThis)
+recoveryStopsHere(XLogRecord *record, bool *includeThis, bool *delayThis)
{
bool stopsHere;
uint8 record_info;
recordXactCommitData = (xl_xact_commit_compact *) XLogRecGetData(record);
recordXtime = recordXactCommitData->xact_time;
+
+ *delayThis = SetRecoveryDelayUntilTime(recordXactCommitData->xact_time);
}
else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
{
recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record);
recordXtime = recordXactCommitData->xact_time;
+
+ *delayThis = SetRecoveryDelayUntilTime(recordXactCommitData->xact_time);
}
else if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
{
recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record);
recordXtime = recordXactAbortData->xact_time;
+
+ /*
+ * We deliberately choose not to delay aborts since they have no
+ * effect on MVCC. We already allow replay of records that don't
+ * have a timestamp, so there is already opportunity for issues
+ * caused by early conflicts on standbys.
+ */
}
else if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
{
recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
recordXtime = recordRestorePointData->rp_time;
strncpy(recordRPName, recordRestorePointData->rp_name, MAXFNAMELEN);
+
+ *delayThis = SetRecoveryDelayUntilTime(recordRestorePointData->rp_time);
}
else
return false;
SpinLockRelease(&xlogctl->info_lck);
}
+static bool
+SetRecoveryDelayUntilTime(TimestampTz xtime)
+{
+ if (min_recovery_apply_delay != 0)
+ {
+ recoveryDelayUntilTime =
+ TimestampTzPlusMilliseconds(xtime, min_recovery_apply_delay);
+
+ return true;
+ }
+
+ return false;
+}
+/*
+ * When min_recovery_apply_delay is set, we wait long enough to make sure
+ * certain record types are applied at least that interval behind the master.
+ * See recoveryStopsHere().
+ *
+ * Note that the delay is calculated between the WAL record log time and
+ * the current time on standby. We would prefer to keep track of when this
+ * standby received each WAL record, which would allow a more consistent
+ * approach and one not affected by time synchronisation issues, but that
+ * is significantly more effort and complexity for little actual gain in
+ * usability.
+ */
+static void
+recoveryApplyDelay(void)
+{
+ while (true)
+ {
+ long secs;
+ int microsecs;
+
+ ResetLatch(&XLogCtl->recoveryWakeupLatch);
+
+ /* might change the trigger file's location */
+ HandleStartupProcInterrupts();
+
+ if (CheckForStandbyTrigger())
+ break;
+
+ /*
+ * Wait for difference between GetCurrentTimestamp() and
+ * recoveryDelayUntilTime
+ */
+ TimestampDifference(GetCurrentTimestamp(), recoveryDelayUntilTime,
+ &secs, µsecs);
+
+ if (secs <= 0 && microsecs <=0)
+ break;
+
+ elog(DEBUG2, "recovery apply delay %ld seconds, %d milliseconds",
+ secs, microsecs / 1000);
+
+ WaitLatch(&XLogCtl->recoveryWakeupLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ secs * 1000L + microsecs / 1000);
+ }
+}
+
/*
* Save timestamp of latest processed commit/abort record.
*
{
bool recoveryContinue = true;
bool recoveryApply = true;
+ bool recoveryDelay = false;
ErrorContextCallback errcallback;
TimestampTz xtime;
/*
* Have we reached our recovery target?
*/
- if (recoveryStopsHere(record, &recoveryApply))
+ if (recoveryStopsHere(record, &recoveryApply, &recoveryDelay))
{
if (recoveryPauseAtTarget)
{
break;
}
+ /*
+ * If we've been asked to lag the master, wait on
+ * latch until enough time has passed.
+ */
+ if (recoveryDelay)
+ {
+ recoveryApplyDelay();
+
+ /*
+ * We test for paused recovery again here. If
+ * user sets delayed apply, it may be because
+ * they expect to pause recovery in case of
+ * problems, so we must test again here otherwise
+ * pausing during the delay-wait wouldn't work.
+ */
+ if (xlogctl->recoveryPause)
+ recoveryPausesHere();
+ }
+
/* Setup error traceback support for ereport() */
errcallback.callback = rm_redo_error_callback;
errcallback.arg = (void *) record;