From aedccb1f6fef988af1d1a25b78151f3773954b4c Mon Sep 17 00:00:00 2001 From: Simon Riggs Date: Tue, 25 Nov 2014 20:13:30 +0000 Subject: [PATCH] action_at_recovery_target recovery config option action_at_recovery_target = pause | promote | shutdown Petr Jelinek Reviewed by Muhammad Asif Naeem, Fujji Masao and Simon Riggs --- doc/src/sgml/recovery-config.sgml | 58 ++++++++++++++--- src/backend/access/transam/xlog.c | 98 +++++++++++++++++++++++++---- src/backend/postmaster/postmaster.c | 12 ++++ src/include/access/xlog_internal.h | 10 +++ 4 files changed, 159 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index 0f1ff343a6..a145a3fee2 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -289,12 +289,39 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows - Specifies whether recovery should pause when the recovery target - is reached. The default is true. - This is intended to allow queries to be executed against the - database to check if this recovery target is the most desirable - point for recovery. The paused state can be resumed by using - pg_xlog_replay_resume() (See + Alias for action_at_recovery_target, true is same as + action_at_recovery_target = pause and false + is same as action_at_recovery_target = promote. + + + This setting has no effect if is not + enabled, or if no recovery target is set. + + + + + + + + action_at_recovery_target (enum) + + action_at_recovery_target recovery parameter + + + + + Specifies what action the server should take once the recovery target is + reached. The default is pause, which means recovery will + be paused. promote means recovery process will finish and + the server will start to accept connections. + Finally shutdown will stop the server after reaching the + recovery target. + + The intended use of pause setting is to allow queries to be + executed against the database to check if this recovery target is the + most desirable point for recovery. The paused state can be resumed by + using pg_xlog_replay_resume() (See ), which then causes recovery to end. If this recovery target is not the desired stopping point, then shutdown the server, change the @@ -302,8 +329,23 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows continue recovery. - This setting has no effect if is not - enabled, or if no recovery target is set. + The shutdown setting is useful to have instance ready at + exact replay point desired. + The instance will still be able to replay more WAL records (and in fact + will have to replay WAL records since last checkpoint next time it is + started). + + + Note that because recovery.conf will not be renamed when + action_at_recovery_target is set to shutdown, + any subsequent start will end with immediate shutdown unless the + configuration is changed or the recovery.conf is removed + manually. + + + This setting has no effect if no recovery target is set. + If is not enabled, a setting of + pause will act the same as shutdown. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8e712b793f..0f661f5010 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -228,7 +228,7 @@ static char *recoveryEndCommand = NULL; static char *archiveCleanupCommand = NULL; static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; static bool recoveryTargetInclusive = true; -static bool recoveryPauseAtTarget = true; +static RecoveryTargetAction actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PAUSE; static TransactionId recoveryTargetXid; static TimestampTz recoveryTargetTime; static char *recoveryTargetName; @@ -4647,6 +4647,9 @@ readRecoveryCommandFile(void) ConfigVariable *item, *head = NULL, *tail = NULL; + bool recoveryPauseAtTargetSet = false; + bool actionAtRecoveryTargetSet = false; + fd = AllocateFile(RECOVERY_COMMAND_FILE, "r"); if (fd == NULL) @@ -4692,13 +4695,43 @@ readRecoveryCommandFile(void) } else if (strcmp(item->name, "pause_at_recovery_target") == 0) { + bool recoveryPauseAtTarget; + if (!parse_bool(item->value, &recoveryPauseAtTarget)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("parameter \"%s\" requires a Boolean value", "pause_at_recovery_target"))); + ereport(DEBUG2, (errmsg_internal("pause_at_recovery_target = '%s'", item->value))); + + actionAtRecoveryTarget = recoveryPauseAtTarget ? + RECOVERY_TARGET_ACTION_PAUSE : + RECOVERY_TARGET_ACTION_PROMOTE; + + recoveryPauseAtTargetSet = true; + } + else if (strcmp(item->name, "action_at_recovery_target") == 0) + { + if (strcmp(item->value, "pause") == 0) + actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PAUSE; + else if (strcmp(item->value, "promote") == 0) + actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PROMOTE; + else if (strcmp(item->value, "shutdown") == 0) + actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_SHUTDOWN; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for recovery parameter \"%s\"", + "action_at_recovery_target"), + errhint("The allowed values are \"pause\", \"promote\" and \"shutdown\"."))); + + ereport(DEBUG2, + (errmsg_internal("action_at_recovery_target = '%s'", + item->value))); + + actionAtRecoveryTargetSet = true; } else if (strcmp(item->name, "recovery_target_timeline") == 0) { @@ -4863,6 +4896,28 @@ readRecoveryCommandFile(void) RECOVERY_COMMAND_FILE))); } + /* + * Check for mutually exclusive parameters + */ + if (recoveryPauseAtTargetSet && actionAtRecoveryTargetSet) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("cannot set both \"%s\" and \"%s\" recovery parameters", + "pause_at_recovery_target", + "action_at_recovery_target"), + errhint("The \"pause_at_recovery_target\" is deprecated."))); + + + /* + * Override any inconsistent requests. Not that this is a change + * of behaviour in 9.5; prior to this we simply ignored a request + * to pause if hot_standby = off, which was surprising behaviour. + */ + if (actionAtRecoveryTarget == RECOVERY_TARGET_ACTION_PAUSE && + actionAtRecoveryTargetSet && + standbyState == STANDBY_DISABLED) + actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_SHUTDOWN; + /* Enable fetching from archive recovery area */ ArchiveRecoveryRequested = true; @@ -6415,10 +6470,37 @@ StartupXLOG(void) * end of main redo apply loop */ - if (recoveryPauseAtTarget && reachedStopPoint) + if (reachedStopPoint) { - SetRecoveryPause(true); - recoveryPausesHere(); + if (!reachedConsistency) + ereport(FATAL, + (errmsg("requested recovery stop point is before consistent recovery point"))); + + /* + * This is the last point where we can restart recovery with a + * new recovery target, if we shutdown and begin again. After + * this, Resource Managers may choose to do permanent corrective + * actions at end of recovery. + */ + switch (actionAtRecoveryTarget) + { + case RECOVERY_TARGET_ACTION_SHUTDOWN: + /* + * exit with special return code to request shutdown + * of postmaster. Log messages issued from + * postmaster. + */ + proc_exit(3); + + case RECOVERY_TARGET_ACTION_PAUSE: + SetRecoveryPause(true); + recoveryPausesHere(); + + /* drop into promote */ + + case RECOVERY_TARGET_ACTION_PROMOTE: + break; + } } /* Allow resource managers to do any required cleanup. */ @@ -6436,6 +6518,7 @@ StartupXLOG(void) ereport(LOG, (errmsg("last completed transaction was at log time %s", timestamptz_to_str(xtime)))); + InRedo = false; } else @@ -6496,13 +6579,6 @@ StartupXLOG(void) (EndOfLog < minRecoveryPoint || !XLogRecPtrIsInvalid(ControlFile->backupStartPoint))) { - if (reachedStopPoint) - { - /* stopped because of stop request */ - ereport(FATAL, - (errmsg("requested recovery stop point is before consistent recovery point"))); - } - /* * Ran off end of WAL before reaching end-of-backup WAL record, or * minRecoveryPoint. That's usually a bad sign, indicating that you diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 6220a8e6e4..5106f52e0e 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -509,6 +509,7 @@ static void ShmemBackendArrayRemove(Backend *bn); /* Macros to check exit status of a child process */ #define EXIT_STATUS_0(st) ((st) == 0) #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1) +#define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3) #ifndef WIN32 /* @@ -2555,6 +2556,17 @@ reaper(SIGNAL_ARGS) continue; } + if (EXIT_STATUS_3(exitstatus)) + { + ereport(LOG, + (errmsg("shutdown at recovery target"))); + Shutdown = SmartShutdown; + TerminateChildren(SIGTERM); + pmState = PM_WAIT_BACKENDS; + /* PostmasterStateMachine logic does the rest */ + continue; + } + /* * Unexpected exit of startup process (including FATAL exit) * during PM_STARTUP is treated as catastrophic. There are no diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 423ef4d7fa..85b3fe76bb 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -214,6 +214,16 @@ typedef struct XLogRecData uint32 len; /* length of rmgr data to include */ } XLogRecData; +/* + * Recovery target action. + */ +typedef enum +{ + RECOVERY_TARGET_ACTION_PAUSE, + RECOVERY_TARGET_ACTION_PROMOTE, + RECOVERY_TARGET_ACTION_SHUTDOWN, +} RecoveryTargetAction; + /* * Method table for resource managers. * -- 2.40.0