From caa078353ecd1f3b3681c0d4fa95ad4bb8c2308a Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 7 Oct 2019 09:07:22 +0900 Subject: [PATCH] Improve handling and coverage of --no-ensure-shutdown in pg_rewind This includes a couple of changes around the new behavior of pg_rewind which enforces recovery to happen once on a cluster not shut down cleanly: - Some comments and documentation improvements. - Shutdown the cluster to rewind with immediate mode in all the tests, this allows to check after the forced recovery behavior which is wanted as new default. - Use -F for the forced recovery step, so as postgres does not use fsync. This was useless as a final sync is done once the tool is done. Author: Michael Paquier Reviewed-by: Alexey Kondratov Discussion: https://postgr.es/m/20191004083721.GA1829@paquier.xyz --- doc/src/sgml/ref/pg_rewind.sgml | 10 ++++++---- src/bin/pg_rewind/pg_rewind.c | 19 ++++++++++++------- src/bin/pg_rewind/t/005_same_timeline.pl | 4 ++++ src/bin/pg_rewind/t/RewindTest.pm | 20 ++++++++++++-------- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml index fbf454803b..42d29edd4e 100644 --- a/doc/src/sgml/ref/pg_rewind.sgml +++ b/doc/src/sgml/ref/pg_rewind.sgml @@ -169,12 +169,14 @@ PostgreSQL documentation - pg_rewind verifies that the target server - is cleanly shutdown before rewinding; by default, if it isn't, it - starts the server in single-user mode to complete crash recovery. + pg_rewind requires that the target server + is cleanly shut down before rewinding. By default, if the target server + is not shut down cleanly, pg_rewind starts + the target server in single-user mode to complete crash recovery first, + and stops it. By passing this option, pg_rewind skips this and errors out immediately if the server is not cleanly shut - down. Users are expected to handle the situation themselves in that + down. Users are expected to handle the situation themselves in that case. diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index fe1468b771..875a43b219 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -270,11 +270,12 @@ main(int argc, char **argv) pg_free(buffer); /* - * If the target instance was not cleanly shut down, run a single-user - * postgres session really quickly and reload the control file to get the - * new state. Note if no_ensure_shutdown is specified, pg_rewind won't do - * that automatically. That means users need to do themselves in advance, - * else pg_rewind will soon quit, see sanityChecks(). + * If the target instance was not cleanly shut down, start and stop the + * target cluster once in single-user mode to enforce recovery to finish, + * ensuring that the cluster can be used by pg_rewind. Note that if + * no_ensure_shutdown is specified, pg_rewind ignores this step, and users + * need to make sure by themselves that the target cluster is in a clean + * state. */ if (!no_ensure_shutdown && ControlFile_target.state != DB_SHUTDOWNED && @@ -847,8 +848,12 @@ ensureCleanShutdown(const char *argv0) if (dry_run) return; - /* finally run postgres in single-user mode */ - snprintf(cmd, MAXCMDLEN, "\"%s\" --single -D \"%s\" template1 < \"%s\"", + /* + * Finally run postgres in single-user mode. There is no need to use + * fsync here. This makes the recovery faster, and the target data folder + * is synced at the end anyway. + */ + snprintf(cmd, MAXCMDLEN, "\"%s\" --single -F -D \"%s\" template1 < \"%s\"", exec_path, datadir_target, DEVNULL); if (system(cmd) != 0) diff --git a/src/bin/pg_rewind/t/005_same_timeline.pl b/src/bin/pg_rewind/t/005_same_timeline.pl index 40dbc44caa..df469d3939 100644 --- a/src/bin/pg_rewind/t/005_same_timeline.pl +++ b/src/bin/pg_rewind/t/005_same_timeline.pl @@ -1,3 +1,7 @@ +# +# Test that running pg_rewind with the source and target clusters +# on the same timeline runs successfully. +# use strict; use warnings; use TestLib; diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm index c540722420..82fa220ac8 100644 --- a/src/bin/pg_rewind/t/RewindTest.pm +++ b/src/bin/pg_rewind/t/RewindTest.pm @@ -227,8 +227,10 @@ sub run_pg_rewind # Append the rewind-specific role to the connection string. $standby_connstr = "$standby_connstr user=rewind_user"; - # Stop the master and be ready to perform the rewind - $node_master->stop; + # Stop the master and be ready to perform the rewind. The cluster + # needs recovery to finish once, and pg_rewind makes sure that it + # happens automatically. + $node_master->stop('immediate'); # At this point, the rewind processing is ready to run. # We now have a very simple scenario with a few diverged WAL record. @@ -260,19 +262,21 @@ sub run_pg_rewind } elsif ($test_mode eq "remote") { - - # Do rewind using a remote connection as source + # Do rewind using a remote connection as source, generating + # recovery configuration automatically. command_ok( [ 'pg_rewind', "--debug", "--source-server", $standby_connstr, - "--target-pgdata=$master_pgdata", "-R", - "--no-sync" + "--target-pgdata=$master_pgdata", "--no-sync", + "--write-recovery-conf" ], 'pg_rewind remote'); - # Check that standby.signal has been created. - ok(-e "$master_pgdata/standby.signal"); + # Check that standby.signal is here as recovery configuration + # was requested. + ok( -e "$master_pgdata/standby.signal", + 'standby.signal created after pg_rewind'); # Now, when pg_rewind apparently succeeded with minimal permissions, # add REPLICATION privilege. So we could test that new standby -- 2.40.0