From: Heikki Linnakangas Date: Wed, 22 Apr 2015 11:28:37 +0000 (+0300) Subject: Make the pg_rewind regression tests more robust on slow systems. X-Git-Tag: REL9_5_ALPHA1~417 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=54a16df0100da445be7c79eb81dfb96fd0685e6d;p=postgresql Make the pg_rewind regression tests more robust on slow systems. There were a couple of hard-coded sleeps in the tests: to wait for standby to catch up with master, and to wait for promotion with "pg_ctl promote" to complete. Instead of a fixed, hard-coded sleep, poll the server with a query once a second. This isn't ideal either, and I wish we had a better solution for real-world applications too, but this should fix the immediate problem. Patch by Michael Paquier, with some editing by me. --- diff --git a/src/bin/pg_rewind/RewindTest.pm b/src/bin/pg_rewind/RewindTest.pm index 84e0512143..a5f7d08bf7 100644 --- a/src/bin/pg_rewind/RewindTest.pm +++ b/src/bin/pg_rewind/RewindTest.pm @@ -125,6 +125,37 @@ sub check_query } } +# Run a query once a second, until it returns 't' (i.e. SQL boolean true). +sub poll_query_until +{ + my ($query, $connstr) = @_; + + my $max_attempts = 30; + my $attempts = 0; + my ($stdout, $stderr); + + while ($attempts < $max_attempts) + { + my $cmd = ['psql', '-At', '-c', "$query", '-d', "$connstr" ]; + my $result = run $cmd, '>', \$stdout, '2>', \$stderr; + + chomp($stdout); + if ($stdout eq "t") + { + return 1; + } + + # Wait a second before retrying. + sleep 1; + $attempts++; + } + + # The query result didn't change in 30 seconds. Give up. Print the stderr + # from the last attempt, hopefully that's useful for debugging. + diag $stderr; + return 0; +} + sub append_to_file { my($filename, $str) = @_; @@ -185,7 +216,7 @@ sub create_standby # Base backup is taken with xlog files included system_or_bail("pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1"); append_to_file("$test_standby_datadir/recovery.conf", qq( -primary_conninfo='$connstr_master' +primary_conninfo='$connstr_master application_name=rewind_standby' standby_mode=on recovery_target_timeline='latest' )); @@ -193,8 +224,11 @@ recovery_target_timeline='latest' # Start standby system_or_bail("pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1"); - # sleep a bit to make sure the standby has caught up. - sleep 1; + # Wait until the standby has caught up with the primary, by polling + # pg_stat_replication. + my $caughtup_query = "SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';"; + poll_query_until($caughtup_query, $connstr_master) + or die "Timed out while waiting for standby to catch up"; } sub promote_standby @@ -203,9 +237,11 @@ sub promote_standby # up standby # Now promote slave and insert some new data on master, this will put - # the master out-of-sync with the standby. + # the master out-of-sync with the standby. Wait until the standby is + # out of recovery mode, and is ready to accept read-write connections. system_or_bail("pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1"); - sleep 2; + poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby) + or die "Timed out while waiting for promotion of standby"; } sub run_pg_rewind