Make the pg_rewind regression tests more robust on slow systems.

author Heikki Linnakangas <heikki.linnakangas@iki.fi>

Wed, 22 Apr 2015 11:28:37 +0000 (14:28 +0300)

committer Heikki Linnakangas <heikki.linnakangas@iki.fi>

Wed, 22 Apr 2015 11:33:57 +0000 (14:33 +0300)
author Heikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 22 Apr 2015 11:28:37 +0000 (14:28 +0300)
committer Heikki Linnakangas <heikki.linnakangas@iki.fi>
Wed, 22 Apr 2015 11:33:57 +0000 (14:33 +0300)
diff --git a/src/bin/pg_rewind/RewindTest.pm b/src/bin/pg_rewind/RewindTest.pm

index 84e0512143a34a007f84ea72074663e888baa3be..a5f7d08bf72afd6d8bfdc3398f703eaa989b2913 100644 (file)
--- a/src/bin/pg_rewind/RewindTest.pm
+++ b/src/bin/pg_rewind/RewindTest.pm
@@ -125,6 +125,37 @@ sub check_query
         }
  }
  
+# Run a query once a second, until it returns 't' (i.e. SQL boolean true).
+sub poll_query_until
+{
+       my ($query, $connstr) = @_;
+
+       my $max_attempts = 30;
+       my $attempts = 0;
+       my ($stdout, $stderr);
+
+       while ($attempts < $max_attempts)
+       {
+               my $cmd = ['psql', '-At', '-c', "$query", '-d', "$connstr" ];
+               my $result = run $cmd, '>', \$stdout, '2>', \$stderr;
+
+               chomp($stdout);
+               if ($stdout eq "t")
+               {
+                       return 1;
+               }
+
+               # Wait a second before retrying.
+               sleep 1;
+               $attempts++;
+       }
+
+       # The query result didn't change in 30 seconds. Give up. Print the stderr
+       # from the last attempt, hopefully that's useful for debugging.
+       diag $stderr;
+       return 0;
+}
+
  sub append_to_file
  {
         my($filename, $str) = @_;
@@ -185,7 +216,7 @@ sub create_standby
         # Base backup is taken with xlog files included
         system_or_bail("pg_basebackup -D $test_standby_datadir -p $port_master -x >>$log_path 2>&1");
         append_to_file("$test_standby_datadir/recovery.conf", qq(
-primary_conninfo='$connstr_master'
+primary_conninfo='$connstr_master application_name=rewind_standby'
  standby_mode=on
  recovery_target_timeline='latest'
  ));
@@ -193,8 +224,11 @@ recovery_target_timeline='latest'
         # Start standby
         system_or_bail("pg_ctl -w -D $test_standby_datadir -o \"-k $tempdir_short --listen-addresses='' -p $port_standby\" start >>$log_path 2>&1");
  
-       # sleep a bit to make sure the standby has caught up.
-       sleep 1;
+       # Wait until the standby has caught up with the primary, by polling
+       # pg_stat_replication.
+       my $caughtup_query = "SELECT pg_current_xlog_location() = replay_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
+       poll_query_until($caughtup_query, $connstr_master)
+               or die "Timed out while waiting for standby to catch up";
  }
  
  sub promote_standby
@@ -203,9 +237,11 @@ sub promote_standby
         # up standby
  
         # Now promote slave and insert some new data on master, this will put
-       # the master out-of-sync with the standby.
+       # the master out-of-sync with the standby. Wait until the standby is
+       # out of recovery mode, and is ready to accept read-write connections.
         system_or_bail("pg_ctl -w -D $test_standby_datadir promote >>$log_path 2>&1");
-       sleep 2;
+       poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby)
+               or die "Timed out while waiting for promotion of standby";
  }
  
  sub run_pg_rewind
author	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Wed, 22 Apr 2015 11:28:37 +0000 (14:28 +0300)
committer	Heikki Linnakangas <heikki.linnakangas@iki.fi>
	Wed, 22 Apr 2015 11:33:57 +0000 (14:33 +0300)