3 # Test driver for pg_rewind. Each test consists of a cycle where a new cluster
4 # is first created with initdb, and a streaming replication standby is set up
5 # to follow the master. Then the master is shut down and the standby is
6 # promoted, and finally pg_rewind is used to rewind the old master, using the
7 # standby as the source.
9 # To run a test, the test script (in t/ subdirectory) calls the functions
10 # in this module. These functions should be called in this sequence:
12 # 1. init_rewind_test - sets up log file etc.
14 # 2. setup_cluster - creates a PostgreSQL cluster that runs as the master
16 # 3. start_master - starts the master server
18 # 4. create_standby - runs pg_basebackup to initialize a standby server, and
19 # sets it up to follow the master.
21 # 5. promote_standby - runs "pg_ctl promote" to promote the standby server.
22 # The old master keeps running.
24 # 6. run_pg_rewind - stops the old master (if it's still running) and runs
25 # pg_rewind to synchronize it with the now-promoted standby server.
27 # 7. clean_rewind_test - stops both servers used in the test, if they're
30 # The test script can use the helper functions master_psql and standby_psql
31 # to run psql against the master and standby servers, respectively. The
32 # test script can also use the $connstr_master and $connstr_standby global
33 # variables, which contain libpq connection strings for connecting to the
34 # master and standby servers. The data directories are also available
35 # in paths $test_master_datadir and $test_standby_datadir
45 use File::Path qw(rmtree);
46 use IPC::Run qw(run start);
48 use Exporter 'import';
69 our $test_master_datadir = "$tmp_check/data_master";
70 our $test_standby_datadir = "$tmp_check/data_standby";
72 # Define non-conflicting ports for both nodes.
73 my $port_master = $ENV{PGPORT};
74 my $port_standby = $port_master + 1;
76 my $connstr_master = "port=$port_master";
77 my $connstr_standby = "port=$port_standby";
79 $ENV{PGDATABASE} = "postgres";
85 system_or_bail 'psql', '-q', '--no-psqlrc', '-d', $connstr_master,
93 system_or_bail 'psql', '-q', '--no-psqlrc', '-d', $connstr_standby,
97 # Run a query against the master, and check that the output matches what's
101 my ($query, $expected_stdout, $test_name) = @_;
102 my ($stdout, $stderr);
104 # we want just the output, no formatting
106 'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
107 $connstr_master, '-c', $query ],
108 '>', \$stdout, '2>', \$stderr;
110 # We don't use ok() for the exit code and stderr, because we want this
111 # check to be just a single test.
114 fail("$test_name: psql exit code");
116 elsif ($stderr ne '')
119 fail("$test_name: psql no stderr");
123 $stdout =~ s/\r//g if $Config{osname} eq 'msys';
124 is($stdout, $expected_stdout, "$test_name: query result matches");
128 # Run a query once a second, until it returns 't' (i.e. SQL boolean true).
131 my ($query, $connstr) = @_;
133 my $max_attempts = 30;
135 my ($stdout, $stderr);
137 while ($attempts < $max_attempts)
139 my $cmd = [ 'psql', '-At', '-c', "$query", '-d', "$connstr" ];
140 my $result = run $cmd, '>', \$stdout, '2>', \$stderr;
143 $stdout =~ s/\r//g if $Config{osname} eq 'msys';
149 # Wait a second before retrying.
154 # The query result didn't change in 30 seconds. Give up. Print the stderr
155 # from the last attempt, hopefully that's useful for debugging.
162 my ($filename, $str) = @_;
164 open my $fh, ">>", $filename or die "could not open file $filename";
171 # Initialize master, data checksums are mandatory
172 rmtree($test_master_datadir);
173 standard_initdb($test_master_datadir);
175 # Custom parameters for master's postgresql.conf
177 "$test_master_datadir/postgresql.conf", qq(
178 wal_level = hot_standby
180 wal_keep_segments = 20
189 # Accept replication connections on master
190 configure_hba_for_replication $test_master_datadir;
195 system_or_bail('pg_ctl' , '-w',
196 '-D' , $test_master_datadir,
197 '-l', "$log_path/master.log",
198 "-o", "-p $port_master", 'start');
200 #### Now run the test-specific parts to initialize the master before setting
207 # Set up standby with necessary parameter
208 rmtree $test_standby_datadir;
210 # Base backup is taken with xlog files included
211 system_or_bail('pg_basebackup', '-D', $test_standby_datadir,
212 '-p', $port_master, '-x');
214 "$test_standby_datadir/recovery.conf", qq(
215 primary_conninfo='$connstr_master application_name=rewind_standby'
217 recovery_target_timeline='latest'
221 system_or_bail('pg_ctl', '-w', '-D', $test_standby_datadir,
222 '-l', "$log_path/standby.log",
223 '-o', "-p $port_standby", 'start');
225 # The standby may have WAL to apply before it matches the primary. That
226 # is fine, because no test examines the standby before promotion.
231 #### Now run the test-specific parts to run after standby has been started
234 # Wait for the standby to receive and write all WAL.
235 my $wal_received_query =
236 "SELECT pg_current_xlog_location() = write_location FROM pg_stat_replication WHERE application_name = 'rewind_standby';";
237 poll_query_until($wal_received_query, $connstr_master)
238 or die "Timed out while waiting for standby to receive and write WAL";
240 # Now promote slave and insert some new data on master, this will put
241 # the master out-of-sync with the standby. Wait until the standby is
242 # out of recovery mode, and is ready to accept read-write connections.
243 system_or_bail('pg_ctl', '-w', '-D', $test_standby_datadir, 'promote');
244 poll_query_until("SELECT NOT pg_is_in_recovery()", $connstr_standby)
245 or die "Timed out while waiting for promotion of standby";
247 # Force a checkpoint after the promotion. pg_rewind looks at the control
248 # file todetermine what timeline the server is on, and that isn't updated
249 # immediately at promotion, but only at the next checkpoint. When running
250 # pg_rewind in remote mode, it's possible that we complete the test steps
251 # after promotion so quickly that when pg_rewind runs, the standby has not
252 # performed a checkpoint after promotion yet.
253 standby_psql("checkpoint");
258 my $test_mode = shift;
260 # Stop the master and be ready to perform the rewind
261 system_or_bail('pg_ctl', '-D', $test_master_datadir, '-m', 'fast', 'stop');
263 # At this point, the rewind processing is ready to run.
264 # We now have a very simple scenario with a few diverged WAL record.
265 # The real testing begins really now with a bifurcation of the possible
266 # scenarios that pg_rewind supports.
268 # Keep a temporary postgresql.conf for master node or it would be
269 # overwritten during the rewind.
270 copy("$test_master_datadir/postgresql.conf",
271 "$tmp_check/master-postgresql.conf.tmp");
274 if ($test_mode eq "local")
276 # Do rewind using a local pgdata as source
277 # Stop the master and be ready to perform the rewind
278 system_or_bail('pg_ctl', '-D', $test_standby_datadir,
279 '-m', 'fast', 'stop');
280 command_ok(['pg_rewind',
282 "--source-pgdata=$test_standby_datadir",
283 "--target-pgdata=$test_master_datadir"],
286 elsif ($test_mode eq "remote")
288 # Do rewind using a remote connection as source
289 command_ok(['pg_rewind',
292 "port=$port_standby dbname=postgres",
293 "--target-pgdata=$test_master_datadir"],
299 # Cannot come here normally
300 die("Incorrect test mode specified");
303 # Now move back postgresql.conf with old settings
304 move("$tmp_check/master-postgresql.conf.tmp",
305 "$test_master_datadir/postgresql.conf");
307 # Plug-in rewound node to the now-promoted standby node
309 "$test_master_datadir/recovery.conf", qq(
310 primary_conninfo='port=$port_standby'
312 recovery_target_timeline='latest'
315 # Restart the master to check that rewind went correctly
316 system_or_bail('pg_ctl', '-w', '-D', $test_master_datadir,
317 '-l', "$log_path/master.log",
318 '-o', "-p $port_master", 'start');
320 #### Now run the test-specific parts to check the result
323 # Clean up after the test. Stop both servers, if they're still running.
324 sub clean_rewind_test
326 if ($test_master_datadir)
329 'pg_ctl', '-D', $test_master_datadir, '-m', 'immediate', 'stop';
331 if ($test_standby_datadir)
334 'pg_ctl', '-D', $test_standby_datadir, '-m', 'immediate', 'stop';
338 # Stop the test servers, just in case they're still running.