From 647675228f2b18964d8ade8a1061a719e527acfb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 2 Jul 2017 22:01:19 -0400 Subject: [PATCH] Fix race condition in recovery/t/009_twophase.pl test. Since reducing pg_ctl's reaction time in commit c61559ec3, some slower buildfarm members have shown erratic failures in this test. The reason turns out to be that the test assumes synchronous replication (because it does not provide any lag time for a commit to replicate before shutting down the servers), but it had only enabled sync rep in one direction. The observed symptoms correspond to failure to replicate the last committed transaction in the other direction, which can be expected to happen if the shutdown command is issued soon enough and we are providing no synchronous-commit guarantees. Fix that, and add a bit more paranoid state checking at the bottom of the script. Michael Paquier and myself Discussion: https://postgr.es/m/908.1498965681@sss.pgh.pa.us --- src/test/recovery/t/009_twophase.pl | 53 +++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 93d0e974c4..2922c763b3 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -4,11 +4,25 @@ use warnings; use PostgresNode; use TestLib; -use Test::More tests => 17; +use Test::More tests => 20; my $psql_out = ''; my $psql_rc = ''; +sub configure_and_reload +{ + my ($node, $parameter) = @_; + my $name = $node->name; + + $node->append_conf( + 'postgresql.conf', qq( + $parameter + )); + $node->psql('postgres', "SELECT pg_reload_conf()", + stdout => \$psql_out); + is($psql_out, 't', "reload node $name with $parameter"); +} + # Set up two nodes, which will alternately be master and replication slave. # Setup london node @@ -28,15 +42,11 @@ $node_paris->init_from_backup($node_london, 'london_backup', has_streaming => 1); $node_paris->start; -# Switch to synchronous replication -$node_london->append_conf( - 'postgresql.conf', qq( - synchronous_standby_names = '*' -)); -$node_london->psql('postgres', "SELECT pg_reload_conf()", - stdout => \$psql_out); -is($psql_out, 't', 'Enable synchronous replication'); +# Switch to synchronous replication in both directions +configure_and_reload($node_london, "synchronous_standby_names = 'paris'"); +configure_and_reload($node_paris, "synchronous_standby_names = 'london'"); +# Set up nonce names for current master and slave nodes note "Initially, london is master and paris is slave"; my ($cur_master, $cur_slave) = ($node_london, $node_paris); my $cur_master_name = $cur_master->name; @@ -213,7 +223,10 @@ note "Now paris is master and london is slave"; ($cur_master, $cur_slave) = ($node_paris, $node_london); $cur_master_name = $cur_master->name; -$psql_rc = $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_10'"); +# because london is not running at this point, we can't use syncrep commit +# on this command +$psql_rc = $cur_master->psql('postgres', + "SET synchronous_commit = off; COMMIT PREPARED 'xact_009_10'"); is($psql_rc, '0', "Restore of prepared transaction on promoted slave"); # restart old master as new slave @@ -309,8 +322,8 @@ $cur_slave->start; $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_12'"); ############################################################################### -# Check for a lock conflict between prepared transaction with DDL inside and replay of -# XLOG_STANDBY_LOCK wal record. +# Check for a lock conflict between prepared transaction with DDL inside and +# replay of XLOG_STANDBY_LOCK wal record. ############################################################################### $cur_master->psql( @@ -327,14 +340,20 @@ $cur_master->psql( $cur_slave->psql( 'postgres', - "SELECT count(*) FROM pg_prepared_xacts", + "SELECT count(*) FROM t_009_tbl2", stdout => \$psql_out); -is($psql_out, '0', "Replay prepared transaction with DDL"); +is($psql_out, '1', "Replay prepared transaction with DDL"); ############################################################################### # Verify expected data appears on both servers. ############################################################################### +$cur_master->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', "No uncommitted prepared transactions on master"); + $cur_master->psql( 'postgres', "SELECT * FROM t_009_tbl ORDER BY id", @@ -370,6 +389,12 @@ $cur_master->psql( is($psql_out, qq{27|issued to paris}, "Check expected t_009_tbl2 data on master"); +$cur_slave->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', "No uncommitted prepared transactions on slave"); + $cur_slave->psql( 'postgres', "SELECT * FROM t_009_tbl ORDER BY id", -- 2.40.0