Make deadlock-parallel isolation test more robust.

author Tom Lane <tgl@sss.pgh.pa.us>

Sat, 17 Aug 2019 22:15:38 +0000 (18:15 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sat, 17 Aug 2019 22:15:38 +0000 (18:15 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sat, 17 Aug 2019 22:15:38 +0000 (18:15 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sat, 17 Aug 2019 22:15:38 +0000 (18:15 -0400)
diff --git a/src/test/isolation/expected/deadlock-parallel.out b/src/test/isolation/expected/deadlock-parallel.out

index 871a80c4e321f84778d55867a0e2b046cf953e94..cf4d07e61567bfafd556e25ad9eefe57b2e08257 100644 (file)
--- a/src/test/isolation/expected/deadlock-parallel.out
+++ b/src/test/isolation/expected/deadlock-parallel.out
@@ -1,10 +1,10 @@
  Parsed test spec with 4 sessions
  
  starting permutation: d1a1 d2a2 e1l e2l d1a2 d2a1 d1c e1c d2c e2c
-step d1a1: SELECT lock_share(1,x) FROM bigt LIMIT 1;
-lock_share     
+step d1a1: SELECT lock_share(1,x), lock_excl(3,x) FROM bigt LIMIT 1;
+lock_share     lock_excl      
  
-1              
+1              1              
  step d2a2: select lock_share(2,x) FROM bigt LIMIT 1;
  lock_share     
  
@@ -16,15 +16,19 @@ step d1a2: SET force_parallel_mode = on;
                           SET parallel_tuple_cost = 0;
                           SET min_parallel_table_scan_size = 0;
                           SET parallel_leader_participation = off;
-                         SET max_parallel_workers_per_gather = 4;
+                         SET max_parallel_workers_per_gather = 3;
                           SELECT sum(lock_share(2,x)) FROM bigt; <waiting ...>
  step d2a1: SET force_parallel_mode = on;
                           SET parallel_setup_cost = 0;
                           SET parallel_tuple_cost = 0;
                           SET min_parallel_table_scan_size = 0;
                           SET parallel_leader_participation = off;
-                         SET max_parallel_workers_per_gather = 4;
-                         SELECT sum(lock_share(1,x)) FROM bigt; <waiting ...>
+                         SET max_parallel_workers_per_gather = 3;
+                         SELECT sum(lock_share(1,x)) FROM bigt;
+                         SET force_parallel_mode = off;
+                         RESET parallel_setup_cost;
+                         RESET parallel_tuple_cost;
+                         SELECT lock_share(3,x) FROM bigt LIMIT 1; <waiting ...>
  step d1a2: <... completed>
  sum            
  
@@ -38,6 +42,9 @@ step d2a1: <... completed>
  sum            
  
  10000          
+lock_share     
+
+1              
  step e1c: COMMIT;
  step d2c: COMMIT;
  step e2l: <... completed>
diff --git a/src/test/isolation/specs/deadlock-parallel.spec b/src/test/isolation/specs/deadlock-parallel.spec

index aa4a0847e0ec29bca36c0ba14e4a16e974ac29d6..7ad290c0bddf8b6612bf34a01db75978d9b66937 100644 (file)
--- a/src/test/isolation/specs/deadlock-parallel.spec
+++ b/src/test/isolation/specs/deadlock-parallel.spec
@@ -15,6 +15,25 @@
  # The deadlock detector resolves the deadlock by reversing the d1-e2 edge,
  # unblocking d1.
  
+# However ... it's not actually that well-defined whether the deadlock
+# detector will prefer to unblock d1 or d2.  It depends on which backend
+# is first to run DeadLockCheck after the deadlock condition is created:
+# that backend will search outwards from its own wait condition, and will
+# first find a loop involving the *other* lock.  We encourage that to be
+# one of the d2a1 parallel workers, which will therefore unblock d1a2
+# workers, by setting a shorter deadlock_timeout in session d2.  But on
+# slow machines, one or more d1a2 workers may not yet have reached their
+# lock waits, so that they're not unblocked by the first DeadLockCheck.
+# The next DeadLockCheck may choose to unblock the d2a1 workers instead,
+# which would allow d2a1 to complete before d1a2, causing the test to
+# freeze up because isolationtester isn't expecting that completion order.
+# (In effect, we have an undetectable deadlock because d2 is waiting for
+# d1's completion, but on the client side.)  To fix this, introduce an
+# additional lock (advisory lock 3), which is initially taken by d1 and
+# then d2a1 will wait for it after completing the main part of the test.
+# In this way, the deadlock detector can see that d1 must be completed
+# first, regardless of timing.
+
  setup
  {
    create function lock_share(int,int) returns int language sql as
@@ -39,15 +58,15 @@ setup               { BEGIN isolation level repeatable read;
                           SET force_parallel_mode = off;
                           SET deadlock_timeout = '10s';
  }
-# this lock will be taken in the leader, so it will persist:
-step "d1a1"    { SELECT lock_share(1,x) FROM bigt LIMIT 1; }
+# these locks will be taken in the leader, so they will persist:
+step "d1a1"    { SELECT lock_share(1,x), lock_excl(3,x) FROM bigt LIMIT 1; }
  # this causes all the parallel workers to take locks:
  step "d1a2"    { SET force_parallel_mode = on;
                           SET parallel_setup_cost = 0;
                           SET parallel_tuple_cost = 0;
                           SET min_parallel_table_scan_size = 0;
                           SET parallel_leader_participation = off;
-                         SET max_parallel_workers_per_gather = 4;
+                         SET max_parallel_workers_per_gather = 3;
                           SELECT sum(lock_share(2,x)) FROM bigt; }
  step "d1c"     { COMMIT; }
  
@@ -58,14 +77,19 @@ setup               { BEGIN isolation level repeatable read;
  }
  # this lock will be taken in the leader, so it will persist:
  step "d2a2"    { select lock_share(2,x) FROM bigt LIMIT 1; }
-# this causes all the parallel workers to take locks:
+# this causes all the parallel workers to take locks;
+# after which, make the leader take lock 3 to prevent client-driven deadlock
  step "d2a1"    { SET force_parallel_mode = on;
                           SET parallel_setup_cost = 0;
                           SET parallel_tuple_cost = 0;
                           SET min_parallel_table_scan_size = 0;
                           SET parallel_leader_participation = off;
-                         SET max_parallel_workers_per_gather = 4;
-                         SELECT sum(lock_share(1,x)) FROM bigt; }
+                         SET max_parallel_workers_per_gather = 3;
+                         SELECT sum(lock_share(1,x)) FROM bigt;
+                         SET force_parallel_mode = off;
+                         RESET parallel_setup_cost;
+                         RESET parallel_tuple_cost;
+                         SELECT lock_share(3,x) FROM bigt LIMIT 1; }
  step "d2c"     { COMMIT; }
  
  session "e1"
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 17 Aug 2019 22:15:38 +0000 (18:15 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sat, 17 Aug 2019 22:15:38 +0000 (18:15 -0400)
src/test/isolation/expected/deadlock-parallel.out		patch \| blob \| history
src/test/isolation/specs/deadlock-parallel.spec		patch \| blob \| history