From: Tom Lane Date: Tue, 2 Aug 2011 19:16:29 +0000 (-0400) Subject: Move CheckRecoveryConflictDeadlock() call to a safer place. X-Git-Tag: REL9_2_BETA1~1336 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ac36e6f71f197540b8ee83c97f338ae5e5163f30;p=postgresql Move CheckRecoveryConflictDeadlock() call to a safer place. This kluge was inserted in a spot apparently chosen at random: the lock manager's state is not yet fully set up for the wait, and in particular LockWaitCancel hasn't been armed by setting lockAwaited, so the ProcLock will not get cleaned up if the ereport is thrown. This seems to not cause any observable problem in trivial test cases, because LockReleaseAll will silently clean up the debris; but I was able to cause failures with tests involving subtransactions. Fixes breakage induced by commit c85c941470efc44494fd7a5f426ee85fc65c268c. Back-patch to all affected branches. --- diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 75b5ab458a..bf92d25995 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -459,24 +459,25 @@ SendRecoveryConflictWithBufferPin(ProcSignalReason reason) /* * In Hot Standby perform early deadlock detection. We abort the lock - * wait if are about to sleep while holding the buffer pin that Startup - * process is waiting for. The deadlock occurs because we can only be - * waiting behind an AccessExclusiveLock, which can only clear when a - * transaction completion record is replayed, which can only occur when - * Startup process is not waiting. So if Startup process is waiting we - * never will clear that lock, so if we wait we cause deadlock. If we - * are the Startup process then no need to check for deadlocks. + * wait if we are about to sleep while holding the buffer pin that Startup + * process is waiting for. + * + * Note: this code is pessimistic, because there is no way for it to + * determine whether an actual deadlock condition is present: the lock we + * need to wait for might be unrelated to any held by the Startup process. + * Sooner or later, this mechanism should get ripped out in favor of somehow + * accounting for buffer locks in DeadLockCheck(). However, errors here + * seem to be very low-probability in practice, so for now it's not worth + * the trouble. */ void -CheckRecoveryConflictDeadlock(LWLockId partitionLock) +CheckRecoveryConflictDeadlock(void) { - Assert(!InRecovery); + Assert(!InRecovery); /* do not call in Startup process */ if (!HoldingBufferPinThatDelaysRecovery()) return; - LWLockRelease(partitionLock); - /* * Error message should match ProcessInterrupts() but we avoid calling * that because we aren't handling an interrupt at this point. Note that diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 41e9a1532b..416f5aae99 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -844,13 +844,6 @@ LockAcquireExtended(const LOCKTAG *locktag, return LOCKACQUIRE_NOT_AVAIL; } - /* - * In Hot Standby perform early deadlock detection in normal backends. - * If deadlock found we release partition lock but do not return. - */ - if (RecoveryInProgress() && !InRecovery) - CheckRecoveryConflictDeadlock(partitionLock); - /* * Set bitmask of locks this process already holds on this object. */ diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 106625d87a..5d67b93c5d 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -934,6 +934,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) */ LWLockRelease(partitionLock); + /* + * Also, now that we will successfully clean up after an ereport, it's + * safe to check to see if there's a buffer pin deadlock against the + * Startup process. Of course, that's only necessary if we're doing + * Hot Standby and are not the Startup process ourselves. + */ + if (RecoveryInProgress() && !InRecovery) + CheckRecoveryConflictDeadlock(); + /* Reset deadlock_state before enabling the signal handler */ deadlock_state = DS_NOT_YET_CHECKED; diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 690b4070af..6ebac62db5 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -35,7 +35,7 @@ extern void ResolveRecoveryConflictWithDatabase(Oid dbid); extern void ResolveRecoveryConflictWithBufferPin(void); extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); -extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock); +extern void CheckRecoveryConflictDeadlock(void); /* * Standby Rmgr (RM_STANDBY_ID)