From b95a720a487b5027af1b9e4a12542800598ff5de Mon Sep 17 00:00:00 2001 From: Simon Riggs Date: Sat, 13 Feb 2010 01:32:20 +0000 Subject: [PATCH] Re-enable max_standby_delay = -1 using deadlock detection on startup process. If startup waits on a buffer pin we send a request to all backends to cancel themselves if they are holding the buffer pin required and they are also waiting on a lock. If not, startup waits until max_standby_delay before cancelling any backend waiting for the requested buffer pin. --- src/backend/storage/ipc/procsignal.c | 5 ++- src/backend/storage/ipc/standby.c | 62 +++++++++++++++++++++------- src/backend/storage/lmgr/proc.c | 14 ++++++- src/backend/tcop/postgres.c | 16 ++++++- src/backend/utils/misc/guc.c | 4 +- src/include/storage/proc.h | 3 +- src/include/storage/procsignal.h | 3 +- src/include/storage/standby.h | 5 ++- 8 files changed, 86 insertions(+), 26 deletions(-) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 6c38d423f2..03f61b20ee 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.4 2010/01/23 16:37:12 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/procsignal.c,v 1.5 2010/02/13 01:32:19 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -272,6 +272,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_SNAPSHOT); + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK)) + RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + if (CheckProcSignal(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN)) RecoveryConflictInterrupt(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 3b3534886c..fc4295465a 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.11 2010/02/11 19:35:22 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.12 2010/02/13 01:32:19 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -127,6 +127,9 @@ WaitExceedsMaxStandbyDelay(void) long delay_secs; int delay_usecs; + if (MaxStandbyDelay == -1) + return false; + /* Are we past max_standby_delay? */ TimestampDifference(GetLatestXLogTime(), GetCurrentTimestamp(), &delay_secs, &delay_usecs); @@ -351,15 +354,15 @@ ResolveRecoveryConflictWithLock(Oid dbOid, Oid relOid) * they hold one of the buffer pins that is blocking Startup process. If so, * backends will take an appropriate error action, ERROR or FATAL. * - * A secondary purpose of this is to avoid deadlocks that might occur between - * the Startup process and lock waiters. Deadlocks occur because if queries + * We also check for deadlocks before we wait, though applications that cause + * these will be extremely rare. Deadlocks occur because if queries * wait on a lock, that must be behind an AccessExclusiveLock, which can only - * be clared if the Startup process replays a transaction completion record. - * If Startup process is waiting then that is a deadlock. If we allowed a - * setting of max_standby_delay that meant "wait forever" we would then need - * special code to protect against deadlock. Such deadlocks are rare, so the - * code would be almost certainly buggy, so we avoid both long waits and - * deadlocks using the same mechanism. + * be cleared if the Startup process replays a transaction completion record. + * If Startup process is also waiting then that is a deadlock. The deadlock + * can occur if the query is waiting and then the Startup sleeps, or if + * Startup is sleeping and the the query waits on a lock. We protect against + * only the former sequence here, the latter sequence is checked prior to + * the query sleeping, in CheckRecoveryConflictDeadlock(). */ void ResolveRecoveryConflictWithBufferPin(void) @@ -368,11 +371,23 @@ ResolveRecoveryConflictWithBufferPin(void) Assert(InHotStandby); - /* - * Signal immediately or set alarm for later. - */ if (MaxStandbyDelay == 0) - SendRecoveryConflictWithBufferPin(); + { + /* + * We don't want to wait, so just tell everybody holding the pin to + * get out of town. + */ + SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + } + else if (MaxStandbyDelay == -1) + { + /* + * Send out a request to check for buffer pin deadlocks before we wait. + * This is fairly cheap, so no need to wait for deadlock timeout before + * trying to send it out. + */ + SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + } else { TimestampTz now; @@ -386,13 +401,25 @@ ResolveRecoveryConflictWithBufferPin(void) &standby_delay_secs, &standby_delay_usecs); if (standby_delay_secs >= MaxStandbyDelay) - SendRecoveryConflictWithBufferPin(); + { + /* + * We're already behind, so clear a path as quickly as possible. + */ + SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); + } else { TimestampTz fin_time; /* Expected wake-up time by timer */ long timer_delay_secs; /* Amount of time we set timer for */ int timer_delay_usecs = 0; + /* + * Send out a request to check for buffer pin deadlocks before we wait. + * This is fairly cheap, so no need to wait for deadlock timeout before + * trying to send it out. + */ + SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + /* * How much longer we should wait? */ @@ -435,15 +462,18 @@ ResolveRecoveryConflictWithBufferPin(void) } void -SendRecoveryConflictWithBufferPin(void) +SendRecoveryConflictWithBufferPin(ProcSignalReason reason) { + Assert(reason == PROCSIG_RECOVERY_CONFLICT_BUFFERPIN || + reason == PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK); + /* * We send signal to all backends to ask them if they are holding * the buffer pin which is delaying the Startup process. We must * not set the conflict flag yet, since most backends will be innocent. * Let the SIGUSR1 handling in each backend decide their own fate. */ - CancelDBBackends(InvalidOid, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, false); + CancelDBBackends(InvalidOid, reason, false); } /* diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index a96a558da3..1e103743b2 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.215 2010/02/08 04:33:54 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.216 2010/02/13 01:32:19 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -45,6 +45,7 @@ #include "storage/pmsignal.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/procsignal.h" #include "storage/spin.h" @@ -556,6 +557,15 @@ HaveNFreeProcs(int n) return (n <= 0); } +bool +IsWaitingForLock(void) +{ + if (lockAwaited == NULL) + return false; + + return true; +} + /* * Cancel any pending wait for lock, when aborting a transaction. * @@ -1670,7 +1680,7 @@ CheckStandbyTimeout(void) now = GetCurrentTimestamp(); if (now >= statement_fin_time) - SendRecoveryConflictWithBufferPin(); + SendRecoveryConflictWithBufferPin(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN); else { /* Not time yet, so (re)schedule the interrupt */ diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 35a4d087ce..8d33000e61 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.587 2010/01/23 17:04:05 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.588 2010/02/13 01:32:19 sriggs Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -2278,6 +2278,9 @@ errdetail_recovery_conflict(void) case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: errdetail("User query might have needed to see row versions that must be removed."); break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + errdetail("User transaction caused buffer deadlock with recovery."); + break; case PROCSIG_RECOVERY_CONFLICT_DATABASE: errdetail("User was connected to a database that must be dropped."); break; @@ -2754,6 +2757,15 @@ RecoveryConflictInterrupt(ProcSignalReason reason) RecoveryConflictReason = reason; switch (reason) { + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + /* + * If we aren't waiting for a lock we can never deadlock. + */ + if (!IsWaitingForLock()) + return; + + /* Intentional drop through to check wait for pin */ + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: /* * If we aren't blocking the Startup process there is @@ -2819,6 +2831,8 @@ RecoveryConflictInterrupt(ProcSignalReason reason) elog(FATAL, "Unknown conflict mode"); } + Assert(RecoveryConflictPending && (QueryCancelPending || ProcDiePending)); + /* * If it's safe to interrupt, and we're waiting for input or a lock, * service the interrupt immediately diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 642fc6796a..fd533a1e58 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.538 2010/02/01 13:40:28 sriggs Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.539 2010/02/13 01:32:20 sriggs Exp $ * *-------------------------------------------------------------------- */ @@ -1381,7 +1381,7 @@ static struct config_int ConfigureNamesInt[] = NULL }, &MaxStandbyDelay, - 30, 0, INT_MAX, NULL, NULL + 30, -1, INT_MAX, NULL, NULL }, { diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 5a4421bfa5..b1fc78d3ed 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.119 2010/01/23 16:37:12 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.120 2010/02/13 01:32:20 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -189,6 +189,7 @@ extern void ProcQueueInit(PROC_QUEUE *queue); extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable); extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus); extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); +extern bool IsWaitingForLock(void); extern void LockWaitCancel(void); extern void ProcWaitForSignal(void); diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 2a67a62a92..0b5316e691 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.4 2010/01/23 16:37:12 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/storage/procsignal.h,v 1.5 2010/02/13 01:32:20 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -38,6 +38,7 @@ typedef enum PROCSIG_RECOVERY_CONFLICT_LOCK, PROCSIG_RECOVERY_CONFLICT_SNAPSHOT, PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, + PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK, NUM_PROCSIGNALS /* Must be last! */ } ProcSignalReason; diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 2a0b56ff0b..081fa51ba0 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.7 2010/01/31 19:01:11 sriggs Exp $ + * $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.8 2010/02/13 01:32:20 sriggs Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "access/xlog.h" #include "storage/lock.h" +#include "storage/procsignal.h" #include "storage/relfilenode.h" extern int vacuum_defer_cleanup_age; @@ -30,7 +31,7 @@ extern void ResolveRecoveryConflictWithTablespace(Oid tsid); extern void ResolveRecoveryConflictWithDatabase(Oid dbid); extern void ResolveRecoveryConflictWithBufferPin(void); -extern void SendRecoveryConflictWithBufferPin(void); +extern void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); extern void CheckRecoveryConflictDeadlock(LWLockId partitionLock); /* -- 2.40.0