1 /*-------------------------------------------------------------------------
4 * Lightweight lock manager
6 * Lightweight locks are intended primarily to provide mutual exclusion of
7 * access to shared-memory data structures. Therefore, they offer both
8 * exclusive and shared lock modes (to support read/write and read-only
9 * access to a shared object). There are few other frammishes. User-level
10 * locking should be done with the full lock manager --- which depends on
11 * LWLocks to protect its shared state.
14 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
15 * Portions Copyright (c) 1994, Regents of the University of California
18 * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.53 2009/01/01 17:23:48 momjian Exp $
20 *-------------------------------------------------------------------------
24 #include "access/clog.h"
25 #include "access/multixact.h"
26 #include "access/subtrans.h"
27 #include "miscadmin.h"
29 #include "storage/ipc.h"
30 #include "storage/proc.h"
31 #include "storage/spin.h"
34 /* We use the ShmemLock spinlock to protect LWLockAssign */
35 extern slock_t *ShmemLock;
40 slock_t mutex; /* Protects LWLock and queue of PGPROCs */
41 bool releaseOK; /* T if ok to release waiters */
42 char exclusive; /* # of exclusive holders (0 or 1) */
43 int shared; /* # of shared holders (0..MaxBackends) */
44 PGPROC *head; /* head of list of waiting PGPROCs */
45 PGPROC *tail; /* tail of list of waiting PGPROCs */
46 /* tail is undefined when head is NULL */
50 * All the LWLock structs are allocated as an array in shared memory.
51 * (LWLockIds are indexes into the array.) We force the array stride to
52 * be a power of 2, which saves a few cycles in indexing, but more
53 * importantly also ensures that individual LWLocks don't cross cache line
54 * boundaries. This reduces cache contention problems, especially on AMD
55 * Opterons. (Of course, we have to also ensure that the array start
56 * address is suitably aligned.)
58 * LWLock is between 16 and 32 bytes on all known platforms, so these two
59 * cases are sufficient.
61 #define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 16 ? 16 : 32)
63 typedef union LWLockPadded
66 char pad[LWLOCK_PADDED_SIZE];
70 * This points to the array of LWLocks in shared memory. Backends inherit
71 * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
72 * where we have special measures to pass it down).
74 NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL;
78 * We use this structure to keep track of locked LWLocks for release
79 * during error recovery. The maximum size could be determined at runtime
80 * if necessary, but it seems unlikely that more than a few locks could
81 * ever be held simultaneously.
83 #define MAX_SIMUL_LWLOCKS 100
85 static int num_held_lwlocks = 0;
86 static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS];
88 static int lock_addin_request = 0;
89 static bool lock_addin_request_allowed = true;
92 static int counts_for_pid = 0;
93 static int *sh_acquire_counts;
94 static int *ex_acquire_counts;
95 static int *block_counts;
99 bool Trace_lwlocks = false;
102 PRINT_LWDEBUG(const char *where, LWLockId lockid, const volatile LWLock *lock)
105 elog(LOG, "%s(%d): excl %d shared %d head %p rOK %d",
107 (int) lock->exclusive, lock->shared, lock->head,
108 (int) lock->releaseOK);
112 LOG_LWDEBUG(const char *where, LWLockId lockid, const char *msg)
115 elog(LOG, "%s(%d): %s", where, (int) lockid, msg);
117 #else /* not LOCK_DEBUG */
118 #define PRINT_LWDEBUG(a,b,c)
119 #define LOG_LWDEBUG(a,b,c)
120 #endif /* LOCK_DEBUG */
125 print_lwlock_stats(int code, Datum arg)
128 int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
129 int numLocks = LWLockCounter[1];
131 /* Grab an LWLock to keep different backends from mixing reports */
132 LWLockAcquire(0, LW_EXCLUSIVE);
134 for (i = 0; i < numLocks; i++)
136 if (sh_acquire_counts[i] || ex_acquire_counts[i] || block_counts[i])
137 fprintf(stderr, "PID %d lwlock %d: shacq %u exacq %u blk %u\n",
138 MyProcPid, i, sh_acquire_counts[i], ex_acquire_counts[i],
144 #endif /* LWLOCK_STATS */
148 * Compute number of LWLocks to allocate.
156 * Possibly this logic should be spread out among the affected modules,
157 * the same way that shmem space estimation is done. But for now, there
158 * are few enough users of LWLocks that we can get away with just keeping
159 * the knowledge here.
162 /* Predefined LWLocks */
163 numLocks = (int) NumFixedLWLocks;
165 /* bufmgr.c needs two for each shared buffer */
166 numLocks += 2 * NBuffers;
168 /* clog.c needs one per CLOG buffer */
169 numLocks += NUM_CLOG_BUFFERS;
171 /* subtrans.c needs one per SubTrans buffer */
172 numLocks += NUM_SUBTRANS_BUFFERS;
174 /* multixact.c needs two SLRU areas */
175 numLocks += NUM_MXACTOFFSET_BUFFERS + NUM_MXACTMEMBER_BUFFERS;
178 * Add any requested by loadable modules; for backwards-compatibility
179 * reasons, allocate at least NUM_USER_DEFINED_LWLOCKS of them even if
180 * there are no explicit requests.
182 lock_addin_request_allowed = false;
183 numLocks += Max(lock_addin_request, NUM_USER_DEFINED_LWLOCKS);
190 * RequestAddinLWLocks
191 * Request that extra LWLocks be allocated for use by
194 * This is only useful if called from the _PG_init hook of a library that
195 * is loaded into the postmaster via shared_preload_libraries. Once
196 * shared memory has been allocated, calls will be ignored. (We could
197 * raise an error, but it seems better to make it a no-op, so that
198 * libraries containing such calls can be reloaded if needed.)
201 RequestAddinLWLocks(int n)
203 if (IsUnderPostmaster || !lock_addin_request_allowed)
204 return; /* too late */
205 lock_addin_request += n;
210 * Compute shmem space needed for LWLocks.
213 LWLockShmemSize(void)
216 int numLocks = NumLWLocks();
218 /* Space for the LWLock array. */
219 size = mul_size(numLocks, sizeof(LWLockPadded));
221 /* Space for dynamic allocation counter, plus room for alignment. */
222 size = add_size(size, 2 * sizeof(int) + LWLOCK_PADDED_SIZE);
229 * Allocate shmem space for LWLocks and initialize the locks.
234 int numLocks = NumLWLocks();
235 Size spaceLocks = LWLockShmemSize();
242 ptr = (char *) ShmemAlloc(spaceLocks);
244 /* Leave room for dynamic allocation counter */
245 ptr += 2 * sizeof(int);
247 /* Ensure desired alignment of LWLock array */
248 ptr += LWLOCK_PADDED_SIZE - ((unsigned long) ptr) % LWLOCK_PADDED_SIZE;
250 LWLockArray = (LWLockPadded *) ptr;
253 * Initialize all LWLocks to "unlocked" state
255 for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
257 SpinLockInit(&lock->lock.mutex);
258 lock->lock.releaseOK = true;
259 lock->lock.exclusive = 0;
260 lock->lock.shared = 0;
261 lock->lock.head = NULL;
262 lock->lock.tail = NULL;
266 * Initialize the dynamic-allocation counter, which is stored just before
269 LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
270 LWLockCounter[0] = (int) NumFixedLWLocks;
271 LWLockCounter[1] = numLocks;
276 * LWLockAssign - assign a dynamically-allocated LWLock number
278 * We interlock this using the same spinlock that is used to protect
279 * ShmemAlloc(). Interlocking is not really necessary during postmaster
280 * startup, but it is needed if any user-defined code tries to allocate
281 * LWLocks after startup.
288 /* use volatile pointer to prevent code rearrangement */
289 volatile int *LWLockCounter;
291 LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
292 SpinLockAcquire(ShmemLock);
293 if (LWLockCounter[0] >= LWLockCounter[1])
295 SpinLockRelease(ShmemLock);
296 elog(ERROR, "no more LWLockIds available");
298 result = (LWLockId) (LWLockCounter[0]++);
299 SpinLockRelease(ShmemLock);
305 * LWLockAcquire - acquire a lightweight lock in the specified mode
307 * If the lock is not available, sleep until it is.
309 * Side effect: cancel/die interrupts are held off until lock release.
312 LWLockAcquire(LWLockId lockid, LWLockMode mode)
314 volatile LWLock *lock = &(LWLockArray[lockid].lock);
315 PGPROC *proc = MyProc;
319 PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
322 /* Set up local count state first time through in a given process */
323 if (counts_for_pid != MyProcPid)
325 int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
326 int numLocks = LWLockCounter[1];
328 sh_acquire_counts = calloc(numLocks, sizeof(int));
329 ex_acquire_counts = calloc(numLocks, sizeof(int));
330 block_counts = calloc(numLocks, sizeof(int));
331 counts_for_pid = MyProcPid;
332 on_shmem_exit(print_lwlock_stats, 0);
334 /* Count lock acquisition attempts */
335 if (mode == LW_EXCLUSIVE)
336 ex_acquire_counts[lockid]++;
338 sh_acquire_counts[lockid]++;
339 #endif /* LWLOCK_STATS */
342 * We can't wait if we haven't got a PGPROC. This should only occur
343 * during bootstrap or shared memory initialization. Put an Assert here
344 * to catch unsafe coding practices.
346 Assert(!(proc == NULL && IsUnderPostmaster));
348 /* Ensure we will have room to remember the lock */
349 if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
350 elog(ERROR, "too many LWLocks taken");
353 * Lock out cancel/die interrupts until we exit the code section protected
354 * by the LWLock. This ensures that interrupts will not interfere with
355 * manipulations of data structures in shared memory.
360 * Loop here to try to acquire lock after each time we are signaled by
363 * NOTE: it might seem better to have LWLockRelease actually grant us the
364 * lock, rather than retrying and possibly having to go back to sleep. But
365 * in practice that is no good because it means a process swap for every
366 * lock acquisition when two or more processes are contending for the same
367 * lock. Since LWLocks are normally used to protect not-very-long
368 * sections of computation, a process needs to be able to acquire and
369 * release the same lock many times during a single CPU time slice, even
370 * in the presence of contention. The efficiency of being able to do that
371 * outweighs the inefficiency of sometimes wasting a process dispatch
372 * cycle because the lock is not free when a released waiter finally gets
373 * to run. See pgsql-hackers archives for 29-Dec-01.
379 /* Acquire mutex. Time spent holding mutex should be short! */
380 SpinLockAcquire(&lock->mutex);
382 /* If retrying, allow LWLockRelease to release waiters again */
384 lock->releaseOK = true;
386 /* If I can get the lock, do so quickly. */
387 if (mode == LW_EXCLUSIVE)
389 if (lock->exclusive == 0 && lock->shared == 0)
399 if (lock->exclusive == 0)
409 break; /* got the lock */
412 * Add myself to wait queue.
414 * If we don't have a PGPROC structure, there's no way to wait. This
415 * should never occur, since MyProc should only be null during shared
416 * memory initialization.
419 elog(PANIC, "cannot wait without a PGPROC structure");
421 proc->lwWaiting = true;
422 proc->lwExclusive = (mode == LW_EXCLUSIVE);
423 proc->lwWaitLink = NULL;
424 if (lock->head == NULL)
427 lock->tail->lwWaitLink = proc;
430 /* Can release the mutex now */
431 SpinLockRelease(&lock->mutex);
434 * Wait until awakened.
436 * Since we share the process wait semaphore with the regular lock
437 * manager and ProcWaitForSignal, and we may need to acquire an LWLock
438 * while one of those is pending, it is possible that we get awakened
439 * for a reason other than being signaled by LWLockRelease. If so,
440 * loop back and wait again. Once we've gotten the LWLock,
441 * re-increment the sema by the number of additional signals received,
442 * so that the lock manager or signal manager will see the received
443 * signal when it next waits.
445 LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");
448 block_counts[lockid]++;
451 TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
455 /* "false" means cannot accept cancel/die interrupt here. */
456 PGSemaphoreLock(&proc->sem, false);
457 if (!proc->lwWaiting)
462 TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
464 LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
466 /* Now loop back and try to acquire lock again. */
470 /* We are done updating shared state of the lock itself. */
471 SpinLockRelease(&lock->mutex);
473 TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);
475 /* Add lock to list of locks held by this backend */
476 held_lwlocks[num_held_lwlocks++] = lockid;
479 * Fix the process wait semaphore's count for any absorbed wakeups.
481 while (extraWaits-- > 0)
482 PGSemaphoreUnlock(&proc->sem);
486 * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
488 * If the lock is not available, return FALSE with no side-effects.
490 * If successful, cancel/die interrupts are held off until lock release.
493 LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
495 volatile LWLock *lock = &(LWLockArray[lockid].lock);
498 PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
500 /* Ensure we will have room to remember the lock */
501 if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
502 elog(ERROR, "too many LWLocks taken");
505 * Lock out cancel/die interrupts until we exit the code section protected
506 * by the LWLock. This ensures that interrupts will not interfere with
507 * manipulations of data structures in shared memory.
511 /* Acquire mutex. Time spent holding mutex should be short! */
512 SpinLockAcquire(&lock->mutex);
514 /* If I can get the lock, do so quickly. */
515 if (mode == LW_EXCLUSIVE)
517 if (lock->exclusive == 0 && lock->shared == 0)
527 if (lock->exclusive == 0)
536 /* We are done updating shared state of the lock itself. */
537 SpinLockRelease(&lock->mutex);
541 /* Failed to get lock, so release interrupt holdoff */
543 LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed");
544 TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(lockid, mode);
548 /* Add lock to list of locks held by this backend */
549 held_lwlocks[num_held_lwlocks++] = lockid;
550 TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode);
557 * LWLockRelease - release a previously acquired lock
560 LWLockRelease(LWLockId lockid)
562 volatile LWLock *lock = &(LWLockArray[lockid].lock);
567 PRINT_LWDEBUG("LWLockRelease", lockid, lock);
570 * Remove lock from list of locks held. Usually, but not always, it will
571 * be the latest-acquired lock; so search array backwards.
573 for (i = num_held_lwlocks; --i >= 0;)
575 if (lockid == held_lwlocks[i])
579 elog(ERROR, "lock %d is not held", (int) lockid);
581 for (; i < num_held_lwlocks; i++)
582 held_lwlocks[i] = held_lwlocks[i + 1];
584 /* Acquire mutex. Time spent holding mutex should be short! */
585 SpinLockAcquire(&lock->mutex);
587 /* Release my hold on lock */
588 if (lock->exclusive > 0)
592 Assert(lock->shared > 0);
597 * See if I need to awaken any waiters. If I released a non-last shared
598 * hold, there cannot be anything to do. Also, do not awaken any waiters
599 * if someone has already awakened waiters that haven't yet acquired the
605 if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK)
608 * Remove the to-be-awakened PGPROCs from the queue. If the front
609 * waiter wants exclusive lock, awaken him only. Otherwise awaken
610 * as many waiters as want shared access.
613 if (!proc->lwExclusive)
615 while (proc->lwWaitLink != NULL &&
616 !proc->lwWaitLink->lwExclusive)
617 proc = proc->lwWaitLink;
619 /* proc is now the last PGPROC to be released */
620 lock->head = proc->lwWaitLink;
621 proc->lwWaitLink = NULL;
622 /* prevent additional wakeups until retryer gets to run */
623 lock->releaseOK = false;
627 /* lock is still held, can't awaken anything */
632 /* We are done updating shared state of the lock itself. */
633 SpinLockRelease(&lock->mutex);
635 TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid);
638 * Awaken any waiters I removed from the queue.
642 LOG_LWDEBUG("LWLockRelease", lockid, "release waiter");
644 head = proc->lwWaitLink;
645 proc->lwWaitLink = NULL;
646 proc->lwWaiting = false;
647 PGSemaphoreUnlock(&proc->sem);
651 * Now okay to allow cancel/die interrupts.
658 * LWLockReleaseAll - release all currently-held locks
660 * Used to clean up after ereport(ERROR). An important difference between this
661 * function and retail LWLockRelease calls is that InterruptHoldoffCount is
662 * unchanged by this operation. This is necessary since InterruptHoldoffCount
663 * has been set to an appropriate level earlier in error recovery. We could
664 * decrement it below zero if we allow it to drop for each released lock!
667 LWLockReleaseAll(void)
669 while (num_held_lwlocks > 0)
671 HOLD_INTERRUPTS(); /* match the upcoming RESUME_INTERRUPTS */
673 LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
679 * LWLockHeldByMe - test whether my process currently holds a lock
681 * This is meant as debug support only. We do not distinguish whether the
682 * lock is held shared or exclusive.
685 LWLockHeldByMe(LWLockId lockid)
689 for (i = 0; i < num_held_lwlocks; i++)
691 if (held_lwlocks[i] == lockid)