1 /*-------------------------------------------------------------------------
4 * routines to manage per-process shared memory data structure
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.51 1999/02/21 01:41:45 tgl Exp $
12 *-------------------------------------------------------------------------
15 * Each postgres backend gets one of these. We'll use it to
16 * clean up after the process should the process suddenly die.
20 * ProcSleep(), ProcWakeup(), ProcWakeupNext(),
21 * ProcQueueAlloc() -- create a shm queue for sleeping processes
22 * ProcQueueInit() -- create a queue without allocing memory
24 * Locking and waiting for buffers can cause the backend to be
25 * put to sleep. Whoever releases the lock, etc. wakes the
26 * process up again (and gives it an error code so it knows
27 * whether it was awoken on an error condition).
31 * ProcReleaseLocks -- frees the locks associated with this process,
32 * ProcKill -- destroys the shared memory state (and locks)
33 * associated with the process.
35 * 5/15/91 -- removed the buffer pool based lock chain in favor
36 * of a shared memory lock chain. The write-protection is
37 * more expensive if the lock chain is in the buffer pool.
38 * The only reason I kept the lock chain in the buffer pool
39 * in the first place was to allow the lock table to grow larger
40 * than available shared memory and that isn't going to work
41 * without a lot of unimplemented support anyway.
43 * 4/7/95 -- instead of allocating a set of 1 semaphore per process, we
44 * allocate a semaphore from a set of PROC_NSEMS_PER_SET semaphores
45 * shared among backends (we keep a few sets of semaphores around).
46 * This is so that we can support more backends. (system-wide semaphore
47 * sets run out pretty fast.) -ay 4/95
49 * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.51 1999/02/21 01:41:45 tgl Exp $
55 #include <sys/types.h>
57 #if defined(solaris_sparc)
63 #include "miscadmin.h"
64 #include "libpq/pqsignal.h"
66 #include "access/xact.h"
67 #include "utils/hsearch.h"
69 #include "storage/ipc.h"
70 /* In Ultrix, sem.h must be included after ipc.h */
72 #include "storage/buf.h"
73 #include "storage/lock.h"
74 #include "storage/lmgr.h"
75 #include "storage/shmem.h"
76 #include "storage/spin.h"
77 #include "storage/proc.h"
78 #include "utils/trace.h"
80 static void HandleDeadLock(int sig);
81 static PROC *ProcWakeup(PROC *proc, int errType);
82 static void ProcFreeAllSemaphores(void);
84 #define DeadlockCheckTimer pg_options[OPT_DEADLOCKTIMEOUT]
86 /* --------------------
87 * Spin lock for manipulating the shared process data structure:
88 * ProcGlobal.... Adding an extra spin lock seemed like the smallest
89 * hack to get around reading and updating this structure in shared
90 * memory. -mer 17 July 1991
91 * --------------------
93 SPINLOCK ProcStructLock;
96 * For cleanup routines. Don't cleanup if the initialization
99 static bool ProcInitialized = FALSE;
101 static PROC_HDR *ProcGlobal = NULL;
105 static void ProcKill(int exitStatus, int pid);
106 static void ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum);
107 static void ProcFreeSem(IpcSemaphoreKey semKey, int semNum);
111 * initializes the global process table. We put it here so that
112 * the postmaster can do this initialization. (ProcFreeAllSemaphores needs
113 * to read this table on exiting the postmaster. If we have the first
114 * backend do this, starting up and killing the postmaster without
115 * starting any backends will be a problem.)
117 * We also allocate all the per-process semaphores we will need to support
118 * the requested number of backends. We used to allocate semaphores
119 * only when backends were actually started up, but that is bad because
120 * it lets Postgres fail under load --- a lot of Unix systems are
121 * (mis)configured with small limits on the number of semaphores, and
122 * running out when trying to start another backend is a common failure.
123 * So, now we grab enough semaphores to support the desired max number
124 * of backends immediately at initialization --- if the sysadmin has set
125 * MaxBackends higher than his kernel will support, he'll find out sooner
129 InitProcGlobal(IPCKey key, int maxBackends)
133 /* attach to the free list */
134 ProcGlobal = (PROC_HDR *)
135 ShmemInitStruct("Proc Header", (unsigned) sizeof(PROC_HDR), &found);
137 /* --------------------
138 * We're the first - initialize.
139 * XXX if found should ever be true, it is a sign of impending doom ...
140 * ought to complain if so?
141 * --------------------
147 ProcGlobal->freeProcs = INVALID_OFFSET;
148 ProcGlobal->currKey = IPCGetProcessSemaphoreInitKey(key);
149 for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++)
150 ProcGlobal->freeSemMap[i] = 0;
152 /* Arrange to delete semas on exit --- set this up now so that
153 * we will clean up if pre-allocation fails...
155 on_shmem_exit(ProcFreeAllSemaphores, NULL);
157 /* Pre-create the semaphores for the first maxBackends processes,
158 * unless we are running as a standalone backend.
160 if (key != PrivateIPCKey)
163 i < (maxBackends+PROC_NSEMS_PER_SET-1) / PROC_NSEMS_PER_SET;
166 IPCKey semKey = ProcGlobal->currKey + i;
170 semId = IpcSemaphoreCreate(semKey,
173 IpcSemaphoreDefaultStartValue,
176 /* mark this sema set allocated */
177 ProcGlobal->freeSemMap[i] = (1 << PROC_NSEMS_PER_SET);
183 /* ------------------------
184 * InitProc -- create a per-process data structure for this process
185 * used by the lock manager on semaphore queues.
186 * ------------------------
189 InitProcess(IPCKey key)
193 unsigned long location,
196 /* ------------------
197 * Routine called if deadlock timer goes off. See ProcSleep()
200 pqsignal(SIGALRM, HandleDeadLock);
202 SpinAcquire(ProcStructLock);
204 /* attach to the free list */
205 ProcGlobal = (PROC_HDR *)
206 ShmemInitStruct("Proc Header", (unsigned) sizeof(PROC_HDR), &found);
209 /* this should not happen. InitProcGlobal() is called before this. */
210 elog(ERROR, "InitProcess: Proc Header uninitialized");
215 SpinRelease(ProcStructLock);
216 elog(ERROR, "ProcInit: you already exist");
220 /* try to get a proc from the free list first */
222 myOffset = ProcGlobal->freeProcs;
224 if (myOffset != INVALID_OFFSET)
226 MyProc = (PROC *) MAKE_PTR(myOffset);
227 ProcGlobal->freeProcs = MyProc->links.next;
233 * have to allocate one. We can't use the normal shmem index
234 * table mechanism because the proc structure is stored by PID
235 * instead of by a global name (need to look it up by PID when we
236 * cleanup dead processes).
239 MyProc = (PROC *) ShmemAlloc((unsigned) sizeof(PROC));
242 SpinRelease(ProcStructLock);
243 elog(FATAL, "cannot create new proc: out of memory");
246 /* this cannot be initialized until after the buffer pool */
247 SHMQueueInit(&(MyProc->lockQueue));
251 * zero out the spin lock counts and set the sLocks field for
252 * ProcStructLock to 1 as we have acquired this spinlock above but
253 * didn't record it since we didn't have MyProc until now.
255 MemSet(MyProc->sLocks, 0, sizeof(MyProc->sLocks));
256 MyProc->sLocks[ProcStructLock] = 1;
259 if (IsUnderPostmaster)
266 ProcGetNewSemKeyAndNum(&semKey, &semNum);
268 /* Note: because of the pre-allocation done in InitProcGlobal,
269 * this call should always attach to an existing semaphore.
270 * It will (try to) create a new group of semaphores only if
271 * the postmaster tries to start more backends than it said it would.
273 semId = IpcSemaphoreCreate(semKey,
276 IpcSemaphoreDefaultStartValue,
281 * we might be reusing a semaphore that belongs to a dead backend.
282 * So be careful and reinitialize its value here.
284 semun.val = IpcSemaphoreDefaultStartValue;
285 semctl(semId, semNum, SETVAL, semun);
287 IpcSemaphoreLock(semId, semNum, IpcExclusiveLock);
288 MyProc->sem.semId = semId;
289 MyProc->sem.semNum = semNum;
290 MyProc->sem.semKey = semKey;
293 MyProc->sem.semId = -1;
295 /* ----------------------
297 * ----------------------
299 SpinRelease(ProcStructLock);
301 MyProc->pid = MyProcPid;
302 MyProc->xid = InvalidTransactionId;
303 #ifdef LowLevelLocking
304 MyProc->xmin = InvalidTransactionId;
308 * Start keeping spin lock stats from here on. Any botch before
309 * this initialization is forever botched
312 MemSet(MyProc->sLocks, 0, MAX_SPINS * sizeof(*MyProc->sLocks));
314 /* -------------------------
315 * Install ourselves in the shmem index table. The name to
316 * use is determined by the OS-assigned process id. That
317 * allows the cleanup process to find us after any untimely
319 * -------------------------
321 location = MAKE_OFFSET(MyProc);
322 if ((!ShmemPIDLookup(MyProcPid, &location)) || (location != MAKE_OFFSET(MyProc)))
323 elog(FATAL, "InitProc: ShmemPID table broken");
325 MyProc->errType = NO_ERROR;
326 SHMQueueElemInit(&(MyProc->links));
328 on_shmem_exit(ProcKill, (caddr_t) MyProcPid);
330 ProcInitialized = TRUE;
334 * ProcReleaseLocks() -- release all locks associated with this process
342 LockReleaseAll(1, &MyProc->lockQueue);
347 * used by the postmaster to clean up the global tables. This also frees
348 * up the semaphore used for the lmgr of the process. (We have to do
349 * this is the postmaster instead of doing a IpcSemaphoreKill on exiting
350 * the process because the semaphore set is shared among backends and
351 * we don't want to remove other's semaphores on exit.)
356 SHMEM_OFFSET location;
359 location = INVALID_OFFSET;
361 location = ShmemPIDDestroy(pid);
362 if (location == INVALID_OFFSET)
364 proc = (PROC *) MAKE_PTR(location);
366 SpinAcquire(ProcStructLock);
368 ProcFreeSem(proc->sem.semKey, proc->sem.semNum);
370 proc->links.next = ProcGlobal->freeProcs;
371 ProcGlobal->freeProcs = MAKE_OFFSET(proc);
373 SpinRelease(ProcStructLock);
379 * ProcKill() -- Destroy the per-proc data structure for
380 * this process. Release any of its held spin locks.
383 ProcKill(int exitStatus, int pid)
386 SHMEM_OFFSET location;
388 /* --------------------
389 * If this is a FATAL exit the postmaster will have to kill all the
390 * existing backends and reinitialize shared memory. So all we don't
391 * need to do anything here.
392 * --------------------
397 ShmemPIDLookup(MyProcPid, &location);
398 if (location == INVALID_OFFSET)
401 proc = (PROC *) MAKE_PTR(location);
403 Assert(proc == MyProc || pid != MyProcPid);
408 * Assume one lock table.
411 ProcReleaseSpins(proc);
412 LockReleaseAll(DEFAULT_LOCKMETHOD, &proc->lockQueue);
417 * Assume we have a second lock table.
419 LockReleaseAll(USER_LOCKMETHOD, &proc->lockQueue);
423 * get off the wait queue
427 if (proc->links.next != INVALID_OFFSET)
429 Assert(proc->waitLock->waitProcs.size > 0);
430 SHMQueueDelete(&(proc->links));
431 --proc->waitLock->waitProcs.size;
433 SHMQueueElemInit(&(proc->links));
440 * ProcQueue package: routines for putting processes to sleep
445 * ProcQueueAlloc -- alloc/attach to a shared memory process queue
447 * Returns: a pointer to the queue or NULL
448 * Side Effects: Initializes the queue if we allocated one
452 ProcQueueAlloc(char *name)
455 PROC_QUEUE *queue = (PROC_QUEUE *)
456 ShmemInitStruct(name, (unsigned) sizeof(PROC_QUEUE), &found);
461 ProcQueueInit(queue);
468 * ProcQueueInit -- initialize a shared memory process queue
471 ProcQueueInit(PROC_QUEUE *queue)
473 SHMQueueInit(&(queue->links));
480 * ProcSleep -- put a process to sleep
482 * P() on the semaphore should put us to sleep. The process
483 * semaphore is cleared by default, so the first time we try
484 * to acquire it, we sleep.
486 * ASSUME: that no one will fiddle with the queue until after
487 * we release the spin lock.
489 * NOTES: The process queue is now a priority queue for locking.
492 ProcSleep(PROC_QUEUE *waitQueue,/* lock->waitProcs */
494 int token, /* lockmode */
497 TransactionId xid) /* needed by user locks, see below */
501 bool deadlock_checked = false;
502 struct itimerval timeval,
506 * If the first entries in the waitQueue have a greater priority than
507 * we have, we must be a reader, and they must be a writers, and we
508 * must be here because the current holder is a writer or a reader but
509 * we don't share shared locks if a writer is waiting. We put
510 * ourselves after the writers. This way, we have a FIFO, but keep
511 * the readers together to give them decent priority, and no one
512 * starves. Because we group all readers together, a non-empty queue
513 * only has a few possible configurations:
515 * [readers] [writers] [readers][writers] [writers][readers]
516 * [writers][readers][writers]
518 * In a full queue, we would have a reader holding a lock, then a writer
519 * gets the lock, then a bunch of readers, made up of readers who
520 * could not share the first readlock because a writer was waiting,
521 * and new readers arriving while the writer had the lock.
524 proc = (PROC *) MAKE_PTR(waitQueue->links.prev);
526 /* If we are a reader, and they are writers, skip past them */
527 for (i = 0; i < waitQueue->size && proc->prio > prio; i++)
528 proc = (PROC *) MAKE_PTR(proc->links.prev);
530 /* The rest of the queue is FIFO, with readers first, writers last */
531 for (; i < waitQueue->size && proc->prio <= prio; i++)
532 proc = (PROC *) MAKE_PTR(proc->links.prev);
535 MyProc->token = token;
536 MyProc->waitLock = lock;
539 /* -------------------
540 * Currently, we only need this for the ProcWakeup routines.
541 * This must be 0 for user lock, so we can't just use the value
542 * from GetCurrentTransactionId().
543 * -------------------
545 TransactionIdStore(xid, &MyProc->xid);
547 #ifndef LowLevelLocking
548 /* -------------------
549 * currently, we only need this for the ProcWakeup routines
550 * -------------------
552 TransactionIdStore((TransactionId) GetCurrentTransactionId(), &MyProc->xid);
556 /* -------------------
557 * assume that these two operations are atomic (because
559 * -------------------
561 SHMQueueInsertTL(&(proc->links), &(MyProc->links));
564 SpinRelease(spinlock);
567 * We set this so we can wake up periodically and check for a deadlock.
568 * If a deadlock is detected, the handler releases the processes
569 * semaphore and aborts the current transaction.
571 * Need to zero out struct to set the interval and the micro seconds fields
575 MemSet(&timeval, 0, sizeof(struct itimerval));
576 timeval.it_value.tv_sec = \
577 (DeadlockCheckTimer ? DeadlockCheckTimer : DEADLOCK_CHECK_TIMER);
581 MyProc->errType = NO_ERROR; /* reset flag after deadlock check */
583 if (!deadlock_checked)
584 if (setitimer(ITIMER_REAL, &timeval, &dummy))
585 elog(FATAL, "ProcSleep: Unable to set timer for process wakeup");
586 deadlock_checked = true;
589 * if someone wakes us between SpinRelease and IpcSemaphoreLock,
590 * IpcSemaphoreLock will not block. The wakeup is "saved" by
591 * the semaphore implementation.
594 IpcSemaphoreLock(MyProc->sem.semId, MyProc->sem.semNum,
596 } while (MyProc->errType == STATUS_NOT_FOUND); /* sleep after deadlock
600 * We were awoken before a timeout - now disable the timer
603 timeval.it_value.tv_sec = 0;
604 if (setitimer(ITIMER_REAL, &timeval, &dummy))
605 elog(FATAL, "ProcSleep: Unable to diable timer for process wakeup");
608 * We were assumed to be in a critical section when we went
612 SpinAcquire(spinlock);
614 #ifdef LOCK_MGR_DEBUG
615 /* Just to get meaningful debug messages from DumpLocks() */
616 MyProc->waitLock = (LOCK *) NULL;
619 return MyProc->errType;
624 * ProcWakeup -- wake up a process by releasing its private semaphore.
626 * remove the process from the wait queue and set its links invalid.
627 * RETURN: the next process in the wait queue.
630 ProcWakeup(PROC *proc, int errType)
634 /* assume that spinlock has been acquired */
636 if (proc->links.prev == INVALID_OFFSET ||
637 proc->links.next == INVALID_OFFSET)
638 return (PROC *) NULL;
640 retProc = (PROC *) MAKE_PTR(proc->links.prev);
642 /* you have to update waitLock->waitProcs.size yourself */
643 SHMQueueDelete(&(proc->links));
644 SHMQueueElemInit(&(proc->links));
646 proc->errType = errType;
648 IpcSemaphoreUnlock(proc->sem.semId, proc->sem.semNum, IpcExclusiveLock);
654 * ProcLockWakeup -- routine for waking up processes when a lock is
658 ProcLockWakeup(PROC_QUEUE *queue, LOCKMETHOD lockmethod, LOCK *lock)
663 int last_locktype = -1;
664 int queue_size = queue->size;
666 Assert(queue->size >= 0);
669 return STATUS_NOT_FOUND;
671 proc = (PROC *) MAKE_PTR(queue->links.prev);
673 while ((queue_size--) && (proc))
677 * This proc will conflict as the previous one did, don't even
680 if (proc->token == last_locktype)
684 * This proc conflicts with locks held by others, ignored.
686 if (LockResolveConflicts(lockmethod,
690 (XIDLookupEnt *) NULL) != STATUS_OK)
692 last_locktype = proc->token;
697 * there was a waiting process, grant it the lock before waking it
698 * up. This will prevent another process from seizing the lock
699 * between the time we release the lock master (spinlock) and the
700 * time that the awoken process begins executing again.
702 GrantLock(lock, proc->token);
705 * ProcWakeup removes proc from the lock waiting process queue and
706 * returns the next proc in chain.
711 proc = ProcWakeup(proc, NO_ERROR);
714 Assert(queue->size >= 0);
720 /* Something is still blocking us. May have deadlocked. */
721 trace_flag = (lock->tag.lockmethod == USER_LOCKMETHOD) ? \
722 TRACE_USERLOCKS : TRACE_LOCKS;
724 "ProcLockWakeup: lock(%x) can't wake up any process",
726 #ifdef DEADLOCK_DEBUG
727 if (pg_options[trace_flag] >= 2)
730 return STATUS_NOT_FOUND;
735 ProcAddLock(SHM_QUEUE *elem)
737 SHMQueueInsertTL(&MyProc->lockQueue, elem);
740 /* --------------------
741 * We only get to this routine if we got SIGALRM after DEADLOCK_CHECK_TIMER
742 * while waiting for a lock to be released by some other process. If we have
743 * a real deadlock, we must also indicate that I'm no longer waiting
744 * on a lock so that other processes don't try to wake me up and screw
746 * --------------------
749 HandleDeadLock(int sig)
755 /* ---------------------
756 * Check to see if we've been awoken by anyone in the interim.
758 * If we have we can return and resume our transaction -- happy day.
759 * Before we are awoken the process releasing the lock grants it to
760 * us so we know that we don't have to wait anymore.
762 * Damn these names are LONG! -mer
763 * ---------------------
765 if (IpcSemaphoreGetCount(MyProc->sem.semId, MyProc->sem.semNum) ==
766 IpcSemaphoreDefaultStartValue)
773 * you would think this would be unnecessary, but...
775 * this also means we've been removed already. in some ports (e.g.,
776 * sparc and aix) the semop(2) implementation is such that we can
777 * actually end up in this handler after someone has removed us from
778 * the queue and bopped the semaphore *but the test above fails to
779 * detect the semaphore update* (presumably something weird having to
780 * do with the order in which the semaphore wakeup signal and SIGALRM
783 if (MyProc->links.prev == INVALID_OFFSET ||
784 MyProc->links.next == INVALID_OFFSET)
790 #ifdef DEADLOCK_DEBUG
794 if (!DeadLockCheck(&(MyProc->lockQueue), MyProc->waitLock, true))
797 MyProc->errType = STATUS_NOT_FOUND;
801 mywaitlock = MyProc->waitLock;
803 /* ------------------------
804 * Get this process off the lock's wait queue
805 * ------------------------
807 Assert(mywaitlock->waitProcs.size > 0);
808 --mywaitlock->waitProcs.size;
809 SHMQueueDelete(&(MyProc->links));
810 SHMQueueElemInit(&(MyProc->links));
812 /* ------------------
813 * Unlock my semaphore so that the count is right for next time.
814 * I was awoken by a signal, not by someone unlocking my semaphore.
817 IpcSemaphoreUnlock(MyProc->sem.semId, MyProc->sem.semNum,
821 * Set MyProc->errType to STATUS_ERROR so that we abort after
822 * returning from this handler.
825 MyProc->errType = STATUS_ERROR;
828 * if this doesn't follow the IpcSemaphoreUnlock then we get lock
829 * table corruption ("LockReplace: xid table corrupted") due to race
830 * conditions. i don't claim to understand this...
834 elog(NOTICE, "Deadlock detected -- See the lock(l) manual page for a possible cause.");
839 ProcReleaseSpins(PROC *proc)
848 for (i = 0; i < (int) MAX_SPINS; i++)
852 Assert(proc->sLocks[i] == 1);
858 /*****************************************************************************
860 *****************************************************************************/
863 * ProcGetNewSemKeyAndNum -
864 * scan the free semaphore bitmap and allocate a single semaphore from
865 * a semaphore set. (If the semaphore set doesn't exist yet,
866 * IpcSemaphoreCreate will create it. Otherwise, we use the existing
870 ProcGetNewSemKeyAndNum(IPCKey *key, int *semNum)
873 int32 *freeSemMap = ProcGlobal->freeSemMap;
874 int32 fullmask = (1 << (PROC_NSEMS_PER_SET+1)) - 1;
877 * we hold ProcStructLock when entering this routine. We scan through
878 * the bitmap to look for a free semaphore.
881 for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++)
886 if (freeSemMap[i] == fullmask)
887 continue; /* this set is fully allocated */
889 for (j = 0; j < PROC_NSEMS_PER_SET; j++)
891 if ((freeSemMap[i] & mask) == 0)
895 * a free semaphore found. Mark it as allocated.
896 * Also set the bit indicating whole set is allocated.
898 freeSemMap[i] |= mask + (1 << PROC_NSEMS_PER_SET);
900 *key = ProcGlobal->currKey + i;
908 /* if we reach here, all the semaphores are in use. */
909 elog(ERROR, "InitProc: cannot allocate a free semaphore");
914 * free up our semaphore in the semaphore set.
917 ProcFreeSem(IpcSemaphoreKey semKey, int semNum)
921 int32 *freeSemMap = ProcGlobal->freeSemMap;
923 i = semKey - ProcGlobal->currKey;
924 mask = ~(1 << semNum);
925 freeSemMap[i] &= mask;
927 /* Formerly we'd release a semaphore set if it was now completely unused,
928 * but now we keep the semaphores to ensure we won't run out when
929 * starting new backends --- cf. InitProcGlobal. Note that the
930 * PROC_NSEMS_PER_SET+1'st bit of the freeSemMap entry remains set to
931 * indicate it is still allocated; ProcFreeAllSemaphores() needs that.
936 * ProcFreeAllSemaphores -
937 * called at shmem_exit time, ie when exiting the postmaster or
938 * destroying shared state for a failed set of backends.
939 * Free up all the semaphores allocated to the lmgrs of the backends.
942 ProcFreeAllSemaphores()
945 int32 *freeSemMap = ProcGlobal->freeSemMap;
947 for (i = 0; i < MAX_PROC_SEMS / PROC_NSEMS_PER_SET; i++)
949 if (freeSemMap[i] != 0)
950 IpcSemaphoreKill(ProcGlobal->currKey + i);