1 /*-------------------------------------------------------------------------
4 * Implement PGSemaphores using SysV semaphore facilities
7 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * src/backend/port/sysv_sema.c
13 *-------------------------------------------------------------------------
26 #ifdef HAVE_KERNEL_OS_H
27 #include <kernel/OS.h>
30 #include "miscadmin.h"
31 #include "storage/ipc.h"
32 #include "storage/pg_sema.h"
35 #ifndef HAVE_UNION_SEMUN
40 unsigned short *array;
44 typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
45 typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
48 * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
49 * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
50 * per set) parameter, which is often around 25. (Less than, because we
51 * allocate one extra sema in each set for identification purposes.)
53 #define SEMAS_PER_SET 16
55 #define IPCProtection (0600) /* access/modify by user only */
57 #define PGSemaMagic 537 /* must be less than SEMVMX */
60 static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
61 static int numSemaSets; /* number of sema sets acquired so far */
62 static int maxSemaSets; /* allocated size of mySemaSets array */
63 static IpcSemaphoreKey nextSemaKey; /* next key to try using */
64 static int nextSemaNumber; /* next free sem num in last sema set */
67 static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
69 static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
71 static void IpcSemaphoreKill(IpcSemaphoreId semId);
72 static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
73 static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
74 static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
75 static void ReleaseSemaphores(int status, Datum arg);
79 * InternalIpcSemaphoreCreate
81 * Attempt to create a new semaphore set with the specified key.
82 * Will fail (return -1) if such a set already exists.
84 * If we fail with a failure code other than collision-with-existing-set,
85 * print out an error and abort. Other types of errors suggest nonrecoverable
89 InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
93 semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
98 * Fail quietly if error indicates a collision with existing set. One
99 * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
100 * could get a permission violation instead? Also, EIDRM might occur
101 * if an old set is slated for destruction but not gone yet.
103 if (errno == EEXIST || errno == EACCES
111 * Else complain and abort
114 (errmsg("could not create semaphores: %m"),
115 errdetail("Failed system call was semget(%lu, %d, 0%o).",
116 (unsigned long) semKey, numSems,
117 IPC_CREAT | IPC_EXCL | IPCProtection),
119 errhint("This error does *not* mean that you have run out of disk space. "
120 "It occurs when either the system limit for the maximum number of "
121 "semaphore sets (SEMMNI), or the system wide maximum number of "
122 "semaphores (SEMMNS), would be exceeded. You need to raise the "
123 "respective kernel parameter. Alternatively, reduce PostgreSQL's "
124 "consumption of semaphores by reducing its max_connections parameter.\n"
125 "The PostgreSQL documentation contains more information about "
126 "configuring your system for PostgreSQL.") : 0));
133 * Initialize a semaphore to the specified value.
136 IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
141 if (semctl(semId, semNum, SETVAL, semun) < 0)
143 (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
144 semId, semNum, value),
146 errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
147 "%d. Look into the PostgreSQL documentation for details.",
152 * IpcSemaphoreKill(semId) - removes a semaphore set
155 IpcSemaphoreKill(IpcSemaphoreId semId)
159 semun.val = 0; /* unused, but keep compiler quiet */
161 if (semctl(semId, 0, IPC_RMID, semun) < 0)
162 elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
165 /* Get the current value (semval) of the semaphore */
167 IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
169 union semun dummy; /* for Solaris */
171 dummy.val = 0; /* unused */
173 return semctl(semId, semNum, GETVAL, dummy);
176 /* Get the PID of the last process to do semop() on the semaphore */
178 IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
180 union semun dummy; /* for Solaris */
182 dummy.val = 0; /* unused */
184 return semctl(semId, semNum, GETPID, dummy);
189 * Create a semaphore set with the given number of useful semaphores
190 * (an additional sema is actually allocated to serve as identifier).
191 * Dead Postgres sema sets are recycled if found, but we do not fail
192 * upon collision with non-Postgres sema sets.
194 * The idea here is to detect and re-use keys that may have been assigned
195 * by a crashed postmaster or backend.
197 static IpcSemaphoreId
198 IpcSemaphoreCreate(int numSems)
200 IpcSemaphoreId semId;
202 PGSemaphoreData mysema;
204 /* Loop till we find a free IPC key */
205 for (nextSemaKey++;; nextSemaKey++)
209 /* Try to create new semaphore set */
210 semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
212 break; /* successful create */
214 /* See if it looks to be leftover from a dead Postgres process */
215 semId = semget(nextSemaKey, numSems + 1, 0);
217 continue; /* failed: must be some other app's */
218 if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
219 continue; /* sema belongs to a non-Postgres app */
222 * If the creator PID is my own PID or does not belong to any extant
223 * process, it's safe to zap it.
225 creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
227 continue; /* oops, GETPID failed */
228 if (creatorPID != getpid())
230 if (kill(creatorPID, 0) == 0 || errno != ESRCH)
231 continue; /* sema belongs to a live process */
235 * The sema set appears to be from a dead Postgres process, or from a
236 * previous cycle of life in this same process. Zap it, if possible.
237 * This probably shouldn't fail, but if it does, assume the sema set
238 * belongs to someone else after all, and continue quietly.
240 semun.val = 0; /* unused, but keep compiler quiet */
241 if (semctl(semId, 0, IPC_RMID, semun) < 0)
245 * Now try again to create the sema set.
247 semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
249 break; /* successful create */
252 * Can only get here if some other process managed to create the same
253 * sema key before we did. Let him have that one, loop around to try
259 * OK, we created a new sema set. Mark it as created by this process. We
260 * do this by setting the spare semaphore to PGSemaMagic-1 and then
261 * incrementing it with semop(). That leaves it with value PGSemaMagic
262 * and sempid referencing this process.
264 IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
265 mysema.semId = semId;
266 mysema.semNum = numSems;
267 PGSemaphoreUnlock(&mysema);
274 * PGReserveSemaphores --- initialize semaphore support
276 * This is called during postmaster start or shared memory reinitialization.
277 * It should do whatever is needed to be able to support up to maxSemas
278 * subsequent PGSemaphoreCreate calls. Also, if any system resources
279 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
280 * callback to release them.
282 * The port number is passed for possible use as a key (for SysV, we use
283 * it to generate the starting semaphore key). In a standalone backend,
284 * zero will be passed.
286 * In the SysV implementation, we acquire semaphore sets on-demand; the
287 * maxSemas parameter is just used to size the array that keeps track of
288 * acquired sets for subsequent releasing.
291 PGReserveSemaphores(int maxSemas, int port)
293 maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
294 mySemaSets = (IpcSemaphoreId *)
295 malloc(maxSemaSets * sizeof(IpcSemaphoreId));
296 if (mySemaSets == NULL)
297 elog(PANIC, "out of memory");
299 nextSemaKey = port * 1000;
300 nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
302 on_shmem_exit(ReleaseSemaphores, 0);
306 * Release semaphores at shutdown or shmem reinitialization
308 * (called as an on_shmem_exit callback, hence funny argument list)
311 ReleaseSemaphores(int status, Datum arg)
315 for (i = 0; i < numSemaSets; i++)
316 IpcSemaphoreKill(mySemaSets[i]);
323 * Initialize a PGSemaphore structure to represent a sema with count 1
326 PGSemaphoreCreate(PGSemaphore sema)
328 /* Can't do this in a backend, because static state is postmaster's */
329 Assert(!IsUnderPostmaster);
331 if (nextSemaNumber >= SEMAS_PER_SET)
333 /* Time to allocate another semaphore set */
334 if (numSemaSets >= maxSemaSets)
335 elog(PANIC, "too many semaphores created");
336 mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
340 /* Assign the next free semaphore in the current set */
341 sema->semId = mySemaSets[numSemaSets - 1];
342 sema->semNum = nextSemaNumber++;
343 /* Initialize it to count 1 */
344 IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
350 * Reset a previously-initialized PGSemaphore to have count 0
353 PGSemaphoreReset(PGSemaphore sema)
355 IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
361 * Lock a semaphore (decrement count), blocking if count would be < 0
364 PGSemaphoreLock(PGSemaphore sema, bool interruptOK)
369 sops.sem_op = -1; /* decrement */
371 sops.sem_num = sema->semNum;
374 * Note: if errStatus is -1 and errno == EINTR then it means we returned
375 * from the operation prematurely because we were sent a signal. So we
376 * try and lock the semaphore again.
378 * Each time around the loop, we check for a cancel/die interrupt. On
379 * some platforms, if such an interrupt comes in while we are waiting, it
380 * will cause the semop() call to exit with errno == EINTR, allowing us to
381 * service the interrupt (if not in a critical section already) during the
382 * next loop iteration.
384 * Once we acquire the lock, we do NOT check for an interrupt before
385 * returning. The caller needs to be able to record ownership of the lock
386 * before any interrupt can be accepted.
388 * There is a window of a few instructions between CHECK_FOR_INTERRUPTS
389 * and entering the semop() call. If a cancel/die interrupt occurs in
390 * that window, we would fail to notice it until after we acquire the lock
391 * (or get another interrupt to escape the semop()). We can avoid this
392 * problem by temporarily setting ImmediateInterruptOK to true before we
393 * do CHECK_FOR_INTERRUPTS; then, a die() interrupt in this interval will
394 * execute directly. However, there is a huge pitfall: there is another
395 * window of a few instructions after the semop() before we are able to
396 * reset ImmediateInterruptOK. If an interrupt occurs then, we'll lose
397 * control, which means that the lock has been acquired but our caller did
398 * not get a chance to record the fact. Therefore, we only set
399 * ImmediateInterruptOK if the caller tells us it's OK to do so, ie, the
400 * caller does not need to record acquiring the lock. (This is currently
401 * true for lockmanager locks, since the process that granted us the lock
402 * did all the necessary state updates. It's not true for SysV semaphores
403 * used to implement LW locks or emulate spinlocks --- but the wait time
404 * for such locks should not be very long, anyway.)
406 * On some platforms, signals marked SA_RESTART (which is most, for us)
407 * will not interrupt the semop(); it will just keep waiting. Therefore
408 * it's necessary for cancel/die interrupts to be serviced directly by the
409 * signal handler. On these platforms the behavior is really the same
410 * whether the signal arrives just before the semop() begins, or while it
411 * is waiting. The loop on EINTR is thus important only for other types
416 ImmediateInterruptOK = interruptOK;
417 CHECK_FOR_INTERRUPTS();
418 errStatus = semop(sema->semId, &sops, 1);
419 ImmediateInterruptOK = false;
420 } while (errStatus < 0 && errno == EINTR);
423 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
429 * Unlock a semaphore (increment count)
432 PGSemaphoreUnlock(PGSemaphore sema)
437 sops.sem_op = 1; /* increment */
439 sops.sem_num = sema->semNum;
442 * Note: if errStatus is -1 and errno == EINTR then it means we returned
443 * from the operation prematurely because we were sent a signal. So we
444 * try and unlock the semaphore again. Not clear this can really happen,
445 * but might as well cope.
449 errStatus = semop(sema->semId, &sops, 1);
450 } while (errStatus < 0 && errno == EINTR);
453 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
459 * Lock a semaphore only if able to do so without blocking
462 PGSemaphoreTryLock(PGSemaphore sema)
467 sops.sem_op = -1; /* decrement */
468 sops.sem_flg = IPC_NOWAIT; /* but don't block */
469 sops.sem_num = sema->semNum;
472 * Note: if errStatus is -1 and errno == EINTR then it means we returned
473 * from the operation prematurely because we were sent a signal. So we
474 * try and lock the semaphore again.
478 errStatus = semop(sema->semId, &sops, 1);
479 } while (errStatus < 0 && errno == EINTR);
483 /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
486 return false; /* failed to lock it */
488 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
489 if (errno == EWOULDBLOCK)
490 return false; /* failed to lock it */
492 /* Otherwise we got trouble */
493 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);