1 /*-------------------------------------------------------------------------
4 * Implement shared memory using SysV facilities
6 * These routines represent a fairly thin layer on top of SysV shared
7 * memory functionality.
9 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
10 * Portions Copyright (c) 1994, Regents of the University of California
13 * src/backend/port/sysv_shmem.c
15 *-------------------------------------------------------------------------
29 #ifdef HAVE_KERNEL_OS_H
30 #include <kernel/OS.h>
33 #include "miscadmin.h"
34 #include "storage/ipc.h"
35 #include "storage/pg_shmem.h"
38 typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
39 typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
41 #define IPCProtection (0600) /* access/modify by user only */
43 #ifdef SHM_SHARE_MMU /* use intimate shared memory on Solaris */
44 #define PG_SHMAT_FLAGS SHM_SHARE_MMU
46 #define PG_SHMAT_FLAGS 0
50 unsigned long UsedShmemSegID = 0;
51 void *UsedShmemSegAddr = NULL;
53 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
54 static void IpcMemoryDetach(int status, Datum shmaddr);
55 static void IpcMemoryDelete(int status, Datum shmId);
56 static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
61 * InternalIpcMemoryCreate(memKey, size)
63 * Attempt to create a new shared memory segment with the specified key.
64 * Will fail (return NULL) if such a segment already exists. If successful,
65 * attach the segment to the current process and return its attached address.
66 * On success, callbacks are registered with on_shmem_exit to detach and
67 * delete the segment when on_shmem_exit is called.
69 * If we fail with a failure code other than collision-with-existing-segment,
70 * print out an error and abort. Other types of errors are not recoverable.
73 InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size)
78 shmid = shmget(memKey, size, IPC_CREAT | IPC_EXCL | IPCProtection);
83 * Fail quietly if error indicates a collision with existing segment.
84 * One would expect EEXIST, given that we said IPC_EXCL, but perhaps
85 * we could get a permission violation instead? Also, EIDRM might
86 * occur if an old seg is slated for destruction but not gone yet.
88 if (errno == EEXIST || errno == EACCES
96 * Some BSD-derived kernels are known to return EINVAL, not EEXIST, if
97 * there is an existing segment but it's smaller than "size" (this is
98 * a result of poorly-thought-out ordering of error tests). To
99 * distinguish between collision and invalid size in such cases, we
100 * make a second try with size = 0. These kernels do not test size
101 * against SHMMIN in the preexisting-segment case, so we will not get
102 * EINVAL a second time if there is such a segment.
106 int save_errno = errno;
108 shmid = shmget(memKey, 0, IPC_CREAT | IPC_EXCL | IPCProtection);
112 /* As above, fail quietly if we verify a collision */
113 if (errno == EEXIST || errno == EACCES
119 /* Otherwise, fall through to report the original error */
124 * On most platforms we cannot get here because SHMMIN is
125 * greater than zero. However, if we do succeed in creating a
126 * zero-size segment, free it and then fall through to report
127 * the original error.
129 if (shmctl(shmid, IPC_RMID, NULL) < 0)
130 elog(LOG, "shmctl(%d, %d, 0) failed: %m",
131 (int) shmid, IPC_RMID);
138 * Else complain and abort.
140 * Note: at this point EINVAL should mean that either SHMMIN or SHMMAX
141 * is violated. SHMALL violation might be reported as either ENOMEM
142 * (BSDen) or ENOSPC (Linux); the Single Unix Spec fails to say which
143 * it should be. SHMMNI violation is ENOSPC, per spec. Just plain
144 * not-enough-RAM is ENOMEM.
147 (errmsg("could not create shared memory segment: %m"),
148 errdetail("Failed system call was shmget(key=%lu, size=%lu, 0%o).",
149 (unsigned long) memKey, (unsigned long) size,
150 IPC_CREAT | IPC_EXCL | IPCProtection),
152 errhint("This error usually means that PostgreSQL's request for a shared memory "
153 "segment exceeded your kernel's SHMMAX parameter. You can either "
154 "reduce the request size or reconfigure the kernel with larger SHMMAX. "
155 "To reduce the request size (currently %lu bytes), reduce "
156 "PostgreSQL's shared memory usage, perhaps by reducing shared_buffers "
157 "or max_connections.\n"
158 "If the request size is already small, it's possible that it is less than "
159 "your kernel's SHMMIN parameter, in which case raising the request size or "
160 "reconfiguring SHMMIN is called for.\n"
161 "The PostgreSQL documentation contains more information about shared "
162 "memory configuration.",
163 (unsigned long) size) : 0,
165 errhint("This error usually means that PostgreSQL's request for a shared "
166 "memory segment exceeded available memory or swap space, "
167 "or exceeded your kernel's SHMALL parameter. You can either "
168 "reduce the request size or reconfigure the kernel with larger SHMALL. "
169 "To reduce the request size (currently %lu bytes), reduce "
170 "PostgreSQL's shared memory usage, perhaps by reducing shared_buffers "
171 "or max_connections.\n"
172 "The PostgreSQL documentation contains more information about shared "
173 "memory configuration.",
174 (unsigned long) size) : 0,
176 errhint("This error does *not* mean that you have run out of disk space. "
177 "It occurs either if all available shared memory IDs have been taken, "
178 "in which case you need to raise the SHMMNI parameter in your kernel, "
179 "or because the system's overall limit for shared memory has been "
180 "reached. If you cannot increase the shared memory limit, "
181 "reduce PostgreSQL's shared memory request (currently %lu bytes), "
182 "perhaps by reducing shared_buffers or max_connections.\n"
183 "The PostgreSQL documentation contains more information about shared "
184 "memory configuration.",
185 (unsigned long) size) : 0));
188 /* Register on-exit routine to delete the new segment */
189 on_shmem_exit(IpcMemoryDelete, Int32GetDatum(shmid));
191 /* OK, should be able to attach to the segment */
192 memAddress = shmat(shmid, NULL, PG_SHMAT_FLAGS);
194 if (memAddress == (void *) -1)
195 elog(FATAL, "shmat(id=%d) failed: %m", shmid);
197 /* Register on-exit routine to detach new segment before deleting */
198 on_shmem_exit(IpcMemoryDetach, PointerGetDatum(memAddress));
201 * Store shmem key and ID in data directory lockfile. Format to try to
202 * keep it the same length always (trailing junk in the lockfile won't
203 * hurt, but might confuse humans).
208 sprintf(line, "%9lu %9lu",
209 (unsigned long) memKey, (unsigned long) shmid);
210 AddToDataDirLockFile(LOCK_FILE_LINE_SHMEM_KEY, line);
216 /****************************************************************************/
217 /* IpcMemoryDetach(status, shmaddr) removes a shared memory segment */
218 /* from process' address spaceq */
219 /* (called as an on_shmem_exit callback, hence funny argument list) */
220 /****************************************************************************/
222 IpcMemoryDetach(int status, Datum shmaddr)
224 if (shmdt(DatumGetPointer(shmaddr)) < 0)
225 elog(LOG, "shmdt(%p) failed: %m", DatumGetPointer(shmaddr));
228 /****************************************************************************/
229 /* IpcMemoryDelete(status, shmId) deletes a shared memory segment */
230 /* (called as an on_shmem_exit callback, hence funny argument list) */
231 /****************************************************************************/
233 IpcMemoryDelete(int status, Datum shmId)
235 if (shmctl(DatumGetInt32(shmId), IPC_RMID, NULL) < 0)
236 elog(LOG, "shmctl(%d, %d, 0) failed: %m",
237 DatumGetInt32(shmId), IPC_RMID);
241 * PGSharedMemoryIsInUse
243 * Is a previously-existing shmem segment still existing and in use?
245 * The point of this exercise is to detect the case where a prior postmaster
246 * crashed, but it left child backends that are still running. Therefore
247 * we only care about shmem segments that are associated with the intended
248 * DataDir. This is an important consideration since accidental matches of
249 * shmem segment IDs are reasonably common.
252 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
254 IpcMemoryId shmId = (IpcMemoryId) id2;
255 struct shmid_ds shmStat;
260 * We detect whether a shared memory segment is in use by seeing whether
261 * it (a) exists and (b) has any processes attached to it.
263 if (shmctl(shmId, IPC_STAT, &shmStat) < 0)
266 * EINVAL actually has multiple possible causes documented in the
267 * shmctl man page, but we assume it must mean the segment no longer
274 * EACCES implies that the segment belongs to some other userid, which
275 * means it is not a Postgres shmem segment (or at least, not one that
276 * is relevant to our data directory).
282 * Some Linux kernel versions (in fact, all of them as of July 2007)
283 * sometimes return EIDRM when EINVAL is correct. The Linux kernel
284 * actually does not have any internal state that would justify
285 * returning EIDRM, so we can get away with assuming that EIDRM is
286 * equivalent to EINVAL on that platform.
288 #ifdef HAVE_LINUX_EIDRM_BUG
294 * Otherwise, we had better assume that the segment is in use. The
295 * only likely case is EIDRM, which implies that the segment has been
296 * IPC_RMID'd but there are still processes attached to it.
301 /* If it has no attached processes, it's not in use */
302 if (shmStat.shm_nattch == 0)
306 * Try to attach to the segment and see if it matches our data directory.
307 * This avoids shmid-conflict problems on machines that are running
308 * several postmasters under the same userid.
310 if (stat(DataDir, &statbuf) < 0)
311 return true; /* if can't stat, be conservative */
313 hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS);
315 if (hdr == (PGShmemHeader *) -1)
316 return true; /* if can't attach, be conservative */
318 if (hdr->magic != PGShmemMagic ||
319 hdr->device != statbuf.st_dev ||
320 hdr->inode != statbuf.st_ino)
323 * It's either not a Postgres segment, or not one for my data
324 * directory. In either case it poses no threat.
330 /* Trouble --- looks a lot like there's still live backends */
338 * PGSharedMemoryCreate
340 * Create a shared memory segment of the given size and initialize its
341 * standard header. Also, register an on_shmem_exit callback to release
344 * Dead Postgres segments are recycled if found, but we do not fail upon
345 * collision with non-Postgres shmem segments. The idea here is to detect and
346 * re-use keys that may have been assigned by a crashed postmaster or backend.
348 * makePrivate means to always create a new segment, rather than attach to
349 * or recycle any existing segment.
351 * The port number is passed for possible use as a key (for SysV, we use
352 * it to generate the starting shmem key). In a standalone backend,
353 * zero will be passed.
356 PGSharedMemoryCreate(Size size, bool makePrivate, int port)
358 IpcMemoryKey NextShmemSegID;
364 /* Room for a header? */
365 Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
367 /* Make sure PGSharedMemoryAttach doesn't fail without need */
368 UsedShmemSegAddr = NULL;
370 /* Loop till we find a free IPC key */
371 NextShmemSegID = port * 1000;
373 for (NextShmemSegID++;; NextShmemSegID++)
375 /* Try to create new segment */
376 memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);
378 break; /* successful create and attach */
380 /* Check shared memory and possibly remove and recreate */
382 if (makePrivate) /* a standalone backend shouldn't do this */
385 if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL)
386 continue; /* can't attach, not one of mine */
389 * If I am not the creator and it belongs to an extant process,
392 hdr = (PGShmemHeader *) memAddress;
393 if (hdr->creatorPID != getpid())
395 if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
398 continue; /* segment belongs to a live process */
403 * The segment appears to be from a dead Postgres process, or from a
404 * previous cycle of life in this same process. Zap it, if possible.
405 * This probably shouldn't fail, but if it does, assume the segment
406 * belongs to someone else after all, and continue quietly.
409 if (shmctl(shmid, IPC_RMID, NULL) < 0)
413 * Now try again to create the segment.
415 memAddress = InternalIpcMemoryCreate(NextShmemSegID, size);
417 break; /* successful create and attach */
420 * Can only get here if some other process managed to create the same
421 * shmem key before we did. Let him have that one, loop around to try
427 * OK, we created a new segment. Mark it as created by this process. The
428 * order of assignments here is critical so that another Postgres process
429 * can't see the header as valid but belonging to an invalid PID!
431 hdr = (PGShmemHeader *) memAddress;
432 hdr->creatorPID = getpid();
433 hdr->magic = PGShmemMagic;
435 /* Fill in the data directory ID info, too */
436 if (stat(DataDir, &statbuf) < 0)
438 (errcode_for_file_access(),
439 errmsg("could not stat data directory \"%s\": %m",
441 hdr->device = statbuf.st_dev;
442 hdr->inode = statbuf.st_ino;
445 * Initialize space allocation status for segment.
447 hdr->totalsize = size;
448 hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
450 /* Save info for possible future use */
451 UsedShmemSegAddr = memAddress;
452 UsedShmemSegID = (unsigned long) NextShmemSegID;
460 * PGSharedMemoryReAttach
462 * Re-attach to an already existing shared memory segment. In the non
463 * EXEC_BACKEND case this is not used, because postmaster children inherit
464 * the shared memory segment attachment via fork().
466 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
467 * routine. The caller must have already restored them to the postmaster's
471 PGSharedMemoryReAttach(void)
475 void *origUsedShmemSegAddr = UsedShmemSegAddr;
477 Assert(UsedShmemSegAddr != NULL);
478 Assert(IsUnderPostmaster);
481 /* cygipc (currently) appears to not detach on exec. */
482 PGSharedMemoryDetach();
483 UsedShmemSegAddr = origUsedShmemSegAddr;
486 elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
487 hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid);
489 elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
490 (int) UsedShmemSegID, UsedShmemSegAddr);
491 if (hdr != origUsedShmemSegAddr)
492 elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
493 hdr, origUsedShmemSegAddr);
495 UsedShmemSegAddr = hdr; /* probably redundant */
497 #endif /* EXEC_BACKEND */
500 * PGSharedMemoryDetach
502 * Detach from the shared memory segment, if still attached. This is not
503 * intended for use by the process that originally created the segment
504 * (it will have an on_shmem_exit callback registered to do that). Rather,
505 * this is for subprocesses that have inherited an attachment and want to
509 PGSharedMemoryDetach(void)
511 if (UsedShmemSegAddr != NULL)
513 if ((shmdt(UsedShmemSegAddr) < 0)
514 #if defined(EXEC_BACKEND) && defined(__CYGWIN__)
515 /* Work-around for cygipc exec bug */
519 elog(LOG, "shmdt(%p) failed: %m", UsedShmemSegAddr);
520 UsedShmemSegAddr = NULL;
526 * Attach to shared memory and make sure it has a Postgres header
528 * Returns attach address if OK, else NULL
530 static PGShmemHeader *
531 PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid)
535 if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0)
538 hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS);
540 if (hdr == (PGShmemHeader *) -1)
541 return NULL; /* failed: must be some other app's */
543 if (hdr->magic != PGShmemMagic)
546 return NULL; /* segment belongs to a non-Postgres app */