From: Noah Misch Date: Sat, 13 Apr 2019 05:36:38 +0000 (-0700) Subject: Consistently test for in-use shared memory. X-Git-Tag: REL9_4_22~19 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3ef5e16c618177160bc5f95216dccab3588defdc;p=postgresql Consistently test for in-use shared memory. postmaster startup scrutinizes any shared memory segment recorded in postmaster.pid, exiting if that segment matches the current data directory and has an attached process. When the postmaster.pid file was missing, a starting postmaster used weaker checks. Change to use the same checks in both scenarios. This increases the chance of a startup failure, in lieu of data corruption, if the DBA does "kill -9 `head -n1 postmaster.pid` && rm postmaster.pid && pg_ctl -w start". A postmaster will no longer stop if shmat() of an old segment fails with EACCES. A postmaster will no longer recycle segments pertaining to other data directories. That's good for production, but it's bad for integration tests that crash a postmaster and immediately delete its data directory. Such a test now leaks a segment indefinitely. No "make check-world" test does that. win32_shmem.c already avoided all these problems. In 9.6 and later, enhance PostgresNode to facilitate testing. Back-patch to 9.4 (all supported versions). Reviewed (in earlier versions) by Daniel Gustafsson and Kyotaro HORIGUCHI. Discussion: https://postgr.es/m/20190408064141.GA2016666@rfd.leadboat.com --- diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 1eb3ae5091..7695aff7b6 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -70,6 +70,26 @@ typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ +/* + * How does a given IpcMemoryId relate to this PostgreSQL process? + * + * One could recycle unattached segments of different data directories if we + * distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would + * cause us to visit less of the key space, making us less likely to detect a + * SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis, + * in that postmasters of different data directories could simultaneously + * attempt to recycle a given key. We'll waste keys longer in some cases, but + * avoiding the problems of the alternative justifies that loss. + */ +typedef enum +{ + SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */ + SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */ + SHMSTATE_ENOENT, /* no segment of that ID */ + SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */ + SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */ +} IpcMemoryState; + unsigned long UsedShmemSegID = 0; void *UsedShmemSegAddr = NULL; @@ -82,8 +102,8 @@ static void *AnonymousShmem = NULL; static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size); static void IpcMemoryDetach(int status, Datum shmaddr); static void IpcMemoryDelete(int status, Datum shmId); -static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key, - IpcMemoryId *shmid); +static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId, + PGShmemHeader **addr); /* @@ -287,11 +307,36 @@ IpcMemoryDelete(int status, Datum shmId) bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) { - IpcMemoryId shmId = (IpcMemoryId) id2; + PGShmemHeader *memAddress; + IpcMemoryState state; + + state = PGSharedMemoryAttach((IpcMemoryId) id2, &memAddress); + if (memAddress && shmdt(memAddress) < 0) + elog(LOG, "shmdt(%p) failed: %m", memAddress); + switch (state) + { + case SHMSTATE_ENOENT: + case SHMSTATE_FOREIGN: + case SHMSTATE_UNATTACHED: + return false; + case SHMSTATE_ANALYSIS_FAILURE: + case SHMSTATE_ATTACHED: + return true; + } + return true; +} + +/* See comment at IpcMemoryState. */ +static IpcMemoryState +PGSharedMemoryAttach(IpcMemoryId shmId, + PGShmemHeader **addr) +{ struct shmid_ds shmStat; struct stat statbuf; PGShmemHeader *hdr; + *addr = NULL; + /* * We detect whether a shared memory segment is in use by seeing whether * it (a) exists and (b) has any processes attached to it. @@ -304,15 +349,15 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) * exists. */ if (errno == EINVAL) - return false; + return SHMSTATE_ENOENT; /* - * EACCES implies that the segment belongs to some other userid, which - * means it is not a Postgres shmem segment (or at least, not one that - * is relevant to our data directory). + * EACCES implies we have no read permission, which means it is not a + * Postgres shmem segment (or at least, not one that is relevant to + * our data directory). */ if (errno == EACCES) - return false; + return SHMSTATE_FOREIGN; /* * Some Linux kernel versions (in fact, all of them as of July 2007) @@ -323,7 +368,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) */ #ifdef HAVE_LINUX_EIDRM_BUG if (errno == EIDRM) - return false; + return SHMSTATE_ENOENT; #endif /* @@ -331,25 +376,32 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) * only likely case is EIDRM, which implies that the segment has been * IPC_RMID'd but there are still processes attached to it. */ - return true; + return SHMSTATE_ANALYSIS_FAILURE; } - /* If it has no attached processes, it's not in use */ - if (shmStat.shm_nattch == 0) - return false; - /* * Try to attach to the segment and see if it matches our data directory. * This avoids shmid-conflict problems on machines that are running * several postmasters under the same userid. */ if (stat(DataDir, &statbuf) < 0) - return true; /* if can't stat, be conservative */ - - hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS); + return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */ + /* + * Attachment fails if we have no write permission. Since that will never + * happen with Postgres IPCProtection, such a failure shows the segment is + * not a Postgres segment. If attachment fails for some other reason, be + * conservative. + */ + hdr = (PGShmemHeader *) shmat(shmId, UsedShmemSegAddr, PG_SHMAT_FLAGS); if (hdr == (PGShmemHeader *) -1) - return true; /* if can't attach, be conservative */ + { + if (errno == EACCES) + return SHMSTATE_FOREIGN; + else + return SHMSTATE_ANALYSIS_FAILURE; + } + *addr = hdr; if (hdr->magic != PGShmemMagic || hdr->device != statbuf.st_dev || @@ -357,16 +409,12 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) { /* * It's either not a Postgres segment, or not one for my data - * directory. In either case it poses no threat. + * directory. */ - shmdt((void *) hdr); - return false; + return SHMSTATE_FOREIGN; } - /* Trouble --- looks a lot like there's still live backends */ - shmdt((void *) hdr); - - return true; + return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED; } #ifdef USE_ANONYMOUS_SHMEM @@ -542,25 +590,21 @@ AnonymousShmemDetach(int status, Datum arg) * standard header. Also, register an on_shmem_exit callback to release * the storage. * - * Dead Postgres segments are recycled if found, but we do not fail upon - * collision with non-Postgres shmem segments. The idea here is to detect and - * re-use keys that may have been assigned by a crashed postmaster or backend. - * - * makePrivate means to always create a new segment, rather than attach to - * or recycle any existing segment. + * Dead Postgres segments pertinent to this DataDir are recycled if found, but + * we do not fail upon collision with foreign shmem segments. The idea here + * is to detect and re-use keys that may have been assigned by a crashed + * postmaster or backend. * * The port number is passed for possible use as a key (for SysV, we use - * it to generate the starting shmem key). In a standalone backend, - * zero will be passed. + * it to generate the starting shmem key). */ PGShmemHeader * -PGSharedMemoryCreate(Size size, bool makePrivate, int port, +PGSharedMemoryCreate(Size size, int port, PGShmemHeader **shim) { IpcMemoryKey NextShmemSegID; void *memAddress; PGShmemHeader *hdr; - IpcMemoryId shmid; struct stat statbuf; Size sysvsize; @@ -591,11 +635,20 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port, /* Make sure PGSharedMemoryAttach doesn't fail without need */ UsedShmemSegAddr = NULL; - /* Loop till we find a free IPC key */ - NextShmemSegID = port * 1000; + /* + * Loop till we find a free IPC key. Trust CreateDataDirLockFile() to + * ensure no more than one postmaster per data directory can enter this + * loop simultaneously. (CreateDataDirLockFile() does not ensure that, + * but prefer fixing it over coping here.) + */ + NextShmemSegID = 1 + port * 1000; - for (NextShmemSegID++;; NextShmemSegID++) + for (;;) { + IpcMemoryId shmid; + PGShmemHeader *oldhdr; + IpcMemoryState state; + /* Try to create new segment */ memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize); if (memAddress) @@ -603,58 +656,71 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port, /* Check shared memory and possibly remove and recreate */ - if (makePrivate) /* a standalone backend shouldn't do this */ - continue; - - if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL) - continue; /* can't attach, not one of mine */ - /* - * If I am not the creator and it belongs to an extant process, - * continue. + * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN. + * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can + * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN. */ - hdr = (PGShmemHeader *) memAddress; - if (hdr->creatorPID != getpid()) + shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0); + if (shmid < 0) { - if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH) - { - shmdt(memAddress); - continue; /* segment belongs to a live process */ - } + oldhdr = NULL; + state = SHMSTATE_FOREIGN; } + else + state = PGSharedMemoryAttach(shmid, &oldhdr); - /* - * The segment appears to be from a dead Postgres process, or from a - * previous cycle of life in this same process. Zap it, if possible, - * and any associated dynamic shared memory segments, as well. This - * probably shouldn't fail, but if it does, assume the segment belongs - * to someone else after all, and continue quietly. - */ - if (hdr->dsm_control != 0) - dsm_cleanup_using_control_segment(hdr->dsm_control); - shmdt(memAddress); - if (shmctl(shmid, IPC_RMID, NULL) < 0) - continue; + switch (state) + { + case SHMSTATE_ANALYSIS_FAILURE: + case SHMSTATE_ATTACHED: + ereport(FATAL, + (errcode(ERRCODE_LOCK_FILE_EXISTS), + errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use", + (unsigned long) NextShmemSegID, + (unsigned long) shmid), + errhint("Terminate any old server processes associated with data directory \"%s\".", + DataDir))); + break; + case SHMSTATE_ENOENT: - /* - * Now try again to create the segment. - */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize); - if (memAddress) - break; /* successful create and attach */ + /* + * To our surprise, some other process deleted since our last + * InternalIpcMemoryCreate(). Moments earlier, we would have + * seen SHMSTATE_FOREIGN. Try that same ID again. + */ + elog(LOG, + "shared memory block (key %lu, ID %lu) deleted during startup", + (unsigned long) NextShmemSegID, + (unsigned long) shmid); + break; + case SHMSTATE_FOREIGN: + NextShmemSegID++; + break; + case SHMSTATE_UNATTACHED: - /* - * Can only get here if some other process managed to create the same - * shmem key before we did. Let him have that one, loop around to try - * next key. - */ + /* + * The segment pertains to DataDir, and every process that had + * used it has died or detached. Zap it, if possible, and any + * associated dynamic shared memory segments, as well. This + * shouldn't fail, but if it does, assume the segment belongs + * to someone else after all, and try the next candidate. + * Otherwise, try again to create the segment. That may fail + * if some other process creates the same shmem key before we + * do, in which case we'll try the next key. + */ + if (oldhdr->dsm_control != 0) + dsm_cleanup_using_control_segment(oldhdr->dsm_control); + if (shmctl(shmid, IPC_RMID, NULL) < 0) + NextShmemSegID++; + break; + } + + if (oldhdr && shmdt(oldhdr) < 0) + elog(LOG, "shmdt(%p) failed: %m", oldhdr); } - /* - * OK, we created a new segment. Mark it as created by this process. The - * order of assignments here is critical so that another Postgres process - * can't see the header as valid but belonging to an invalid PID! - */ + /* Initialize new segment. */ hdr = (PGShmemHeader *) memAddress; hdr->creatorPID = getpid(); hdr->magic = PGShmemMagic; @@ -714,7 +780,8 @@ void PGSharedMemoryReAttach(void) { IpcMemoryId shmid; - void *hdr; + PGShmemHeader *hdr; + IpcMemoryState state; void *origUsedShmemSegAddr = UsedShmemSegAddr; Assert(UsedShmemSegAddr != NULL); @@ -727,14 +794,18 @@ PGSharedMemoryReAttach(void) #endif elog(DEBUG3, "attaching to %p", UsedShmemSegAddr); - hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid); - if (hdr == NULL) + shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0); + if (shmid < 0) + state = SHMSTATE_FOREIGN; + else + state = PGSharedMemoryAttach(shmid, &hdr); + if (state != SHMSTATE_ATTACHED) elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m", (int) UsedShmemSegID, UsedShmemSegAddr); if (hdr != origUsedShmemSegAddr) elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)", hdr, origUsedShmemSegAddr); - dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control); + dsm_set_control_handle(hdr->dsm_control); UsedShmemSegAddr = hdr; /* probably redundant */ } @@ -810,31 +881,3 @@ PGSharedMemoryDetach(void) } #endif } - - -/* - * Attach to shared memory and make sure it has a Postgres header - * - * Returns attach address if OK, else NULL - */ -static PGShmemHeader * -PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid) -{ - PGShmemHeader *hdr; - - if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0) - return NULL; - - hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS); - - if (hdr == (PGShmemHeader *) -1) - return NULL; /* failed: must be some other app's */ - - if (hdr->magic != PGShmemMagic) - { - shmdt((void *) hdr); - return NULL; /* segment belongs to a non-Postgres app */ - } - - return hdr; -} diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c index 9df1506835..90442a068f 100644 --- a/src/backend/port/win32_shmem.c +++ b/src/backend/port/win32_shmem.c @@ -131,14 +131,9 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) * * Create a shared memory segment of the given size and initialize its * standard header. - * - * makePrivate means to always create a new segment, rather than attach to - * or recycle any existing segment. On win32, we always create a new segment, - * since there is no need for recycling (segments go away automatically - * when the last backend exits) */ PGShmemHeader * -PGSharedMemoryCreate(Size size, bool makePrivate, int port, +PGSharedMemoryCreate(Size size, int port, PGShmemHeader **shim) { void *memAddress; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index c2b91790bd..474fb91d58 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2511,7 +2511,7 @@ reset_shared(int port) * determine IPC keys. This helps ensure that we will clean up dead IPC * objects if the postmaster crashes and is restarted. */ - CreateSharedMemoryAndSemaphores(false, port); + CreateSharedMemoryAndSemaphores(port); } @@ -4868,7 +4868,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); /* And run the backend */ BackendRun(&port); /* does not return */ @@ -4882,7 +4882,7 @@ SubPostmasterMain(int argc, char *argv[]) InitAuxiliaryProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */ } @@ -4895,7 +4895,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */ } @@ -4908,7 +4908,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */ } @@ -4926,7 +4926,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); shmem_slot = atoi(argv[1] + 15); MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot); diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 1d04c5508a..0a2ed2ecd9 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -83,12 +83,9 @@ RequestAddinShmemSpace(Size size) * through the same code as before. (Note that the called routines mostly * check IsUnderPostmaster, rather than EXEC_BACKEND, to detect this case. * This is a bit code-wasteful and could be cleaned up.) - * - * If "makePrivate" is true then we only need private memory, not shared - * memory. This is true for a standalone backend, false for a postmaster. */ void -CreateSharedMemoryAndSemaphores(bool makePrivate, int port) +CreateSharedMemoryAndSemaphores(int port) { PGShmemHeader *shim = NULL; @@ -151,7 +148,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) /* * Create the shmem segment */ - seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim); + seghdr = PGSharedMemoryCreate(size, port, &shim); InitShmemAccess(seghdr); @@ -166,12 +163,9 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) { /* * We are reattaching to an existing shared memory segment. This - * should only be reached in the EXEC_BACKEND case, and even then only - * with makePrivate == false. + * should only be reached in the EXEC_BACKEND case. */ -#ifdef EXEC_BACKEND - Assert(!makePrivate); -#else +#ifndef EXEC_BACKEND elog(PANIC, "should be attached to shared memory already"); #endif } diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index afaedf6a95..5d9ccc663d 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -399,9 +399,11 @@ InitCommunication(void) { /* * We're running a postgres bootstrap process or a standalone backend. - * Create private "shmem" and semaphores. + * Though we won't listen on PostPortNumber, use it to select a shmem + * key. This increases the chance of detecting a leftover live + * backend of this DataDir. */ - CreateSharedMemoryAndSemaphores(true, 0); + CreateSharedMemoryAndSemaphores(PostPortNumber); } } diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h index 52aff5bbe5..be915337f7 100644 --- a/src/include/storage/ipc.h +++ b/src/include/storage/ipc.h @@ -75,6 +75,6 @@ extern void on_exit_reset(void); /* ipci.c */ extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook; -extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port); +extern void CreateSharedMemoryAndSemaphores(int port); #endif /* IPC_H */ diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 0fa45b33cb..163192454d 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -30,7 +30,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ { int32 magic; /* magic # to identify Postgres segments */ #define PGShmemMagic 679834894 - pid_t creatorPID; /* PID of creating process */ + pid_t creatorPID; /* PID of creating process (set but unread) */ Size totalsize; /* total size of segment */ Size freeoffset; /* offset to first free space */ dsm_handle dsm_control; /* ID of dynamic shared memory control seg */ @@ -65,8 +65,8 @@ extern void PGSharedMemoryReAttach(void); extern void PGSharedMemoryNoReAttach(void); #endif -extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate, - int port, PGShmemHeader **shim); +extern PGShmemHeader *PGSharedMemoryCreate(Size size, int port, + PGShmemHeader **shim); extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2); extern void PGSharedMemoryDetach(void);