From c098509927f9a49ebceb301a2cb6a477ecd4ac3c Mon Sep 17 00:00:00 2001 From: Noah Misch Date: Fri, 12 Apr 2019 22:36:38 -0700 Subject: [PATCH] Consistently test for in-use shared memory. postmaster startup scrutinizes any shared memory segment recorded in postmaster.pid, exiting if that segment matches the current data directory and has an attached process. When the postmaster.pid file was missing, a starting postmaster used weaker checks. Change to use the same checks in both scenarios. This increases the chance of a startup failure, in lieu of data corruption, if the DBA does "kill -9 `head -n1 postmaster.pid` && rm postmaster.pid && pg_ctl -w start". A postmaster will no longer stop if shmat() of an old segment fails with EACCES. A postmaster will no longer recycle segments pertaining to other data directories. That's good for production, but it's bad for integration tests that crash a postmaster and immediately delete its data directory. Such a test now leaks a segment indefinitely. No "make check-world" test does that. win32_shmem.c already avoided all these problems. In 9.6 and later, enhance PostgresNode to facilitate testing. Back-patch to 9.4 (all supported versions). Reviewed (in earlier versions) by Daniel Gustafsson and Kyotaro HORIGUCHI. Discussion: https://postgr.es/m/20190408064141.GA2016666@rfd.leadboat.com --- src/Makefile.global.in | 4 +- src/backend/port/sysv_shmem.c | 269 ++++++++++++++++------------ src/backend/port/win32_shmem.c | 7 +- src/backend/postmaster/postmaster.c | 12 +- src/backend/storage/ipc/ipci.c | 14 +- src/backend/utils/init/postinit.c | 6 +- src/include/storage/ipc.h | 2 +- src/include/storage/pg_shmem.h | 6 +- src/test/perl/PostgresNode.pm | 184 +++++++++++++++---- src/test/recovery/t/017_shm.pl | 200 +++++++++++++++++++++ src/tools/msvc/vcregress.pl | 1 + 11 files changed, 525 insertions(+), 180 deletions(-) create mode 100644 src/test/recovery/t/017_shm.pl diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 6b98d53d06..b9d86acaa9 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -430,13 +430,13 @@ ifeq ($(enable_tap_tests),yes) define prove_installcheck rm -rf '$(CURDIR)'/tmp_check $(MKDIR_P) '$(CURDIR)'/tmp_check -cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl) +cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl) endef define prove_check rm -rf '$(CURDIR)'/tmp_check $(MKDIR_P) '$(CURDIR)'/tmp_check -cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl) +cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl) endef else diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 1511a61bc5..a9d7bf9a14 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -72,6 +72,26 @@ typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */ typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */ +/* + * How does a given IpcMemoryId relate to this PostgreSQL process? + * + * One could recycle unattached segments of different data directories if we + * distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would + * cause us to visit less of the key space, making us less likely to detect a + * SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis, + * in that postmasters of different data directories could simultaneously + * attempt to recycle a given key. We'll waste keys longer in some cases, but + * avoiding the problems of the alternative justifies that loss. + */ +typedef enum +{ + SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */ + SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */ + SHMSTATE_ENOENT, /* no segment of that ID */ + SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */ + SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */ +} IpcMemoryState; + unsigned long UsedShmemSegID = 0; void *UsedShmemSegAddr = NULL; @@ -82,8 +102,8 @@ static void *AnonymousShmem = NULL; static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size); static void IpcMemoryDetach(int status, Datum shmaddr); static void IpcMemoryDelete(int status, Datum shmId); -static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key, - IpcMemoryId *shmid); +static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId, + PGShmemHeader **addr); /* @@ -287,11 +307,36 @@ IpcMemoryDelete(int status, Datum shmId) bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) { - IpcMemoryId shmId = (IpcMemoryId) id2; + PGShmemHeader *memAddress; + IpcMemoryState state; + + state = PGSharedMemoryAttach((IpcMemoryId) id2, &memAddress); + if (memAddress && shmdt(memAddress) < 0) + elog(LOG, "shmdt(%p) failed: %m", memAddress); + switch (state) + { + case SHMSTATE_ENOENT: + case SHMSTATE_FOREIGN: + case SHMSTATE_UNATTACHED: + return false; + case SHMSTATE_ANALYSIS_FAILURE: + case SHMSTATE_ATTACHED: + return true; + } + return true; +} + +/* See comment at IpcMemoryState. */ +static IpcMemoryState +PGSharedMemoryAttach(IpcMemoryId shmId, + PGShmemHeader **addr) +{ struct shmid_ds shmStat; struct stat statbuf; PGShmemHeader *hdr; + *addr = NULL; + /* * We detect whether a shared memory segment is in use by seeing whether * it (a) exists and (b) has any processes attached to it. @@ -304,15 +349,15 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) * exists. */ if (errno == EINVAL) - return false; + return SHMSTATE_ENOENT; /* - * EACCES implies that the segment belongs to some other userid, which - * means it is not a Postgres shmem segment (or at least, not one that - * is relevant to our data directory). + * EACCES implies we have no read permission, which means it is not a + * Postgres shmem segment (or at least, not one that is relevant to + * our data directory). */ if (errno == EACCES) - return false; + return SHMSTATE_FOREIGN; /* * Some Linux kernel versions (in fact, all of them as of July 2007) @@ -323,7 +368,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) */ #ifdef HAVE_LINUX_EIDRM_BUG if (errno == EIDRM) - return false; + return SHMSTATE_ENOENT; #endif /* @@ -331,25 +376,32 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) * only likely case is EIDRM, which implies that the segment has been * IPC_RMID'd but there are still processes attached to it. */ - return true; + return SHMSTATE_ANALYSIS_FAILURE; } - /* If it has no attached processes, it's not in use */ - if (shmStat.shm_nattch == 0) - return false; - /* * Try to attach to the segment and see if it matches our data directory. * This avoids shmid-conflict problems on machines that are running * several postmasters under the same userid. */ if (stat(DataDir, &statbuf) < 0) - return true; /* if can't stat, be conservative */ - - hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS); + return SHMSTATE_ANALYSIS_FAILURE; /* can't stat; be conservative */ + /* + * Attachment fails if we have no write permission. Since that will never + * happen with Postgres IPCProtection, such a failure shows the segment is + * not a Postgres segment. If attachment fails for some other reason, be + * conservative. + */ + hdr = (PGShmemHeader *) shmat(shmId, UsedShmemSegAddr, PG_SHMAT_FLAGS); if (hdr == (PGShmemHeader *) -1) - return true; /* if can't attach, be conservative */ + { + if (errno == EACCES) + return SHMSTATE_FOREIGN; + else + return SHMSTATE_ANALYSIS_FAILURE; + } + *addr = hdr; if (hdr->magic != PGShmemMagic || hdr->device != statbuf.st_dev || @@ -357,16 +409,12 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2) { /* * It's either not a Postgres segment, or not one for my data - * directory. In either case it poses no threat. + * directory. */ - shmdt((void *) hdr); - return false; + return SHMSTATE_FOREIGN; } - /* Trouble --- looks a lot like there's still live backends */ - shmdt((void *) hdr); - - return true; + return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED; } #ifdef MAP_HUGETLB @@ -538,25 +586,21 @@ AnonymousShmemDetach(int status, Datum arg) * standard header. Also, register an on_shmem_exit callback to release * the storage. * - * Dead Postgres segments are recycled if found, but we do not fail upon - * collision with non-Postgres shmem segments. The idea here is to detect and - * re-use keys that may have been assigned by a crashed postmaster or backend. - * - * makePrivate means to always create a new segment, rather than attach to - * or recycle any existing segment. + * Dead Postgres segments pertinent to this DataDir are recycled if found, but + * we do not fail upon collision with foreign shmem segments. The idea here + * is to detect and re-use keys that may have been assigned by a crashed + * postmaster or backend. * * The port number is passed for possible use as a key (for SysV, we use - * it to generate the starting shmem key). In a standalone backend, - * zero will be passed. + * it to generate the starting shmem key). */ PGShmemHeader * -PGSharedMemoryCreate(Size size, bool makePrivate, int port, +PGSharedMemoryCreate(Size size, int port, PGShmemHeader **shim) { IpcMemoryKey NextShmemSegID; void *memAddress; PGShmemHeader *hdr; - IpcMemoryId shmid; struct stat statbuf; Size sysvsize; @@ -588,11 +632,20 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port, /* Make sure PGSharedMemoryAttach doesn't fail without need */ UsedShmemSegAddr = NULL; - /* Loop till we find a free IPC key */ - NextShmemSegID = port * 1000; + /* + * Loop till we find a free IPC key. Trust CreateDataDirLockFile() to + * ensure no more than one postmaster per data directory can enter this + * loop simultaneously. (CreateDataDirLockFile() does not ensure that, + * but prefer fixing it over coping here.) + */ + NextShmemSegID = 1 + port * 1000; - for (NextShmemSegID++;; NextShmemSegID++) + for (;;) { + IpcMemoryId shmid; + PGShmemHeader *oldhdr; + IpcMemoryState state; + /* Try to create new segment */ memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize); if (memAddress) @@ -600,58 +653,71 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port, /* Check shared memory and possibly remove and recreate */ - if (makePrivate) /* a standalone backend shouldn't do this */ - continue; - - if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL) - continue; /* can't attach, not one of mine */ - /* - * If I am not the creator and it belongs to an extant process, - * continue. + * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN. + * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can + * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN. */ - hdr = (PGShmemHeader *) memAddress; - if (hdr->creatorPID != getpid()) + shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0); + if (shmid < 0) { - if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH) - { - shmdt(memAddress); - continue; /* segment belongs to a live process */ - } + oldhdr = NULL; + state = SHMSTATE_FOREIGN; } + else + state = PGSharedMemoryAttach(shmid, &oldhdr); - /* - * The segment appears to be from a dead Postgres process, or from a - * previous cycle of life in this same process. Zap it, if possible, - * and any associated dynamic shared memory segments, as well. This - * probably shouldn't fail, but if it does, assume the segment belongs - * to someone else after all, and continue quietly. - */ - if (hdr->dsm_control != 0) - dsm_cleanup_using_control_segment(hdr->dsm_control); - shmdt(memAddress); - if (shmctl(shmid, IPC_RMID, NULL) < 0) - continue; + switch (state) + { + case SHMSTATE_ANALYSIS_FAILURE: + case SHMSTATE_ATTACHED: + ereport(FATAL, + (errcode(ERRCODE_LOCK_FILE_EXISTS), + errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use", + (unsigned long) NextShmemSegID, + (unsigned long) shmid), + errhint("Terminate any old server processes associated with data directory \"%s\".", + DataDir))); + break; + case SHMSTATE_ENOENT: - /* - * Now try again to create the segment. - */ - memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize); - if (memAddress) - break; /* successful create and attach */ + /* + * To our surprise, some other process deleted since our last + * InternalIpcMemoryCreate(). Moments earlier, we would have + * seen SHMSTATE_FOREIGN. Try that same ID again. + */ + elog(LOG, + "shared memory block (key %lu, ID %lu) deleted during startup", + (unsigned long) NextShmemSegID, + (unsigned long) shmid); + break; + case SHMSTATE_FOREIGN: + NextShmemSegID++; + break; + case SHMSTATE_UNATTACHED: - /* - * Can only get here if some other process managed to create the same - * shmem key before we did. Let him have that one, loop around to try - * next key. - */ + /* + * The segment pertains to DataDir, and every process that had + * used it has died or detached. Zap it, if possible, and any + * associated dynamic shared memory segments, as well. This + * shouldn't fail, but if it does, assume the segment belongs + * to someone else after all, and try the next candidate. + * Otherwise, try again to create the segment. That may fail + * if some other process creates the same shmem key before we + * do, in which case we'll try the next key. + */ + if (oldhdr->dsm_control != 0) + dsm_cleanup_using_control_segment(oldhdr->dsm_control); + if (shmctl(shmid, IPC_RMID, NULL) < 0) + NextShmemSegID++; + break; + } + + if (oldhdr && shmdt(oldhdr) < 0) + elog(LOG, "shmdt(%p) failed: %m", oldhdr); } - /* - * OK, we created a new segment. Mark it as created by this process. The - * order of assignments here is critical so that another Postgres process - * can't see the header as valid but belonging to an invalid PID! - */ + /* Initialize new segment. */ hdr = (PGShmemHeader *) memAddress; hdr->creatorPID = getpid(); hdr->magic = PGShmemMagic; @@ -707,7 +773,8 @@ void PGSharedMemoryReAttach(void) { IpcMemoryId shmid; - void *hdr; + PGShmemHeader *hdr; + IpcMemoryState state; void *origUsedShmemSegAddr = UsedShmemSegAddr; Assert(UsedShmemSegAddr != NULL); @@ -720,14 +787,18 @@ PGSharedMemoryReAttach(void) #endif elog(DEBUG3, "attaching to %p", UsedShmemSegAddr); - hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid); - if (hdr == NULL) + shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0); + if (shmid < 0) + state = SHMSTATE_FOREIGN; + else + state = PGSharedMemoryAttach(shmid, &hdr); + if (state != SHMSTATE_ATTACHED) elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m", (int) UsedShmemSegID, UsedShmemSegAddr); if (hdr != origUsedShmemSegAddr) elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)", hdr, origUsedShmemSegAddr); - dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control); + dsm_set_control_handle(hdr->dsm_control); UsedShmemSegAddr = hdr; /* probably redundant */ } @@ -801,31 +872,3 @@ PGSharedMemoryDetach(void) AnonymousShmem = NULL; } } - - -/* - * Attach to shared memory and make sure it has a Postgres header - * - * Returns attach address if OK, else NULL - */ -static PGShmemHeader * -PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid) -{ - PGShmemHeader *hdr; - - if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0) - return NULL; - - hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS); - - if (hdr == (PGShmemHeader *) -1) - return NULL; /* failed: must be some other app's */ - - if (hdr->magic != PGShmemMagic) - { - shmdt((void *) hdr); - return NULL; /* segment belongs to a non-Postgres app */ - } - - return hdr; -} diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c index eed17c5e21..ccd7b6b5e3 100644 --- a/src/backend/port/win32_shmem.c +++ b/src/backend/port/win32_shmem.c @@ -192,14 +192,9 @@ EnableLockPagesPrivilege(int elevel) * * Create a shared memory segment of the given size and initialize its * standard header. - * - * makePrivate means to always create a new segment, rather than attach to - * or recycle any existing segment. On win32, we always create a new segment, - * since there is no need for recycling (segments go away automatically - * when the last backend exits) */ PGShmemHeader * -PGSharedMemoryCreate(Size size, bool makePrivate, int port, +PGSharedMemoryCreate(Size size, int port, PGShmemHeader **shim) { void *memAddress; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index ad87b7c9ee..60d29a2089 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2605,7 +2605,7 @@ reset_shared(int port) * determine IPC keys. This helps ensure that we will clean up dead IPC * objects if the postmaster crashes and is restarted. */ - CreateSharedMemoryAndSemaphores(false, port); + CreateSharedMemoryAndSemaphores(port); } @@ -4946,7 +4946,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); /* And run the backend */ BackendRun(&port); /* does not return */ @@ -4960,7 +4960,7 @@ SubPostmasterMain(int argc, char *argv[]) InitAuxiliaryProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */ } @@ -4973,7 +4973,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */ } @@ -4986,7 +4986,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */ } @@ -5004,7 +5004,7 @@ SubPostmasterMain(int argc, char *argv[]) InitProcess(); /* Attach process to shared data structures */ - CreateSharedMemoryAndSemaphores(false, 0); + CreateSharedMemoryAndSemaphores(0); /* Fetch MyBgworkerEntry from shared memory */ shmem_slot = atoi(argv[1] + 15); diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 5965d3620f..d7d733530f 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -89,12 +89,9 @@ RequestAddinShmemSpace(Size size) * through the same code as before. (Note that the called routines mostly * check IsUnderPostmaster, rather than EXEC_BACKEND, to detect this case. * This is a bit code-wasteful and could be cleaned up.) - * - * If "makePrivate" is true then we only need private memory, not shared - * memory. This is true for a standalone backend, false for a postmaster. */ void -CreateSharedMemoryAndSemaphores(bool makePrivate, int port) +CreateSharedMemoryAndSemaphores(int port) { PGShmemHeader *shim = NULL; @@ -166,7 +163,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) /* * Create the shmem segment */ - seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim); + seghdr = PGSharedMemoryCreate(size, port, &shim); InitShmemAccess(seghdr); @@ -187,12 +184,9 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) { /* * We are reattaching to an existing shared memory segment. This - * should only be reached in the EXEC_BACKEND case, and even then only - * with makePrivate == false. + * should only be reached in the EXEC_BACKEND case. */ -#ifdef EXEC_BACKEND - Assert(!makePrivate); -#else +#ifndef EXEC_BACKEND elog(PANIC, "should be attached to shared memory already"); #endif } diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 1c2a99c9c8..e9f72b5069 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -446,9 +446,11 @@ InitCommunication(void) { /* * We're running a postgres bootstrap process or a standalone backend. - * Create private "shmem" and semaphores. + * Though we won't listen on PostPortNumber, use it to select a shmem + * key. This increases the chance of detecting a leftover live + * backend of this DataDir. */ - CreateSharedMemoryAndSemaphores(true, 0); + CreateSharedMemoryAndSemaphores(PostPortNumber); } } diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h index 0660252faf..e9b243fe7d 100644 --- a/src/include/storage/ipc.h +++ b/src/include/storage/ipc.h @@ -76,6 +76,6 @@ extern void on_exit_reset(void); /* ipci.c */ extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook; -extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port); +extern void CreateSharedMemoryAndSemaphores(int port); #endif /* IPC_H */ diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 4e1e13dcb1..0535d4545c 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -30,7 +30,7 @@ typedef struct PGShmemHeader /* standard header for all Postgres shmem */ { int32 magic; /* magic # to identify Postgres segments */ #define PGShmemMagic 679834894 - pid_t creatorPID; /* PID of creating process */ + pid_t creatorPID; /* PID of creating process (set but unread) */ Size totalsize; /* total size of segment */ Size freeoffset; /* offset to first free space */ dsm_handle dsm_control; /* ID of dynamic shared memory control seg */ @@ -82,8 +82,8 @@ extern void PGSharedMemoryReAttach(void); extern void PGSharedMemoryNoReAttach(void); #endif -extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate, - int port, PGShmemHeader **shim); +extern PGShmemHeader *PGSharedMemoryCreate(Size size, int port, + PGShmemHeader **shim); extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2); extern void PGSharedMemoryDetach(void); diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 81deed9843..f6570639e0 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -104,7 +104,8 @@ our @EXPORT = qw( get_new_node ); -our ($test_localhost, $test_pghost, $last_port_assigned, @all_nodes, $died); +our ($use_tcp, $test_localhost, $test_pghost, $last_host_assigned, + $last_port_assigned, @all_nodes, $died); # Windows path to virtual file system root @@ -118,13 +119,14 @@ if ($Config{osname} eq 'msys') INIT { - # PGHOST is set once and for all through a single series of tests when - # this module is loaded. - $test_localhost = "127.0.0.1"; - $test_pghost = - $TestLib::windows_os ? $test_localhost : TestLib::tempdir_short; - $ENV{PGHOST} = $test_pghost; - $ENV{PGDATABASE} = 'postgres'; + # Set PGHOST for backward compatibility. This doesn't work for own_host + # nodes, so prefer to not rely on this when writing new tests. + $use_tcp = $TestLib::windows_os; + $test_localhost = "127.0.0.1"; + $last_host_assigned = 1; + $test_pghost = $use_tcp ? $test_localhost : TestLib::tempdir_short; + $ENV{PGHOST} = $test_pghost; + $ENV{PGDATABASE} = 'postgres'; # Tracking of last port value assigned to accelerate free port lookup. $last_port_assigned = int(rand() * 16384) + 49152; @@ -155,7 +157,9 @@ sub new _host => $pghost, _basedir => "$TestLib::tmp_check/t_${testname}_${name}_data", _name => $name, - _logfile => "$TestLib::log_path/${testname}_${name}.log" + _logfile_generation => 0, + _logfile_base => "$TestLib::log_path/${testname}_${name}", + _logfile => "$TestLib::log_path/${testname}_${name}.log" }; bless $self, $class; @@ -473,8 +477,9 @@ sub init print $conf "max_wal_senders = 0\n"; } - if ($TestLib::windows_os) + if ($use_tcp) { + print $conf "unix_socket_directories = ''\n"; print $conf "listen_addresses = '$host'\n"; } else @@ -536,12 +541,11 @@ sub backup { my ($self, $backup_name) = @_; my $backup_path = $self->backup_dir . '/' . $backup_name; - my $port = $self->port; my $name = $self->name; print "# Taking pg_basebackup $backup_name from node \"$name\"\n"; - TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-p', $port, - '--no-sync'); + TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-h', + $self->host, '-p', $self->port, '--no-sync'); print "# Backup finished\n"; return; } @@ -651,6 +655,7 @@ sub init_from_backup { my ($self, $root_node, $backup_name, %params) = @_; my $backup_path = $root_node->backup_dir . '/' . $backup_name; + my $host = $self->host; my $port = $self->port; my $node_name = $self->name; my $root_name = $root_node->name; @@ -677,6 +682,15 @@ sub init_from_backup qq( port = $port )); + if ($use_tcp) + { + $self->append_conf('postgresql.conf', "listen_addresses = '$host'"); + } + else + { + $self->append_conf('postgresql.conf', + "unix_socket_directories = '$host'"); + } $self->enable_streaming($root_node) if $params{has_streaming}; $self->enable_restoring($root_node) if $params{has_restoring}; return; @@ -684,17 +698,45 @@ port = $port =pod -=item $node->start() +=item $node->rotate_logfile() + +Switch to a new PostgreSQL log file. This does not alter any running +PostgreSQL process. Subsequent method calls, including pg_ctl invocations, +will use the new name. Return the new name. + +=cut + +sub rotate_logfile +{ + my ($self) = @_; + $self->{_logfile} = sprintf('%s_%d.log', + $self->{_logfile_base}, + ++$self->{_logfile_generation}); + return $self->{_logfile}; +} + +=pod + +=item $node->start(%params) => success_or_failure Wrapper for pg_ctl start Start the node and wait until it is ready to accept connections. +=over + +=item fail_ok => 1 + +By default, failure terminates the entire F invocation. If given, +instead return a true or false value to indicate success or failure. + +=back + =cut sub start { - my ($self) = @_; + my ($self, %params) = @_; my $port = $self->port; my $pgdata = $self->data_dir; my $name = $self->name; @@ -721,10 +763,34 @@ sub start { print "# pg_ctl start failed; logfile:\n"; print TestLib::slurp_file($self->logfile); - BAIL_OUT("pg_ctl start failed"); + BAIL_OUT("pg_ctl start failed") unless $params{fail_ok}; + return 0; } $self->_update_pid(1); + return 1; +} + +=pod + +=item $node->kill9() + +Send SIGKILL (signal 9) to the postmaster. + +Note: if the node is already known stopped, this does nothing. +However, if we think it's running and it's not, it's important for +this to fail. Otherwise, tests might fail to detect server crashes. + +=cut + +sub kill9 +{ + my ($self) = @_; + my $name = $self->name; + return unless defined $self->{_pid}; + print "### Killing node \"$name\" using signal 9\n"; + kill(9, $self->{_pid}) or BAIL_OUT("kill(9, $self->{_pid}) failed"); + $self->{_pid} = undef; return; } @@ -965,7 +1031,7 @@ sub _update_pid =pod -=item PostgresNode->get_new_node(node_name) +=item PostgresNode->get_new_node(node_name, %params) Build a new object of class C (or of a subclass, if you have one), assigning a free port number. Remembers the node, to prevent its port @@ -974,6 +1040,22 @@ shut down when the test script exits. You should generally use this instead of C. +=over + +=item port => [1,65535] + +By default, this function assigns a port number to each node. Specify this to +force a particular port number. The caller is responsible for evaluating +potential conflicts and privilege requirements. + +=item own_host => 1 + +By default, all nodes use the same PGHOST value. If specified, generate a +PGHOST specific to this node. This allows multiple nodes to use the same +port. + +=back + For backwards compatibility, it is also exported as a standalone function, which can only create objects of class C. @@ -982,10 +1064,11 @@ which can only create objects of class C. sub get_new_node { my $class = 'PostgresNode'; - $class = shift if 1 < scalar @_; - my $name = shift; - my $found = 0; - my $port = $last_port_assigned; + $class = shift if scalar(@_) % 2 != 1; + my ($name, %params) = @_; + my $port_is_forced = defined $params{port}; + my $found = $port_is_forced; + my $port = $port_is_forced ? $params{port} : $last_port_assigned; while ($found == 0) { @@ -1002,13 +1085,15 @@ sub get_new_node $found = 0 if ($node->port == $port); } - # Check to see if anything else is listening on this TCP port. - # This is *necessary* on Windows, and seems like a good idea - # on Unixen as well, even though we don't ask the postmaster - # to open a TCP port on Unix. + # Check to see if anything else is listening on this TCP port. Accept + # only ports available for all possible listen_addresses values, so + # the caller can harness this port for the widest range of purposes. + # This is *necessary* on Windows, and seems like a good idea on Unixen + # as well, even though we don't ask the postmaster to open a TCP port + # on Unix. if ($found == 1) { - my $iaddr = inet_aton($test_localhost); + my $iaddr = inet_aton('0.0.0.0'); my $paddr = sockaddr_in($port, $iaddr); my $proto = getprotobyname("tcp"); @@ -1024,16 +1109,35 @@ sub get_new_node } } - print "# Found free port $port\n"; + print "# Found port $port\n"; + + # Select a host. + my $host = $test_pghost; + if ($params{own_host}) + { + if ($use_tcp) + { + # This assumes $use_tcp platforms treat every address in + # 127.0.0.1/24, not just 127.0.0.1, as a usable loopback. + $last_host_assigned++; + $last_host_assigned > 254 and BAIL_OUT("too many own_host nodes"); + $host = '127.0.0.' . $last_host_assigned; + } + else + { + $host = "$test_pghost/$name"; # Assume $name =~ /^[-_a-zA-Z0-9]+$/ + mkdir $host; + } + } # Lock port number found by creating a new node - my $node = $class->new($name, $test_pghost, $port); + my $node = $class->new($name, $host, $port); # Add node to list of nodes push(@all_nodes, $node); # And update port for next time - $last_port_assigned = $port; + $port_is_forced or $last_port_assigned = $port; return $node; } @@ -1424,9 +1528,8 @@ $stderr); =item $node->command_ok(...) -Runs a shell command like TestLib::command_ok, but with PGPORT -set so that the command will default to connecting to this -PostgresNode. +Runs a shell command like TestLib::command_ok, but with PGHOST and PGPORT set +so that the command will default to connecting to this PostgresNode. =cut @@ -1436,6 +1539,7 @@ sub command_ok my $self = shift; + local $ENV{PGHOST} = $self->host; local $ENV{PGPORT} = $self->port; TestLib::command_ok(@_); @@ -1446,7 +1550,7 @@ sub command_ok =item $node->command_fails(...) -TestLib::command_fails with our PGPORT. See command_ok(...) +TestLib::command_fails with our connection parameters. See command_ok(...) =cut @@ -1456,6 +1560,7 @@ sub command_fails my $self = shift; + local $ENV{PGHOST} = $self->host; local $ENV{PGPORT} = $self->port; TestLib::command_fails(@_); @@ -1466,7 +1571,7 @@ sub command_fails =item $node->command_like(...) -TestLib::command_like with our PGPORT. See command_ok(...) +TestLib::command_like with our connection parameters. See command_ok(...) =cut @@ -1476,6 +1581,7 @@ sub command_like my $self = shift; + local $ENV{PGHOST} = $self->host; local $ENV{PGPORT} = $self->port; TestLib::command_like(@_); @@ -1486,7 +1592,8 @@ sub command_like =item $node->command_checks_all(...) -TestLib::command_checks_all with our PGPORT. See command_ok(...) +TestLib::command_checks_all with our connection parameters. See +command_ok(...) =cut @@ -1496,6 +1603,7 @@ sub command_checks_all my $self = shift; + local $ENV{PGHOST} = $self->host; local $ENV{PGPORT} = $self->port; TestLib::command_checks_all(@_); @@ -1520,6 +1628,7 @@ sub issues_sql_like my ($self, $cmd, $expected_sql, $test_name) = @_; + local $ENV{PGHOST} = $self->host; local $ENV{PGPORT} = $self->port; truncate $self->logfile, 0; @@ -1534,8 +1643,8 @@ sub issues_sql_like =item $node->run_log(...) -Runs a shell command like TestLib::run_log, but with PGPORT set so -that the command will default to connecting to this PostgresNode. +Runs a shell command like TestLib::run_log, but with connection parameters set +so that the command will default to connecting to this PostgresNode. =cut @@ -1543,6 +1652,7 @@ sub run_log { my $self = shift; + local $ENV{PGHOST} = $self->host; local $ENV{PGPORT} = $self->port; TestLib::run_log(@_); diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl new file mode 100644 index 0000000000..3cbe938ddd --- /dev/null +++ b/src/test/recovery/t/017_shm.pl @@ -0,0 +1,200 @@ +# +# Tests of pg_shmem.h functions +# +use strict; +use warnings; +use IPC::Run 'run'; +use PostgresNode; +use Test::More; +use TestLib; +use Time::HiRes qw(usleep); + +plan tests => 5; + +my $tempdir = TestLib::tempdir; +my $port; + +# Log "ipcs" diffs on a best-effort basis, swallowing any error. +my $ipcs_before = "$tempdir/ipcs_before"; +eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; }; + +sub log_ipcs +{ + eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] }; + return; +} + +# These tests need a $port such that nothing creates or removes a segment in +# $port's IpcMemoryKey range while this test script runs. While there's no +# way to ensure that in general, we do ensure that if PostgreSQL tests are the +# only actors. With TCP, the first get_new_node picks a port number. With +# Unix sockets, use a postmaster, $port_holder, to represent a key space +# reservation. $port_holder holds a reservation on the key space of port +# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's +# key space. If multiple copies of this test script run concurrently, they +# will pick different ports. $port_holder postmasters use odd-numbered ports, +# and tests use even-numbered ports. In the absence of collisions from other +# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts +# with key 0x7d002 (512002). +my $port_holder; +if (!$PostgresNode::use_tcp) +{ + my $lock_port; + for ($lock_port = 511; $lock_port < 711; $lock_port += 2) + { + $port_holder = PostgresNode->get_new_node( + "port${lock_port}_holder", + port => $lock_port, + own_host => 1); + $port_holder->init; + $port_holder->append_conf('postgresql.conf', 'max_connections = 5'); + $port_holder->start; + # Match the AddToDataDirLockFile() call in sysv_shmem.c. Assume all + # systems not using sysv_shmem.c do use TCP. + my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000); + last + if slurp_file($port_holder->data_dir . '/postmaster.pid') =~ + /^$shmem_key_line_prefix/m; + $port_holder->stop; + } + $port = $lock_port + 1; +} + +# Node setup. +sub init_start +{ + my $name = shift; + my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1); + defined($port) or $port = $ret->port; # same port for all nodes + $ret->init; + # Limit semaphore consumption, since we run several nodes concurrently. + $ret->append_conf('postgresql.conf', 'max_connections = 5'); + $ret->start; + log_ipcs(); + return $ret; +} +my $gnat = init_start 'gnat'; +my $flea = init_start 'flea'; + +# Upon postmaster death, postmaster children exit automatically. +$gnat->kill9; +log_ipcs(); +$flea->restart; # flea ignores the shm key gnat abandoned. +log_ipcs(); +poll_start($gnat); # gnat recycles its former shm key. +log_ipcs(); + +# After clean shutdown, the nodes swap shm keys. +$gnat->stop; +$flea->restart; +log_ipcs(); +$gnat->start; +log_ipcs(); + +# Scenarios involving no postmaster.pid, dead postmaster, and a live backend. +# Use a regress.c function to emulate the responsiveness of a backend working +# through a CPU-intensive task. +$gnat->safe_psql('postgres', <connstr('postgres'), + '-c', $slow_query + ], + '<', + \undef, + '>', + \$stdout, + '2>', + \$stderr, + IPC::Run::timeout(900)); # five times the poll_query_until timeout +ok( $gnat->poll_query_until( + 'postgres', + "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'), + 'slow query started'); +my $slow_pid = $gnat->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'"); +$gnat->kill9; +unlink($gnat->data_dir . '/postmaster.pid'); +$gnat->rotate_logfile; # on Windows, can't open old log for writing +log_ipcs(); +# Reject ordinary startup. Retry for the same reasons poll_start() does. +my $pre_existing_msg = qr/pre-existing shared memory block/; +{ + my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s. + my $attempts = 0; + while ($attempts < $max_attempts) + { + last + if $gnat->start(fail_ok => 1) + || slurp_file($gnat->logfile) =~ $pre_existing_msg; + usleep(100_000); + $attempts++; + } +} +like(slurp_file($gnat->logfile), + $pre_existing_msg, 'detected live backend via shared memory'); +# Reject single-user startup. +my $single_stderr; +ok( !run_log( + [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ], + '<', \('SELECT 1 + 1'), '2>', \$single_stderr), + 'live query blocks --single'); +print STDERR $single_stderr; +like($single_stderr, $pre_existing_msg, + 'single-user mode detected live backend via shared memory'); +log_ipcs(); +# Fail to reject startup if shm key N has become available and we crash while +# using key N+1. This is unwanted, but expected. Windows is immune, because +# its GetSharedMemName() use DataDir strings, not numeric keys. +$flea->stop; # release first key +is( $gnat->start(fail_ok => 1), + $TestLib::windows_os ? 0 : 1, + 'key turnover fools only sysv_shmem.c'); +$gnat->stop; # release first key (no-op on $TestLib::windows_os) +$flea->start; # grab first key +# cleanup +TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid); +$slow_client->finish; # client has detected backend termination +log_ipcs(); +poll_start($gnat); # recycle second key + +$gnat->stop; +$flea->stop; +$port_holder->stop if $port_holder; +log_ipcs(); + + +# We may need retries to start a new postmaster. Causes: +# - kernel is slow to deliver SIGKILL +# - postmaster parent is slow to waitpid() +# - postmaster child is slow to exit in response to SIGQUIT +# - postmaster child is slow to exit after postmaster death +sub poll_start +{ + my ($node) = @_; + + my $max_attempts = 180 * 10; + my $attempts = 0; + + while ($attempts < $max_attempts) + { + $node->start(fail_ok => 1) && return 1; + + # Wait 0.1 second before retrying. + usleep(100_000); + + $attempts++; + } + + # No success within 180 seconds. Try one last time without fail_ok, which + # will BAIL_OUT unless it succeeds. + $node->start && return 1; + return 0; +} diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl index d0e8c6849b..2aa29ab008 100644 --- a/src/tools/msvc/vcregress.pl +++ b/src/tools/msvc/vcregress.pl @@ -205,6 +205,7 @@ sub tap_check local %ENV = %ENV; $ENV{PERL5LIB} = "$topdir/src/test/perl;$ENV{PERL5LIB}"; $ENV{PG_REGRESS} = "$topdir/$Config/pg_regress/pg_regress"; + $ENV{REGRESS_SHLIB} = "$topdir/src/test/regress/regress.dll"; $ENV{TESTDIR} = "$dir"; -- 2.40.0