]> granicus.if.org Git - postgresql/commitdiff
Consistently test for in-use shared memory.
authorNoah Misch <noah@leadboat.com>
Sat, 13 Apr 2019 05:36:38 +0000 (22:36 -0700)
committerNoah Misch <noah@leadboat.com>
Sat, 13 Apr 2019 05:36:38 +0000 (22:36 -0700)
postmaster startup scrutinizes any shared memory segment recorded in
postmaster.pid, exiting if that segment matches the current data
directory and has an attached process.  When the postmaster.pid file was
missing, a starting postmaster used weaker checks.  Change to use the
same checks in both scenarios.  This increases the chance of a startup
failure, in lieu of data corruption, if the DBA does "kill -9 `head -n1
postmaster.pid` && rm postmaster.pid && pg_ctl -w start".  A postmaster
will no longer stop if shmat() of an old segment fails with EACCES.  A
postmaster will no longer recycle segments pertaining to other data
directories.  That's good for production, but it's bad for integration
tests that crash a postmaster and immediately delete its data directory.
Such a test now leaks a segment indefinitely.  No "make check-world"
test does that.  win32_shmem.c already avoided all these problems.  In
9.6 and later, enhance PostgresNode to facilitate testing.  Back-patch
to 9.4 (all supported versions).

Reviewed (in earlier versions) by Daniel Gustafsson and Kyotaro HORIGUCHI.

Discussion: https://postgr.es/m/20190408064141.GA2016666@rfd.leadboat.com

src/Makefile.global.in
src/backend/port/sysv_shmem.c
src/backend/port/win32_shmem.c
src/backend/postmaster/postmaster.c
src/backend/storage/ipc/ipci.c
src/backend/utils/init/postinit.c
src/include/storage/ipc.h
src/include/storage/pg_shmem.h
src/test/perl/PostgresNode.pm
src/test/recovery/t/017_shm.pl [new file with mode: 0644]
src/tools/msvc/vcregress.pl

index 6b98d53d06dc2447ff7ebc861df7cb6efe4ae8f2..b9d86acaa948605d7630182dfa66bf80490f6300 100644 (file)
@@ -430,13 +430,13 @@ ifeq ($(enable_tap_tests),yes)
 define prove_installcheck
 rm -rf '$(CURDIR)'/tmp_check
 $(MKDIR_P) '$(CURDIR)'/tmp_check
-cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
+cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
 endef
 
 define prove_check
 rm -rf '$(CURDIR)'/tmp_check
 $(MKDIR_P) '$(CURDIR)'/tmp_check
-cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
+cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) PGPORT='6$(DEF_PGPORT)' PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' $(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
 endef
 
 else
index 1511a61bc569dc2a44fd520b440a902ab86b185d..a9d7bf9a146fb82579f2c713941712e9c54e2c28 100644 (file)
 typedef key_t IpcMemoryKey;            /* shared memory key passed to shmget(2) */
 typedef int IpcMemoryId;               /* shared memory ID returned by shmget(2) */
 
+/*
+ * How does a given IpcMemoryId relate to this PostgreSQL process?
+ *
+ * One could recycle unattached segments of different data directories if we
+ * distinguished that case from other SHMSTATE_FOREIGN cases.  Doing so would
+ * cause us to visit less of the key space, making us less likely to detect a
+ * SHMSTATE_ATTACHED key.  It would also complicate the concurrency analysis,
+ * in that postmasters of different data directories could simultaneously
+ * attempt to recycle a given key.  We'll waste keys longer in some cases, but
+ * avoiding the problems of the alternative justifies that loss.
+ */
+typedef enum
+{
+       SHMSTATE_ANALYSIS_FAILURE,      /* unexpected failure to analyze the ID */
+       SHMSTATE_ATTACHED,                      /* pertinent to DataDir, has attached PIDs */
+       SHMSTATE_ENOENT,                        /* no segment of that ID */
+       SHMSTATE_FOREIGN,                       /* exists, but not pertinent to DataDir */
+       SHMSTATE_UNATTACHED                     /* pertinent to DataDir, no attached PIDs */
+} IpcMemoryState;
+
 
 unsigned long UsedShmemSegID = 0;
 void      *UsedShmemSegAddr = NULL;
@@ -82,8 +102,8 @@ static void *AnonymousShmem = NULL;
 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
 static void IpcMemoryDetach(int status, Datum shmaddr);
 static void IpcMemoryDelete(int status, Datum shmId);
-static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
-                                        IpcMemoryId *shmid);
+static IpcMemoryState PGSharedMemoryAttach(IpcMemoryId shmId,
+                                        PGShmemHeader **addr);
 
 
 /*
@@ -287,11 +307,36 @@ IpcMemoryDelete(int status, Datum shmId)
 bool
 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
 {
-       IpcMemoryId shmId = (IpcMemoryId) id2;
+       PGShmemHeader *memAddress;
+       IpcMemoryState state;
+
+       state = PGSharedMemoryAttach((IpcMemoryId) id2, &memAddress);
+       if (memAddress && shmdt(memAddress) < 0)
+               elog(LOG, "shmdt(%p) failed: %m", memAddress);
+       switch (state)
+       {
+               case SHMSTATE_ENOENT:
+               case SHMSTATE_FOREIGN:
+               case SHMSTATE_UNATTACHED:
+                       return false;
+               case SHMSTATE_ANALYSIS_FAILURE:
+               case SHMSTATE_ATTACHED:
+                       return true;
+       }
+       return true;
+}
+
+/* See comment at IpcMemoryState. */
+static IpcMemoryState
+PGSharedMemoryAttach(IpcMemoryId shmId,
+                                        PGShmemHeader **addr)
+{
        struct shmid_ds shmStat;
        struct stat statbuf;
        PGShmemHeader *hdr;
 
+       *addr = NULL;
+
        /*
         * We detect whether a shared memory segment is in use by seeing whether
         * it (a) exists and (b) has any processes attached to it.
@@ -304,15 +349,15 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
                 * exists.
                 */
                if (errno == EINVAL)
-                       return false;
+                       return SHMSTATE_ENOENT;
 
                /*
-                * EACCES implies that the segment belongs to some other userid, which
-                * means it is not a Postgres shmem segment (or at least, not one that
-                * is relevant to our data directory).
+                * EACCES implies we have no read permission, which means it is not a
+                * Postgres shmem segment (or at least, not one that is relevant to
+                * our data directory).
                 */
                if (errno == EACCES)
-                       return false;
+                       return SHMSTATE_FOREIGN;
 
                /*
                 * Some Linux kernel versions (in fact, all of them as of July 2007)
@@ -323,7 +368,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
                 */
 #ifdef HAVE_LINUX_EIDRM_BUG
                if (errno == EIDRM)
-                       return false;
+                       return SHMSTATE_ENOENT;
 #endif
 
                /*
@@ -331,25 +376,32 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
                 * only likely case is EIDRM, which implies that the segment has been
                 * IPC_RMID'd but there are still processes attached to it.
                 */
-               return true;
+               return SHMSTATE_ANALYSIS_FAILURE;
        }
 
-       /* If it has no attached processes, it's not in use */
-       if (shmStat.shm_nattch == 0)
-               return false;
-
        /*
         * Try to attach to the segment and see if it matches our data directory.
         * This avoids shmid-conflict problems on machines that are running
         * several postmasters under the same userid.
         */
        if (stat(DataDir, &statbuf) < 0)
-               return true;                    /* if can't stat, be conservative */
-
-       hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS);
+               return SHMSTATE_ANALYSIS_FAILURE;       /* can't stat; be conservative */
 
+       /*
+        * Attachment fails if we have no write permission.  Since that will never
+        * happen with Postgres IPCProtection, such a failure shows the segment is
+        * not a Postgres segment.  If attachment fails for some other reason, be
+        * conservative.
+        */
+       hdr = (PGShmemHeader *) shmat(shmId, UsedShmemSegAddr, PG_SHMAT_FLAGS);
        if (hdr == (PGShmemHeader *) -1)
-               return true;                    /* if can't attach, be conservative */
+       {
+               if (errno == EACCES)
+                       return SHMSTATE_FOREIGN;
+               else
+                       return SHMSTATE_ANALYSIS_FAILURE;
+       }
+       *addr = hdr;
 
        if (hdr->magic != PGShmemMagic ||
                hdr->device != statbuf.st_dev ||
@@ -357,16 +409,12 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
        {
                /*
                 * It's either not a Postgres segment, or not one for my data
-                * directory.  In either case it poses no threat.
+                * directory.
                 */
-               shmdt((void *) hdr);
-               return false;
+               return SHMSTATE_FOREIGN;
        }
 
-       /* Trouble --- looks a lot like there's still live backends */
-       shmdt((void *) hdr);
-
-       return true;
+       return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : SHMSTATE_ATTACHED;
 }
 
 #ifdef MAP_HUGETLB
@@ -538,25 +586,21 @@ AnonymousShmemDetach(int status, Datum arg)
  * standard header.  Also, register an on_shmem_exit callback to release
  * the storage.
  *
- * Dead Postgres segments are recycled if found, but we do not fail upon
- * collision with non-Postgres shmem segments.  The idea here is to detect and
- * re-use keys that may have been assigned by a crashed postmaster or backend.
- *
- * makePrivate means to always create a new segment, rather than attach to
- * or recycle any existing segment.
+ * Dead Postgres segments pertinent to this DataDir are recycled if found, but
+ * we do not fail upon collision with foreign shmem segments.  The idea here
+ * is to detect and re-use keys that may have been assigned by a crashed
+ * postmaster or backend.
  *
  * The port number is passed for possible use as a key (for SysV, we use
- * it to generate the starting shmem key).  In a standalone backend,
- * zero will be passed.
+ * it to generate the starting shmem key).
  */
 PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+PGSharedMemoryCreate(Size size, int port,
                                         PGShmemHeader **shim)
 {
        IpcMemoryKey NextShmemSegID;
        void       *memAddress;
        PGShmemHeader *hdr;
-       IpcMemoryId shmid;
        struct stat statbuf;
        Size            sysvsize;
 
@@ -588,11 +632,20 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
        /* Make sure PGSharedMemoryAttach doesn't fail without need */
        UsedShmemSegAddr = NULL;
 
-       /* Loop till we find a free IPC key */
-       NextShmemSegID = port * 1000;
+       /*
+        * Loop till we find a free IPC key.  Trust CreateDataDirLockFile() to
+        * ensure no more than one postmaster per data directory can enter this
+        * loop simultaneously.  (CreateDataDirLockFile() does not ensure that,
+        * but prefer fixing it over coping here.)
+        */
+       NextShmemSegID = 1 + port * 1000;
 
-       for (NextShmemSegID++;; NextShmemSegID++)
+       for (;;)
        {
+               IpcMemoryId shmid;
+               PGShmemHeader *oldhdr;
+               IpcMemoryState state;
+
                /* Try to create new segment */
                memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
                if (memAddress)
@@ -600,58 +653,71 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
 
                /* Check shared memory and possibly remove and recreate */
 
-               if (makePrivate)                /* a standalone backend shouldn't do this */
-                       continue;
-
-               if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) == NULL)
-                       continue;                       /* can't attach, not one of mine */
-
                /*
-                * If I am not the creator and it belongs to an extant process,
-                * continue.
+                * shmget() failure is typically EACCES, hence SHMSTATE_FOREIGN.
+                * ENOENT, a narrow possibility, implies SHMSTATE_ENOENT, but one can
+                * safely treat SHMSTATE_ENOENT like SHMSTATE_FOREIGN.
                 */
-               hdr = (PGShmemHeader *) memAddress;
-               if (hdr->creatorPID != getpid())
+               shmid = shmget(NextShmemSegID, sizeof(PGShmemHeader), 0);
+               if (shmid < 0)
                {
-                       if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
-                       {
-                               shmdt(memAddress);
-                               continue;               /* segment belongs to a live process */
-                       }
+                       oldhdr = NULL;
+                       state = SHMSTATE_FOREIGN;
                }
+               else
+                       state = PGSharedMemoryAttach(shmid, &oldhdr);
 
-               /*
-                * The segment appears to be from a dead Postgres process, or from a
-                * previous cycle of life in this same process.  Zap it, if possible,
-                * and any associated dynamic shared memory segments, as well. This
-                * probably shouldn't fail, but if it does, assume the segment belongs
-                * to someone else after all, and continue quietly.
-                */
-               if (hdr->dsm_control != 0)
-                       dsm_cleanup_using_control_segment(hdr->dsm_control);
-               shmdt(memAddress);
-               if (shmctl(shmid, IPC_RMID, NULL) < 0)
-                       continue;
+               switch (state)
+               {
+                       case SHMSTATE_ANALYSIS_FAILURE:
+                       case SHMSTATE_ATTACHED:
+                               ereport(FATAL,
+                                               (errcode(ERRCODE_LOCK_FILE_EXISTS),
+                                                errmsg("pre-existing shared memory block (key %lu, ID %lu) is still in use",
+                                                               (unsigned long) NextShmemSegID,
+                                                               (unsigned long) shmid),
+                                                errhint("Terminate any old server processes associated with data directory \"%s\".",
+                                                                DataDir)));
+                               break;
+                       case SHMSTATE_ENOENT:
 
-               /*
-                * Now try again to create the segment.
-                */
-               memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
-               if (memAddress)
-                       break;                          /* successful create and attach */
+                               /*
+                                * To our surprise, some other process deleted since our last
+                                * InternalIpcMemoryCreate().  Moments earlier, we would have
+                                * seen SHMSTATE_FOREIGN.  Try that same ID again.
+                                */
+                               elog(LOG,
+                                        "shared memory block (key %lu, ID %lu) deleted during startup",
+                                        (unsigned long) NextShmemSegID,
+                                        (unsigned long) shmid);
+                               break;
+                       case SHMSTATE_FOREIGN:
+                               NextShmemSegID++;
+                               break;
+                       case SHMSTATE_UNATTACHED:
 
-               /*
-                * Can only get here if some other process managed to create the same
-                * shmem key before we did.  Let him have that one, loop around to try
-                * next key.
-                */
+                               /*
+                                * The segment pertains to DataDir, and every process that had
+                                * used it has died or detached.  Zap it, if possible, and any
+                                * associated dynamic shared memory segments, as well.  This
+                                * shouldn't fail, but if it does, assume the segment belongs
+                                * to someone else after all, and try the next candidate.
+                                * Otherwise, try again to create the segment.  That may fail
+                                * if some other process creates the same shmem key before we
+                                * do, in which case we'll try the next key.
+                                */
+                               if (oldhdr->dsm_control != 0)
+                                       dsm_cleanup_using_control_segment(oldhdr->dsm_control);
+                               if (shmctl(shmid, IPC_RMID, NULL) < 0)
+                                       NextShmemSegID++;
+                               break;
+               }
+
+               if (oldhdr && shmdt(oldhdr) < 0)
+                       elog(LOG, "shmdt(%p) failed: %m", oldhdr);
        }
 
-       /*
-        * OK, we created a new segment.  Mark it as created by this process. The
-        * order of assignments here is critical so that another Postgres process
-        * can't see the header as valid but belonging to an invalid PID!
-        */
+       /* Initialize new segment. */
        hdr = (PGShmemHeader *) memAddress;
        hdr->creatorPID = getpid();
        hdr->magic = PGShmemMagic;
@@ -707,7 +773,8 @@ void
 PGSharedMemoryReAttach(void)
 {
        IpcMemoryId shmid;
-       void       *hdr;
+       PGShmemHeader *hdr;
+       IpcMemoryState state;
        void       *origUsedShmemSegAddr = UsedShmemSegAddr;
 
        Assert(UsedShmemSegAddr != NULL);
@@ -720,14 +787,18 @@ PGSharedMemoryReAttach(void)
 #endif
 
        elog(DEBUG3, "attaching to %p", UsedShmemSegAddr);
-       hdr = (void *) PGSharedMemoryAttach((IpcMemoryKey) UsedShmemSegID, &shmid);
-       if (hdr == NULL)
+       shmid = shmget(UsedShmemSegID, sizeof(PGShmemHeader), 0);
+       if (shmid < 0)
+               state = SHMSTATE_FOREIGN;
+       else
+               state = PGSharedMemoryAttach(shmid, &hdr);
+       if (state != SHMSTATE_ATTACHED)
                elog(FATAL, "could not reattach to shared memory (key=%d, addr=%p): %m",
                         (int) UsedShmemSegID, UsedShmemSegAddr);
        if (hdr != origUsedShmemSegAddr)
                elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
                         hdr, origUsedShmemSegAddr);
-       dsm_set_control_handle(((PGShmemHeader *) hdr)->dsm_control);
+       dsm_set_control_handle(hdr->dsm_control);
 
        UsedShmemSegAddr = hdr;         /* probably redundant */
 }
@@ -801,31 +872,3 @@ PGSharedMemoryDetach(void)
                AnonymousShmem = NULL;
        }
 }
-
-
-/*
- * Attach to shared memory and make sure it has a Postgres header
- *
- * Returns attach address if OK, else NULL
- */
-static PGShmemHeader *
-PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid)
-{
-       PGShmemHeader *hdr;
-
-       if ((*shmid = shmget(key, sizeof(PGShmemHeader), 0)) < 0)
-               return NULL;
-
-       hdr = (PGShmemHeader *) shmat(*shmid, UsedShmemSegAddr, PG_SHMAT_FLAGS);
-
-       if (hdr == (PGShmemHeader *) -1)
-               return NULL;                    /* failed: must be some other app's */
-
-       if (hdr->magic != PGShmemMagic)
-       {
-               shmdt((void *) hdr);
-               return NULL;                    /* segment belongs to a non-Postgres app */
-       }
-
-       return hdr;
-}
index eed17c5e2140a5faa1fdf053f7b585667193b3e5..ccd7b6b5e33ec16f3137fa168394c492bf36850c 100644 (file)
@@ -192,14 +192,9 @@ EnableLockPagesPrivilege(int elevel)
  *
  * Create a shared memory segment of the given size and initialize its
  * standard header.
- *
- * makePrivate means to always create a new segment, rather than attach to
- * or recycle any existing segment. On win32, we always create a new segment,
- * since there is no need for recycling (segments go away automatically
- * when the last backend exits)
  */
 PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+PGSharedMemoryCreate(Size size, int port,
                                         PGShmemHeader **shim)
 {
        void       *memAddress;
index ad87b7c9ee0fbd3ac18a216269c85ee28508f040..60d29a2089af0ef4286153cd02fa34c3036e8b2d 100644 (file)
@@ -2605,7 +2605,7 @@ reset_shared(int port)
         * determine IPC keys.  This helps ensure that we will clean up dead IPC
         * objects if the postmaster crashes and is restarted.
         */
-       CreateSharedMemoryAndSemaphores(false, port);
+       CreateSharedMemoryAndSemaphores(port);
 }
 
 
@@ -4946,7 +4946,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                /* And run the backend */
                BackendRun(&port);              /* does not return */
@@ -4960,7 +4960,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitAuxiliaryProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                AuxiliaryProcessMain(argc - 2, argv + 2);       /* does not return */
        }
@@ -4973,7 +4973,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                AutoVacLauncherMain(argc - 2, argv + 2);        /* does not return */
        }
@@ -4986,7 +4986,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                AutoVacWorkerMain(argc - 2, argv + 2);  /* does not return */
        }
@@ -5004,7 +5004,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                /* Fetch MyBgworkerEntry from shared memory */
                shmem_slot = atoi(argv[1] + 15);
index 5965d3620f76c7224b345ac8a772b6fa291f756e..d7d733530ff740216ab9c27f50c5a218b61d4536 100644 (file)
@@ -89,12 +89,9 @@ RequestAddinShmemSpace(Size size)
  * through the same code as before.  (Note that the called routines mostly
  * check IsUnderPostmaster, rather than EXEC_BACKEND, to detect this case.
  * This is a bit code-wasteful and could be cleaned up.)
- *
- * If "makePrivate" is true then we only need private memory, not shared
- * memory.  This is true for a standalone backend, false for a postmaster.
  */
 void
-CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
+CreateSharedMemoryAndSemaphores(int port)
 {
        PGShmemHeader *shim = NULL;
 
@@ -166,7 +163,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
                /*
                 * Create the shmem segment
                 */
-               seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim);
+               seghdr = PGSharedMemoryCreate(size, port, &shim);
 
                InitShmemAccess(seghdr);
 
@@ -187,12 +184,9 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
        {
                /*
                 * We are reattaching to an existing shared memory segment. This
-                * should only be reached in the EXEC_BACKEND case, and even then only
-                * with makePrivate == false.
+                * should only be reached in the EXEC_BACKEND case.
                 */
-#ifdef EXEC_BACKEND
-               Assert(!makePrivate);
-#else
+#ifndef EXEC_BACKEND
                elog(PANIC, "should be attached to shared memory already");
 #endif
        }
index 1c2a99c9c8cc2d5a8be8b7ce2770473ee5b5ffbb..e9f72b506921fe957367df515dd0dddab92c3c44 100644 (file)
@@ -446,9 +446,11 @@ InitCommunication(void)
        {
                /*
                 * We're running a postgres bootstrap process or a standalone backend.
-                * Create private "shmem" and semaphores.
+                * Though we won't listen on PostPortNumber, use it to select a shmem
+                * key.  This increases the chance of detecting a leftover live
+                * backend of this DataDir.
                 */
-               CreateSharedMemoryAndSemaphores(true, 0);
+               CreateSharedMemoryAndSemaphores(PostPortNumber);
        }
 }
 
index 0660252faf24eff7820cda6d6a072935bc42d574..e9b243fe7d4fb586f3d94e42fe0d30107dc077be 100644 (file)
@@ -76,6 +76,6 @@ extern void on_exit_reset(void);
 /* ipci.c */
 extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
 
-extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port);
+extern void CreateSharedMemoryAndSemaphores(int port);
 
 #endif                                                 /* IPC_H */
index 4e1e13dcb1f89bad9fd0accc87add8c751bcb102..0535d4545c9c76302fccdf8564c25554fe8bb693 100644 (file)
@@ -30,7 +30,7 @@ typedef struct PGShmemHeader  /* standard header for all Postgres shmem */
 {
        int32           magic;                  /* magic # to identify Postgres segments */
 #define PGShmemMagic  679834894
-       pid_t           creatorPID;             /* PID of creating process */
+       pid_t           creatorPID;             /* PID of creating process (set but unread) */
        Size            totalsize;              /* total size of segment */
        Size            freeoffset;             /* offset to first free space */
        dsm_handle      dsm_control;    /* ID of dynamic shared memory control seg */
@@ -82,8 +82,8 @@ extern void PGSharedMemoryReAttach(void);
 extern void PGSharedMemoryNoReAttach(void);
 #endif
 
-extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
-                                        int port, PGShmemHeader **shim);
+extern PGShmemHeader *PGSharedMemoryCreate(Size size, int port,
+                                        PGShmemHeader **shim);
 extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
 extern void PGSharedMemoryDetach(void);
 
index 81deed98430e73cd8112fdf63824c7b9cfc85107..f6570639e00dc57b6d0f7e1074e48e1bd1a7c27b 100644 (file)
@@ -104,7 +104,8 @@ our @EXPORT = qw(
   get_new_node
 );
 
-our ($test_localhost, $test_pghost, $last_port_assigned, @all_nodes, $died);
+our ($use_tcp, $test_localhost, $test_pghost, $last_host_assigned,
+       $last_port_assigned, @all_nodes, $died);
 
 # Windows path to virtual file system root
 
@@ -118,13 +119,14 @@ if ($Config{osname} eq 'msys')
 INIT
 {
 
-       # PGHOST is set once and for all through a single series of tests when
-       # this module is loaded.
-       $test_localhost = "127.0.0.1";
-       $test_pghost =
-         $TestLib::windows_os ? $test_localhost : TestLib::tempdir_short;
-       $ENV{PGHOST}     = $test_pghost;
-       $ENV{PGDATABASE} = 'postgres';
+       # Set PGHOST for backward compatibility.  This doesn't work for own_host
+       # nodes, so prefer to not rely on this when writing new tests.
+       $use_tcp            = $TestLib::windows_os;
+       $test_localhost     = "127.0.0.1";
+       $last_host_assigned = 1;
+       $test_pghost        = $use_tcp ? $test_localhost : TestLib::tempdir_short;
+       $ENV{PGHOST}        = $test_pghost;
+       $ENV{PGDATABASE}    = 'postgres';
 
        # Tracking of last port value assigned to accelerate free port lookup.
        $last_port_assigned = int(rand() * 16384) + 49152;
@@ -155,7 +157,9 @@ sub new
                _host    => $pghost,
                _basedir => "$TestLib::tmp_check/t_${testname}_${name}_data",
                _name    => $name,
-               _logfile => "$TestLib::log_path/${testname}_${name}.log"
+               _logfile_generation => 0,
+               _logfile_base       => "$TestLib::log_path/${testname}_${name}",
+               _logfile            => "$TestLib::log_path/${testname}_${name}.log"
        };
 
        bless $self, $class;
@@ -473,8 +477,9 @@ sub init
                print $conf "max_wal_senders = 0\n";
        }
 
-       if ($TestLib::windows_os)
+       if ($use_tcp)
        {
+               print $conf "unix_socket_directories = ''\n";
                print $conf "listen_addresses = '$host'\n";
        }
        else
@@ -536,12 +541,11 @@ sub backup
 {
        my ($self, $backup_name) = @_;
        my $backup_path = $self->backup_dir . '/' . $backup_name;
-       my $port        = $self->port;
        my $name        = $self->name;
 
        print "# Taking pg_basebackup $backup_name from node \"$name\"\n";
-       TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-p', $port,
-               '--no-sync');
+       TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-h',
+               $self->host, '-p', $self->port, '--no-sync');
        print "# Backup finished\n";
        return;
 }
@@ -651,6 +655,7 @@ sub init_from_backup
 {
        my ($self, $root_node, $backup_name, %params) = @_;
        my $backup_path = $root_node->backup_dir . '/' . $backup_name;
+       my $host        = $self->host;
        my $port        = $self->port;
        my $node_name   = $self->name;
        my $root_name   = $root_node->name;
@@ -677,6 +682,15 @@ sub init_from_backup
                qq(
 port = $port
 ));
+       if ($use_tcp)
+       {
+               $self->append_conf('postgresql.conf', "listen_addresses = '$host'");
+       }
+       else
+       {
+               $self->append_conf('postgresql.conf',
+                       "unix_socket_directories = '$host'");
+       }
        $self->enable_streaming($root_node) if $params{has_streaming};
        $self->enable_restoring($root_node) if $params{has_restoring};
        return;
@@ -684,17 +698,45 @@ port = $port
 
 =pod
 
-=item $node->start()
+=item $node->rotate_logfile()
+
+Switch to a new PostgreSQL log file.  This does not alter any running
+PostgreSQL process.  Subsequent method calls, including pg_ctl invocations,
+will use the new name.  Return the new name.
+
+=cut
+
+sub rotate_logfile
+{
+       my ($self) = @_;
+       $self->{_logfile} = sprintf('%s_%d.log',
+               $self->{_logfile_base},
+               ++$self->{_logfile_generation});
+       return $self->{_logfile};
+}
+
+=pod
+
+=item $node->start(%params) => success_or_failure
 
 Wrapper for pg_ctl start
 
 Start the node and wait until it is ready to accept connections.
 
+=over
+
+=item fail_ok => 1
+
+By default, failure terminates the entire F<prove> invocation.  If given,
+instead return a true or false value to indicate success or failure.
+
+=back
+
 =cut
 
 sub start
 {
-       my ($self) = @_;
+       my ($self, %params) = @_;
        my $port   = $self->port;
        my $pgdata = $self->data_dir;
        my $name   = $self->name;
@@ -721,10 +763,34 @@ sub start
        {
                print "# pg_ctl start failed; logfile:\n";
                print TestLib::slurp_file($self->logfile);
-               BAIL_OUT("pg_ctl start failed");
+               BAIL_OUT("pg_ctl start failed") unless $params{fail_ok};
+               return 0;
        }
 
        $self->_update_pid(1);
+       return 1;
+}
+
+=pod
+
+=item $node->kill9()
+
+Send SIGKILL (signal 9) to the postmaster.
+
+Note: if the node is already known stopped, this does nothing.
+However, if we think it's running and it's not, it's important for
+this to fail.  Otherwise, tests might fail to detect server crashes.
+
+=cut
+
+sub kill9
+{
+       my ($self) = @_;
+       my $name = $self->name;
+       return unless defined $self->{_pid};
+       print "### Killing node \"$name\" using signal 9\n";
+       kill(9, $self->{_pid}) or BAIL_OUT("kill(9, $self->{_pid}) failed");
+       $self->{_pid} = undef;
        return;
 }
 
@@ -965,7 +1031,7 @@ sub _update_pid
 
 =pod
 
-=item PostgresNode->get_new_node(node_name)
+=item PostgresNode->get_new_node(node_name, %params)
 
 Build a new object of class C<PostgresNode> (or of a subclass, if you have
 one), assigning a free port number.  Remembers the node, to prevent its port
@@ -974,6 +1040,22 @@ shut down when the test script exits.
 
 You should generally use this instead of C<PostgresNode::new(...)>.
 
+=over
+
+=item port => [1,65535]
+
+By default, this function assigns a port number to each node.  Specify this to
+force a particular port number.  The caller is responsible for evaluating
+potential conflicts and privilege requirements.
+
+=item own_host => 1
+
+By default, all nodes use the same PGHOST value.  If specified, generate a
+PGHOST specific to this node.  This allows multiple nodes to use the same
+port.
+
+=back
+
 For backwards compatibility, it is also exported as a standalone function,
 which can only create objects of class C<PostgresNode>.
 
@@ -982,10 +1064,11 @@ which can only create objects of class C<PostgresNode>.
 sub get_new_node
 {
        my $class = 'PostgresNode';
-       $class = shift if 1 < scalar @_;
-       my $name  = shift;
-       my $found = 0;
-       my $port  = $last_port_assigned;
+       $class = shift if scalar(@_) % 2 != 1;
+       my ($name, %params) = @_;
+       my $port_is_forced = defined $params{port};
+       my $found          = $port_is_forced;
+       my $port = $port_is_forced ? $params{port} : $last_port_assigned;
 
        while ($found == 0)
        {
@@ -1002,13 +1085,15 @@ sub get_new_node
                        $found = 0 if ($node->port == $port);
                }
 
-               # Check to see if anything else is listening on this TCP port.
-               # This is *necessary* on Windows, and seems like a good idea
-               # on Unixen as well, even though we don't ask the postmaster
-               # to open a TCP port on Unix.
+               # Check to see if anything else is listening on this TCP port.  Accept
+               # only ports available for all possible listen_addresses values, so
+               # the caller can harness this port for the widest range of purposes.
+               # This is *necessary* on Windows, and seems like a good idea on Unixen
+               # as well, even though we don't ask the postmaster to open a TCP port
+               # on Unix.
                if ($found == 1)
                {
-                       my $iaddr = inet_aton($test_localhost);
+                       my $iaddr = inet_aton('0.0.0.0');
                        my $paddr = sockaddr_in($port, $iaddr);
                        my $proto = getprotobyname("tcp");
 
@@ -1024,16 +1109,35 @@ sub get_new_node
                }
        }
 
-       print "# Found free port $port\n";
+       print "# Found port $port\n";
+
+       # Select a host.
+       my $host = $test_pghost;
+       if ($params{own_host})
+       {
+               if ($use_tcp)
+               {
+                       # This assumes $use_tcp platforms treat every address in
+                       # 127.0.0.1/24, not just 127.0.0.1, as a usable loopback.
+                       $last_host_assigned++;
+                       $last_host_assigned > 254 and BAIL_OUT("too many own_host nodes");
+                       $host = '127.0.0.' . $last_host_assigned;
+               }
+               else
+               {
+                       $host = "$test_pghost/$name"; # Assume $name =~ /^[-_a-zA-Z0-9]+$/
+                       mkdir $host;
+               }
+       }
 
        # Lock port number found by creating a new node
-       my $node = $class->new($name, $test_pghost, $port);
+       my $node = $class->new($name, $host, $port);
 
        # Add node to list of nodes
        push(@all_nodes, $node);
 
        # And update port for next time
-       $last_port_assigned = $port;
+       $port_is_forced or $last_port_assigned = $port;
 
        return $node;
 }
@@ -1424,9 +1528,8 @@ $stderr);
 
 =item $node->command_ok(...)
 
-Runs a shell command like TestLib::command_ok, but with PGPORT
-set so that the command will default to connecting to this
-PostgresNode.
+Runs a shell command like TestLib::command_ok, but with PGHOST and PGPORT set
+so that the command will default to connecting to this PostgresNode.
 
 =cut
 
@@ -1436,6 +1539,7 @@ sub command_ok
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_ok(@_);
@@ -1446,7 +1550,7 @@ sub command_ok
 
 =item $node->command_fails(...)
 
-TestLib::command_fails with our PGPORT. See command_ok(...)
+TestLib::command_fails with our connection parameters. See command_ok(...)
 
 =cut
 
@@ -1456,6 +1560,7 @@ sub command_fails
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_fails(@_);
@@ -1466,7 +1571,7 @@ sub command_fails
 
 =item $node->command_like(...)
 
-TestLib::command_like with our PGPORT. See command_ok(...)
+TestLib::command_like with our connection parameters. See command_ok(...)
 
 =cut
 
@@ -1476,6 +1581,7 @@ sub command_like
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_like(@_);
@@ -1486,7 +1592,8 @@ sub command_like
 
 =item $node->command_checks_all(...)
 
-TestLib::command_checks_all with our PGPORT. See command_ok(...)
+TestLib::command_checks_all with our connection parameters. See
+command_ok(...)
 
 =cut
 
@@ -1496,6 +1603,7 @@ sub command_checks_all
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_checks_all(@_);
@@ -1520,6 +1628,7 @@ sub issues_sql_like
 
        my ($self, $cmd, $expected_sql, $test_name) = @_;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        truncate $self->logfile, 0;
@@ -1534,8 +1643,8 @@ sub issues_sql_like
 
 =item $node->run_log(...)
 
-Runs a shell command like TestLib::run_log, but with PGPORT set so
-that the command will default to connecting to this PostgresNode.
+Runs a shell command like TestLib::run_log, but with connection parameters set
+so that the command will default to connecting to this PostgresNode.
 
 =cut
 
@@ -1543,6 +1652,7 @@ sub run_log
 {
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::run_log(@_);
diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl
new file mode 100644 (file)
index 0000000..3cbe938
--- /dev/null
@@ -0,0 +1,200 @@
+#
+# Tests of pg_shmem.h functions
+#
+use strict;
+use warnings;
+use IPC::Run 'run';
+use PostgresNode;
+use Test::More;
+use TestLib;
+use Time::HiRes qw(usleep);
+
+plan tests => 5;
+
+my $tempdir = TestLib::tempdir;
+my $port;
+
+# Log "ipcs" diffs on a best-effort basis, swallowing any error.
+my $ipcs_before = "$tempdir/ipcs_before";
+eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
+
+sub log_ipcs
+{
+       eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
+       return;
+}
+
+# These tests need a $port such that nothing creates or removes a segment in
+# $port's IpcMemoryKey range while this test script runs.  While there's no
+# way to ensure that in general, we do ensure that if PostgreSQL tests are the
+# only actors.  With TCP, the first get_new_node picks a port number.  With
+# Unix sockets, use a postmaster, $port_holder, to represent a key space
+# reservation.  $port_holder holds a reservation on the key space of port
+# 1+$port_holder->port if it created the first IpcMemoryKey of its own port's
+# key space.  If multiple copies of this test script run concurrently, they
+# will pick different ports.  $port_holder postmasters use odd-numbered ports,
+# and tests use even-numbered ports.  In the absence of collisions from other
+# shmget() activity, gnat starts with key 0x7d001 (512001), and flea starts
+# with key 0x7d002 (512002).
+my $port_holder;
+if (!$PostgresNode::use_tcp)
+{
+       my $lock_port;
+       for ($lock_port = 511; $lock_port < 711; $lock_port += 2)
+       {
+               $port_holder = PostgresNode->get_new_node(
+                       "port${lock_port}_holder",
+                       port     => $lock_port,
+                       own_host => 1);
+               $port_holder->init;
+               $port_holder->append_conf('postgresql.conf', 'max_connections = 5');
+               $port_holder->start;
+               # Match the AddToDataDirLockFile() call in sysv_shmem.c.  Assume all
+               # systems not using sysv_shmem.c do use TCP.
+               my $shmem_key_line_prefix = sprintf("%9lu ", 1 + $lock_port * 1000);
+               last
+                 if slurp_file($port_holder->data_dir . '/postmaster.pid') =~
+                 /^$shmem_key_line_prefix/m;
+               $port_holder->stop;
+       }
+       $port = $lock_port + 1;
+}
+
+# Node setup.
+sub init_start
+{
+       my $name = shift;
+       my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 1);
+       defined($port) or $port = $ret->port;    # same port for all nodes
+       $ret->init;
+       # Limit semaphore consumption, since we run several nodes concurrently.
+       $ret->append_conf('postgresql.conf', 'max_connections = 5');
+       $ret->start;
+       log_ipcs();
+       return $ret;
+}
+my $gnat = init_start 'gnat';
+my $flea = init_start 'flea';
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+$flea->restart;       # flea ignores the shm key gnat abandoned.
+log_ipcs();
+poll_start($gnat);    # gnat recycles its former shm key.
+log_ipcs();
+
+# After clean shutdown, the nodes swap shm keys.
+$gnat->stop;
+$flea->restart;
+log_ipcs();
+$gnat->start;
+log_ipcs();
+
+# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
+# Use a regress.c function to emulate the responsiveness of a backend working
+# through a CPU-intensive task.
+$gnat->safe_psql('postgres', <<EOSQL);
+CREATE FUNCTION wait_pid(int)
+   RETURNS void
+   AS '$ENV{REGRESS_SHLIB}'
+   LANGUAGE C STRICT;
+EOSQL
+my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
+my ($stdout, $stderr);
+my $slow_client = IPC::Run::start(
+       [
+               'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
+               '-c', $slow_query
+       ],
+       '<',
+       \undef,
+       '>',
+       \$stdout,
+       '2>',
+       \$stderr,
+       IPC::Run::timeout(900));    # five times the poll_query_until timeout
+ok( $gnat->poll_query_until(
+               'postgres',
+               "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
+       'slow query started');
+my $slow_pid = $gnat->safe_psql('postgres',
+       "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
+$gnat->kill9;
+unlink($gnat->data_dir . '/postmaster.pid');
+$gnat->rotate_logfile;    # on Windows, can't open old log for writing
+log_ipcs();
+# Reject ordinary startup.  Retry for the same reasons poll_start() does.
+my $pre_existing_msg = qr/pre-existing shared memory block/;
+{
+       my $max_attempts = 180 * 10;    # Retry every 0.1s for at least 180s.
+       my $attempts     = 0;
+       while ($attempts < $max_attempts)
+       {
+               last
+                 if $gnat->start(fail_ok => 1)
+                 || slurp_file($gnat->logfile) =~ $pre_existing_msg;
+               usleep(100_000);
+               $attempts++;
+       }
+}
+like(slurp_file($gnat->logfile),
+       $pre_existing_msg, 'detected live backend via shared memory');
+# Reject single-user startup.
+my $single_stderr;
+ok( !run_log(
+               [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
+               '<', \('SELECT 1 + 1'), '2>', \$single_stderr),
+       'live query blocks --single');
+print STDERR $single_stderr;
+like($single_stderr, $pre_existing_msg,
+       'single-user mode detected live backend via shared memory');
+log_ipcs();
+# Fail to reject startup if shm key N has become available and we crash while
+# using key N+1.  This is unwanted, but expected.  Windows is immune, because
+# its GetSharedMemName() use DataDir strings, not numeric keys.
+$flea->stop;    # release first key
+is( $gnat->start(fail_ok => 1),
+       $TestLib::windows_os ? 0 : 1,
+       'key turnover fools only sysv_shmem.c');
+$gnat->stop;     # release first key (no-op on $TestLib::windows_os)
+$flea->start;    # grab first key
+# cleanup
+TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
+$slow_client->finish;    # client has detected backend termination
+log_ipcs();
+poll_start($gnat);       # recycle second key
+
+$gnat->stop;
+$flea->stop;
+$port_holder->stop if $port_holder;
+log_ipcs();
+
+
+# We may need retries to start a new postmaster.  Causes:
+# - kernel is slow to deliver SIGKILL
+# - postmaster parent is slow to waitpid()
+# - postmaster child is slow to exit in response to SIGQUIT
+# - postmaster child is slow to exit after postmaster death
+sub poll_start
+{
+       my ($node) = @_;
+
+       my $max_attempts = 180 * 10;
+       my $attempts     = 0;
+
+       while ($attempts < $max_attempts)
+       {
+               $node->start(fail_ok => 1) && return 1;
+
+               # Wait 0.1 second before retrying.
+               usleep(100_000);
+
+               $attempts++;
+       }
+
+       # No success within 180 seconds.  Try one last time without fail_ok, which
+       # will BAIL_OUT unless it succeeds.
+       $node->start && return 1;
+       return 0;
+}
index d0e8c6849b5bd7796775e0ff08eac6a8216aaede..2aa29ab008c88a7411bdc57ad6fbae6785d6c1d3 100644 (file)
@@ -205,6 +205,7 @@ sub tap_check
        local %ENV = %ENV;
        $ENV{PERL5LIB}   = "$topdir/src/test/perl;$ENV{PERL5LIB}";
        $ENV{PG_REGRESS} = "$topdir/$Config/pg_regress/pg_regress";
+       $ENV{REGRESS_SHLIB} = "$topdir/src/test/regress/regress.dll";
 
        $ENV{TESTDIR} = "$dir";