]> granicus.if.org Git - postgresql/commitdiff
Fix and enhance the assertion of no palloc's in a critical section.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 30 Jun 2014 07:13:48 +0000 (10:13 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 30 Jun 2014 07:26:00 +0000 (10:26 +0300)
The assertion failed if WAL_DEBUG or LWLOCK_STATS was enabled; fix that by
using separate memory contexts for the allocations made within those code
blocks.

This patch introduces a mechanism for marking any memory context as allowed
in a critical section. Previously ErrorContext was exempt as a special case.

Instead of a blanket exception of the checkpointer process, only exempt the
memory context used for the pending ops hash table.

src/backend/access/transam/xlog.c
src/backend/postmaster/checkpointer.c
src/backend/storage/lmgr/lwlock.c
src/backend/storage/lmgr/proc.c
src/backend/storage/smgr/md.c
src/backend/utils/mmgr/mcxt.c
src/include/nodes/memnodes.h
src/include/storage/lwlock.h
src/include/utils/memutils.h

index e5640793eb8e09355b09d13cbdb68d3a9773ae6b..9f417dee2983f5ce43a2a0f27eb9baca3d0eee0d 100644 (file)
@@ -60,6 +60,7 @@
 #include "storage/spin.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
+#include "utils/memutils.h"
 #include "utils/ps_status.h"
 #include "utils/relmapper.h"
 #include "utils/snapmgr.h"
@@ -736,6 +737,10 @@ static bool bgwriterLaunched = false;
 static int     MyLockNo = 0;
 static bool holdingAllLocks = false;
 
+#ifdef WAL_DEBUG
+static MemoryContext walDebugCxt = NULL;
+#endif
+
 static void readRecoveryCommandFile(void);
 static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
 static bool recoveryStopsBefore(XLogRecord *record);
@@ -1258,6 +1263,7 @@ begin:;
        if (XLOG_DEBUG)
        {
                StringInfoData buf;
+               MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
 
                initStringInfo(&buf);
                appendStringInfo(&buf, "INSERT @ %X/%X: ",
@@ -1282,10 +1288,11 @@ begin:;
 
                        appendStringInfoString(&buf, " - ");
                        RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data);
-                       pfree(recordbuf.data);
                }
                elog(LOG, "%s", buf.data);
-               pfree(buf.data);
+
+               MemoryContextSwitchTo(oldCxt);
+               MemoryContextReset(walDebugCxt);
        }
 #endif
 
@@ -4807,6 +4814,24 @@ XLOGShmemInit(void)
        char       *allocptr;
        int                     i;
 
+#ifdef WAL_DEBUG
+       /*
+        * Create a memory context for WAL debugging that's exempt from the
+        * normal "no pallocs in critical section" rule. Yes, that can lead to a
+        * PANIC if an allocation fails, but wal_debug is not for production use
+        * anyway.
+        */
+       if (walDebugCxt == NULL)
+       {
+               walDebugCxt = AllocSetContextCreate(TopMemoryContext,
+                                                                                       "WAL Debug",
+                                                                                       ALLOCSET_DEFAULT_MINSIZE,
+                                                                                       ALLOCSET_DEFAULT_INITSIZE,
+                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
+               MemoryContextAllowInCriticalSection(walDebugCxt, true);
+       }
+#endif
+
        ControlFile = (ControlFileData *)
                ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
        XLogCtl = (XLogCtlData *)
index 2ac3061d9749e030406a72c730094956f4d3604d..6c814ba0be8fc732de2ba3093d4e6b64843437de 100644 (file)
@@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void)
        if (!AmCheckpointerProcess())
                return;
 
-       /*
-        * We have to PANIC if we fail to absorb all the pending requests (eg,
-        * because our hashtable runs out of memory).  This is because the system
-        * cannot run safely if we are unable to fsync what we have been told to
-        * fsync.  Fortunately, the hashtable is so small that the problem is
-        * quite unlikely to arise in practice.
-        */
-       START_CRIT_SECTION();
-
-       /*
-        * We try to avoid holding the lock for a long time by copying the request
-        * array.
-        */
        LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
 
        /* Transfer stats counts into pending pgstats message */
@@ -1327,12 +1314,25 @@ AbsorbFsyncRequests(void)
        CheckpointerShmem->num_backend_writes = 0;
        CheckpointerShmem->num_backend_fsync = 0;
 
+       /*
+        * We try to avoid holding the lock for a long time by copying the request
+        * array, and processing the requests after releasing the lock.
+        *
+        * Once we have cleared the requests from shared memory, we have to PANIC
+        * if we then fail to absorb them (eg, because our hashtable runs out of
+        * memory).  This is because the system cannot run safely if we are unable
+        * to fsync what we have been told to fsync.  Fortunately, the hashtable
+        * is so small that the problem is quite unlikely to arise in practice.
+        */
        n = CheckpointerShmem->num_requests;
        if (n > 0)
        {
                requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
                memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
        }
+
+       START_CRIT_SECTION();
+
        CheckpointerShmem->num_requests = 0;
 
        LWLockRelease(CheckpointerCommLock);
@@ -1340,10 +1340,10 @@ AbsorbFsyncRequests(void)
        for (request = requests; n > 0; request++, n--)
                RememberFsyncRequest(request->rnode, request->forknum, request->segno);
 
+       END_CRIT_SECTION();
+
        if (requests)
                pfree(requests);
-
-       END_CRIT_SECTION();
 }
 
 /*
index d23ac62bf8489264e193cbc6c8ec4b4198d9b795..a62af27d22ca643efdbc09028c1a32a65f3b69dd 100644 (file)
@@ -104,8 +104,8 @@ typedef struct lwlock_stats
        int                     spin_delay_count;
 }      lwlock_stats;
 
-static int     counts_for_pid = 0;
 static HTAB *lwlock_stats_htab;
+static lwlock_stats lwlock_stats_dummy;
 #endif
 
 #ifdef LOCK_DEBUG
@@ -142,21 +142,39 @@ static void
 init_lwlock_stats(void)
 {
        HASHCTL         ctl;
+       static MemoryContext lwlock_stats_cxt = NULL;
+       static bool exit_registered = false;
 
-       if (lwlock_stats_htab != NULL)
-       {
-               hash_destroy(lwlock_stats_htab);
-               lwlock_stats_htab = NULL;
-       }
+       if (lwlock_stats_cxt != NULL)
+               MemoryContextDelete(lwlock_stats_cxt);
+
+       /*
+        * The LWLock stats will be updated within a critical section, which
+        * requires allocating new hash entries. Allocations within a critical
+        * section are normally not allowed because running out of memory would
+        * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
+        * turned on in production, so that's an acceptable risk. The hash entries
+        * are small, so the risk of running out of memory is minimal in practice.
+        */
+       lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
+                                                                                        "LWLock stats",
+                                                                                        ALLOCSET_DEFAULT_MINSIZE,
+                                                                                        ALLOCSET_DEFAULT_INITSIZE,
+                                                                                        ALLOCSET_DEFAULT_MAXSIZE);
+       MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
 
        MemSet(&ctl, 0, sizeof(ctl));
        ctl.keysize = sizeof(lwlock_stats_key);
        ctl.entrysize = sizeof(lwlock_stats);
        ctl.hash = tag_hash;
+       ctl.hcxt = lwlock_stats_cxt;
        lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
-                                                                       HASH_ELEM | HASH_FUNCTION);
-       counts_for_pid = MyProcPid;
-       on_shmem_exit(print_lwlock_stats, 0);
+                                                                       HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+       if (!exit_registered)
+       {
+               on_shmem_exit(print_lwlock_stats, 0);
+               exit_registered = true;
+       }
 }
 
 static void
@@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock)
        lwlock_stats *lwstats;
        bool            found;
 
-       /* Set up local count state first time through in a given process */
-       if (counts_for_pid != MyProcPid)
-               init_lwlock_stats();
+       /*
+        * During shared memory initialization, the hash table doesn't exist yet.
+        * Stats of that phase aren't very interesting, so just collect operations
+        * on all locks in a single dummy entry.
+        */
+       if (lwlock_stats_htab == NULL)
+               return &lwlock_stats_dummy;
 
        /* Fetch or create the entry. */
        key.tranche = lock->tranche;
@@ -361,6 +383,16 @@ CreateLWLocks(void)
        LWLockRegisterTranche(0, &MainLWLockTranche);
 }
 
+/*
+ * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
+ */
+void
+InitLWLockAccess(void)
+{
+#ifdef LWLOCK_STATS
+       init_lwlock_stats();
+#endif
+}
 
 /*
  * LWLockAssign - assign a dynamically-allocated LWLock number
index dfaf10e4b5c58c0dde687a8a1ce3106dab381f6a..ea88a241443f393995fc1a316f13cc41ae270af3 100644 (file)
@@ -411,8 +411,9 @@ InitProcess(void)
 
        /*
         * Now that we have a PGPROC, we could try to acquire locks, so initialize
-        * the deadlock checker.
+        * local state needed for LWLocks, and the deadlock checker.
         */
+       InitLWLockAccess();
        InitDeadLockChecking();
 }
 
index 3c1c81a72956fec6970a18e7e3ce4771349e74bd..167d61c1afecc655860d3f49aeeb6d67fc003a62 100644 (file)
@@ -115,7 +115,7 @@ typedef struct _MdfdVec
        struct _MdfdVec *mdfd_chain;    /* next segment, or NULL */
 } MdfdVec;
 
-static MemoryContext MdCxt;            /* context for all md.c allocations */
+static MemoryContext MdCxt;            /* context for all MdfdVec objects */
 
 
 /*
@@ -157,6 +157,7 @@ typedef struct
 
 static HTAB *pendingOpsTable = NULL;
 static List *pendingUnlinks = NIL;
+static MemoryContext pendingOpsCxt;            /* context for the above  */
 
 static CycleCtr mdsync_cycle_ctr = 0;
 static CycleCtr mdckpt_cycle_ctr = 0;
@@ -209,11 +210,27 @@ mdinit(void)
        {
                HASHCTL         hash_ctl;
 
+               /*
+                * XXX: The checkpointer needs to add entries to the pending ops table
+                * when absorbing fsync requests.  That is done within a critical
+                * section, which isn't usually allowed, but we make an exception.
+                * It means that there's a theoretical possibility that you run out of
+                * memory while absorbing fsync requests, which leads to a PANIC.
+                * Fortunately the hash table is small so that's unlikely to happen in
+                * practice.
+                */
+               pendingOpsCxt = AllocSetContextCreate(MdCxt,
+                                                                                         "Pending Ops Context",
+                                                                                         ALLOCSET_DEFAULT_MINSIZE,
+                                                                                         ALLOCSET_DEFAULT_INITSIZE,
+                                                                                         ALLOCSET_DEFAULT_MAXSIZE);
+               MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
+
                MemSet(&hash_ctl, 0, sizeof(hash_ctl));
                hash_ctl.keysize = sizeof(RelFileNode);
                hash_ctl.entrysize = sizeof(PendingOperationEntry);
                hash_ctl.hash = tag_hash;
-               hash_ctl.hcxt = MdCxt;
+               hash_ctl.hcxt = pendingOpsCxt;
                pendingOpsTable = hash_create("Pending Ops Table",
                                                                          100L,
                                                                          &hash_ctl,
@@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
        else if (segno == UNLINK_RELATION_REQUEST)
        {
                /* Unlink request: put it in the linked list */
-               MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
+               MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
                PendingUnlinkEntry *entry;
 
                /* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
@@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
        else
        {
                /* Normal case: enter a request to fsync this segment */
-               MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
+               MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
                PendingOperationEntry *entry;
                bool            found;
 
index e83e76dc0f38ed13f2932a0337641a48e007bd59..4185a03e9ff1969704c854c4faefc2906414ad00 100644 (file)
@@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level);
  * You should not do memory allocations within a critical section, because
  * an out-of-memory error will be escalated to a PANIC. To enforce that
  * rule, the allocation functions Assert that.
- *
- * There are a two exceptions: 1) error recovery uses ErrorContext, which
- * has some memory set aside so that you don't run out. And 2) checkpointer
- * currently just hopes for the best, which is wrong and ought to be fixed,
- * but it's a known issue so let's not complain about in the meanwhile.
  */
 #define AssertNotInCriticalSection(context) \
-       Assert(CritSectionCount == 0 || (context) == ErrorContext || \
-                  AmCheckpointerProcess())
+       Assert(CritSectionCount == 0 || (context)->allowInCritSection)
 
 /*****************************************************************************
  *       EXPORTED ROUTINES                                                                                                              *
@@ -120,7 +114,10 @@ MemoryContextInit(void)
         * require it to contain at least 8K at all times. This is the only case
         * where retained memory in a context is *essential* --- we want to be
         * sure ErrorContext still has some memory even if we've run out
-        * elsewhere!
+        * elsewhere! Also, allow allocations in ErrorContext within a critical
+        * section. Otherwise a PANIC will cause an assertion failure in the
+        * error reporting code, before printing out the real cause of the
+        * failure.
         *
         * This should be the last step in this function, as elog.c assumes memory
         * management works once ErrorContext is non-null.
@@ -130,6 +127,7 @@ MemoryContextInit(void)
                                                                                 8 * 1024,
                                                                                 8 * 1024,
                                                                                 8 * 1024);
+       MemoryContextAllowInCriticalSection(ErrorContext, true);
 }
 
 /*
@@ -305,6 +303,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
        }
 }
 
+/*
+ * MemoryContextAllowInCriticalSection
+ *             Allow/disallow allocations in this memory context within a critical
+ *             section.
+ *
+ * Normally, memory allocations are not allowed within a critical section,
+ * because a failure would lead to PANIC.  There are a few exceptions to
+ * that, like allocations related to debugging code that is not supposed to
+ * be enabled in production.  This function can be used to exempt specific
+ * memory contexts from the assertion in palloc().
+ */
+void
+MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
+{
+       AssertArg(MemoryContextIsValid(context));
+#ifdef USE_ASSERT_CHECKING
+       context->allowInCritSection = allow;
+#endif
+}
+
 /*
  * GetMemoryChunkSpace
  *             Given a currently-allocated chunk, determine the total space
@@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size,
        MemoryContext node;
        Size            needed = size + strlen(name) + 1;
 
+       /* creating new memory contexts is not allowed in a critical section */
        Assert(CritSectionCount == 0);
 
        /* Get space for node and name */
@@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size,
                node->parent = parent;
                node->nextchild = parent->firstchild;
                parent->firstchild = node;
+
+#ifdef USE_ASSERT_CHECKING
+               /* inherit allowInCritSection flag from parent */
+               node->allowInCritSection = parent->allowInCritSection;
+#endif
        }
 
        VALGRIND_CREATE_MEMPOOL(node, 0, false);
index f79ebd423fadcaf37fdb8d37e61591af62f7095e..ad77509b0ca23f02386d85320a043b6c53beb04a 100644 (file)
@@ -60,6 +60,9 @@ typedef struct MemoryContextData
        MemoryContext nextchild;        /* next child of same parent */
        char       *name;                       /* context name (just for debugging) */
        bool            isReset;                /* T = no space alloced since last reset */
+#ifdef USE_ASSERT_CHECKING
+       bool            allowInCritSection;     /* allow palloc in critical section */
+#endif
 } MemoryContextData;
 
 /* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */
index d588b1434e27d2736b8f31260acabf25951083d0..1d90b9f8586c7689fff4c2aec09842d1cd1e601c 100644 (file)
@@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
 
 extern Size LWLockShmemSize(void);
 extern void CreateLWLocks(void);
+extern void InitLWLockAccess(void);
 
 /*
  * The traditional method for obtaining an lwlock for use by an extension is
index 59d0aecfbbcd62aaa0c1b19962422b45e63783e2..2fede860276ec6510a93ca7478b47a783cfe5e3f 100644 (file)
@@ -101,6 +101,8 @@ extern MemoryContext GetMemoryChunkContext(void *pointer);
 extern MemoryContext MemoryContextGetParent(MemoryContext context);
 extern bool MemoryContextIsEmpty(MemoryContext context);
 extern void MemoryContextStats(MemoryContext context);
+extern void MemoryContextAllowInCriticalSection(MemoryContext context,
+                                                                       bool allow);
 
 #ifdef MEMORY_CONTEXT_CHECKING
 extern void MemoryContextCheck(MemoryContext context);