]> granicus.if.org Git - postgresql/commitdiff
Add "Slab" MemoryContext implementation for efficient equal-sized allocations.
authorAndres Freund <andres@anarazel.de>
Mon, 27 Feb 2017 11:41:44 +0000 (03:41 -0800)
committerAndres Freund <andres@anarazel.de>
Mon, 27 Feb 2017 11:41:44 +0000 (03:41 -0800)
The default general purpose aset.c style memory context is not a great
choice for allocations that are all going to be evenly sized,
especially when those objects aren't small, and have varying
lifetimes.  There tends to be a lot of fragmentation, larger
allocations always directly go to libc rather than have their cost
amortized over several pallocs.

These problems lead to the introduction of ad-hoc slab allocators in
reorderbuffer.c. But it turns out that the simplistic implementation
leads to problems when a lot of objects are allocated and freed, as
aset.c is still the underlying implementation. Especially freeing can
easily run into O(n^2) behavior in aset.c.

While the O(n^2) behavior in aset.c can, and probably will, be
addressed, custom allocators for this behavior are more efficient
both in space and time.

This allocator is for evenly sized allocations, and supports both
cheap allocations and freeing, without fragmenting significantly.  It
does so by allocating evenly sized blocks via malloc(), and carves
them into chunks that can be used for allocations.  In order to
release blocks to the OS as early as possible, chunks are allocated
from the fullest block that still has free objects, increasing the
likelihood of a block being entirely unused.

A subsequent commit uses this in reorderbuffer.c, but a further
allocator is needed to resolve the performance problems triggering
this work.

There likely are further potentialy uses of this allocator besides
reorderbuffer.c.

There's potential further optimizations of the new slab.c, in
particular the array of freelists could be replaced by a more
intelligent structure - but for now this looks more than good enough.

Author: Tomas Vondra, editorialized by Andres Freund
Reviewed-By: Andres Freund, Petr Jelinek, Robert Haas, Jim Nasby
Discussion: https://postgr.es/m/d15dff83-0b37-28ed-0809-95a5cc7292ad@2ndquadrant.com

src/backend/utils/mmgr/Makefile
src/backend/utils/mmgr/slab.c [new file with mode: 0644]
src/include/nodes/memnodes.h
src/include/nodes/nodes.h
src/include/utils/memutils.h
src/tools/pgindent/typedefs.list

index fc5f793b7f92771da7399cba1252ab87aae29840..cd0e803253b7895e73a4e9b3d26fe233ea9477cb 100644 (file)
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o
+OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
new file mode 100644 (file)
index 0000000..a5e140e
--- /dev/null
@@ -0,0 +1,790 @@
+/*-------------------------------------------------------------------------
+ *
+ * slab.c
+ *       SLAB allocator definitions.
+ *
+ * SLAB is a MemoryContext implementation designed for cases where large
+ * numbers of equally-sized objects are allocated (and freed).
+ *
+ *
+ * Portions Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       src/backend/utils/mmgr/slab.c
+ *
+ *
+ * NOTE:
+ *     The constant allocation size allows significant simplification and various
+ *     optimizations over more general purpose allocators. The blocks are carved
+ *     into chunks of exactly the right size (plus alignment), not wasting any
+ *     memory.
+ *
+ *     The information about free chunks is maintained both at the block level and
+ *     global (context) level. This is possible as the chunk size (and thus also
+ *     the number of chunks per block) is fixed.
+ *
+ *     On each block, free chunks are tracked in a simple linked list. Contents
+ *     of free chunks is replaced with an index of the next free chunk, forming
+ *     a very simple linked list. Each block also contains a counter of free
+ *     chunks. Combined with the local block-level freelist, it makes it trivial
+ *     to eventually free the whole block.
+ *
+ *     At the context level, we use 'freelist' to track blocks ordered by number
+ *     of free chunks, starting with blocks having a single allocated chunk, and
+ *     with completely full blocks on the tail.
+ *
+ *     This also allows various optimizations - for example when searching for
+ *     free chunk, the allocator reuses space from the fullest blocks first, in
+ *     the hope that some of the less full blocks will get completely empty (and
+ *     returned back to the OS).
+ *
+ *     For each block, we maintain pointer to the first free chunk - this is quite
+ *     cheap and allows us to skip all the preceding used chunks, eliminating
+ *     a significant number of lookups in many common usage patters. In the worst
+ *     case this performs as if the pointer was not maintained.
+ *
+ *     We cache the freelist index for the blocks with the fewest free chunks
+ *     (minFreeChunks), so that we don't have to search the freelist on every
+ *     SlabAlloc() call, which is quite expensive.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "lib/ilist.h"
+
+
+#define SLAB_CHUNKHDRSZ MAXALIGN(sizeof(SlabChunk))
+
+/* Portion of SLAB_CHUNKHDRSZ excluding trailing padding. */
+#define SLAB_CHUNK_USED \
+       (offsetof(SlabChunk, header) + sizeof(StandardChunkHeader))
+
+/*
+ * SlabContext is a specialized implementation of MemoryContext.
+ */
+typedef struct SlabContext
+{
+       MemoryContextData header;       /* Standard memory-context fields */
+       /* Allocation parameters for this context: */
+       Size            chunkSize;              /* chunk size */
+       Size            fullChunkSize;  /* chunk size including header and alignment */
+       Size            blockSize;              /* block size */
+       int                     chunksPerBlock; /* number of chunks per block */
+       int                     minFreeChunks;  /* min number of free chunks in any block */
+       int                     nblocks;                /* number of blocks allocated */
+       /* blocks with free space, grouped by number of free chunks: */
+       dlist_head      freelist[FLEXIBLE_ARRAY_MEMBER];
+} SlabContext;
+
+/*
+ * SlabBlock
+ *             Structure of a single block in SLAB allocator.
+ *
+ * node: doubly-linked list of blocks in global freelist
+ * nfree: number of free chunks in this block
+ * firstFreeChunk: index of the first free chunk
+ */
+typedef struct SlabBlock
+{
+       dlist_node      node;                   /* doubly-linked list */
+       int                     nfree;                  /* number of free chunks */
+       int                     firstFreeChunk; /* index of the first free chunk in the block */
+} SlabBlock;
+
+/*
+ * SlabChunk
+ *             The prefix of each piece of memory in an SlabBlock
+ */
+typedef struct SlabChunk
+{
+       /* block owning this chunk */
+       void       *block;
+
+       /* include StandardChunkHeader because mcxt.c expects that */
+       StandardChunkHeader header;
+
+} SlabChunk;
+
+
+#define SlabPointerGetChunk(ptr)       \
+       ((SlabChunk *)(((char *)(ptr)) - SLAB_CHUNKHDRSZ))
+#define SlabChunkGetPointer(chk)       \
+       ((void *)(((char *)(chk)) + SLAB_CHUNKHDRSZ))
+#define SlabBlockGetChunk(slab, block, idx) \
+       ((SlabChunk *) ((char *) (block) + sizeof(SlabBlock)    \
+                                       + (idx * slab->fullChunkSize)))
+#define SlabBlockStart(block)  \
+       ((char *) block + sizeof(SlabBlock))
+#define SlabChunkIndex(slab, block, chunk)     \
+       (((char *) chunk - SlabBlockStart(block)) / slab->fullChunkSize)
+
+/*
+ * These functions implement the MemoryContext API for Slab contexts.
+ */
+static void *SlabAlloc(MemoryContext context, Size size);
+static void SlabFree(MemoryContext context, void *pointer);
+static void *SlabRealloc(MemoryContext context, void *pointer, Size size);
+static void SlabInit(MemoryContext context);
+static void SlabReset(MemoryContext context);
+static void SlabDelete(MemoryContext context);
+static Size SlabGetChunkSpace(MemoryContext context, void *pointer);
+static bool SlabIsEmpty(MemoryContext context);
+static void SlabStats(MemoryContext context, int level, bool print,
+                 MemoryContextCounters *totals);
+#ifdef MEMORY_CONTEXT_CHECKING
+static void SlabCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Slab contexts.
+ */
+static MemoryContextMethods SlabMethods = {
+       SlabAlloc,
+       SlabFree,
+       SlabRealloc,
+       SlabInit,
+       SlabReset,
+       SlabDelete,
+       SlabGetChunkSpace,
+       SlabIsEmpty,
+       SlabStats
+#ifdef MEMORY_CONTEXT_CHECKING
+       ,SlabCheck
+#endif
+};
+
+/* ----------
+ * Debug macros
+ * ----------
+ */
+#ifdef HAVE_ALLOCINFO
+#define SlabFreeInfo(_cxt, _chunk) \
+                       fprintf(stderr, "SlabFree: %s: %p, %zu\n", \
+                               (_cxt)->header.name, (_chunk), (_chunk)->header.size)
+#define SlabAllocInfo(_cxt, _chunk) \
+                       fprintf(stderr, "SlabAlloc: %s: %p, %zu\n", \
+                               (_cxt)->header.name, (_chunk), (_chunk)->header.size)
+#else
+#define SlabFreeInfo(_cxt, _chunk)
+#define SlabAllocInfo(_cxt, _chunk)
+#endif
+
+
+/*
+ * SlabContextCreate
+ *             Create a new Slab context.
+ *
+ * parent: parent context, or NULL if top-level context
+ * name: name of context (for debugging --- string will be copied)
+ * blockSize: allocation block size
+ * chunkSize: allocation chunk size
+ *
+ * The chunkSize may not exceed:
+ *             MAXALIGN_DOWN(SIZE_MAX) - MAXALIGN(sizeof(SlabBlock)) - SLAB_CHUNKHDRSZ
+ *
+ */
+MemoryContext
+SlabContextCreate(MemoryContext parent,
+                                 const char *name,
+                                 Size blockSize,
+                                 Size chunkSize)
+{
+       int                     chunksPerBlock;
+       Size            fullChunkSize;
+       Size            freelistSize;
+       SlabContext *slab;
+
+       /* otherwise the linked list inside freed chunk isn't guaranteed to fit */
+       StaticAssertStmt(MAXIMUM_ALIGNOF >= sizeof(int),
+                                        "MAXALIGN too small to fit int32");
+
+       /* chunk, including SLAB header (both addresses nicely aligned) */
+       fullChunkSize = MAXALIGN(sizeof(SlabChunk) + MAXALIGN(chunkSize));
+
+       /* Make sure the block can store at least one chunk. */
+       if (blockSize - sizeof(SlabBlock) < fullChunkSize)
+               elog(ERROR, "block size %ld for slab is too small for %ld chunks",
+                        blockSize, chunkSize);
+
+       /* Compute maximum number of chunks per block */
+       chunksPerBlock = (blockSize - sizeof(SlabBlock)) / fullChunkSize;
+
+       /* The freelist starts with 0, ends with chunksPerBlock. */
+       freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1);
+
+       /* if we can't fit at least one chunk into the block, we're hosed */
+       Assert(chunksPerBlock > 0);
+
+       /* make sure the chunks actually fit on the block       */
+       Assert((fullChunkSize * chunksPerBlock) + sizeof(SlabBlock) <= blockSize);
+
+       /* Do the type-independent part of context creation */
+       slab = (SlabContext *)
+               MemoryContextCreate(T_SlabContext,
+                                                       (offsetof(SlabContext, freelist) +freelistSize),
+                                                       &SlabMethods,
+                                                       parent,
+                                                       name);
+
+       slab->blockSize = blockSize;
+       slab->chunkSize = chunkSize;
+       slab->fullChunkSize = fullChunkSize;
+       slab->chunksPerBlock = chunksPerBlock;
+       slab->nblocks = 0;
+       slab->minFreeChunks = 0;
+
+       return (MemoryContext) slab;
+}
+
+/*
+ * SlabInit
+ *             Context-type-specific initialization routine.
+ */
+static void
+SlabInit(MemoryContext context)
+{
+       int                     i;
+       SlabContext *slab = castNode(SlabContext, context);
+
+       Assert(slab);
+
+       /* initialize the freelist slots */
+       for (i = 0; i < (slab->chunksPerBlock + 1); i++)
+               dlist_init(&slab->freelist[i]);
+}
+
+/*
+ * SlabReset
+ *             Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+SlabReset(MemoryContext context)
+{
+       int                     i;
+       SlabContext *slab = castNode(SlabContext, context);
+
+       Assert(slab);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       /* Check for corruption and leaks before freeing */
+       SlabCheck(context);
+#endif
+
+       /* walk over freelists and free the blocks */
+       for (i = 0; i <= slab->chunksPerBlock; i++)
+       {
+               dlist_mutable_iter miter;
+
+               dlist_foreach_modify(miter, &slab->freelist[i])
+               {
+                       SlabBlock  *block = dlist_container(SlabBlock, node, miter.cur);
+
+                       dlist_delete(miter.cur);
+
+#ifdef CLOBBER_FREED_MEMORY
+                       wipe_mem(block, slab->blockSize);
+#endif
+                       free(block);
+                       slab->nblocks--;
+               }
+       }
+
+       slab->minFreeChunks = 0;
+
+       Assert(slab->nblocks == 0);
+}
+
+/*
+ * SlabDelete
+ *             Frees all memory which is allocated in the given slab, in preparation
+ *             for deletion of the slab. We simply call SlabReset().
+ */
+static void
+SlabDelete(MemoryContext context)
+{
+       /* just reset the context */
+       SlabReset(context);
+}
+
+/*
+ * SlabAlloc
+ *             Returns pointer to allocated memory of given size or NULL if
+ *             request could not be completed; memory is added to the slab.
+ */
+static void *
+SlabAlloc(MemoryContext context, Size size)
+{
+       SlabContext *slab = castNode(SlabContext, context);
+       SlabBlock  *block;
+       SlabChunk  *chunk;
+       int                     idx;
+
+       Assert(slab);
+
+       Assert((slab->minFreeChunks >= 0) &&
+                  (slab->minFreeChunks < slab->chunksPerBlock));
+
+       /* make sure we only allow correct request size */
+       if (size != slab->chunkSize)
+               elog(ERROR, "unexpected alloc chunk size %ld (expected %ld)",
+                        size, slab->chunkSize);
+
+       /*
+        * If there are no free chunks in any existing block, create a new block
+        * and put it to the last freelist bucket.
+        *
+        * slab->minFreeChunks == 0 means there are no blocks with free chunks,
+        * thanks to how minFreeChunks is updated at the end of SlabAlloc().
+        */
+       if (slab->minFreeChunks == 0)
+       {
+               block = (SlabBlock *) malloc(slab->blockSize);
+
+               if (block == NULL)
+                       return NULL;
+
+               block->nfree = slab->chunksPerBlock;
+               block->firstFreeChunk = 0;
+
+               /*
+                * Put all the chunks on a freelist. Walk the chunks and point each
+                * one to the next one.
+                */
+               for (idx = 0; idx < slab->chunksPerBlock; idx++)
+               {
+                       chunk = SlabBlockGetChunk(slab, block, idx);
+                       *(int32 *) SlabChunkGetPointer(chunk) = (idx + 1);
+               }
+
+               /*
+                * And add it to the last freelist with all chunks empty.
+                *
+                * We know there are no blocks in the freelist, otherwise we wouldn't
+                * need a new block.
+                */
+               Assert(dlist_is_empty(&slab->freelist[slab->chunksPerBlock]));
+
+               dlist_push_head(&slab->freelist[slab->chunksPerBlock], &block->node);
+
+               slab->minFreeChunks = slab->chunksPerBlock;
+               slab->nblocks += 1;
+       }
+
+       /* grab the block from the freelist (even the new block is there) */
+       block = dlist_head_element(SlabBlock, node,
+                                                          &slab->freelist[slab->minFreeChunks]);
+
+       /* make sure we actually got a valid block, with matching nfree */
+       Assert(block != NULL);
+       Assert(slab->minFreeChunks == block->nfree);
+       Assert(block->nfree > 0);
+
+       /* we know index of the first free chunk in the block */
+       idx = block->firstFreeChunk;
+
+       /* make sure the chunk index is valid, and that it's marked as empty */
+       Assert((idx >= 0) && (idx < slab->chunksPerBlock));
+
+       /* compute the chunk location block start (after the block header) */
+       chunk = SlabBlockGetChunk(slab, block, idx);
+
+       /*
+        * Update the block nfree count, and also the minFreeChunks as we've
+        * decreased nfree for a block with the minimum number of free chunks
+        * (because that's how we chose the block).
+        */
+       block->nfree--;
+       slab->minFreeChunks = block->nfree;
+
+       /*
+        * Remove the chunk from the freelist head. The index of the next free
+        * chunk is stored in the chunk itself.
+        */
+       VALGRIND_MAKE_MEM_DEFINED(chunk, SlabChunkGetPointer(chunk));
+       block->firstFreeChunk = *(int32 *) SlabChunkGetPointer(chunk);
+
+       Assert(block->firstFreeChunk >= 0);
+       Assert(block->firstFreeChunk <= slab->chunksPerBlock);
+
+       Assert((block->nfree != 0 &&
+                       block->firstFreeChunk < slab->chunksPerBlock) ||
+                  (block->nfree == 0 &&
+                       block->firstFreeChunk == slab->chunksPerBlock));
+
+       /* move the whole block to the right place in the freelist */
+       dlist_delete(&block->node);
+       dlist_push_head(&slab->freelist[block->nfree], &block->node);
+
+       /*
+        * And finally update minFreeChunks, i.e. the index to the block with the
+        * lowest number of free chunks. We only need to do that when the block
+        * got full (otherwise we know the current block is the right one). We'll
+        * simply walk the freelist until we find a non-empty entry.
+        */
+       if (slab->minFreeChunks == 0)
+       {
+               for (idx = 1; idx <= slab->chunksPerBlock; idx++)
+               {
+                       if (dlist_is_empty(&slab->freelist[idx]))
+                               continue;
+
+                       /* found a non-empty freelist */
+                       slab->minFreeChunks = idx;
+                       break;
+               }
+       }
+
+       if (slab->minFreeChunks == slab->chunksPerBlock)
+               slab->minFreeChunks = 0;
+
+       /* Prepare to initialize the chunk header. */
+       VALGRIND_MAKE_MEM_UNDEFINED(chunk, SLAB_CHUNK_USED);
+
+       chunk->block = (void *) block;
+
+       chunk->header.context = (MemoryContext) slab;
+       chunk->header.size = MAXALIGN(size);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       chunk->header.requested_size = size;
+       VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+                                                          sizeof(chunk->header.requested_size));
+       /* slab mark to catch clobber of "unused" space */
+       if (size < chunk->header.size)
+               set_sentinel(SlabChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+       /* fill the allocated space with junk */
+       randomize_mem((char *) SlabChunkGetPointer(chunk), size);
+#endif
+
+       SlabAllocInfo(slab, chunk);
+       return SlabChunkGetPointer(chunk);
+}
+
+/*
+ * SlabFree
+ *             Frees allocated memory; memory is removed from the slab.
+ */
+static void
+SlabFree(MemoryContext context, void *pointer)
+{
+       int                     idx;
+       SlabContext *slab = castNode(SlabContext, context);
+       SlabChunk  *chunk = SlabPointerGetChunk(pointer);
+       SlabBlock  *block = chunk->block;
+
+       SlabFreeInfo(slab, chunk);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+                                                         sizeof(chunk->header.requested_size));
+       /* Test for someone scribbling on unused space in chunk */
+       if (chunk->header.requested_size < chunk->header.size)
+               if (!sentinel_ok(pointer, chunk->header.requested_size))
+                       elog(WARNING, "detected write past chunk end in %s %p",
+                                slab->header.name, chunk);
+#endif
+
+       /* compute index of the chunk with respect to block start */
+       idx = SlabChunkIndex(slab, block, chunk);
+
+       /* add chunk to freelist, and update block nfree count */
+       *(int32 *) pointer = block->firstFreeChunk;
+       block->firstFreeChunk = idx;
+       block->nfree++;
+
+       Assert(block->nfree > 0);
+       Assert(block->nfree <= slab->chunksPerBlock);
+
+#ifdef CLOBBER_FREED_MEMORY
+       /* XXX don't wipe the int32 index, used for block-level freelist */
+       wipe_mem((char *) pointer + sizeof(int32),
+                        chunk->header.size - sizeof(int32));
+#endif
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       /* Reset requested_size to 0 in chunks that are on freelist */
+       chunk->header.requested_size = 0;
+#endif
+
+       /* remove the block from a freelist */
+       dlist_delete(&block->node);
+
+       /*
+        * See if we need to update the minFreeChunks field for the slab - we only
+        * need to do that if there the block had that number of free chunks
+        * before we freed one. In that case, we check if there still are blocks
+        * in the original freelist and we either keep the current value (if there
+        * still are blocks) or increment it by one (the new block is still the
+        * one with minimum free chunks).
+        *
+        * The one exception is when the block will get completely free - in that
+        * case we will free it, se we can't use it for minFreeChunks. It however
+        * means there are no more blocks with free chunks.
+        */
+       if (slab->minFreeChunks == (block->nfree - 1))
+       {
+               /* Have we removed the last chunk from the freelist? */
+               if (dlist_is_empty(&slab->freelist[slab->minFreeChunks]))
+               {
+                       /* but if we made the block entirely free, we'll free it */
+                       if (block->nfree == slab->chunksPerBlock)
+                               slab->minFreeChunks = 0;
+                       else
+                               slab->minFreeChunks++;
+               }
+       }
+
+       /* If the block is now completely empty, free it. */
+       if (block->nfree == slab->chunksPerBlock)
+       {
+               free(block);
+               slab->nblocks--;
+       }
+       else
+               dlist_push_head(&slab->freelist[block->nfree], &block->node);
+
+       Assert(slab->nblocks >= 0);
+}
+
+/*
+ * SlabRealloc
+ *             As Slab is designed for allocating equally-sized chunks of memory, it
+ *             can't really do an actual realloc.
+ *
+ * We try to be gentle and allow calls with exactly the same size as in that
+ * case we can simply return the same chunk. When the size differs, we fail
+ * with assert failure or return NULL.
+ *
+ * We might be even support cases with (size < chunkSize). That however seems
+ * rather pointless - Slab is meant for chunks of constant size, and moreover
+ * realloc is usually used to enlarge the chunk.
+ */
+static void *
+SlabRealloc(MemoryContext context, void *pointer, Size size)
+{
+       SlabContext *slab = castNode(SlabContext, context);
+
+       Assert(slab);
+
+       /* can't do actual realloc with slab, but let's try to be gentle */
+       if (size == slab->chunkSize)
+               return pointer;
+
+       elog(ERROR, "slab allocator does not support realloc()");
+}
+
+/*
+ * SlabGetChunkSpace
+ *             Given a currently-allocated chunk, determine the total space
+ *             it occupies (including all memory-allocation overhead).
+ */
+static Size
+SlabGetChunkSpace(MemoryContext context, void *pointer)
+{
+       SlabChunk  *chunk = SlabPointerGetChunk(pointer);
+
+       return chunk->header.size + SLAB_CHUNKHDRSZ;
+}
+
+/*
+ * SlabIsEmpty
+ *             Is an Slab empty of any allocated space?
+ */
+static bool
+SlabIsEmpty(MemoryContext context)
+{
+       SlabContext *slab = castNode(SlabContext, context);
+
+       Assert(slab);
+
+       return (slab->nblocks == 0);
+}
+
+/*
+ * SlabStats
+ *             Compute stats about memory consumption of an Slab.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Slab into *totals.
+ */
+static void
+SlabStats(MemoryContext context, int level, bool print,
+                 MemoryContextCounters *totals)
+{
+       SlabContext *slab = castNode(SlabContext, context);
+       Size            nblocks = 0;
+       Size            freechunks = 0;
+       Size            totalspace = 0;
+       Size            freespace = 0;
+       int                     i;
+
+       Assert(slab);
+
+       for (i = 0; i <= slab->chunksPerBlock; i++)
+       {
+               dlist_iter      iter;
+
+               dlist_foreach(iter, &slab->freelist[i])
+               {
+                       SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);
+
+                       nblocks++;
+                       totalspace += slab->blockSize;
+                       freespace += slab->fullChunkSize * block->nfree;
+                       freechunks += block->nfree;
+               }
+       }
+
+       if (print)
+       {
+               for (i = 0; i < level; i++)
+                       fprintf(stderr, "  ");
+               fprintf(stderr,
+                               "Slab: %s: %zu total in %zd blocks; %zu free (%zd chunks); %zu used\n",
+                               slab->header.name, totalspace, nblocks, freespace, freechunks,
+                               totalspace - freespace);
+       }
+
+       if (totals)
+       {
+               totals->nblocks += nblocks;
+               totals->freechunks += freechunks;
+               totals->totalspace += totalspace;
+               totals->freespace += freespace;
+       }
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * SlabCheck
+ *             Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+SlabCheck(MemoryContext context)
+{
+       int                     i;
+       SlabContext *slab = castNode(SlabContext, context);
+       char       *name = slab->header.name;
+       char       *freechunks;
+
+       Assert(slab);
+       Assert(slab->chunksPerBlock > 0);
+
+       /* bitmap of free chunks on a block */
+       freechunks = palloc(slab->chunksPerBlock * sizeof(bool));
+
+       /* walk all the freelists */
+       for (i = 0; i <= slab->chunksPerBlock; i++)
+       {
+               int                     j,
+                                       nfree;
+               dlist_iter      iter;
+
+               /* walk all blocks on this freelist */
+               dlist_foreach(iter, &slab->freelist[i])
+               {
+                       int                     idx;
+                       SlabBlock  *block = dlist_container(SlabBlock, node, iter.cur);
+
+                       /*
+                        * Make sure the number of free chunks (in the block header)
+                        * matches position in the freelist.
+                        */
+                       if (block->nfree != i)
+                               elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match freelist %d",
+                                        name, block->nfree, block, i);
+
+                       /* reset the bitmap of free chunks for this block */
+                       memset(freechunks, 0, (slab->chunksPerBlock * sizeof(bool)));
+                       idx = block->firstFreeChunk;
+
+                       /*
+                        * Now walk through the chunks, count the free ones and also
+                        * perform some additional checks for the used ones. As the chunk
+                        * freelist is stored within the chunks themselves, we have to
+                        * walk through the chunks and construct our own bitmap.
+                        */
+
+                       nfree = 0;
+                       while (idx < slab->chunksPerBlock)
+                       {
+                               SlabChunk  *chunk;
+
+                               /* count the chunk as free, add it to the bitmap */
+                               nfree++;
+                               freechunks[idx] = true;
+
+                               /* read index of the next free chunk */
+                               chunk = SlabBlockGetChunk(slab, block, idx);
+                               idx = *(int32 *) SlabChunkGetPointer(chunk);
+                       }
+
+                       for (j = 0; j < slab->chunksPerBlock; j++)
+                       {
+                               /* non-zero bit in the bitmap means chunk the chunk is used */
+                               if (!freechunks[j])
+                               {
+                                       SlabChunk  *chunk = SlabBlockGetChunk(slab, block, j);
+
+                                       VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+                                                                          sizeof(chunk->header.requested_size));
+
+                                       /* we're in a no-freelist branch */
+                                       VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+                                                                          sizeof(chunk->header.requested_size));
+
+                                       /* chunks have both block and slab pointers, so check both */
+                                       if (chunk->block != block)
+                                               elog(WARNING, "problem in slab %s: bogus block link in block %p, chunk %p",
+                                                        name, block, chunk);
+
+                                       if (chunk->header.context != (MemoryContext) slab)
+                                               elog(WARNING, "problem in slab %s: bogus slab link in block %p, chunk %p",
+                                                        name, block, chunk);
+
+                                       /* now make sure the chunk size is correct */
+                                       if (chunk->header.size != MAXALIGN(slab->chunkSize))
+                                               elog(WARNING, "problem in slab %s: bogus chunk size in block %p, chunk %p",
+                                                        name, block, chunk);
+
+                                       /* now make sure the chunk size is correct */
+                                       if (chunk->header.requested_size != slab->chunkSize)
+                                               elog(WARNING, "problem in slab %s: bogus chunk requested size in block %p, chunk %p",
+                                                        name, block, chunk);
+
+                                       /* there might be sentinel (thanks to alignment) */
+                                       if (chunk->header.requested_size < chunk->header.size &&
+                                               !sentinel_ok(chunk, SLAB_CHUNKHDRSZ + chunk->header.requested_size))
+                                               elog(WARNING, "problem in slab %s: detected write past chunk end in block %p, chunk %p",
+                                                        name, block, chunk);
+                               }
+                       }
+
+                       /*
+                        * Make sure we got the expected number of free chunks (as tracked
+                        * in the block header).
+                        */
+                       if (nfree != block->nfree)
+                               elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match bitmap %d",
+                                        name, block->nfree, block, nfree);
+               }
+       }
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
index e487d172fcd083e206553529d254b106e62c939f..fe6bc903b36bc903e5d3b9fcc07b3c31c8ae6f64 100644 (file)
@@ -96,6 +96,6 @@ typedef struct MemoryContextData
  */
 #define MemoryContextIsValid(context) \
        ((context) != NULL && \
-        (IsA((context), AllocSetContext)))
+        (IsA((context), AllocSetContext) || IsA((context), SlabContext)))
 
 #endif   /* MEMNODES_H */
index 95dd8baadd4a8466b9e3be674ea8d337f401e4c2..28aca928a85c972230bc509bbb1e7527a9c95e5f 100644 (file)
@@ -278,6 +278,7 @@ typedef enum NodeTag
         */
        T_MemoryContext,
        T_AllocSetContext,
+       T_SlabContext,
 
        /*
         * TAGS FOR VALUE NODES (value.h)
index 1d1035e374bbd164f45b49f53576eef08147b7b7..5223a4da3952f1c1851e69b7f47430f4c5b33e91 100644 (file)
@@ -135,6 +135,12 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent,
                                          Size initBlockSize,
                                          Size maxBlockSize);
 
+/* slab.c */
+extern MemoryContext SlabContextCreate(MemoryContext parent,
+                                 const char *name,
+                                 Size blockSize,
+                                 Size chunkSize);
+
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
  * that might hold quite a lot of data.
@@ -171,4 +177,7 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent,
  */
 #define ALLOCSET_SEPARATE_THRESHOLD  8192
 
+#define SLAB_DEFAULT_BLOCK_SIZE                (8 * 1024)
+#define SLAB_LARGE_BLOCK_SIZE          (8 * 1024 * 1024)
+
 #endif   /* MEMUTILS_H */
index 9f876ae264f5afbd36d182eeecc3ce0e0a3daa22..1fd7ec4256ae6bad400492513cac5be41f8a84d2 100644 (file)
@@ -1941,6 +1941,9 @@ SimpleStringList
 SimpleStringListCell
 SingleBoundSortItem
 Size
+SlabBlock
+SlabContext
+SlabChunk
 SlabSlot
 SlotNumber
 SlruCtl