]> granicus.if.org Git - postgresql/commitdiff
Generational memory allocator
authorSimon Riggs <simon@2ndQuadrant.com>
Wed, 22 Nov 2017 18:45:07 +0000 (05:45 +1100)
committerSimon Riggs <simon@2ndQuadrant.com>
Wed, 22 Nov 2017 18:45:07 +0000 (05:45 +1100)
Add new style of memory allocator, known as Generational
appropriate for use in cases where memory is allocated
and then freed in roughly oldest first order (FIFO).

Use new allocator for logical decoding’s reorderbuffer
to significantly reduce memory usage and improve performance.

Author: Tomas Vondra
Reviewed-by: Simon Riggs
src/backend/replication/logical/reorderbuffer.c
src/backend/utils/mmgr/Makefile
src/backend/utils/mmgr/README
src/backend/utils/mmgr/generation.c [new file with mode: 0644]
src/include/nodes/memnodes.h
src/include/nodes/nodes.h
src/include/replication/reorderbuffer.h
src/include/utils/memutils.h

index 0f607bab7030a93e6efc709a82a216e616ab85a6..dc0ad5b0e72557cc5d91474c02cc4a887fd3c8ec 100644 (file)
  *       transaction there will be no other data carrying records between a row's
  *       toast chunks and the row data itself. See ReorderBufferToast* for
  *       details.
+ *
+ *       ReorderBuffer uses two special memory context types - SlabContext for
+ *       allocations of fixed-length structures (changes and transactions), and
+ *       GenerationContext for the variable-length transaction data (allocated
+ *       and freed in groups with similar lifespan).
+ *
  * -------------------------------------------------------------------------
  */
 #include "postgres.h"
@@ -150,15 +156,6 @@ typedef struct ReorderBufferDiskChange
  */
 static const Size max_changes_in_memory = 4096;
 
-/*
- * We use a very simple form of a slab allocator for frequently allocated
- * objects, simply keeping a fixed number in a linked list when unused,
- * instead pfree()ing them. Without that in many workloads aset.c becomes a
- * major bottleneck, especially when spilling to disk while decoding batch
- * workloads.
- */
-static const Size max_cached_tuplebufs = 4096 * 2;     /* ~8MB */
-
 /* ---------------------------------------
  * primary reorderbuffer support routines
  * ---------------------------------------
@@ -248,6 +245,10 @@ ReorderBufferAllocate(void)
                                                                                        SLAB_DEFAULT_BLOCK_SIZE,
                                                                                        sizeof(ReorderBufferTXN));
 
+       buffer->tup_context = GenerationContextCreate(new_ctx,
+                                                                                  "Tuples",
+                                                                                  SLAB_LARGE_BLOCK_SIZE);
+
        hash_ctl.keysize = sizeof(TransactionId);
        hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
        hash_ctl.hcxt = buffer->context;
@@ -258,15 +259,12 @@ ReorderBufferAllocate(void)
        buffer->by_txn_last_xid = InvalidTransactionId;
        buffer->by_txn_last_txn = NULL;
 
-       buffer->nr_cached_tuplebufs = 0;
-
        buffer->outbuf = NULL;
        buffer->outbufsize = 0;
 
        buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
 
        dlist_init(&buffer->toplevel_by_lsn);
-       slist_init(&buffer->cached_tuplebufs);
 
        return buffer;
 }
@@ -419,42 +417,12 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 
        alloc_len = tuple_len + SizeofHeapTupleHeader;
 
-       /*
-        * Most tuples are below MaxHeapTupleSize, so we use a slab allocator for
-        * those. Thus always allocate at least MaxHeapTupleSize. Note that tuples
-        * generated for oldtuples can be bigger, as they don't have out-of-line
-        * toast columns.
-        */
-       if (alloc_len < MaxHeapTupleSize)
-               alloc_len = MaxHeapTupleSize;
-
-
-       /* if small enough, check the slab cache */
-       if (alloc_len <= MaxHeapTupleSize && rb->nr_cached_tuplebufs)
-       {
-               rb->nr_cached_tuplebufs--;
-               tuple = slist_container(ReorderBufferTupleBuf, node,
-                                                               slist_pop_head_node(&rb->cached_tuplebufs));
-               Assert(tuple->alloc_tuple_size == MaxHeapTupleSize);
-#ifdef USE_ASSERT_CHECKING
-               memset(&tuple->tuple, 0xa9, sizeof(HeapTupleData));
-               VALGRIND_MAKE_MEM_UNDEFINED(&tuple->tuple, sizeof(HeapTupleData));
-#endif
-               tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-#ifdef USE_ASSERT_CHECKING
-               memset(tuple->tuple.t_data, 0xa8, tuple->alloc_tuple_size);
-               VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size);
-#endif
-       }
-       else
-       {
-               tuple = (ReorderBufferTupleBuf *)
-                       MemoryContextAlloc(rb->context,
-                                                          sizeof(ReorderBufferTupleBuf) +
-                                                          MAXIMUM_ALIGNOF + alloc_len);
-               tuple->alloc_tuple_size = alloc_len;
-               tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-       }
+       tuple = (ReorderBufferTupleBuf *)
+               MemoryContextAlloc(rb->tup_context,
+                                                  sizeof(ReorderBufferTupleBuf) +
+                                                  MAXIMUM_ALIGNOF + alloc_len);
+       tuple->alloc_tuple_size = alloc_len;
+       tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
 
        return tuple;
 }
@@ -468,21 +436,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 void
 ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple)
 {
-       /* check whether to put into the slab cache, oversized tuples never are */
-       if (tuple->alloc_tuple_size == MaxHeapTupleSize &&
-               rb->nr_cached_tuplebufs < max_cached_tuplebufs)
-       {
-               rb->nr_cached_tuplebufs++;
-               slist_push_head(&rb->cached_tuplebufs, &tuple->node);
-               VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size);
-               VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf));
-               VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node));
-               VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size, sizeof(tuple->alloc_tuple_size));
-       }
-       else
-       {
-               pfree(tuple);
-       }
+       pfree(tuple);
 }
 
 /*
index cd0e803253b7895e73a4e9b3d26fe233ea9477cb..f644c40c46757c4d466c34fe9bc2b8de31afc9b2 100644 (file)
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
+OBJS = aset.o dsa.o freepage.o generation.o mcxt.o memdebug.o portalmem.o slab.o
 
 include $(top_srcdir)/src/backend/common.mk
index 0ab81bd80ffc451238020343242c056966ce5987..296fa198dc9a8bbb48ab9f157cc120760784dc76 100644 (file)
@@ -431,3 +431,26 @@ will not allocate very much space per tuple cycle.  To make this usage
 pattern cheap, the first block allocated in a context is not given
 back to malloc() during reset, but just cleared.  This avoids malloc
 thrashing.
+
+
+Alternative Memory Context Implementations
+------------------------------------------
+
+aset.c is our default general-purpose implementation, working fine
+in most situations. We also have two implementations optimized for
+special use cases, providing either better performance or lower memory
+usage compared to aset.c (or both).
+
+* slab.c (SlabContext) is designed for allocations of fixed-length
+  chunks, and does not allow allocations of chunks with different size.
+
+* generation.c (GenerationContext) is designed for cases when chunks
+  are allocated in groups with similar lifespan (generations), or
+  roughly in FIFO order.
+
+Both memory contexts aim to free memory back to the operating system
+(unlike aset.c, which keeps the freed chunks in a freelist, and only
+returns the memory when reset/deleted).
+
+These memory contexts were initially developed for ReorderBuffer, but
+may be useful elsewhere as long as the allocation patterns match.
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
new file mode 100644 (file)
index 0000000..11a6a37
--- /dev/null
@@ -0,0 +1,768 @@
+/*-------------------------------------------------------------------------
+ *
+ * generation.c
+ *       Generational allocator definitions.
+ *
+ * Generation is a custom MemoryContext implementation designed for cases of
+ * chunks with similar lifespan.
+ *
+ * Portions Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       src/backend/utils/mmgr/Generation.c
+ *
+ *
+ *     This memory context is based on the assumption that the chunks are freed
+ *     roughly in the same order as they were allocated (FIFO), or in groups with
+ *     similar lifespan (generations - hence the name of the context). This is
+ *     typical for various queue-like use cases, i.e. when tuples are constructed,
+ *     processed and then thrown away.
+ *
+ *     The memory context uses a very simple approach to free space management.
+ *     Instead of a complex global freelist, each block tracks a number
+ *     of allocated and freed chunks. Freed chunks are not reused, and once all
+ *     chunks on a block are freed, the whole block is thrown away. When the
+ *     chunks allocated on the same block have similar lifespan, this works
+ *     very well and is very cheap.
+ *
+ *     The current implementation only uses a fixed block size - maybe it should
+ *     adapt a min/max block size range, and grow the blocks automatically.
+ *     It already uses dedicated blocks for oversized chunks.
+ *
+ *     XXX It might be possible to improve this by keeping a small freelist for
+ *     only a small number of recent blocks, but it's not clear it's worth the
+ *     additional complexity.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "lib/ilist.h"
+
+
+#define Generation_BLOCKHDRSZ  MAXALIGN(sizeof(GenerationBlock))
+#define Generation_CHUNKHDRSZ  sizeof(GenerationChunk)
+
+/* Portion of Generation_CHUNKHDRSZ examined outside Generation.c. */
+#define Generation_CHUNK_PUBLIC        \
+       (offsetof(GenerationChunk, size) + sizeof(Size))
+
+/* Portion of Generation_CHUNKHDRSZ excluding trailing padding. */
+#ifdef MEMORY_CONTEXT_CHECKING
+#define Generation_CHUNK_USED  \
+       (offsetof(GenerationChunk, requested_size) + sizeof(Size))
+#else
+#define Generation_CHUNK_USED  \
+       (offsetof(GenerationChunk, size) + sizeof(Size))
+#endif
+
+typedef struct GenerationBlock GenerationBlock;        /* forward reference */
+typedef struct GenerationChunk GenerationChunk;
+
+typedef void *GenerationPointer;
+
+/*
+ * GenerationContext is a simple memory context not reusing allocated chunks, and
+ * freeing blocks once all chunks are freed.
+ */
+typedef struct GenerationContext
+{
+       MemoryContextData header;       /* Standard memory-context fields */
+
+       /* Generationerational context parameters */
+       Size            blockSize;              /* block size */
+
+       GenerationBlock *block;         /* current (most recently allocated) block */
+       dlist_head      blocks;                 /* list of blocks */
+
+}      GenerationContext;
+
+/*
+ * GenerationBlock
+ *             A GenerationBlock is the unit of memory that is obtained by Generation.c
+ *             from malloc().  It contains one or more GenerationChunks, which are
+ *             the units requested by palloc() and freed by pfree().  GenerationChunks
+ *             cannot be returned to malloc() individually, instead pfree()
+ *             updates a free counter on a block and when all chunks on a block
+ *             are freed the whole block is returned to malloc().
+ *
+ *             GenerationBloc is the header data for a block --- the usable space
+ *             within the block begins at the next alignment boundary.
+ */
+typedef struct GenerationBlock
+{
+       dlist_node      node;                   /* doubly-linked list */
+       int                     nchunks;                /* number of chunks in the block */
+       int                     nfree;                  /* number of free chunks */
+       char       *freeptr;            /* start of free space in this block */
+       char       *endptr;                     /* end of space in this block */
+}      GenerationBlock;
+
+/*
+ * GenerationChunk
+ *             The prefix of each piece of memory in an GenerationBlock
+ */
+typedef struct GenerationChunk
+{
+       /* block owning this chunk */
+       void       *block;
+
+       /* size is always the size of the usable space in the chunk */
+       Size            size;
+#ifdef MEMORY_CONTEXT_CHECKING
+       /* when debugging memory usage, also store actual requested size */
+       /* this is zero in a free chunk */
+       Size            requested_size;
+#endif   /* MEMORY_CONTEXT_CHECKING */
+
+       GenerationContext *context;             /* owning context */
+       /* there must not be any padding to reach a MAXALIGN boundary here! */
+}      GenerationChunk;
+
+
+/*
+ * GenerationIsValid
+ *             True iff set is valid allocation set.
+ */
+#define GenerationIsValid(set) PointerIsValid(set)
+
+#define GenerationPointerGetChunk(ptr) \
+       ((GenerationChunk *)(((char *)(ptr)) - Generation_CHUNKHDRSZ))
+#define GenerationChunkGetPointer(chk) \
+       ((GenerationPointer *)(((char *)(chk)) + Generation_CHUNKHDRSZ))
+
+/*
+ * These functions implement the MemoryContext API for Generation contexts.
+ */
+static void *GenerationAlloc(MemoryContext context, Size size);
+static void GenerationFree(MemoryContext context, void *pointer);
+static void *GenerationRealloc(MemoryContext context, void *pointer, Size size);
+static void GenerationInit(MemoryContext context);
+static void GenerationReset(MemoryContext context);
+static void GenerationDelete(MemoryContext context);
+static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
+static bool GenerationIsEmpty(MemoryContext context);
+static void GenerationStats(MemoryContext context, int level, bool print,
+                MemoryContextCounters *totals);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+static void GenerationCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Generation contexts.
+ */
+static MemoryContextMethods GenerationMethods = {
+       GenerationAlloc,
+       GenerationFree,
+       GenerationRealloc,
+       GenerationInit,
+       GenerationReset,
+       GenerationDelete,
+       GenerationGetChunkSpace,
+       GenerationIsEmpty,
+       GenerationStats
+#ifdef MEMORY_CONTEXT_CHECKING
+       ,GenerationCheck
+#endif
+};
+
+/* ----------
+ * Debug macros
+ * ----------
+ */
+#ifdef HAVE_ALLOCINFO
+#define GenerationFreeInfo(_cxt, _chunk) \
+                       fprintf(stderr, "GenerationFree: %s: %p, %lu\n", \
+                               (_cxt)->name, (_chunk), (_chunk)->size)
+#define GenerationAllocInfo(_cxt, _chunk) \
+                       fprintf(stderr, "GenerationAlloc: %s: %p, %lu\n", \
+                               (_cxt)->name, (_chunk), (_chunk)->size)
+#else
+#define GenerationFreeInfo(_cxt, _chunk)
+#define GenerationAllocInfo(_cxt, _chunk)
+#endif
+
+
+/*
+ * Public routines
+ */
+
+
+/*
+ * GenerationContextCreate
+ *             Create a new Generation context.
+ */
+MemoryContext
+GenerationContextCreate(MemoryContext parent,
+                                const char *name,
+                                Size blockSize)
+{
+       GenerationContext  *set;
+
+       StaticAssertStmt(offsetof(GenerationChunk, context) + sizeof(MemoryContext) ==
+                                        MAXALIGN(sizeof(GenerationChunk)),
+                                        "padding calculation in GenerationChunk is wrong");
+
+       /*
+        * First, validate allocation parameters.  (If we're going to throw an
+        * error, we should do so before the context is created, not after.)  We
+        * somewhat arbitrarily enforce a minimum 1K block size, mostly because
+        * that's what AllocSet does.
+        */
+       if (blockSize != MAXALIGN(blockSize) ||
+               blockSize < 1024 ||
+               !AllocHugeSizeIsValid(blockSize))
+               elog(ERROR, "invalid blockSize for memory context: %zu",
+                        blockSize);
+
+       /* Do the type-independent part of context creation */
+       set = (GenerationContext *) MemoryContextCreate(T_GenerationContext,
+                                                                       sizeof(GenerationContext),
+                                                                       &GenerationMethods,
+                                                                       parent,
+                                                                       name);
+
+       set->blockSize = blockSize;
+       set->block = NULL;
+
+       return (MemoryContext) set;
+}
+
+/*
+ * GenerationInit
+ *             Context-type-specific initialization routine.
+ */
+static void
+GenerationInit(MemoryContext context)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+
+       dlist_init(&set->blocks);
+}
+
+/*
+ * GenerationReset
+ *             Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+GenerationReset(MemoryContext context)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+       dlist_mutable_iter miter;
+
+       AssertArg(GenerationIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       /* Check for corruption and leaks before freeing */
+       GenerationCheck(context);
+#endif
+
+       dlist_foreach_modify(miter, &set->blocks)
+       {
+               GenerationBlock *block = dlist_container(GenerationBlock, node, miter.cur);
+
+               dlist_delete(miter.cur);
+
+               /* Normal case, release the block */
+#ifdef CLOBBER_FREED_MEMORY
+               wipe_mem(block, set->blockSize);
+#endif
+
+               free(block);
+       }
+
+       set->block = NULL;
+
+       Assert(dlist_is_empty(&set->blocks));
+}
+
+/*
+ * GenerationDelete
+ *             Frees all memory which is allocated in the given set, in preparation
+ *             for deletion of the set. We simply call GenerationReset() which does all the
+ *             dirty work.
+ */
+static void
+GenerationDelete(MemoryContext context)
+{
+       /* just reset (although not really necessary) */
+       GenerationReset(context);
+}
+
+/*
+ * GenerationAlloc
+ *             Returns pointer to allocated memory of given size or NULL if
+ *             request could not be completed; memory is added to the set.
+ *
+ * No request may exceed:
+ *             MAXALIGN_DOWN(SIZE_MAX) - Generation_BLOCKHDRSZ - Generation_CHUNKHDRSZ
+ * All callers use a much-lower limit.
+ */
+static void *
+GenerationAlloc(MemoryContext context, Size size)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+       GenerationBlock    *block;
+       GenerationChunk    *chunk;
+
+       Size            chunk_size = MAXALIGN(size);
+
+       /* is it an over-sized chunk? if yes, allocate special block */
+       if (chunk_size > set->blockSize / 8)
+       {
+               Size            blksize = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ;
+
+               block = (GenerationBlock *) malloc(blksize);
+               if (block == NULL)
+                       return NULL;
+
+               /* block with a single (used) chunk */
+               block->nchunks = 1;
+               block->nfree = 0;
+
+               /* the block is completely full */
+               block->freeptr = block->endptr = ((char *) block) + blksize;
+
+               chunk = (GenerationChunk *) (((char *) block) + Generation_BLOCKHDRSZ);
+               chunk->context = set;
+               chunk->size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+               /* Valgrind: Will be made NOACCESS below. */
+               chunk->requested_size = size;
+               /* set mark to catch clobber of "unused" space */
+               if (size < chunk_size)
+                       set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+               /* fill the allocated space with junk */
+               randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+               /* add the block to the list of allocated blocks */
+               dlist_push_head(&set->blocks, &block->node);
+
+               GenerationAllocInfo(set, chunk);
+
+               /*
+                * Chunk header public fields remain DEFINED.  The requested
+                * allocation itself can be NOACCESS or UNDEFINED; our caller will
+                * soon make it UNDEFINED.  Make extra space at the end of the chunk,
+                * if any, NOACCESS.
+                */
+               VALGRIND_MAKE_MEM_NOACCESS((char *) chunk + Generation_CHUNK_PUBLIC,
+                                                        chunk_size + Generation_CHUNKHDRSZ - Generation_CHUNK_PUBLIC);
+
+               return GenerationChunkGetPointer(chunk);
+       }
+
+       /*
+        * Not an over-sized chunk. Is there enough space on the current block? If
+        * not, allocate a new "regular" block.
+        */
+       block = set->block;
+
+       if ((block == NULL) ||
+               (block->endptr - block->freeptr) < Generation_CHUNKHDRSZ + chunk_size)
+       {
+               Size            blksize = set->blockSize;
+
+               block = (GenerationBlock *) malloc(blksize);
+
+               if (block == NULL)
+                       return NULL;
+
+               block->nchunks = 0;
+               block->nfree = 0;
+
+               block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ;
+               block->endptr = ((char *) block) + blksize;
+
+               /* Mark unallocated space NOACCESS. */
+               VALGRIND_MAKE_MEM_NOACCESS(block->freeptr,
+                                                                  blksize - Generation_BLOCKHDRSZ);
+
+               /* add it to the doubly-linked list of blocks */
+               dlist_push_head(&set->blocks, &block->node);
+
+               /* and also use it as the current allocation block */
+               set->block = block;
+       }
+
+       /* we're supposed to have a block with enough free space now */
+       Assert(block != NULL);
+       Assert((block->endptr - block->freeptr) >= Generation_CHUNKHDRSZ + chunk_size);
+
+       chunk = (GenerationChunk *) block->freeptr;
+
+       block->nchunks += 1;
+       block->freeptr += (Generation_CHUNKHDRSZ + chunk_size);
+
+       chunk->block = block;
+
+       chunk->context = set;
+       chunk->size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       /* Valgrind: Free list requested_size should be DEFINED. */
+       chunk->requested_size = size;
+       VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size,
+                                                          sizeof(chunk->requested_size));
+       /* set mark to catch clobber of "unused" space */
+       if (size < chunk->size)
+               set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+       /* fill the allocated space with junk */
+       randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+       GenerationAllocInfo(set, chunk);
+       return GenerationChunkGetPointer(chunk);
+}
+
+/*
+ * GenerationFree
+ *             Update number of chunks on the block, and if all chunks on the block
+ *             are freeed then discard the block.
+ */
+static void
+GenerationFree(MemoryContext context, void *pointer)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+       GenerationChunk    *chunk = GenerationPointerGetChunk(pointer);
+       GenerationBlock    *block = chunk->block;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size,
+                                                         sizeof(chunk->requested_size));
+       /* Test for someone scribbling on unused space in chunk */
+       if (chunk->requested_size < chunk->size)
+               if (!sentinel_ok(pointer, chunk->requested_size))
+                       elog(WARNING, "detected write past chunk end in %s %p",
+                                ((MemoryContext)set)->name, chunk);
+#endif
+
+#ifdef CLOBBER_FREED_MEMORY
+       wipe_mem(pointer, chunk->size);
+#endif
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       /* Reset requested_size to 0 in chunks that are on freelist */
+       chunk->requested_size = 0;
+#endif
+
+       block->nfree += 1;
+
+       Assert(block->nchunks > 0);
+       Assert(block->nfree <= block->nchunks);
+
+       /* If there are still allocated chunks on the block, we're done. */
+       if (block->nfree < block->nchunks)
+               return;
+
+       /*
+        * The block is empty, so let's get rid of it. First remove it from the
+        * list of blocks, then return it to malloc().
+        */
+       dlist_delete(&block->node);
+
+       /* Also make sure the block is not marked as the current block. */
+       if (set->block == block)
+               set->block = NULL;
+
+       free(block);
+}
+
+/*
+ * GenerationRealloc
+ *             When handling repalloc, we simply allocate a new chunk, copy the data
+ *             and discard the old one. The only exception is when the new size fits
+ *             into the old chunk - in that case we just update chunk header.
+ */
+static void *
+GenerationRealloc(MemoryContext context, void *pointer, Size size)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+       GenerationChunk    *chunk = GenerationPointerGetChunk(pointer);
+       GenerationPointer       newPointer;
+       Size            oldsize = chunk->size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+       VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size,
+                                                         sizeof(chunk->requested_size));
+       /* Test for someone scribbling on unused space in chunk */
+       if (chunk->requested_size < oldsize)
+               if (!sentinel_ok(pointer, chunk->requested_size))
+                       elog(WARNING, "detected write past chunk end in %s %p",
+                                ((MemoryContext)set)->name, chunk);
+#endif
+
+       /*
+        * Maybe the allocated area already is >= the new size.  (In particular,
+        * we always fall out here if the requested size is a decrease.)
+        *
+        * This memory context is not use the power-of-2 chunk sizing and instead
+        * carves the chunks to be as small as possible, so most repalloc() calls
+        * will end up in the palloc/memcpy/pfree branch.
+        *
+        * XXX Perhaps we should annotate this condition with unlikely()?
+        */
+       if (oldsize >= size)
+       {
+#ifdef MEMORY_CONTEXT_CHECKING
+               Size            oldrequest = chunk->requested_size;
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+               /* We can only fill the extra space if we know the prior request */
+               if (size > oldrequest)
+                       randomize_mem((char *) pointer + oldrequest,
+                                                 size - oldrequest);
+#endif
+
+               chunk->requested_size = size;
+               VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size,
+                                                                  sizeof(chunk->requested_size));
+
+               /*
+                * If this is an increase, mark any newly-available part UNDEFINED.
+                * Otherwise, mark the obsolete part NOACCESS.
+                */
+               if (size > oldrequest)
+                       VALGRIND_MAKE_MEM_UNDEFINED((char *) pointer + oldrequest,
+                                                                               size - oldrequest);
+               else
+                       VALGRIND_MAKE_MEM_NOACCESS((char *) pointer + size,
+                                                                          oldsize - size);
+
+               /* set mark to catch clobber of "unused" space */
+               if (size < oldsize)
+                       set_sentinel(pointer, size);
+#else                                                  /* !MEMORY_CONTEXT_CHECKING */
+
+               /*
+                * We don't have the information to determine whether we're growing
+                * the old request or shrinking it, so we conservatively mark the
+                * entire new allocation DEFINED.
+                */
+               VALGRIND_MAKE_MEM_NOACCESS(pointer, oldsize);
+               VALGRIND_MAKE_MEM_DEFINED(pointer, size);
+#endif
+
+               return pointer;
+       }
+
+       /* allocate new chunk */
+       newPointer = GenerationAlloc((MemoryContext) set, size);
+
+       /* leave immediately if request was not completed */
+       if (newPointer == NULL)
+               return NULL;
+
+       /*
+        * GenerationSetAlloc() just made the region NOACCESS.  Change it to UNDEFINED
+        * for the moment; memcpy() will then transfer definedness from the old
+        * allocation to the new.  If we know the old allocation, copy just that
+        * much.  Otherwise, make the entire old chunk defined to avoid errors as
+        * we copy the currently-NOACCESS trailing bytes.
+        */
+       VALGRIND_MAKE_MEM_UNDEFINED(newPointer, size);
+#ifdef MEMORY_CONTEXT_CHECKING
+       oldsize = chunk->requested_size;
+#else
+       VALGRIND_MAKE_MEM_DEFINED(pointer, oldsize);
+#endif
+
+       /* transfer existing data (certain to fit) */
+       memcpy(newPointer, pointer, oldsize);
+
+       /* free old chunk */
+       GenerationFree((MemoryContext) set, pointer);
+
+       return newPointer;
+}
+
+/*
+ * GenerationGetChunkSpace
+ *             Given a currently-allocated chunk, determine the total space
+ *             it occupies (including all memory-allocation overhead).
+ */
+static Size
+GenerationGetChunkSpace(MemoryContext context, void *pointer)
+{
+       GenerationChunk *chunk = GenerationPointerGetChunk(pointer);
+
+       return chunk->size + Generation_CHUNKHDRSZ;
+}
+
+/*
+ * GenerationIsEmpty
+ *             Is an Generation empty of any allocated space?
+ */
+static bool
+GenerationIsEmpty(MemoryContext context)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+
+       return dlist_is_empty(&set->blocks);
+}
+
+/*
+ * GenerationStats
+ *             Compute stats about memory consumption of an Generation.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Generation into *totals.
+ *
+ * XXX freespace only accounts for empty space at the end of the block, not
+ * space of freed chunks (which is unknown).
+ */
+static void
+GenerationStats(MemoryContext context, int level, bool print,
+                MemoryContextCounters *totals)
+{
+       GenerationContext  *set = (GenerationContext *) context;
+
+       Size            nblocks = 0;
+       Size            nchunks = 0;
+       Size            nfreechunks = 0;
+       Size            totalspace = 0;
+       Size            freespace = 0;
+
+       dlist_iter      iter;
+
+       dlist_foreach(iter, &set->blocks)
+       {
+               GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur);
+
+               nblocks++;
+               nchunks += block->nchunks;
+               nfreechunks += block->nfree;
+               totalspace += set->blockSize;
+               freespace += (block->endptr - block->freeptr);
+       }
+
+       if (print)
+       {
+               int                     i;
+
+               for (i = 0; i < level; i++)
+                       fprintf(stderr, "  ");
+               fprintf(stderr,
+                       "Generation: %s: %zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used\n",
+                               ((MemoryContext)set)->name, totalspace, nblocks, nchunks, freespace,
+                               nfreechunks, totalspace - freespace);
+       }
+
+       if (totals)
+       {
+               totals->nblocks += nblocks;
+               totals->freechunks += nfreechunks;
+               totals->totalspace += totalspace;
+               totals->freespace += freespace;
+       }
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * GenerationCheck
+ *             Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+GenerationCheck(MemoryContext context)
+{
+       GenerationContext  *gen = (GenerationContext *) context;
+       char       *name = context->name;
+       dlist_iter      iter;
+
+       /* walk all blocks in this context */
+       dlist_foreach(iter, &gen->blocks)
+       {
+               int                     nfree,
+                                       nchunks;
+               char       *ptr;
+               GenerationBlock *block = dlist_container(GenerationBlock, node, iter.cur);
+
+               /* We can't free more chunks than allocated. */
+               if (block->nfree <= block->nchunks)
+                       elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p exceeds %d allocated",
+                                name, block->nfree, block, block->nchunks);
+
+               /* Now walk through the chunks and count them. */
+               nfree = 0;
+               nchunks = 0;
+               ptr = ((char *) block) + Generation_BLOCKHDRSZ;
+
+               while (ptr < block->freeptr)
+               {
+                       GenerationChunk *chunk = (GenerationChunk *) ptr;
+
+                       /* move to the next chunk */
+                       ptr += (chunk->size + Generation_CHUNKHDRSZ);
+
+                       /* chunks have both block and context pointers, so check both */
+                       if (chunk->block != block)
+                               elog(WARNING, "problem in Generation %s: bogus block link in block %p, chunk %p",
+                                        name, block, chunk);
+
+                       if (chunk->context != gen)
+                               elog(WARNING, "problem in Generation %s: bogus context link in block %p, chunk %p",
+                                        name, block, chunk);
+
+                       nchunks += 1;
+
+                       /* if requested_size==0, the chunk was freed */
+                       if (chunk->requested_size > 0)
+                       {
+                               /* if the chunk was not freed, we can trigger valgrind checks */
+                               VALGRIND_MAKE_MEM_DEFINED(&chunk->requested_size,
+                                                                          sizeof(chunk->requested_size));
+
+                               /* we're in a no-freelist branch */
+                               VALGRIND_MAKE_MEM_NOACCESS(&chunk->requested_size,
+                                                                          sizeof(chunk->requested_size));
+
+                               /* now make sure the chunk size is correct */
+                               if (chunk->size != MAXALIGN(chunk->requested_size))
+                                       elog(WARNING, "problem in Generation %s: bogus chunk size in block %p, chunk %p",
+                                                name, block, chunk);
+
+                               /* there might be sentinel (thanks to alignment) */
+                               if (chunk->requested_size < chunk->size &&
+                                       !sentinel_ok(chunk, Generation_CHUNKHDRSZ + chunk->requested_size))
+                                       elog(WARNING, "problem in Generation %s: detected write past chunk end in block %p, chunk %p",
+                                                name, block, chunk);
+                       }
+                       else
+                               nfree += 1;
+               }
+
+               /*
+                * Make sure we got the expected number of allocated and free chunks
+                * (as tracked in the block header).
+                */
+               if (nchunks != block->nchunks)
+                       elog(WARNING, "problem in Generation %s: number of allocated chunks %d in block %p does not match header %d",
+                                name, nchunks, block, block->nchunks);
+
+               if (nfree != block->nfree)
+                       elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p does not match header %d",
+                                name, nfree, block, block->nfree);
+       }
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
index 7a0c6763dfe8e89cb822d8aa63a4dc49097334c8..e22d9fb17816215144e2a923211aad2fd82e8ce3 100644 (file)
@@ -96,6 +96,8 @@ typedef struct MemoryContextData
  */
 #define MemoryContextIsValid(context) \
        ((context) != NULL && \
-        (IsA((context), AllocSetContext) || IsA((context), SlabContext)))
+        (IsA((context), AllocSetContext) || \
+         IsA((context), SlabContext) || \
+         IsA((context), GenerationContext)))
 
 #endif                                                 /* MEMNODES_H */
index ffeeb4919b2cf1757fabde9239f717419053f75c..03dc5307e8a4e3ccb95fb58929cc77bcab263f67 100644 (file)
@@ -274,6 +274,7 @@ typedef enum NodeTag
        T_MemoryContext,
        T_AllocSetContext,
        T_SlabContext,
+       T_GenerationContext,
 
        /*
         * TAGS FOR VALUE NODES (value.h)
index 86effe106bd09a900d9f9ced1b63f2a4623461b2..b18ce5a9df543ea49d0641035074c6fdfcacddd2 100644 (file)
@@ -344,20 +344,7 @@ struct ReorderBuffer
         */
        MemoryContext change_context;
        MemoryContext txn_context;
-
-       /*
-        * Data structure slab cache.
-        *
-        * We allocate/deallocate some structures very frequently, to avoid bigger
-        * overhead we cache some unused ones here.
-        *
-        * The maximum number of cached entries is controlled by const variables
-        * on top of reorderbuffer.c
-        */
-
-       /* cached ReorderBufferTupleBufs */
-       slist_head      cached_tuplebufs;
-       Size            nr_cached_tuplebufs;
+       MemoryContext tup_context;
 
        XLogRecPtr      current_restart_decoding_lsn;
 
index 869c59dc8535bd47641b69499bab556bd780f36c..ff8e5d7d79b273fff6f2b3c11b6c646c9ffe17ae 100644 (file)
@@ -155,6 +155,11 @@ extern MemoryContext SlabContextCreate(MemoryContext parent,
                                  Size blockSize,
                                  Size chunkSize);
 
+/* generation.c */
+extern MemoryContext GenerationContextCreate(MemoryContext parent,
+                                const char *name,
+                                Size blockSize);
+
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
  * that might hold quite a lot of data.