#include "storage/buf_internals.h"
-BufferDesc *BufferDescriptors;
+BufferDescPadded *BufferDescriptors;
char *BufferBlocks;
bool foundBufs,
foundDescs;
- BufferDescriptors = (BufferDesc *)
+ /* Align descriptors to a cacheline boundary. */
+ BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN(
ShmemInitStruct("Buffer Descriptors",
- NBuffers * sizeof(BufferDesc), &foundDescs);
+ NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE,
+ &foundDescs));
BufferBlocks = (char *)
ShmemInitStruct("Buffer Blocks",
}
else
{
- BufferDesc *buf;
int i;
- buf = BufferDescriptors;
-
/*
* Initialize all the buffer headers.
*/
- for (i = 0; i < NBuffers; buf++, i++)
+ for (i = 0; i < NBuffers; i++)
{
+ BufferDesc *buf = GetBufferDescriptor(i);
+
CLEAR_BUFFERTAG(buf->tag);
buf->flags = 0;
buf->usage_count = 0;
}
/* Correct last entry of linked list */
- BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;
+ GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
}
/* Init other shared buffer-management stuff */
Size size = 0;
/* size of buffer descriptors */
- size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));
+ size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded)));
+ /* to allow aligning buffer descriptors */
+ size = add_size(size, PG_CACHE_LINE_SIZE);
/* size of data pages */
size = add_size(size, mul_size(NBuffers, BLCKSZ));
* buffer pool, and check to see if the correct data has been loaded
* into the buffer.
*/
- buf = &BufferDescriptors[buf_id];
+ buf = GetBufferDescriptor(buf_id);
valid = PinBuffer(buf, strategy);
/* remaining code should match code at top of routine */
- buf = &BufferDescriptors[buf_id];
+ buf = GetBufferDescriptor(buf_id);
valid = PinBuffer(buf, strategy);
return;
}
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
Assert(BufferIsPinned(buffer));
/* unfortunately we can't check if the lock is held exclusively */
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
{
- bufHdr = &LocalBufferDescriptors[-buffer - 1];
+ bufHdr = GetLocalBufferDescriptor(-buffer - 1);
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
bufHdr->tag.forkNum == forkNum)
}
else
{
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
/* we have pin, so it's ok to examine tag without spinlock */
if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
num_to_write = 0;
for (buf_id = 0; buf_id < NBuffers; buf_id++)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
/*
* Header spinlock is enough to examine BM_DIRTY, see comment in
num_written = 0;
while (num_to_scan-- > 0)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
/*
* We don't need to acquire the lock here, because we're only looking
static int
SyncOneBuffer(int buf_id, bool skip_recently_used)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
int result = 0;
ReservePrivateRefCountEntry();
Assert(BufferIsValid(buffer));
if (BufferIsLocal(buffer))
{
- buf = &LocalBufferDescriptors[-buffer - 1];
+ buf = GetLocalBufferDescriptor(-buffer - 1);
loccount = LocalRefCount[-buffer - 1];
backend = MyBackendId;
}
else
{
- buf = &BufferDescriptors[buffer - 1];
+ buf = GetBufferDescriptor(buffer - 1);
loccount = GetPrivateRefCount(buffer);
backend = InvalidBackendId;
}
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
- bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
+ bufHdr = GetLocalBufferDescriptor(-buffer - 1);
else
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
/* pinned, so OK to read tag without spinlock */
return bufHdr->tag.blockNum;
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer))
- bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
+ bufHdr = GetLocalBufferDescriptor(-buffer - 1);
else
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
/* pinned, so OK to read tag without spinlock */
*rnode = bufHdr->tag.rnode;
* changing an aligned 2-byte BufFlags value is atomic, so we'll read the
* old value or the new value, but not random garbage.
*/
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
return (bufHdr->flags & BM_PERMANENT) != 0;
}
XLogRecPtr
BufferGetLSNAtomic(Buffer buffer)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
char *page = BufferGetPage(buffer);
XLogRecPtr lsn;
for (i = 0; i < NBuffers; i++)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
/*
* We can make this a tad faster by prechecking the buffer tag before
for (i = 0; i < NBuffers; i++)
{
RelFileNode *rnode = NULL;
- volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
for (i = 0; i < NBuffers; i++)
{
- volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+ volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
PrintBufferDescs(void)
{
int i;
- volatile BufferDesc *buf = BufferDescriptors;
- for (i = 0; i < NBuffers; ++i, ++buf)
+ for (i = 0; i < NBuffers; ++i)
{
+ volatile BufferDesc *buf = GetBufferDescriptor(i);
+
/* theoretically we should lock the bufhdr here */
elog(LOG,
"[%02d] (freeNext=%d, rel=%s, "
PrintPinnedBufs(void)
{
int i;
- volatile BufferDesc *buf = BufferDescriptors;
- for (i = 0; i < NBuffers; ++i, ++buf)
+ for (i = 0; i < NBuffers; ++i)
{
+ volatile BufferDesc *buf = GetBufferDescriptor(i);
+
if (GetPrivateRefCount(i + 1) > 0)
{
/* theoretically we should lock the bufhdr here */
{
for (i = 0; i < NLocBuffer; i++)
{
- bufHdr = &LocalBufferDescriptors[i];
+ bufHdr = GetLocalBufferDescriptor(i);
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
for (i = 0; i < NBuffers; i++)
{
- bufHdr = &BufferDescriptors[i];
+ bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
for (i = 0; i < NBuffers; i++)
{
- bufHdr = &BufferDescriptors[i];
+ bufHdr = GetBufferDescriptor(i);
/*
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
return;
}
- UnpinBuffer(&BufferDescriptors[buffer - 1], true);
+ UnpinBuffer(GetBufferDescriptor(buffer - 1), true);
}
/*
return;
}
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
Assert(GetPrivateRefCount(buffer) > 0);
/* here, either share or exclusive lock is OK */
if (BufferIsLocal(buffer))
return; /* local buffers need no lock */
- buf = &(BufferDescriptors[buffer - 1]);
+ buf = GetBufferDescriptor(buffer - 1);
if (mode == BUFFER_LOCK_UNLOCK)
LWLockRelease(buf->content_lock);
if (BufferIsLocal(buffer))
return true; /* act as though we got it */
- buf = &(BufferDescriptors[buffer - 1]);
+ buf = GetBufferDescriptor(buffer - 1);
return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
}
elog(ERROR, "incorrect local pin count: %d",
GetPrivateRefCount(buffer));
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
for (;;)
{
if (!ConditionalLockBuffer(buffer))
return false;
- bufHdr = &BufferDescriptors[buffer - 1];
+ bufHdr = GetBufferDescriptor(buffer - 1);
LockBufHdr(bufHdr);
Assert(bufHdr->refcount > 0);
if (bufHdr->refcount == 1)
if (hresult)
{
b = hresult->id;
- bufHdr = &LocalBufferDescriptors[b];
+ bufHdr = GetLocalBufferDescriptor(b);
Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag));
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
if (++nextFreeLocalBuf >= NLocBuffer)
nextFreeLocalBuf = 0;
- bufHdr = &LocalBufferDescriptors[b];
+ bufHdr = GetLocalBufferDescriptor(b);
if (LocalRefCount[b] == 0)
{
Assert(LocalRefCount[bufid] > 0);
- bufHdr = &LocalBufferDescriptors[bufid];
+ bufHdr = GetLocalBufferDescriptor(bufid);
if (!(bufHdr->flags & BM_DIRTY))
pgBufferUsage.local_blks_dirtied++;
for (i = 0; i < NLocBuffer; i++)
{
- BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+ BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
if ((bufHdr->flags & BM_TAG_VALID) &&
for (i = 0; i < NLocBuffer; i++)
{
- BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+ BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
LocalBufferLookupEnt *hresult;
if ((bufHdr->flags & BM_TAG_VALID) &&
/* initialize fields that need to start off nonzero */
for (i = 0; i < nbufs; i++)
{
- BufferDesc *buf = &LocalBufferDescriptors[i];
+ BufferDesc *buf = GetLocalBufferDescriptor(i);
/*
* negative to indicate local buffer. This is tricky: shared buffers
* We use this same struct for local buffer headers, but the lock fields
* are not used and not all of the flag bits are useful either.
*/
-typedef struct sbufdesc
+typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
LWLock *content_lock; /* to lock access to buffer contents */
} BufferDesc;
+/*
+ * Concurrent access to buffer headers has proven to be more efficient if
+ * they're cache line aligned. So we force the start of the BufferDescriptors
+ * array to be on a cache line boundary and force the elements to be cache
+ * line sized.
+ *
+ * XXX: As this is primarily matters in highly concurrent workloads which
+ * probably all are 64bit these days, and the space wastage would be a bit
+ * more noticeable on 32bit systems, we don't force the stride to be cache
+ * line sized on those. If somebody does actual performance testing, we can
+ * reevaluate.
+ *
+ * Note that local buffer descriptors aren't forced to be aligned - as there's
+ * no concurrent access to those it's unlikely to be beneficial.
+ *
+ * We use 64bit as the cache line size here, because that's the most common
+ * size. Making it bigger would be a waste of memory. Even if running on a
+ * platform with either 32 or 128 byte line sizes, it's good to align to
+ * boundaries and avoid false sharing.
+ */
+#define BUFFERDESC_PAD_TO_SIZE (SIZEOF_VOID_P == 8 ? 64 : 1)
+
+typedef union BufferDescPadded
+{
+ BufferDesc bufferdesc;
+ char pad[BUFFERDESC_PAD_TO_SIZE];
+} BufferDescPadded;
+
+#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
+#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
+
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
/*
/* in buf_init.c */
-extern PGDLLIMPORT BufferDesc *BufferDescriptors;
+extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
/* in localbuf.c */
extern BufferDesc *LocalBufferDescriptors;