]> granicus.if.org Git - postgresql/commitdiff
Replacement of the buffer replacement strategy with an ARC
authorJan Wieck <JanWieck@Yahoo.com>
Thu, 13 Nov 2003 00:40:02 +0000 (00:40 +0000)
committerJan Wieck <JanWieck@Yahoo.com>
Thu, 13 Nov 2003 00:40:02 +0000 (00:40 +0000)
algorithm adopted for PostgreSQL.

Jan

src/backend/commands/vacuum.c
src/backend/storage/buffer/buf_init.c
src/backend/storage/buffer/buf_table.c
src/backend/storage/buffer/bufmgr.c
src/backend/storage/buffer/freelist.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/miscadmin.h
src/include/storage/buf_internals.h

index c1c5d64ea2fa534ea4ed3f374c5028f482830d63..47564a3002d384f04918932e1c1523ad2969663f 100644 (file)
@@ -13,7 +13,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.265 2003/11/12 21:15:51 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.266 2003/11/13 00:40:00 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -33,6 +33,7 @@
 #include "commands/vacuum.h"
 #include "executor/executor.h"
 #include "miscadmin.h"
+#include "storage/buf_internals.h"
 #include "storage/freespace.h"
 #include "storage/sinval.h"
 #include "storage/smgr.h"
@@ -310,8 +311,16 @@ vacuum(VacuumStmt *vacstmt)
                        else
                                old_context = MemoryContextSwitchTo(anl_context);
 
+                       /*
+                        * Tell the buffer replacement strategy that vacuum is
+                        * causing the IO
+                        */
+                       StrategyHintVacuum(true);
+
                        analyze_rel(relid, vacstmt);
 
+                       StrategyHintVacuum(false);
+
                        if (vacstmt->vacuum)
                                CommitTransactionCommand();
                        else
@@ -749,6 +758,12 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
        SetQuerySnapshot();                     /* might be needed for functions in
                                                                 * indexes */
 
+       /*
+        * Tell the cache replacement strategy that vacuum is causing
+        * all following IO
+        */
+       StrategyHintVacuum(true);
+
        /*
         * Check for user-requested abort.      Note we want this to be inside a
         * transaction, so xact.c doesn't issue useless WARNING.
@@ -763,6 +778,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
                                                          ObjectIdGetDatum(relid),
                                                          0, 0, 0))
        {
+               StrategyHintVacuum(false);
                CommitTransactionCommand();
                return true;                    /* okay 'cause no data there */
        }
@@ -796,6 +812,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
                                (errmsg("skipping \"%s\" --- only table or database owner can vacuum it",
                                                RelationGetRelationName(onerel))));
                relation_close(onerel, lmode);
+               StrategyHintVacuum(false);
                CommitTransactionCommand();
                return false;
        }
@@ -810,6 +827,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
                                (errmsg("skipping \"%s\" --- cannot vacuum indexes, views, or special system tables",
                                                RelationGetRelationName(onerel))));
                relation_close(onerel, lmode);
+               StrategyHintVacuum(false);
                CommitTransactionCommand();
                return false;
        }
@@ -824,6 +842,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
        if (isOtherTempNamespace(RelationGetNamespace(onerel)))
        {
                relation_close(onerel, lmode);
+               StrategyHintVacuum(false);
                CommitTransactionCommand();
                return true;                    /* assume no long-lived data in temp
                                                                 * tables */
@@ -863,6 +882,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
        /*
         * Complete the transaction and free all temporary memory used.
         */
+       StrategyHintVacuum(false);
        CommitTransactionCommand();
 
        /*
index 38cc332efd37e3f500e43bb785a0d4dc1a880f8c..616338c60c6980c91ee2f10d7d453414baa94d28 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.54 2003/08/04 02:40:03 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.55 2003/11/13 00:40:01 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -48,9 +48,6 @@ long     *CurTraceBuf;
 int                    ShowPinTrace = 0;
 
 int                    Data_Descriptors;
-int                    Free_List_Descriptor;
-int                    Lookup_List_Descriptor;
-int                    Num_Descriptors;
 
 BufferDesc *BufferDescriptors;
 Block     *BufferBlockPointers;
@@ -133,9 +130,6 @@ InitBufferPool(void)
        int                     i;
 
        Data_Descriptors = NBuffers;
-       Free_List_Descriptor = Data_Descriptors;
-       Lookup_List_Descriptor = Data_Descriptors + 1;
-       Num_Descriptors = Data_Descriptors + 1;
 
        /*
         * It's probably not really necessary to grab the lock --- if there's
@@ -156,7 +150,7 @@ InitBufferPool(void)
 
        BufferDescriptors = (BufferDesc *)
                ShmemInitStruct("Buffer Descriptors",
-                                         Num_Descriptors * sizeof(BufferDesc), &foundDescs);
+                                         Data_Descriptors * sizeof(BufferDesc), &foundDescs);
 
        BufferBlocks = (char *)
                ShmemInitStruct("Buffer Blocks",
@@ -176,16 +170,14 @@ InitBufferPool(void)
                block = BufferBlocks;
 
                /*
-                * link the buffers into a circular, doubly-linked list to
-                * initialize free list, and initialize the buffer headers. Still
-                * don't know anything about replacement strategy in this file.
+                * link the buffers into a single linked list. This will become the
+                * LiFo list of unused buffers returned by StragegyGetBuffer().
                 */
                for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
                {
                        Assert(ShmemIsValid((unsigned long) block));
 
-                       buf->freeNext = i + 1;
-                       buf->freePrev = i - 1;
+                       buf->bufNext = i + 1;
 
                        CLEAR_BUFFERTAG(&(buf->tag));
                        buf->buf_id = i;
@@ -199,14 +191,12 @@ InitBufferPool(void)
                        buf->wait_backend_id = 0;
                }
 
-               /* close the circular queue */
-               BufferDescriptors[0].freePrev = Data_Descriptors - 1;
-               BufferDescriptors[Data_Descriptors - 1].freeNext = 0;
+               /* Correct last entry */
+               BufferDescriptors[Data_Descriptors - 1].bufNext = -1;
        }
 
        /* Init other shared buffer-management stuff */
-       InitBufTable();
-       InitFreeList(!foundDescs);
+       StrategyInitialize(!foundDescs);
 
        LWLockRelease(BufMgrLock);
 }
index 072288e330ee9e82b560017ead3f8fbb35b5e26d..a2318a29f36e0e97c3e8903d85baf89cbfe1d6d2 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.29 2003/08/04 02:40:03 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_table.c,v 1.30 2003/11/13 00:40:01 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,7 @@ static HTAB *SharedBufHash;
  * Initialize shmem hash table for mapping buffers
  */
 void
-InitBufTable(void)
+InitBufTable(int size)
 {
        HASHCTL         info;
 
@@ -50,7 +50,7 @@ InitBufTable(void)
        info.hash = tag_hash;
 
        SharedBufHash = ShmemInitHash("Shared Buffer Lookup Table",
-                                                                 NBuffers, NBuffers,
+                                                                 size, size,
                                                                  &info,
                                                                  HASH_ELEM | HASH_FUNCTION);
 
@@ -58,79 +58,63 @@ InitBufTable(void)
                elog(FATAL, "could not initialize shared buffer hash table");
 }
 
-BufferDesc *
+/*
+ * BufTableLookup
+ */
+int
 BufTableLookup(BufferTag *tagPtr)
 {
        BufferLookupEnt *result;
 
        if (tagPtr->blockNum == P_NEW)
-               return NULL;
+               return -1;
 
        result = (BufferLookupEnt *)
                hash_search(SharedBufHash, (void *) tagPtr, HASH_FIND, NULL);
        if (!result)
-               return NULL;
+               return -1;
 
-       return &(BufferDescriptors[result->id]);
+       return result->id;
 }
 
 /*
  * BufTableDelete
  */
 bool
-BufTableDelete(BufferDesc *buf)
+BufTableInsert(BufferTag *tagPtr, Buffer buf_id)
 {
        BufferLookupEnt *result;
-
-       /*
-        * buffer not initialized or has been removed from table already.
-        * BM_DELETED keeps us from removing buffer twice.
-        */
-       if (buf->flags & BM_DELETED)
-               return TRUE;
-
-       buf->flags |= BM_DELETED;
+       bool            found;
 
        result = (BufferLookupEnt *)
-               hash_search(SharedBufHash, (void *) &(buf->tag), HASH_REMOVE, NULL);
+               hash_search(SharedBufHash, (void *) tagPtr, HASH_ENTER, &found);
 
-       if (!result)                            /* shouldn't happen */
-               elog(ERROR, "shared buffer hash table corrupted");
+       if (!result)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of shared memory")));
 
-       /*
-        * Clear the buffer's tag.  This doesn't matter for the hash table,
-        * since the buffer is already removed from it, but it ensures that
-        * sequential searches through the buffer table won't think the buffer
-        * is still valid for its old page.
-        */
-       buf->tag.rnode.relNode = InvalidOid;
-       buf->tag.rnode.tblNode = InvalidOid;
+       if (found)                                      /* found something else in the table? */
+               elog(ERROR, "shared buffer hash table corrupted");
 
+       result->id = buf_id;
        return TRUE;
 }
 
+/*
+ * BufTableDelete
+ */
 bool
-BufTableInsert(BufferDesc *buf)
+BufTableDelete(BufferTag *tagPtr)
 {
        BufferLookupEnt *result;
-       bool            found;
-
-       /* cannot insert it twice */
-       Assert(buf->flags & BM_DELETED);
-       buf->flags &= ~(BM_DELETED);
 
        result = (BufferLookupEnt *)
-               hash_search(SharedBufHash, (void *) &(buf->tag), HASH_ENTER, &found);
-
-       if (!result)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OUT_OF_MEMORY),
-                                errmsg("out of shared memory")));
+               hash_search(SharedBufHash, (void *) tagPtr, HASH_REMOVE, NULL);
 
-       if (found)                                      /* found something else in the table? */
+       if (!result)                            /* shouldn't happen */
                elog(ERROR, "shared buffer hash table corrupted");
 
-       result->id = buf->buf_id;
        return TRUE;
 }
 
index d95129df4e24eca535d4dc5b02b38983b3d76ceb..f10ff7e5b744c8527802e02eef0bd3ed1e1ad6dc 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.141 2003/09/25 06:58:01 petere Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.142 2003/11/13 00:40:01 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -260,12 +260,8 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
        if (status == SM_FAIL)
        {
                /* IO Failed.  cleanup the data structures and go home */
+               StrategyInvalidateBuffer(bufHdr);
 
-               if (!BufTableDelete(bufHdr))
-               {
-                       LWLockRelease(BufMgrLock);
-                       elog(FATAL, "buffer table broken after I/O error");
-               }
                /* remember that BufferAlloc() pinned the buffer */
                UnpinBuffer(bufHdr);
 
@@ -318,7 +314,7 @@ BufferAlloc(Relation reln,
        INIT_BUFFERTAG(&newTag, reln, blockNum);
 
        /* see if the block is in the buffer pool already */
-       buf = BufTableLookup(&newTag);
+       buf = StrategyBufferLookup(&newTag, false);
        if (buf != NULL)
        {
                /*
@@ -379,7 +375,7 @@ BufferAlloc(Relation reln,
        inProgress = FALSE;
        for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
        {
-               buf = GetFreeBuffer();
+               buf = StrategyGetBuffer();
 
                /* GetFreeBuffer will abort if it can't find a free buffer */
                Assert(buf);
@@ -492,7 +488,7 @@ BufferAlloc(Relation reln,
                         * we haven't gotten around to insert the new tag into the
                         * buffer table. So we need to check here.              -ay 3/95
                         */
-                       buf2 = BufTableLookup(&newTag);
+                       buf2 = StrategyBufferLookup(&newTag, true);
                        if (buf2 != NULL)
                        {
                                /*
@@ -535,29 +531,12 @@ BufferAlloc(Relation reln,
         */
 
        /*
-        * Change the name of the buffer in the lookup table:
-        *
-        * Need to update the lookup table before the read starts. If someone
-        * comes along looking for the buffer while we are reading it in, we
-        * don't want them to allocate a new buffer.  For the same reason, we
-        * didn't want to erase the buf table entry for the buffer we were
-        * writing back until now, either.
+        * Tell the buffer replacement strategy that we are replacing the
+        * buffer content. Then rename the buffer.
         */
-
-       if (!BufTableDelete(buf))
-       {
-               LWLockRelease(BufMgrLock);
-               elog(FATAL, "buffer wasn't in the buffer hash table");
-       }
-
+       StrategyReplaceBuffer(buf, reln, blockNum);
        INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
 
-       if (!BufTableInsert(buf))
-       {
-               LWLockRelease(BufMgrLock);
-               elog(FATAL, "buffer in buffer hash table twice");
-       }
-
        /*
         * Buffer contents are currently invalid.  Have to mark IO IN PROGRESS
         * so no one fiddles with them until the read completes.  If this
@@ -709,13 +688,28 @@ BufferSync(void)
        BufferDesc *bufHdr;
        ErrorContextCallback errcontext;
 
+       int                     num_buffer_dirty;
+       int                *buffer_dirty;
+
        /* Setup error traceback support for ereport() */
        errcontext.callback = buffer_write_error_callback;
        errcontext.arg = NULL;
        errcontext.previous = error_context_stack;
        error_context_stack = &errcontext;
 
-       for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
+       /*
+        * Get a list of all currently dirty buffers and how many there are.
+        * We do not flush buffers that get dirtied after we started. They
+        * have to wait until the next checkpoint.
+        */
+       buffer_dirty = (int *)palloc(NBuffers * sizeof(int));
+       num_buffer_dirty = 0;
+
+       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+       num_buffer_dirty = StrategyDirtyBufferList(buffer_dirty, NBuffers);
+       LWLockRelease(BufMgrLock);
+
+       for (i = 0; i < num_buffer_dirty; i++)
        {
                Buffer          buffer;
                int                     status;
@@ -723,10 +717,11 @@ BufferSync(void)
                XLogRecPtr      recptr;
                Relation        reln;
 
-               errcontext.arg = bufHdr;
-
                LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
 
+               bufHdr = &BufferDescriptors[buffer_dirty[i]];
+               errcontext.arg = bufHdr;
+
                if (!(bufHdr->flags & BM_VALID))
                {
                        LWLockRelease(BufMgrLock);
@@ -855,6 +850,8 @@ BufferSync(void)
                        RelationDecrementReferenceCount(reln);
        }
 
+       pfree(buffer_dirty);
+
        /* Pop the error context stack */
        error_context_stack = errcontext.previous;
 }
@@ -959,9 +956,9 @@ AtEOXact_Buffers(bool isCommit)
 
                        if (isCommit)
                                elog(WARNING,
-                               "buffer refcount leak: [%03d] (freeNext=%d, freePrev=%d, "
+                               "buffer refcount leak: [%03d] (bufNext=%d, "
                                  "rel=%u/%u, blockNum=%u, flags=0x%x, refcount=%d %ld)",
-                                        i, buf->freeNext, buf->freePrev,
+                                        i, buf->bufNext,
                                         buf->tag.rnode.tblNode, buf->tag.rnode.relNode,
                                         buf->tag.blockNum, buf->flags,
                                         buf->refcount, PrivateRefCount[i]);
@@ -1229,7 +1226,7 @@ recheck:
                        /*
                         * And mark the buffer as no longer occupied by this rel.
                         */
-                       BufTableDelete(bufHdr);
+                       StrategyInvalidateBuffer(bufHdr);
                }
        }
 
@@ -1295,7 +1292,7 @@ recheck:
                        /*
                         * And mark the buffer as no longer occupied by this page.
                         */
-                       BufTableDelete(bufHdr);
+                       StrategyInvalidateBuffer(bufHdr);
                }
        }
 
@@ -1543,7 +1540,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
                                return -2;
                        }
                        if (bufHdr->tag.blockNum >= firstDelBlock)
-                               BufTableDelete(bufHdr);
+                               StrategyInvalidateBuffer(bufHdr);
                }
        }
 
index 227c7e938d207798a2cbe6e81bd75697d15d8b70..9e340b47209e26a80ce1d2b22b1f047de495010c 100644 (file)
@@ -1,15 +1,14 @@
 /*-------------------------------------------------------------------------
  *
  * freelist.c
- *       routines for manipulating the buffer pool's replacement strategy
- *       freelist.
+ *       routines for manipulating the buffer pool's replacement strategy.
  *
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.31 2003/08/04 02:40:03 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v 1.32 2003/11/13 00:40:01 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/proc.h"
+#include "access/xact.h"
 
+#define STRAT_LIST_UNUSED      -1
+#define STRAT_LIST_B1          0
+#define STRAT_LIST_T1          1
+#define STRAT_LIST_T2          2
+#define STRAT_LIST_B2          3
+#define STRAT_NUM_LISTS                4
+
+#ifndef MAX
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif
+#ifndef MIN
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+/*
+ * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
+ */
+typedef struct bufstratcdb
+{
+       int                             prev;           /* links in the queue */
+       int                             next;
+       int                             list;           /* current list */
+       BufferTag               buf_tag;        /* buffer key */
+       Buffer                  buf_id;         /* currently assigned data buffer */
+       TransactionId   t1_xid;         /* the xid this entry went onto T1 */
+} BufferStrategyCDB;
+
+/*
+ * The shared ARC control information.
+ */
+typedef struct bufstratcontrol
+{
+
+       int             target_T1_size;                         /* What T1 size are we aiming for */
+       int             listUnusedCDB;                          /* All unused StrategyCDB */
+       int             listHead[STRAT_NUM_LISTS];      /* ARC lists B1, T1, T2 and B2 */
+       int             listTail[STRAT_NUM_LISTS];
+       int             listSize[STRAT_NUM_LISTS];
+       Buffer  listFreeBuffers;                        /* List of unused buffers */
+
+       long    num_lookup;                                     /* Some hit statistics */
+       long    num_hit[STRAT_NUM_LISTS];
+       time_t  stat_report;
+
+       BufferStrategyCDB       cdb[1];                 /* The cache directory */
+} BufferStrategyControl;
+
+static BufferStrategyControl   *StrategyControl = NULL;
+static BufferStrategyCDB               *StrategyCDB = NULL;
+
+static int             strategy_cdb_found;
+static int             strategy_cdb_replace;
+static int             strategy_get_from;
+
+int                            BufferStrategyStatInterval = 0;
+
+static bool                            strategy_hint_vacuum;
+static TransactionId   strategy_vacuum_xid;
+
+
+#define T1_TARGET      StrategyControl->target_T1_size
+#define B1_LENGTH      StrategyControl->listSize[STRAT_LIST_B1]
+#define T1_LENGTH      StrategyControl->listSize[STRAT_LIST_T1]
+#define T2_LENGTH      StrategyControl->listSize[STRAT_LIST_T2]
+#define B2_LENGTH      StrategyControl->listSize[STRAT_LIST_B2]
 
-static BufferDesc *SharedFreeList;
 
 /*
- * State-checking macros
+ * Macro to remove a CDB from whichever list it currently is on
  */
+#define        STRAT_LIST_REMOVE(cdb) \
+{ \
+       AssertMacro((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS);         \
+       if ((cdb)->prev < 0)                                                                                            \
+               StrategyControl->listHead[(cdb)->list] = (cdb)->next;                   \
+       else                                                                                                                            \
+               StrategyCDB[(cdb)->prev].next = (cdb)->next;                                    \
+       if ((cdb)->next < 0)                                                                                            \
+               StrategyControl->listTail[(cdb)->list] = (cdb)->prev;                   \
+       else                                                                                                                            \
+               StrategyCDB[(cdb)->next].prev = (cdb)->prev;                                    \
+       StrategyControl->listSize[(cdb)->list]--;                                                       \
+       (cdb)->list = STRAT_LIST_UNUSED;                                                                        \
+}
 
-#define IsInQueue(bf) \
-( \
-       AssertMacro((bf->freeNext != INVALID_DESCRIPTOR)), \
-       AssertMacro((bf->freePrev != INVALID_DESCRIPTOR)), \
-       AssertMacro((bf->flags & BM_FREE)) \
-)
+/*
+ * Macro to add a CDB to the tail of a list (MRU position)
+ */
+#define STRAT_MRU_INSERT(cdb,l) \
+{ \
+       AssertMacro((cdb)->list == STRAT_LIST_UNUSED);                                          \
+       if (StrategyControl->listTail[(l)] < 0)                                                         \
+       {                                                                                                                                       \
+               (cdb)->prev = (cdb)->next = -1;                                                                 \
+               StrategyControl->listHead[(l)] =                                                                \
+                       StrategyControl->listTail[(l)] =                                                        \
+                       ((cdb) - StrategyCDB);                                                                          \
+       }                                                                                                                                       \
+       else                                                                                                                            \
+       {                                                                                                                                       \
+               (cdb)->next = -1;                                                                                               \
+               (cdb)->prev = StrategyControl->listTail[(l)];                                   \
+               StrategyCDB[StrategyControl->listTail[(l)]].next =                              \
+                       ((cdb) - StrategyCDB);                                                                          \
+               StrategyControl->listTail[(l)] =                                                                \
+                       ((cdb) - StrategyCDB);                                                                          \
+       }                                                                                                                                       \
+       StrategyControl->listSize[(l)]++;                                                                       \
+       (cdb)->list = (l);                                                                                                      \
+}
 
-#define IsNotInQueue(bf) \
-( \
-       AssertMacro((bf->freeNext == INVALID_DESCRIPTOR)), \
-       AssertMacro((bf->freePrev == INVALID_DESCRIPTOR)), \
-       AssertMacro(! (bf->flags & BM_FREE)) \
-)
+/*
+ * Macro to add a CDB to the head of a list (LRU position)
+ */
+#define STRAT_LRU_INSERT(cdb,l) \
+{ \
+       AssertMacro((cdb)->list == STRAT_LIST_UNUSED);                                          \
+       if (StrategyControl->listHead[(l)] < 0)                                                         \
+       {                                                                                                                                       \
+               (cdb)->prev = (cdb)->next = -1;                                                                 \
+               StrategyControl->listHead[(l)] =                                                                \
+                       StrategyControl->listTail[(l)] =                                                        \
+                       ((cdb) - StrategyCDB);                                                                          \
+       }                                                                                                                                       \
+       else                                                                                                                            \
+       {                                                                                                                                       \
+               (cdb)->prev = -1;                                                                                               \
+               (cdb)->next = StrategyControl->listHead[(l)];                                   \
+               StrategyCDB[StrategyControl->listHead[(l)]].prev =                              \
+                       ((cdb) - StrategyCDB);                                                                          \
+               StrategyControl->listHead[(l)] =                                                                \
+                       ((cdb) - StrategyCDB);                                                                          \
+       }                                                                                                                                       \
+       StrategyControl->listSize[(l)]++;                                                                       \
+       (cdb)->list = (l);                                                                                                      \
+}
 
 
 /*
- * AddBufferToFreelist
+ * StrategyBufferLookup
  *
- * In theory, this is the only routine that needs to be changed
- * if the buffer replacement strategy changes. Just change
- * the manner in which buffers are added to the freelist queue.
- * Currently, they are added on an LRU basis.
+ *     Lookup a page request in the cache directory. A buffer is only
+ *     returned for a T1 or T2 cache hit. B1 and B2 hits are only
+ *     remembered here to later affect the behaviour.
  */
-static void
-AddBufferToFreelist(BufferDesc *bf)
+BufferDesc *
+StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
 {
-#ifdef BMTRACE
-       _bm_trace(bf->tag.relId.dbId, bf->tag.relId.relId, bf->tag.blockNum,
-                         BufferDescriptorGetBuffer(bf), BMT_DEALLOC);
-#endif   /* BMTRACE */
-       IsNotInQueue(bf);
-
-       /* change bf so it points to inFrontOfNew and its successor */
-       bf->freePrev = SharedFreeList->freePrev;
-       bf->freeNext = Free_List_Descriptor;
-
-       /* insert new into chain */
-       BufferDescriptors[bf->freeNext].freePrev = bf->buf_id;
-       BufferDescriptors[bf->freePrev].freeNext = bf->buf_id;
+       BufferStrategyCDB  *cdb;
+       time_t                          now;
+
+       if (BufferStrategyStatInterval > 0)
+       {
+               time(&now);
+               if (StrategyControl->stat_report + BufferStrategyStatInterval < now)
+               {
+                       long    all_hit, b1_hit, t1_hit, t2_hit, b2_hit;
+                       ErrorContextCallback    *errcxtold;
+
+                       if (StrategyControl->num_lookup == 0)
+                       {
+                               all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
+                       }
+                       else
+                       {
+                               b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
+                                                 StrategyControl->num_lookup);
+                               t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
+                                                 StrategyControl->num_lookup);
+                               t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
+                                                 StrategyControl->num_lookup);
+                               b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
+                                                 StrategyControl->num_lookup);
+                               all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
+                       }
+
+                       errcxtold = error_context_stack;
+                       error_context_stack = NULL;
+                       elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
+                                       T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
+                       elog(DEBUG1, "ARC total   =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
+                                       all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
+                       error_context_stack = errcxtold;
+
+                       StrategyControl->num_lookup = 0;
+                       StrategyControl->num_hit[STRAT_LIST_B1] = 0;
+                       StrategyControl->num_hit[STRAT_LIST_T1] = 0;
+                       StrategyControl->num_hit[STRAT_LIST_T2] = 0;
+                       StrategyControl->num_hit[STRAT_LIST_B2] = 0;
+                       StrategyControl->stat_report = now;
+               }
+       }
+
+       /*
+        * Count lookups
+        */
+       StrategyControl->num_lookup++;
+
+       /*
+        * Lookup the block in the shared hash table
+        */
+       strategy_cdb_found = BufTableLookup(tagPtr);
+
+       /*
+        * Handle CDB lookup miss
+        */
+       if (strategy_cdb_found < 0)
+       {
+               if (!recheck)
+               {
+                       /*
+                        * This is an initial lookup and we have a complete
+                        * cache miss (block found nowhere). This means we
+                        * remember according to the current T1 size and the
+                        * target T1 size from where we take a block if we
+                        * need one later.
+                        */
+                       if (T1_LENGTH >= MAX(1, T1_TARGET))
+                               strategy_get_from = STRAT_LIST_T1;
+                       else
+                               strategy_get_from = STRAT_LIST_T2;
+               }
+
+               /* report cache miss */
+               return NULL;
+       }
+
+       /*
+        * We found a CDB
+        */
+       cdb = &StrategyCDB[strategy_cdb_found];
+
+       /*
+        * Count hits
+        */
+       StrategyControl->num_hit[cdb->list]++;
+
+       /*
+        * If this is a T2 hit, we simply move the CDB to the
+        * T2 MRU position and return the found buffer.
+        */
+       if (cdb->list == STRAT_LIST_T2)
+       {
+               STRAT_LIST_REMOVE(cdb);
+               STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+
+               return &BufferDescriptors[cdb->buf_id];
+       }
+
+       /*
+        * If this is a T1 hit, we move the buffer to the T2 MRU
+        * only if another transaction had read it into T1. This is
+        * required because any UPDATE or DELETE in PostgreSQL does
+        * multiple ReadBuffer(), first during the scan, later during
+        * the heap_update() or heap_delete().
+        */
+       if (cdb->list == STRAT_LIST_T1)
+       {
+               if (!TransactionIdIsCurrentTransactionId(cdb->t1_xid))
+               {
+                       STRAT_LIST_REMOVE(cdb);
+                       STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+               }
+
+               return &BufferDescriptors[cdb->buf_id];
+       }
+
+       /*
+        * In the case of a recheck we don't care about B1 or B2 hits here.
+        * The bufmgr does this call only to make sure noone faulted in the
+        * block while we where busy flushing another. Now for this really
+        * to end up as a B1 or B2 cache hit, we must have been flushing for
+        * quite some time as the block not only must have been read, but
+        * also traveled through the queue and evicted from the T cache again
+        * already. 
+        */
+       if (recheck)
+               return NULL;
+
+       /*
+        * Adjust the target size of the T1 cache depending on if this is
+        * a B1 or B2 hit.
+        */
+       switch (cdb->list)
+       {
+               case STRAT_LIST_B1:
+                       /*
+                        * B1 hit means that the T1 cache is probably too
+                        * small. Adjust the T1 target size and continue
+                        * below.
+                        */
+                       T1_TARGET = MIN(T1_TARGET + MAX(B2_LENGTH / B1_LENGTH, 1),
+                                                       Data_Descriptors);
+                       break;
+
+               case STRAT_LIST_B2:
+                       /* 
+                        * B2 hit means that the T2 cache is probably too
+                        * small. Adjust the T1 target size and continue
+                        * below.
+ */
+                       T1_TARGET = MAX(T1_TARGET - MAX(B1_LENGTH / B2_LENGTH, 1), 0);
+                       break;
+
+               default:
+                       elog(ERROR, "Buffer hash table corrupted - CDB on list %d found",
+                                       cdb->list);
+       }
+
+       /*
+        * Decide where to take from if we will be out of
+        * free blocks later in StrategyGetBuffer().
+        */
+       if (T1_LENGTH >= MAX(1, T1_TARGET))
+               strategy_get_from = STRAT_LIST_T1;
+       else
+               strategy_get_from = STRAT_LIST_T2;
+
+       /*
+        * Even if we had seen the block in the past, it's data is
+        * not currently in memory ... cache miss to the bufmgr.
+        */
+       return NULL;
 }
 
+
+/*
+ * StrategyGetBuffer
+ *
+ *     Called by the bufmgr to get the next candidate buffer to use in
+ *     BufferAlloc(). The only hard requirement BufferAlloc() has is that
+ *     this buffer must not currently be pinned. 
+ */
+BufferDesc *
+StrategyGetBuffer(void)
+{
+       int                             cdb_id;
+       BufferDesc         *buf;
+
+       if (StrategyControl->listFreeBuffers < 0)
+       {
+               /* We don't have a free buffer, must take one from T1 or T2 */
+
+               if (strategy_get_from == STRAT_LIST_T1)
+               {
+                       /*
+                        * We should take the first unpinned buffer from T1.
+                        */
+                       cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
+                       while (cdb_id >= 0)
+                       {
+                               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
+                               if (buf->refcount == 0)
+                               {
+                                       strategy_cdb_replace = cdb_id;
+                                       Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
+                                       return buf;
+                               }
+                               cdb_id = StrategyCDB[cdb_id].next;
+                       }
+
+                       /*
+                        * No unpinned T1 buffer found - pardon T2 cache.
+                        */
+                       cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
+                       while (cdb_id >= 0)
+                       {
+                               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
+                               if (buf->refcount == 0)
+                               {
+                                       strategy_cdb_replace = cdb_id;
+                                       Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
+                                       return buf;
+                               }
+                               cdb_id = StrategyCDB[cdb_id].next;
+                       }
+
+                       /*
+                        * No unpinned buffers at all!!!
+                        */
+                       elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
+               }
+               else
+               {
+                       /*
+                        * We should take the first unpinned buffer from T2.
+                        */
+                       cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
+                       while (cdb_id >= 0)
+                       {
+                               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
+                               if (buf->refcount == 0)
+                               {
+                                       strategy_cdb_replace = cdb_id;
+                                       Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
+                                       return buf;
+                               }
+                               cdb_id = StrategyCDB[cdb_id].next;
+                       }
+
+                       /*
+                        * No unpinned T2 buffer found - pardon T1 cache.
+                        */
+                       cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
+                       while (cdb_id >= 0)
+                       {
+                               buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
+                               if (buf->refcount == 0)
+                               {
+                                       strategy_cdb_replace = cdb_id;
+                                       Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
+                                       return buf;
+                               }
+                               cdb_id = StrategyCDB[cdb_id].next;
+                       }
+
+                       /*
+                        * No unpinned buffers at all!!!
+                        */
+                       elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
+               }
+       }
+       else
+       {
+               /* There is a completely free buffer available - take it */
+
+               /*
+                * Note: This code uses the side effect that a free buffer
+                * can never be pinned or dirty and therefore the call to
+                * StrategyReplaceBuffer() will happen without the bufmgr
+                * releasing the bufmgr-lock in the meantime. That means,
+                * that there will never be any reason to recheck. Otherwise
+                * we would leak shared buffers here!
+                */
+               strategy_cdb_replace = -1;
+               buf = &BufferDescriptors[StrategyControl->listFreeBuffers];
+
+               StrategyControl->listFreeBuffers = buf->bufNext;
+               buf->bufNext = -1;
+
+               /* Buffer of freelist cannot be pinned */
+               Assert(buf->refcount == 0);
+
+               return buf;
+       }
+
+       /* not reached */
+       return NULL;
+}
+
+
+/*
+ * StrategyReplaceBuffer
+ *
+ *     Called by the buffer manager to inform us that he possibly flushed
+ *     a buffer and is now about to replace the content. Prior to this call,
+ *     the cache algorithm still reports the buffer as in the cache. After
+ *     this call we report the new block, even if IO might still need to
+ *     start.
+ */
+void
+StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
+{
+       BufferStrategyCDB          *cdb_found;
+       BufferStrategyCDB          *cdb_replace;
+
+       if (strategy_cdb_found >= 0)
+       {
+               /* This was a ghost buffer cache hit (B1 or B2) */
+               cdb_found = &StrategyCDB[strategy_cdb_found];
+
+               /* Assert that the buffer remembered in cdb_found is the one */
+               /* the buffer manager is currently faulting in */
+               Assert(BUFFERTAG_EQUALS(&(cdb_found->buf_tag), rnode, blockNum));
+               
+               if (strategy_cdb_replace >= 0)
+               {
+                       /* We are satisfying it with an evicted T buffer */
+                       cdb_replace = &StrategyCDB[strategy_cdb_replace];
+
+                       /* Assert that the buffer remembered in cdb_replace is */
+                       /* the one the buffer manager has just evicted */
+                       Assert(cdb_replace->list == STRAT_LIST_T1 || 
+                                       cdb_replace->list == STRAT_LIST_T2);
+                       Assert(cdb_replace->buf_id == buf->buf_id);
+                       Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
+
+                       /* If this was a T1 buffer faulted in by vacuum, just */
+                       /* do not cause the CDB end up in the B1 list, so that */
+                       /* the vacuum scan does not affect T1_target adjusting */
+                       if (strategy_hint_vacuum)
+                       {
+                               BufTableDelete(&(cdb_replace->buf_tag));
+                               STRAT_LIST_REMOVE(cdb_replace);
+                               cdb_replace->buf_id = -1;
+                               cdb_replace->next = StrategyControl->listUnusedCDB;
+                               StrategyControl->listUnusedCDB = strategy_cdb_replace;
+                       }
+                       else
+                       {
+                               /* Under normal circumstances move the evicted */
+                               /* T list entry to it's corresponding B list */
+                               if (cdb_replace->list == STRAT_LIST_T1)
+                               {
+                                       STRAT_LIST_REMOVE(cdb_replace);
+                                       STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
+                               }
+                               else
+                               {
+                                       STRAT_LIST_REMOVE(cdb_replace);
+                                       STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
+                               }
+                       }
+                       /* And clear it's block reference */
+                       cdb_replace->buf_id = -1;
+               }
+               else
+               {
+                       /* or we satisfy it with an unused buffer */
+               }
+
+               /* Now the found B CDB get's the buffer and is moved to T2 */
+               cdb_found->buf_id = buf->buf_id;
+               STRAT_LIST_REMOVE(cdb_found);
+               STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
+       }
+       else
+       {
+               /* This was a complete cache miss, so we need to create */
+               /* a new CDB. The goal is to keep T1len+B1len <= c */
+
+               if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= Data_Descriptors)
+               {
+                       /* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */
+                       cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
+
+                       BufTableDelete(&(cdb_found->buf_tag));
+                       STRAT_LIST_REMOVE(cdb_found);
+               }
+               else
+               {
+                       /* Otherwise, we try to use a free one */
+                       if (StrategyControl->listUnusedCDB >= 0)
+                       {
+                               cdb_found = &StrategyCDB[StrategyControl->listUnusedCDB];
+                               StrategyControl->listUnusedCDB = cdb_found->next;
+                       }
+                       else
+                       {
+                               /* If there isn't, we take B2-LRU ... except if */
+                               /* T1len+B1len+T2len = c ... oh my */
+                               if (B2_LENGTH > 0)
+                                       cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B2]];
+                               else
+                                       cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
+
+                               BufTableDelete(&(cdb_found->buf_tag));
+                               STRAT_LIST_REMOVE(cdb_found);
+                       }
+               }
+
+               /* Set the CDB's buf_tag and insert the hash key */
+               INIT_BUFFERTAG(&(cdb_found->buf_tag), rnode, blockNum);
+               BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));
+
+               if (strategy_cdb_replace >= 0)
+               {
+                       /* The buffer was formerly in a T list, move it's CDB
+                        * to the corresponding B list */
+                       cdb_replace = &StrategyCDB[strategy_cdb_replace];
+
+                       Assert(cdb_replace->list == STRAT_LIST_T1 || 
+                                       cdb_replace->list == STRAT_LIST_T2);
+                       Assert(cdb_replace->buf_id == buf->buf_id);
+                       Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
+
+                       if (cdb_replace->list == STRAT_LIST_T1)
+                       {
+                               STRAT_LIST_REMOVE(cdb_replace);
+                               STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B1);
+                       }
+                       else
+                       {
+                               STRAT_LIST_REMOVE(cdb_replace);
+                               STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
+                       }
+                       /* And clear it's block reference */
+                       cdb_replace->buf_id = -1;
+               }
+               else
+               {
+                       /* or we satisfy it with an unused buffer */
+               }
+
+               /* Assign the buffer id to the new CDB */
+               cdb_found->buf_id = buf->buf_id;
+
+               /*
+                * Specialized VACUUM optimization. If this "complete cache miss"
+                * happened because vacuum needed the page, we want it later on
+                * to be placed at the LRU instead of the MRU position of T1.
+                */
+               if (strategy_hint_vacuum)
+               {
+                       if (strategy_vacuum_xid != GetCurrentTransactionId())
+                       {
+                               strategy_hint_vacuum = false;
+                               STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
+                       }
+                       else
+                               STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
+                       
+               }
+               else
+                       STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
+
+               /*
+                * Remember the Xid when this buffer went onto T1 to avoid
+                * a single UPDATE promoting a newcomer straight into T2.
+                */
+               cdb_found->t1_xid = GetCurrentTransactionId();
+       }
+}
+
+
+/*
+ * StrategyInvalidateBuffer
+ *
+ *     Called by the buffer manager to inform us that a buffer content
+ *     is no longer valid. We simply throw away any eventual existing
+ *     buffer hash entry and move the CDB and buffer to the free lists.
+ */
+void
+StrategyInvalidateBuffer(BufferDesc *buf)
+{
+       int                                     cdb_id;
+       BufferStrategyCDB  *cdb;
+
+       cdb_id = BufTableLookup(&(buf->tag));
+
+       /* If we have the buffer somewhere in the directory, remove it
+        * and add the CDB to the list of unused CDB's. */
+       if (cdb_id >= 0)
+       {
+               cdb = &StrategyCDB[cdb_id];
+               BufTableDelete(&(cdb->buf_tag));
+               STRAT_LIST_REMOVE(cdb);
+               cdb->buf_id = -1;
+               cdb->next = StrategyControl->listUnusedCDB;
+               StrategyControl->listUnusedCDB = cdb_id;
+       }
+
+       /* Buffer is unreferenced now and should not contain any valid data
+        * so add it to the list of free buffers */
+       buf->bufNext = StrategyControl->listFreeBuffers;
+       StrategyControl->listFreeBuffers = buf->buf_id;
+}
+
+
+void
+StrategyHintVacuum(bool vacuum_active)
+{
+       strategy_hint_vacuum = vacuum_active;
+       strategy_vacuum_xid = GetCurrentTransactionId();
+}
+
+
+int
+StrategyDirtyBufferList(int *buffer_list, int max_buffers)
+{
+       int                                     num_buffer_dirty = 0;
+       int                                     cdb_id_t1;
+       int                                     cdb_id_t2;
+       int                                     buf_id;
+       BufferDesc                 *buf;
+
+       /*
+        * Traverse the T1 and T2 list LRU to MRU in "parallel"
+        * and add all dirty buffers found in that order to the list.
+        * The ARC strategy keeps all used buffers including pinned ones
+        * in the T1 or T2 list. So we cannot loose any dirty buffers.
+        */
+       cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1];
+       cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2];
+
+       while ((cdb_id_t1 >= 0 || cdb_id_t2 >= 0) && 
+                       num_buffer_dirty < max_buffers)
+       {
+               if (cdb_id_t1 >= 0)
+               {
+                       buf_id = StrategyCDB[cdb_id_t1].buf_id;
+                       buf = &BufferDescriptors[buf_id];
+
+                       if (buf->flags & BM_VALID)
+                       {
+                               if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
+                               {
+                                       buffer_list[num_buffer_dirty++] = buf_id;
+                               }
+                       }
+
+                       cdb_id_t1 = StrategyCDB[cdb_id_t1].next;
+               }
+
+               if (cdb_id_t2 >= 0)
+               {
+                       buf_id = StrategyCDB[cdb_id_t2].buf_id;
+                       buf = &BufferDescriptors[buf_id];
+
+                       if (buf->flags & BM_VALID)
+                       {
+                               if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
+                               {
+                                       buffer_list[num_buffer_dirty++] = buf_id;
+                               }
+                       }
+
+                       cdb_id_t2 = StrategyCDB[cdb_id_t2].next;
+               }
+       }
+
+       return num_buffer_dirty;
+}
+
+
+/*
+ * StrategyInitialize -- initialize the buffer cache replacement
+ *             strategy.
+ *
+ * Assume: All of the buffers are already building a linked list.
+ *             Only called by postmaster and only during initialization.
+ */
+void
+StrategyInitialize(bool init)
+{
+       bool found;
+       int i;
+
+       /*
+        * Initialize the shared CDB lookup hashtable
+        */
+       InitBufTable(Data_Descriptors * 2);
+
+       /*
+        * Get or create the shared strategy control block and the CDB's
+        */
+       StrategyControl = (BufferStrategyControl *)
+                       ShmemInitStruct("Buffer Strategy Status",
+                                       sizeof(BufferStrategyControl) +
+                                       sizeof(BufferStrategyCDB) * (Data_Descriptors * 2 - 1),
+                                       &found);
+       StrategyCDB = &(StrategyControl->cdb[0]);
+
+       if (!found)
+       {
+               /*
+                * Only done once, usually in postmaster
+                */
+               Assert(init);
+
+               /*
+                * Grab the whole linked list of free buffers for our
+                * strategy
+                */
+               StrategyControl->listFreeBuffers = 0;
+
+               /*
+                * We start off with a target T1 list size of
+                * half the available cache blocks.
+                */
+               StrategyControl->target_T1_size = Data_Descriptors / 2;
+
+               /*
+                * Initialize B1, T1, T2 and B2 lists to be empty
+                */
+               for (i = 0; i < STRAT_NUM_LISTS; i++)
+               {
+                       StrategyControl->listHead[i] = -1;
+                       StrategyControl->listTail[i] = -1;
+                       StrategyControl->listSize[i] = 0;
+                       StrategyControl->num_hit[i] = 0;
+               }
+               StrategyControl->num_lookup  = 0;
+               StrategyControl->stat_report = 0;
+
+               /*
+                * All CDB's are linked as the listUnusedCDB
+                */
+               for (i = 0; i < Data_Descriptors * 2; i++)
+               {
+                       StrategyCDB[i].next = i + 1;
+                       StrategyCDB[i].list = STRAT_LIST_UNUSED;
+                       CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag));
+                       StrategyCDB[i].buf_id = -1;
+               }
+               StrategyCDB[Data_Descriptors * 2 - 1].next = -1;
+               StrategyControl->listUnusedCDB = 0;
+       }
+       else
+       {
+               Assert(!init);
+       }
+}
+
+
 #undef PinBuffer
 
 /*
@@ -95,18 +853,9 @@ PinBuffer(BufferDesc *buf)
 
        if (buf->refcount == 0)
        {
-               IsInQueue(buf);
-
-               /* remove from freelist queue */
-               BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
-               BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
-               buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
-
                /* mark buffer as no longer free */
                buf->flags &= ~BM_FREE;
        }
-       else
-               IsNotInQueue(buf);
 
        if (PrivateRefCount[b] == 0)
                buf->refcount++;
@@ -144,7 +893,6 @@ UnpinBuffer(BufferDesc *buf)
 {
        int                     b = BufferDescriptorGetBuffer(buf) - 1;
 
-       IsNotInQueue(buf);
        Assert(buf->refcount > 0);
        Assert(PrivateRefCount[b] > 0);
        PrivateRefCount[b]--;
@@ -154,7 +902,6 @@ UnpinBuffer(BufferDesc *buf)
        if (buf->refcount == 0)
        {
                /* buffer is now unpinned */
-               AddBufferToFreelist(buf);
                buf->flags |= BM_FREE;
        }
        else if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
@@ -187,64 +934,6 @@ refcount = %ld, file: %s, line: %d\n",
 }
 #endif
 
-/*
- * GetFreeBuffer() -- get the 'next' buffer from the freelist.
- */
-BufferDesc *
-GetFreeBuffer(void)
-{
-       BufferDesc *buf;
-
-       if (Free_List_Descriptor == SharedFreeList->freeNext)
-       {
-               /* queue is empty. All buffers in the buffer pool are pinned. */
-               ereport(ERROR,
-                               (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
-                                errmsg("out of free buffers")));
-               return NULL;
-       }
-       buf = &(BufferDescriptors[SharedFreeList->freeNext]);
-
-       /* remove from freelist queue */
-       BufferDescriptors[buf->freeNext].freePrev = buf->freePrev;
-       BufferDescriptors[buf->freePrev].freeNext = buf->freeNext;
-       buf->freeNext = buf->freePrev = INVALID_DESCRIPTOR;
-
-       buf->flags &= ~(BM_FREE);
-
-       return buf;
-}
-
-/*
- * InitFreeList -- initialize the dummy buffer descriptor used
- *             as a freelist head.
- *
- * Assume: All of the buffers are already linked in a circular
- *             queue.   Only called by postmaster and only during
- *             initialization.
- */
-void
-InitFreeList(bool init)
-{
-       SharedFreeList = &(BufferDescriptors[Free_List_Descriptor]);
-
-       if (init)
-       {
-               /* we only do this once, normally in the postmaster */
-               SharedFreeList->data = INVALID_OFFSET;
-               SharedFreeList->flags = 0;
-               SharedFreeList->flags &= ~(BM_VALID | BM_DELETED | BM_FREE);
-               SharedFreeList->buf_id = Free_List_Descriptor;
-
-               /* insert it into a random spot in the circular queue */
-               SharedFreeList->freeNext = BufferDescriptors[0].freeNext;
-               SharedFreeList->freePrev = 0;
-               BufferDescriptors[SharedFreeList->freeNext].freePrev =
-                       BufferDescriptors[SharedFreeList->freePrev].freeNext =
-                       Free_List_Descriptor;
-       }
-}
-
 
 /*
  * print out the free list and check for breaks.
index 44e492b6a983eab9109204b7505719f034647bbf..ac235ecc3a425f8368ef644ae207af6349c7f93b 100644 (file)
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.166 2003/11/07 21:27:38 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.167 2003/11/13 00:40:01 wieck Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -73,6 +73,7 @@ extern int    CheckPointTimeout;
 extern int     CommitDelay;
 extern int     CommitSiblings;
 extern char *preload_libraries_string;
+extern int     BufferStrategyStatInterval;
 
 #ifdef HAVE_SYSLOG
 extern char *Syslog_facility;
@@ -1190,6 +1191,15 @@ static struct config_int ConfigureNamesInt[] =
                -1, -1, INT_MAX / 1000, NULL, NULL
        },
 
+       {
+               {"buffer_strategy_status_interval", PGC_POSTMASTER, RESOURCES_MEM,
+                       gettext_noop("Interval to report buffer strategy status in seconds"),
+                       NULL
+               },
+               &BufferStrategyStatInterval,
+               0, 0, 600, NULL, NULL
+       },
+
        /* End-of-list marker */
        {
                {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL
index 1ead89e130844f153f47147840e6c06368bbce0f..15c73e6692e5afae71580f099abf27f22adf9bf0 100644 (file)
@@ -58,6 +58,7 @@
 #shared_buffers = 1000         # min 16, at least max_connections*2, 8KB each
 #sort_mem = 1024               # min 64, size in KB
 #vacuum_mem = 8192             # min 1024, size in KB
+#buffer_strategy_status_interval = 0   # 0-600 seconds
 
 # - Free Space Map -
 
index 1c1c1d3451934ce10719a3387b98e09348adfe60..492f16cc115b3afc723970dbb91ae19d8a964bb9 100644 (file)
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: miscadmin.h,v 1.134 2003/09/24 18:54:01 tgl Exp $
+ * $Id: miscadmin.h,v 1.135 2003/11/13 00:40:01 wieck Exp $
  *
  * NOTES
  *       some of the information in this file should be moved to
@@ -96,6 +96,13 @@ extern void ProcessInterrupts(void);
                CritSectionCount--; \
        } while(0)
 
+#define PG_DELAY(_msec) \
+{ \
+       struct timeval delay; \
+       delay.tv_sec = (_msec) / 1000; \
+       delay.tv_usec = ((_msec) % 1000) * 1000; \
+       (void) select(0, NULL, NULL, NULL, &delay); \
+}
 
 /*****************************************************************************
  *       globals.h --                                                                                                                   *
index f2d615151ec1761f627e660a6da63e30c090e7be..deafa0b8cec20f612b0dd064db66466307e1796d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: buf_internals.h,v 1.61 2003/08/04 02:40:14 momjian Exp $
+ * $Id: buf_internals.h,v 1.62 2003/11/13 00:40:02 wieck Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -72,17 +72,29 @@ typedef struct buftag
        (a)->rnode = (xx_reln)->rd_node \
 )
 
+#define BUFFERTAG_EQUALS(a,xx_reln,xx_blockNum) \
+( \
+       (a)->rnode.tblNode == (xx_reln)->rd_node.tblNode && \
+       (a)->rnode.relNode == (xx_reln)->rd_node.relNode && \
+       (a)->blockNum == (xx_blockNum) \
+)
+#define BUFFERTAGS_EQUAL(a,b) \
+( \
+       (a)->rnode.tblNode == (b)->rnode.tblNode && \
+       (a)->rnode.relNode == (b)->rnode.relNode && \
+       (a)->blockNum == (b)->blockNum \
+)
+
 /*
  *     BufferDesc -- shared buffer cache metadata for a single
  *                               shared buffer descriptor.
  */
 typedef struct sbufdesc
 {
-       Buffer          freeNext;               /* links for freelist chain */
-       Buffer          freePrev;
+       Buffer          bufNext;                /* link in freelist chain */
        SHMEM_OFFSET data;                      /* pointer to data in buf pool */
 
-       /* tag and id must be together for table lookup (still true?) */
+       /* tag and id must be together for table lookup */
        BufferTag       tag;                    /* file/block identifier */
        int                     buf_id;                 /* buffer's index number (from 0) */
 
@@ -107,6 +119,7 @@ typedef struct sbufdesc
 
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
 
+
 /*
  * Each backend has its own BufferLocks[] array holding flag bits
  * showing what locks it has set on each buffer.
@@ -167,14 +180,19 @@ extern long int LocalBufferFlushCount;
 /*freelist.c*/
 extern void PinBuffer(BufferDesc *buf);
 extern void UnpinBuffer(BufferDesc *buf);
-extern BufferDesc *GetFreeBuffer(void);
-extern void InitFreeList(bool init);
+extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck);
+extern BufferDesc *StrategyGetBuffer(void);
+extern void StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum);
+extern void StrategyInvalidateBuffer(BufferDesc *buf);
+extern void StrategyHintVacuum(bool vacuum_active);
+extern int StrategyDirtyBufferList(int *buffer_dirty, int max_buffers);
+extern void StrategyInitialize(bool init);
 
 /* buf_table.c */
-extern void InitBufTable(void);
-extern BufferDesc *BufTableLookup(BufferTag *tagPtr);
-extern bool BufTableDelete(BufferDesc *buf);
-extern bool BufTableInsert(BufferDesc *buf);
+extern void InitBufTable(int size);
+extern int BufTableLookup(BufferTag *tagPtr);
+extern bool BufTableInsert(BufferTag *tagPtr, Buffer buf_id);
+extern bool BufTableDelete(BufferTag *tagPtr);
 
 /* bufmgr.c */
 extern BufferDesc *BufferDescriptors;