Another round of code cleanup on bufmgr. Use BM_VALID flag to keep track

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 21 Apr 2004 18:06:30 +0000 (18:06 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 21 Apr 2004 18:06:30 +0000 (18:06 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 21 Apr 2004 18:06:30 +0000 (18:06 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 21 Apr 2004 18:06:30 +0000 (18:06 +0000)
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c

index e0aa0e93e867980df54b8ca35b225d5111c3073d..8bbfb49752e68d3a26d43b922669a8bd7f01c839 100644 (file)
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.63 2004/04/19 23:27:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.64 2004/04/21 18:06:30 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -133,11 +133,11 @@ InitBufferPool(void)
  
                         buf->bufNext = i + 1;
  
-                       CLEAR_BUFFERTAG(&(buf->tag));
+                       CLEAR_BUFFERTAG(buf->tag);
                         buf->buf_id = i;
  
                         buf->data = MAKE_OFFSET(block);
-                       buf->flags = (BM_DELETED | BM_VALID);
+                       buf->flags = 0;
                         buf->refcount = 0;
                         buf->io_in_progress_lock = LWLockAssign();
                         buf->cntx_lock = LWLockAssign();
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index a80435b7ec2ced3a1963a807587c3391c5960637..b57ac072445b92278b5f27cea1c62935c799dc0f 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,20 +8,14 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.161 2004/04/19 23:27:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.162 2004/04/21 18:06:30 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  /*
- *
- * BufferAlloc() -- lookup a buffer in the buffer table.  If
- *             it isn't there add it, but do not read data into memory.
- *             This is used when we are about to reinitialize the
- *             buffer so don't care what the current disk contents are.
- *             BufferAlloc() also pins the new buffer in memory.
- *
- * ReadBuffer() -- like BufferAlloc() but reads the data
- *             on a buffer cache miss.
+ * ReadBuffer() -- find or create a buffer holding the requested page,
+ *             and pin it so that no one can destroy it while this process
+ *             is using it.
   *
   * ReleaseBuffer() -- unpin the buffer
   *
@@ -31,7 +25,7 @@
   *
   * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
   *
- * BufferSync() -- flush all dirty buffers in the buffer pool.
+ * BufferSync() -- flush all (or some) dirty buffers in the buffer pool.
   *
   * InitBufferPool() -- Init the buffer module.
   *
@@ -76,25 +70,19 @@ long                NDirectFileRead;        /* some I/O's are direct file access.
                                                                  * bypass bufmgr */
  long           NDirectFileWrite;       /* e.g., I/O in psort and hashjoin. */
  
-/*
- * Macro : BUFFER_IS_BROKEN
- *             Note that write error doesn't mean the buffer broken
-*/
-#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
-
  
  static void PinBuffer(BufferDesc *buf);
  static void UnpinBuffer(BufferDesc *buf);
  static void WaitIO(BufferDesc *buf);
  static void StartBufferIO(BufferDesc *buf, bool forInput);
-static void TerminateBufferIO(BufferDesc *buf);
+static void TerminateBufferIO(BufferDesc *buf, int err_flag);
  static void ContinueBufferIO(BufferDesc *buf, bool forInput);
  static void buffer_write_error_callback(void *arg);
  static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
                                    bool bufferLockHeld);
  static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
                         bool *foundPtr);
-static void BufferReplace(BufferDesc *bufHdr);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
  static void write_buffer(Buffer buffer, bool unpin);
  
  
@@ -107,8 +95,8 @@ static void write_buffer(Buffer buffer, bool unpin);
   *             relation at the same time!)
   *
   * Returns: the buffer number for the buffer containing
- *             the block read, or NULL on an error.  If successful,
- *             the returned buffer has been pinned.
+ *             the block read.  The returned buffer has been pinned.
+ *             Does not return on error --- elog's instead.
   *
   * Assume when this function is called, that reln has been
   *             opened already.
@@ -149,16 +137,11 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
                 pgstat_count_buffer_read(&reln->pgstat_info, reln);
                 /* Substitute proper block number if caller asked for P_NEW */
                 if (isExtend)
-               {
                         blockNum = reln->rd_nblocks;
-                       reln->rd_nblocks++;
-               }
+
                 bufHdr = LocalBufferAlloc(reln, blockNum, &found);
                 if (found)
-               {
                         LocalBufferHitCount++;
-                       pgstat_count_buffer_hit(&reln->pgstat_info, reln);
-               }
         }
         else
         {
@@ -169,7 +152,6 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
                 {
                         /* must be sure we have accurate file length! */
                         blockNum = reln->rd_nblocks = smgrnblocks(reln->rd_smgr);
-                       reln->rd_nblocks++;
                 }
  
                 /*
@@ -180,48 +162,44 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
                         LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
                 bufHdr = BufferAlloc(reln, blockNum, &found);
                 if (found)
-               {
                         BufferHitCount++;
-                       pgstat_count_buffer_hit(&reln->pgstat_info, reln);
-               }
         }
  
         /* At this point we do NOT hold the bufmgr lock. */
  
-       if (!bufHdr)
-               return InvalidBuffer;
-
-       /* if it's already in the buffer pool, we're done */
+       /* if it was already in the buffer pool, we're done */
         if (found)
         {
-               /* That is, we're done if we expected to be able to find it ... */
-               if (!isExtend)
-                       return BufferDescriptorGetBuffer(bufHdr);
+               /* Just need to update stats before we exit */
+               pgstat_count_buffer_hit(&reln->pgstat_info, reln);
  
-               /*
-                * If we found a buffer when we were expecting to extend the
-                * relation, the implication is that a buffer was already created
-                * for the next page position, but then smgrextend failed to write
-                * the page. We'd better try the smgrextend again.  But since
-                * BufferAlloc won't have done StartBufferIO, we must do that
-                * first.
-                */
-               if (!isLocalBuf)
-               {
-                       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-                       StartBufferIO(bufHdr, false);
-                       LWLockRelease(BufMgrLock);
-               }
+               if (VacuumCostActive)
+                       VacuumCostBalance += VacuumCostPageHit;
+
+               return BufferDescriptorGetBuffer(bufHdr);
         }
  
         /*
-        * if we have gotten to this point, the relation must be open in the smgr.
+        * if we have gotten to this point, we have allocated a buffer for the
+        * page but its contents are not yet valid.  IO_IN_PROGRESS is set for
+        * it, if it's a shared buffer.
+        *
+        * Note: if smgrextend fails, we will end up with a buffer that is
+        * allocated but not marked BM_VALID.  P_NEW will still select the same
+        * block number (because the relation didn't get any longer on disk)
+        * and so future attempts to extend the relation will find the same
+        * buffer (if it's not been recycled) but come right back here to try
+        * smgrextend again.
          */
+       Assert(!(bufHdr->flags & BM_VALID));
+
         if (isExtend)
         {
                 /* new buffers are zero-filled */
                 MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
                 smgrextend(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data));
+               /* After successful extend, increment relation length */
+               reln->rd_nblocks++;
         }
         else
         {
@@ -253,29 +231,41 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
  
         if (isLocalBuf)
         {
-               /* No shared buffer state to update... */
-               return BufferDescriptorGetBuffer(bufHdr);
+               /* Only need to adjust flags */
+               bufHdr->flags |= BM_VALID;
         }
+       else
+       {
+               /* lock buffer manager again to update IO IN PROGRESS */
+               LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
  
-       /* lock buffer manager again to update IO IN PROGRESS */
-       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+               /* IO Succeeded, so mark data valid */
+               bufHdr->flags |= BM_VALID;
  
-       /* IO Succeeded.  clear the flags, finish buffer update */
-       bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
+               /* If anyone was waiting for IO to complete, wake them up now */
+               TerminateBufferIO(bufHdr, 0);
  
-       /* If anyone was waiting for IO to complete, wake them up now */
-       TerminateBufferIO(bufHdr);
+               LWLockRelease(BufMgrLock);
+       }
  
-       LWLockRelease(BufMgrLock);
+       if (VacuumCostActive)
+               VacuumCostBalance += VacuumCostPageMiss;
  
         return BufferDescriptorGetBuffer(bufHdr);
  }
  
  /*
- * BufferAlloc -- Get a buffer from the buffer pool but don't
- *             read it.  If successful, the returned buffer is pinned.
+ * BufferAlloc -- subroutine for ReadBuffer.  Handles lookup of a shared
+ *             buffer.  If no buffer exists already, selects a replacement
+ *             victim and evicts the old page, but does NOT read in new page.
   *
- * Returns: descriptor for buffer
+ * The returned buffer is pinned and is already marked as holding the
+ * desired page.  If it already did have the desired page, *foundPtr is
+ * set TRUE.  Otherwise, *foundPtr is set FALSE and the buffer is marked
+ * as IO_IN_PROGRESS; ReadBuffer will now need to do I/O to fill it.
+ *
+ * *foundPtr is actually redundant with the buffer's BM_VALID flag, but
+ * we keep it for simplicity in ReadBuffer.
   *
   * BufMgrLock must be held at entry.  When this routine returns,
   * the BufMgrLock is guaranteed NOT to be held.
@@ -285,67 +275,51 @@ BufferAlloc(Relation reln,
                         BlockNumber blockNum,
                         bool *foundPtr)
  {
+       BufferTag       newTag;                 /* identity of requested block */
         BufferDesc *buf,
                            *buf2;
-       BufferTag       newTag;                 /* identity of requested block */
         int                     cdb_found_index,
                                 cdb_replace_index;
-       bool            inProgress;             /* buffer undergoing IO */
+       bool            inProgress;             /* did we already do StartBufferIO? */
  
         /* create a tag so we can lookup the buffer */
-       INIT_BUFFERTAG(&newTag, reln, blockNum);
+       INIT_BUFFERTAG(newTag, reln, blockNum);
  
         /* see if the block is in the buffer pool already */
         buf = StrategyBufferLookup(&newTag, false, &cdb_found_index);
         if (buf != NULL)
         {
                 /*
-                * Found it.  Now, (a) pin the buffer so no one steals it from the
-                * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting
-                * the buffer into the buffer pool.
+                * Found it.  Now, pin the buffer so no one can steal it from the
+                * buffer pool, and check to see if someone else is still reading
+                * data into the buffer.  (Formerly, we'd always block here if
+                * IO_IN_PROGRESS is set, but there's no need to wait when someone
+                * is writing rather than reading.)
                  */
+               *foundPtr = TRUE;
  
                 PinBuffer(buf);
-               inProgress = (buf->flags & BM_IO_IN_PROGRESS);
  
-               *foundPtr = TRUE;
-               if (inProgress)                 /* confirm end of IO */
-               {
-                       WaitIO(buf);
-                       inProgress = (buf->flags & BM_IO_IN_PROGRESS);
-               }
-               if (BUFFER_IS_BROKEN(buf))
+               if (!(buf->flags & BM_VALID))
                 {
-                       /*
-                        * I couldn't understand the following old comment. If there's
-                        * no IO for the buffer and the buffer is BROKEN, it should be
-                        * read again. So start a new buffer IO here.
-                        *
-                        * wierd race condition:
-                        *
-                        * We were waiting for someone else to read the buffer. While we
-                        * were waiting, the reader boof'd in some way, so the
-                        * contents of the buffer are still invalid.  By saying that
-                        * we didn't find it, we can make the caller reinitialize the
-                        * buffer.      If two processes are waiting for this block, both
-                        * will read the block.  The second one to finish may
-                        * overwrite any updates made by the first.  (Assume higher
-                        * level synchronization prevents this from happening).
-                        *
-                        * This is never going to happen, don't worry about it.
-                        */
-                       *foundPtr = FALSE;
-                       StartBufferIO(buf, true);
+                       if (buf->flags & BM_IO_IN_PROGRESS)
+                       {
+                               /* someone else is reading it, wait for them */
+                               WaitIO(buf);
+                       }
+                       if (!(buf->flags & BM_VALID))
+                       {
+                               /*
+                                * If we get here, previous attempts to read the buffer
+                                * must have failed ... but we shall bravely try again.
+                                */
+                               *foundPtr = FALSE;
+                               StartBufferIO(buf, true);
+                       }
                 }
  
                 LWLockRelease(BufMgrLock);
  
-               /*
-                * Do the cost accounting for vacuum
-                */
-               if (VacuumCostActive)
-                       VacuumCostBalance += VacuumCostPageHit;
-
                 return buf;
         }
  
@@ -354,10 +328,10 @@ BufferAlloc(Relation reln,
         /*
          * Didn't find it in the buffer pool.  We'll have to initialize a new
          * buffer.      First, grab one from the free list.  If it's dirty, flush
-        * it to disk. Remember to unlock BufMgrLock while doing the IOs.
+        * it to disk. Remember to unlock BufMgrLock while doing the IO.
          */
         inProgress = FALSE;
-       for (buf = NULL; buf == NULL;)
+       do
         {
                 buf = StrategyGetBuffer(&cdb_replace_index);
  
@@ -374,130 +348,100 @@ BufferAlloc(Relation reln,
                 buf->refcount = 1;
                 PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
  
-               if (buf->flags & BM_DIRTY || buf->cntxDirty)
+               if ((buf->flags & BM_VALID) &&
+                       (buf->flags & BM_DIRTY || buf->cntxDirty))
                 {
                         /*
-                        * skip write error buffers
+                        * Set BM_IO_IN_PROGRESS to show the buffer is being written.
+                        * It cannot already be set because the buffer would be pinned
+                        * if someone were writing it.
+                        *
+                        * Note: it's okay to grab the io_in_progress lock while holding
+                        * BufMgrLock.  All code paths that acquire this lock pin the
+                        * buffer first; since no one had it pinned (it just came off the
+                        * free list), no one else can have the lock.
                          */
-                       if ((buf->flags & BM_IO_ERROR) != 0)
-                       {
-                               UnpinBuffer(buf);
-                               buf = NULL;
-                               continue;
-                       }
+                       StartBufferIO(buf, false);
  
-                       /*
-                        * Set BM_IO_IN_PROGRESS to keep anyone from doing anything
-                        * with the contents of the buffer while we write it out. We
-                        * don't really care if they try to read it, but if they can
-                        * complete a BufferAlloc on it they can then scribble into
-                        * it, and we'd really like to avoid that while we are
-                        * flushing the buffer.  Setting this flag should block them
-                        * in WaitIO until we're done.
-                        */
                         inProgress = TRUE;
  
-                       /*
-                        * All code paths that acquire this lock pin the buffer first;
-                        * since no one had it pinned (it just came off the free
-                        * list), no one else can have this lock.
-                        */
-                       StartBufferIO(buf, false);
-
                         /*
                          * Write the buffer out, being careful to release BufMgrLock
-                        * before starting the I/O.
-                        */
-                       BufferReplace(buf);
-
-                       /*
-                        * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
-                        * be set by anyone.            - vadim 01/17/97
-                        */
-                       if (buf->flags & BM_JUST_DIRTIED)
-                       {
-                               elog(PANIC, "content of block %u of %u/%u changed while flushing",
-                                        buf->tag.blockNum,
-                                        buf->tag.rnode.tblNode, buf->tag.rnode.relNode);
-                       }
-
-                       buf->flags &= ~BM_DIRTY;
-                       buf->cntxDirty = false;
-
-                       /*
-                        * Somebody could have pinned the buffer while we were doing
-                        * the I/O and had given up the BufMgrLock (though they would
-                        * be waiting for us to clear the BM_IO_IN_PROGRESS flag).
-                        * That's why this is a loop -- if so, we need to clear the
-                        * I/O flags, remove our pin and start all over again.
-                        *
-                        * People may be making buffers free at any time, so there's no
-                        * reason to think that we have an immediate disaster on our
-                        * hands.
+                        * while doing the I/O.
                          */
-                       if (buf && buf->refcount > 1)
-                       {
-                               inProgress = FALSE;
-                               buf->flags &= ~BM_IO_IN_PROGRESS;
-                               TerminateBufferIO(buf);
-                               UnpinBuffer(buf);
-                               buf = NULL;
-                       }
+                       FlushBuffer(buf, NULL);
  
                         /*
                          * Somebody could have allocated another buffer for the same
-                        * block we are about to read in. (While we flush out the
+                        * block we are about to read in. While we flush out the
                          * dirty buffer, we don't hold the lock and someone could have
                          * allocated another buffer for the same block. The problem is
-                        * we haven't gotten around to insert the new tag into the
-                        * buffer table. So we need to check here.              -ay 3/95
+                        * we haven't yet inserted the new tag into the buffer table.
+                        * So we need to check here.            -ay 3/95
+                        *
+                        * Another reason we have to do this is to update cdb_found_index,
+                        * since the CDB could have disappeared from B1/B2 list while
+                        * we were writing.
                          */
                         buf2 = StrategyBufferLookup(&newTag, true, &cdb_found_index);
                         if (buf2 != NULL)
                         {
                                 /*
-                                * Found it. Someone has already done what we're about to
+                                * Found it. Someone has already done what we were about to
                                  * do. We'll just handle this as if it were found in the
-                                * buffer pool in the first place.
+                                * buffer pool in the first place.  First, give up the
+                                * buffer we were planning to use.
                                  */
-                               if (buf != NULL)
-                               {
-                                       buf->flags &= ~BM_IO_IN_PROGRESS;
-                                       TerminateBufferIO(buf);
-                                       /* give up old buffer since we don't need it any more */
-                                       UnpinBuffer(buf);
-                               }
+                               TerminateBufferIO(buf, 0);
+                               UnpinBuffer(buf);
  
-                               PinBuffer(buf2);
-                               inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
+                               buf = buf2;
+
+                               /* remaining code should match code at top of routine */
  
                                 *foundPtr = TRUE;
-                               if (inProgress)
-                               {
-                                       WaitIO(buf2);
-                                       inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
-                               }
  
-                               if (BUFFER_IS_BROKEN(buf2))
+                               PinBuffer(buf);
+
+                               if (!(buf->flags & BM_VALID))
                                 {
-                                       *foundPtr = FALSE;
-                                       StartBufferIO(buf2, true);
+                                       if (buf->flags & BM_IO_IN_PROGRESS)
+                                       {
+                                               /* someone else is reading it, wait for them */
+                                               WaitIO(buf);
+                                       }
+                                       if (!(buf->flags & BM_VALID))
+                                       {
+                                               /*
+                                                * If we get here, previous attempts to read the buffer
+                                                * must have failed ... but we shall bravely try again.
+                                                */
+                                               *foundPtr = FALSE;
+                                               StartBufferIO(buf, true);
+                                       }
                                 }
  
                                 LWLockRelease(BufMgrLock);
  
-                               /*
-                                * Do the cost accounting for vacuum.  (XXX perhaps better
-                                * to consider this a miss?  We didn't have to do the read,
-                                * but we did have to write ...)
-                                */
-                               if (VacuumCostActive)
-                                       VacuumCostBalance += VacuumCostPageHit;
+                               return buf;
+                       }
  
-                               return buf2;
+                       /*
+                        * Somebody could have pinned the buffer while we were doing
+                        * the I/O and had given up the BufMgrLock.  If so, we can't
+                        * recycle this buffer --- we need to clear the I/O flags,
+                        * remove our pin and choose a new victim buffer.  Similarly,
+                        * we have to start over if somebody re-dirtied the buffer.
+                        */
+                       if (buf->refcount > 1 || buf->flags & BM_DIRTY || buf->cntxDirty)
+                       {
+                               TerminateBufferIO(buf, 0);
+                               UnpinBuffer(buf);
+                               inProgress = FALSE;
+                               buf = NULL;
                         }
                 }
-       }
+       } while (buf == NULL);
  
         /*
          * At this point we should have the sole pin on a non-dirty buffer and
@@ -506,16 +450,19 @@ BufferAlloc(Relation reln,
  
         /*
          * Tell the buffer replacement strategy that we are replacing the
-        * buffer content. Then rename the buffer.
+        * buffer content. Then rename the buffer.  Clearing BM_VALID here
+        * is necessary, clearing the dirtybits is just paranoia.
          */
         StrategyReplaceBuffer(buf, &newTag, cdb_found_index, cdb_replace_index);
         buf->tag = newTag;
+       buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
+       buf->cntxDirty = false;
  
         /*
          * Buffer contents are currently invalid.  Have to mark IO IN PROGRESS
-        * so no one fiddles with them until the read completes.  If this
-        * routine has been called simply to allocate a buffer, no io will be
-        * attempted, so the flag isnt set.
+        * so no one fiddles with them until the read completes.  We may have
+        * already marked it, in which case we just flip from write to read
+        * status.
          */
         if (!inProgress)
                 StartBufferIO(buf, true);
@@ -524,12 +471,6 @@ BufferAlloc(Relation reln,
  
         LWLockRelease(BufMgrLock);
  
-       /*
-        * Do the cost accounting for vacuum
-        */
-       if (VacuumCostActive)
-               VacuumCostBalance += VacuumCostPageMiss;
-
         return buf;
  }
  
@@ -553,11 +494,13 @@ write_buffer(Buffer buffer, bool release)
  
         bufHdr = &BufferDescriptors[buffer - 1];
  
+       Assert(PrivateRefCount[buffer - 1] > 0);
+
         LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
         Assert(bufHdr->refcount > 0);
  
         /*
-        * If the buffer is not dirty yet, do vacuum cost accounting.
+        * If the buffer was not dirty already, do vacuum cost accounting.
          */
         if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)
                 VacuumCostBalance += VacuumCostPageDirty;
@@ -714,7 +657,6 @@ BufferSync(int percent, int maxpages)
         BufferTag  *buftags;
         int                     num_buffer_dirty;
         int                     i;
-       ErrorContextCallback errcontext;
  
         /*
          * Get a list of all currently dirty buffers and how many there are.
@@ -741,25 +683,14 @@ BufferSync(int percent, int maxpages)
         if (maxpages > 0 && num_buffer_dirty > maxpages)
                 num_buffer_dirty = maxpages;
  
-       /* Setup error traceback support for ereport() */
-       errcontext.callback = buffer_write_error_callback;
-       errcontext.arg = NULL;
-       errcontext.previous = error_context_stack;
-       error_context_stack = &errcontext;
-
         /*
          * Loop over buffers to be written.  Note the BufMgrLock is held at
-        * loop top, but is released and reacquired intraloop, so we aren't
-        * holding it long.
+        * loop top, but is released and reacquired within FlushBuffer,
+        * so we aren't holding it long.
          */
         for (i = 0; i < num_buffer_dirty; i++)
         {
                 BufferDesc *bufHdr = dirty_buffers[i];
-               Buffer          buffer;
-               XLogRecPtr      recptr;
-               SMgrRelation reln;
-
-               errcontext.arg = bufHdr;
  
                 /*
                  * Check it is still the same page and still needs writing.
@@ -773,9 +704,9 @@ BufferSync(int percent, int maxpages)
                  */
                 if (!(bufHdr->flags & BM_VALID))
                         continue;
-               if (!BUFFERTAGS_EQUAL(&bufHdr->tag, &buftags[i]))
+               if (!BUFFERTAGS_EQUAL(bufHdr->tag, buftags[i]))
                         continue;
-               if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))
+               if (!(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
                         continue;
  
                 /*
@@ -788,9 +719,9 @@ BufferSync(int percent, int maxpages)
                         /* Still need writing? */
                         if (!(bufHdr->flags & BM_VALID))
                                 continue;
-                       if (!BUFFERTAGS_EQUAL(&bufHdr->tag, &buftags[i]))
+                       if (!BUFFERTAGS_EQUAL(bufHdr->tag, buftags[i]))
                                 continue;
-                       if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))
+                       if (!(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
                                 continue;
                 }
  
@@ -800,78 +731,16 @@ BufferSync(int percent, int maxpages)
                  * avoid conflicts with FlushRelationBuffers.
                  */
                 PinBuffer(bufHdr);
-               StartBufferIO(bufHdr, false);   /* output IO start */
-
-               /* Release BufMgrLock while doing xlog work */
-               LWLockRelease(BufMgrLock);
-
-               buffer = BufferDescriptorGetBuffer(bufHdr);
-
-               /*
-                * Protect buffer content against concurrent update
-                */
-               LockBuffer(buffer, BUFFER_LOCK_SHARE);
-
-               /*
-                * Force XLOG flush for buffer' LSN
-                */
-               recptr = BufferGetLSN(bufHdr);
-               XLogFlush(recptr);
-
-               /*
-                * Now it's safe to write buffer to disk. Note that no one else
-                * should have been able to write it while we were busy with
-                * locking and log flushing because we set the IO flag.
-                *
-                * Before we issue the actual write command, clear the just-dirtied
-                * flag.  This lets us recognize concurrent changes (note that only
-                * hint-bit changes are possible since we hold the buffer shlock).
-                */
-               LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-               Assert(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty);
-               bufHdr->flags &= ~BM_JUST_DIRTIED;
-               LWLockRelease(BufMgrLock);
-
-               /* Find smgr relation for buffer */
-               reln = smgropen(bufHdr->tag.rnode);
-
-               /* And write... */
-               smgrwrite(reln,
-                                 bufHdr->tag.blockNum,
-                                 (char *) MAKE_PTR(bufHdr->data));
-
-               /*
-                * Note that it's safe to change cntxDirty here because of we
-                * protect it from upper writers by share lock and from other
-                * bufmgr routines by BM_IO_IN_PROGRESS
-                */
-               bufHdr->cntxDirty = false;
+               StartBufferIO(bufHdr, false);
  
-               /*
-                * Release the per-buffer readlock, reacquire BufMgrLock.
-                */
-               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+               FlushBuffer(bufHdr, NULL);
  
-               LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-
-               bufHdr->flags &= ~BM_IO_IN_PROGRESS;    /* mark IO finished */
-               TerminateBufferIO(bufHdr);              /* Sync IO finished */
-               BufferFlushCount++;
-
-               /*
-                * If this buffer was marked by someone as DIRTY while we were
-                * flushing it out we must not clear DIRTY flag - vadim 01/17/97
-                */
-               if (!(bufHdr->flags & BM_JUST_DIRTIED))
-                       bufHdr->flags &= ~BM_DIRTY;
+               TerminateBufferIO(bufHdr, 0);
                 UnpinBuffer(bufHdr);
         }
  
         LWLockRelease(BufMgrLock);
  
-       /* Pop the error context stack */
-       error_context_stack = errcontext.previous;
-
         pfree(dirty_buffers);
         pfree(buftags);
  
@@ -1123,51 +992,96 @@ BufferGetFileNode(Buffer buffer)
  }
  
  /*
- * BufferReplace
+ * FlushBuffer
+ *             Physically write out a shared buffer.
   *
- * Write out the buffer corresponding to 'bufHdr'.
+ * NOTE: this actually just passes the buffer contents to the kernel; the
+ * real write to disk won't happen until the kernel feels like it.  This
+ * is okay from our point of view since we can redo the changes from WAL.
+ * However, we will need to force the changes to disk via sync/fsync
+ * before we can checkpoint WAL.
   *
- * BufMgrLock must be held at entry, and the buffer must be pinned.
+ * BufMgrLock must be held at entry, and the buffer must be pinned.  The
+ * caller is also responsible for doing StartBufferIO/TerminateBufferIO.
+ *
+ * If the caller has an smgr reference for the buffer's relation, pass it
+ * as the second parameter.  If not, pass NULL.  (Do not open relation
+ * while holding BufMgrLock!)
   */
  static void
-BufferReplace(BufferDesc *bufHdr)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln)
  {
-       SMgrRelation reln;
+       Buffer          buffer;
         XLogRecPtr      recptr;
         ErrorContextCallback errcontext;
  
+       /* Transpose cntxDirty into flags while holding BufMgrLock */
+       buf->cntxDirty = false;
+       buf->flags |= BM_DIRTY;
+
         /* To check if block content changed while flushing. - vadim 01/17/97 */
-       bufHdr->flags &= ~BM_JUST_DIRTIED;
+       buf->flags &= ~BM_JUST_DIRTIED;
  
+       /* Release BufMgrLock while doing xlog work */
         LWLockRelease(BufMgrLock);
  
         /* Setup error traceback support for ereport() */
         errcontext.callback = buffer_write_error_callback;
-       errcontext.arg = bufHdr;
+       errcontext.arg = buf;
         errcontext.previous = error_context_stack;
         error_context_stack = &errcontext;
  
+       /* Find smgr relation for buffer while holding minimal locks */
+       if (reln == NULL)
+               reln = smgropen(buf->tag.rnode);
+
+       buffer = BufferDescriptorGetBuffer(buf);
+
         /*
-        * No need to lock buffer context - no one should be able to end
-        * ReadBuffer
+        * Protect buffer content against concurrent update.  (Note that
+        * hint-bit updates can still occur while the write is in progress,
+        * but we assume that that will not invalidate the data written.)
          */
-       recptr = BufferGetLSN(bufHdr);
-       XLogFlush(recptr);
+       LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
-       /* Find smgr relation for buffer */
-       reln = smgropen(bufHdr->tag.rnode);
+       /*
+        * Force XLOG flush for buffer' LSN.  This implements the basic WAL
+        * rule that log updates must hit disk before any of the data-file
+        * changes they describe do.
+        */
+       recptr = BufferGetLSN(buf);
+       XLogFlush(recptr);
  
-       /* And write... */
+       /*
+        * Now it's safe to write buffer to disk. Note that no one else
+        * should have been able to write it while we were busy with
+        * locking and log flushing because caller has set the IO flag.
+        *
+        * It would be better to clear BM_JUST_DIRTIED right here, but we'd
+        * have to reacquire the BufMgrLock and it doesn't seem worth it.
+        */
         smgrwrite(reln,
-                         bufHdr->tag.blockNum,
-                         (char *) MAKE_PTR(bufHdr->data));
+                         buf->tag.blockNum,
+                         (char *) MAKE_PTR(buf->data));
  
         /* Pop the error context stack */
         error_context_stack = errcontext.previous;
  
+       /*
+        * Release the per-buffer readlock, reacquire BufMgrLock.
+        */
+       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
         LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
  
         BufferFlushCount++;
+
+       /*
+        * If this buffer was marked by someone as DIRTY while we were
+        * flushing it out we must not clear DIRTY flag - vadim 01/17/97
+        */
+       if (!(buf->flags & BM_JUST_DIRTIED))
+               buf->flags &= ~BM_DIRTY;
  }
  
  /*
@@ -1490,6 +1404,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
   *             In all cases, the caller should be holding AccessExclusiveLock on
   *             the target relation to ensure that no other backend is busy reading
   *             more blocks of the relation (or might do so before we commit).
+ *             This should also ensure that no one is busy dirtying these blocks.
   *
   *             Formerly, we considered it an error condition if we found dirty
   *             buffers here.   However, since BufferSync no longer forces out all
@@ -1497,7 +1412,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
   *             to still be present in the cache due to failure of an earlier
   *             transaction.  So, must flush dirty buffers without complaint.
   *
- *             Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
+ *             Returns: 0 - Ok, -1 - FAILED TO CLEAR DIRTY BIT, -2 - PINNED
   *
   *             XXX currently it sequentially searches the buffer pool, should be
   *             changed to more clever ways of searching.
@@ -1508,38 +1423,41 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
  {
         int                     i;
         BufferDesc *bufHdr;
-       XLogRecPtr      recptr;
-       ErrorContextCallback errcontext;
-
-       /* Setup error traceback support for ereport() */
-       errcontext.callback = buffer_write_error_callback;
-       errcontext.arg = NULL;
-       errcontext.previous = error_context_stack;
-       error_context_stack = &errcontext;
  
         if (rel->rd_istemp)
         {
                 for (i = 0; i < NLocBuffer; i++)
                 {
                         bufHdr = &LocalBufferDescriptors[i];
-                       errcontext.arg = bufHdr;
                         if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
                         {
-                               if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
+                               if ((bufHdr->flags & BM_VALID) &&
+                                       (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
                                 {
-                                       /* Open it at the smgr level if not already done */
+                                       ErrorContextCallback errcontext;
+
+                                       /* Setup error traceback support for ereport() */
+                                       errcontext.callback = buffer_write_error_callback;
+                                       errcontext.arg = bufHdr;
+                                       errcontext.previous = error_context_stack;
+                                       error_context_stack = &errcontext;
+
+                                       /* Open rel at the smgr level if not already done */
                                         if (rel->rd_smgr == NULL)
                                                 rel->rd_smgr = smgropen(rel->rd_node);
  
                                         smgrwrite(rel->rd_smgr,
                                                           bufHdr->tag.blockNum,
                                                           (char *) MAKE_PTR(bufHdr->data));
+
                                         bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
                                         bufHdr->cntxDirty = false;
+
+                                       /* Pop the error context stack */
+                                       error_context_stack = errcontext.previous;
                                 }
                                 if (LocalRefCount[i] > 0)
                                 {
-                                       error_context_stack = errcontext.previous;
                                         elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is referenced (%ld)",
                                                  RelationGetRelationName(rel), firstDelBlock,
                                                  bufHdr->tag.blockNum, LocalRefCount[i]);
@@ -1550,9 +1468,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
                         }
                 }
  
-               /* Pop the error context stack */
-               error_context_stack = errcontext.previous;
-
                 return 0;
         }
  
@@ -1561,67 +1476,37 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
         for (i = 0; i < NBuffers; i++)
         {
                 bufHdr = &BufferDescriptors[i];
-               errcontext.arg = bufHdr;
                 if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
                 {
-                       if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
+                       if ((bufHdr->flags & BM_VALID) &&
+                               (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
                         {
                                 PinBuffer(bufHdr);
+                               /* Someone else might be flushing buffer */
                                 if (bufHdr->flags & BM_IO_IN_PROGRESS)
                                         WaitIO(bufHdr);
-                               LWLockRelease(BufMgrLock);
-
-                               /*
-                                * Force XLOG flush for buffer' LSN
-                                */
-                               recptr = BufferGetLSN(bufHdr);
-                               XLogFlush(recptr);
-
-                               /*
-                                * Now it's safe to write buffer to disk
-                                */
-
-                               LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-                               if (bufHdr->flags & BM_IO_IN_PROGRESS)
-                                       WaitIO(bufHdr);
-
+                               /* Still dirty? */
                                 if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
                                 {
-                                       bufHdr->flags &= ~BM_JUST_DIRTIED;
-                                       StartBufferIO(bufHdr, false);           /* output IO start */
-
-                                       LWLockRelease(BufMgrLock);
-
-                                       /* Open it at the smgr level if not already done */
-                                       if (rel->rd_smgr == NULL)
-                                               rel->rd_smgr = smgropen(rel->rd_node);
-
-                                       smgrwrite(rel->rd_smgr,
-                                                         bufHdr->tag.blockNum,
-                                                         (char *) MAKE_PTR(bufHdr->data));
-
-                                       BufferFlushCount++;
+                                       StartBufferIO(bufHdr, false);
  
-                                       LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-                                       bufHdr->flags &= ~BM_IO_IN_PROGRESS;
-                                       TerminateBufferIO(bufHdr);
-                                       Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
-                                       bufHdr->flags &= ~BM_DIRTY;
+                                       FlushBuffer(bufHdr, rel->rd_smgr);
  
-                                       /*
-                                        * Note that it's safe to change cntxDirty here
-                                        * because of we protect it from upper writers by
-                                        * AccessExclusiveLock and from other bufmgr routines
-                                        * by BM_IO_IN_PROGRESS
-                                        */
-                                       bufHdr->cntxDirty = false;
+                                       TerminateBufferIO(bufHdr, 0);
                                 }
                                 UnpinBuffer(bufHdr);
+                               if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
+                               {
+                                       LWLockRelease(BufMgrLock);
+                                       elog(WARNING, "FlushRelationBuffers(\"%s\", %u): block %u was re-dirtied",
+                                                RelationGetRelationName(rel), firstDelBlock,
+                                                bufHdr->tag.blockNum);
+                                       return -1;
+                               }
                         }
                         if (bufHdr->refcount != 0)
                         {
                                 LWLockRelease(BufMgrLock);
-                               error_context_stack = errcontext.previous;
                                 elog(WARNING, "FlushRelationBuffers(\"%s\", %u): block %u is referenced (private %ld, global %d)",
                                          RelationGetRelationName(rel), firstDelBlock,
                                          bufHdr->tag.blockNum,
@@ -1635,9 +1520,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
  
         LWLockRelease(BufMgrLock);
  
-       /* Pop the error context stack */
-       error_context_stack = errcontext.previous;
-
         return 0;
  }
  
@@ -1645,7 +1527,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
   * ReleaseBuffer -- remove the pin on a buffer without
   *             marking it dirty.
   */
-int
+void
  ReleaseBuffer(Buffer buffer)
  {
         BufferDesc *bufHdr;
@@ -1654,15 +1536,16 @@ ReleaseBuffer(Buffer buffer)
         {
                 Assert(LocalRefCount[-buffer - 1] > 0);
                 LocalRefCount[-buffer - 1]--;
-               return STATUS_OK;
+               return;
         }
  
         if (BAD_BUFFER_ID(buffer))
-               return STATUS_ERROR;
+               elog(ERROR, "bad buffer id: %d", buffer);
  
         bufHdr = &BufferDescriptors[buffer - 1];
  
         Assert(PrivateRefCount[buffer - 1] > 0);
+
         if (PrivateRefCount[buffer - 1] > 1)
                 PrivateRefCount[buffer - 1]--;
         else
@@ -1671,8 +1554,6 @@ ReleaseBuffer(Buffer buffer)
                 UnpinBuffer(bufHdr);
                 LWLockRelease(BufMgrLock);
         }
-
-       return STATUS_OK;
  }
  
  #ifdef NOT_USED
@@ -2021,16 +1902,24 @@ StartBufferIO(BufferDesc *buf, bool forInput)
   *     (Assumptions)
   *     My process is executing IO for the buffer
   *     BufMgrLock is held
+ *     BM_IO_IN_PROGRESS mask is set for the buffer
   *     The buffer is Pinned
   *
+ * err_flag must be 0 for successful completion and BM_IO_ERROR for failure.
+ *
   * Because BufMgrLock is held, we are already in an interrupt holdoff here,
   * and do not need another.
   */
  static void
-TerminateBufferIO(BufferDesc *buf)
+TerminateBufferIO(BufferDesc *buf, int err_flag)
  {
         Assert(buf == InProgressBuf);
+       Assert(buf->flags & BM_IO_IN_PROGRESS);
+       buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
+       buf->flags |= err_flag;
+
         LWLockRelease(buf->io_in_progress_lock);
+
         InProgressBuf = NULL;
  }
  
@@ -2062,7 +1951,8 @@ InitBufferIO(void)
  
  /*
   *     Clean up any active buffer I/O after an error.
- *     BufMgrLock isn't held when this function is called.
+ *     BufMgrLock isn't held when this function is called,
+ *     but we haven't yet released buffer pins, so the buffer is still pinned.
   *
   *     If I/O was in progress, we always set BM_IO_ERROR.
   */
@@ -2086,9 +1976,9 @@ AbortBufferIO(void)
                 Assert(buf->flags & BM_IO_IN_PROGRESS);
                 if (IsForInput)
                 {
-                       Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
-                       /* Don't think that buffer is valid */
-                       StrategyInvalidateBuffer(buf);
+                       Assert(!(buf->flags & BM_DIRTY || buf->cntxDirty));
+                       /* We'd better not think buffer is valid yet */
+                       Assert(!(buf->flags & BM_VALID));
                 }
                 else
                 {
@@ -2106,9 +1996,7 @@ AbortBufferIO(void)
                         }
                         buf->flags |= BM_DIRTY;
                 }
-               buf->flags |= BM_IO_ERROR;
-               buf->flags &= ~BM_IO_IN_PROGRESS;
-               TerminateBufferIO(buf);
+               TerminateBufferIO(buf, BM_IO_ERROR);
                 LWLockRelease(BufMgrLock);
         }
  }
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c

index c14d446497421d29c0fb1c382a5935cf497f43f7..b59bfb9543d72bb2fc3c44fe8a51a83f96e1dd60 100644 (file)
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -12,7 +12,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.42 2004/04/19 23:27:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.43 2004/04/21 18:06:30 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -501,7 +501,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
  
                 /* Assert that the buffer remembered in cdb_found is the one */
                 /* the buffer manager is currently faulting in */
-               Assert(BUFFERTAGS_EQUAL(&(cdb_found->buf_tag), newTag));
+               Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag));
                 
                 if (cdb_replace_index >= 0)
                 {
@@ -513,7 +513,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
                         Assert(cdb_replace->list == STRAT_LIST_T1 || 
                                    cdb_replace->list == STRAT_LIST_T2);
                         Assert(cdb_replace->buf_id == buf->buf_id);
-                       Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
+                       Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));
  
                         /*
                          * Under normal circumstances we move the evicted T list entry to
@@ -606,7 +606,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
                         Assert(cdb_replace->list == STRAT_LIST_T1 || 
                                    cdb_replace->list == STRAT_LIST_T2);
                         Assert(cdb_replace->buf_id == buf->buf_id);
-                       Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
+                       Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));
  
                         if (cdb_replace->list == STRAT_LIST_T1)
                         {
@@ -673,7 +673,7 @@ StrategyInvalidateBuffer(BufferDesc *buf)
         BufferStrategyCDB  *cdb;
  
         /* The buffer cannot be dirty or pinned */
-       Assert(!(buf->flags & BM_DIRTY));
+       Assert(!(buf->flags & BM_DIRTY) || !(buf->flags & BM_VALID));
         Assert(buf->refcount == 0);
  
         /*
@@ -695,16 +695,18 @@ StrategyInvalidateBuffer(BufferDesc *buf)
          * Clear out the CDB's buffer tag and association with the buffer
          * and add it to the list of unused CDB's
          */
-       CLEAR_BUFFERTAG(&(cdb->buf_tag));
+       CLEAR_BUFFERTAG(cdb->buf_tag);
         cdb->buf_id = -1;
         cdb->next = StrategyControl->listUnusedCDB;
         StrategyControl->listUnusedCDB = cdb_id;
  
         /*
          * Clear out the buffer's tag and add it to the list of
-        * currently unused buffers.
+        * currently unused buffers.  We must do this to ensure that linear
+        * scans of the buffer array don't think the buffer is valid.
          */
-       CLEAR_BUFFERTAG(&(buf->tag));
+       CLEAR_BUFFERTAG(buf->tag);
+       buf->flags &= ~(BM_VALID | BM_DIRTY);
         buf->bufNext = StrategyControl->listFreeBuffers;
         StrategyControl->listFreeBuffers = buf->buf_id;
  }
@@ -864,7 +866,7 @@ StrategyInitialize(bool init)
                 {
                         StrategyCDB[i].next = i + 1;
                         StrategyCDB[i].list = STRAT_LIST_UNUSED;
-                       CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag));
+                       CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag);
                         StrategyCDB[i].buf_id = -1;
                 }
                 StrategyCDB[NBuffers * 2 - 1].next = -1;
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c

index bcbedc9c6517a762cce4c437d80b32f9dd5e4d8b..17f86ce44e4f910e714abe62006ce7758a5895eb 100644 (file)
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.52 2004/02/10 01:55:25 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.53 2004/04/21 18:06:30 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,19 +36,25 @@ static int  nextFreeLocalBuf = 0;
  /*
   * LocalBufferAlloc -
   *       allocate a local buffer. We do round robin allocation for now.
+ *
+ * API is similar to bufmgr.c's BufferAlloc, except that we do not need
+ * to have the BufMgrLock since this is all local.  Also, IO_IN_PROGRESS
+ * does not get set.
   */
  BufferDesc *
  LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
  {
+       BufferTag       newTag;                 /* identity of requested block */
         int                     i;
-       BufferDesc *bufHdr = NULL;
+       BufferDesc *bufHdr;
+
+       INIT_BUFFERTAG(newTag, reln, blockNum);
  
         /* a low tech search for now -- not optimized for scans */
         for (i = 0; i < NLocBuffer; i++)
         {
-               if (LocalBufferDescriptors[i].tag.rnode.relNode ==
-                       reln->rd_node.relNode &&
-                       LocalBufferDescriptors[i].tag.blockNum == blockNum)
+               bufHdr = &LocalBufferDescriptors[i];
+               if (BUFFERTAGS_EQUAL(bufHdr->tag, newTag))
                 {
  #ifdef LBDEBUG
                         fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
@@ -56,8 +62,14 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
  #endif
  
                         LocalRefCount[i]++;
-                       *foundPtr = TRUE;
-                       return &LocalBufferDescriptors[i];
+                       if (bufHdr->flags & BM_VALID)
+                               *foundPtr = TRUE;
+                       else
+                       {
+                               /* Previous read attempt must have failed; try again */
+                               *foundPtr = FALSE;
+                       }
+                       return bufHdr;
                 }
         }
  
@@ -67,6 +79,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
  #endif
  
         /* need to get a new buffer (round robin for now) */
+       bufHdr = NULL;
         for (i = 0; i < NLocBuffer; i++)
         {
                 int                     b = (nextFreeLocalBuf + i) % NLocBuffer;
@@ -108,7 +121,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
          *
          * Note this path cannot be taken for a buffer that was previously in
          * use, so it's okay to do it (and possibly error out) before marking
-        * the buffer as valid.
+        * the buffer as not dirty.
          */
         if (bufHdr->data == (SHMEM_OFFSET) 0)
         {
@@ -135,9 +148,8 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
         /*
          * it's all ours now.
          */
-       bufHdr->tag.rnode = reln->rd_node;
-       bufHdr->tag.blockNum = blockNum;
-       bufHdr->flags &= ~BM_DIRTY;
+       bufHdr->tag = newTag;
+       bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
         bufHdr->cntxDirty = false;
  
         *foundPtr = FALSE;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h

index fc83396ff0d0f25a47a3873d27e12e78e1eca8af..6db4afd58bd1d36b4439116df1fc5cad9cccba4b 100644 (file)
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.69 2004/04/19 23:27:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.70 2004/04/21 18:06:29 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -24,13 +24,12 @@
  /*
   * Flags for buffer descriptors
   */
-#define BM_DIRTY                               (1 << 0)
-#define BM_VALID                               (1 << 1)
-#define BM_DELETED                             (1 << 2)
-#define BM_IO_IN_PROGRESS              (1 << 3)
-#define BM_IO_ERROR                            (1 << 4)
-#define BM_JUST_DIRTIED                        (1 << 5)
-#define BM_PIN_COUNT_WAITER            (1 << 6)
+#define BM_DIRTY                               (1 << 0) /* data needs writing */
+#define BM_VALID                               (1 << 1) /* data is valid */
+#define BM_IO_IN_PROGRESS              (1 << 2) /* read or write in progress */
+#define BM_IO_ERROR                            (1 << 3) /* previous I/O failed */
+#define BM_JUST_DIRTIED                        (1 << 4) /* dirtied since write started */
+#define BM_PIN_COUNT_WAITER            (1 << 5) /* have waiter for sole pin */
  
  typedef bits16 BufFlags;
  
@@ -54,22 +53,21 @@ typedef struct buftag
  
  #define CLEAR_BUFFERTAG(a) \
  ( \
-       (a)->rnode.tblNode = InvalidOid, \
-       (a)->rnode.relNode = InvalidOid, \
-       (a)->blockNum = InvalidBlockNumber \
+       (a).rnode.tblNode = InvalidOid, \
+       (a).rnode.relNode = InvalidOid, \
+       (a).blockNum = InvalidBlockNumber \
  )
  
  #define INIT_BUFFERTAG(a,xx_reln,xx_blockNum) \
  ( \
-       (a)->blockNum = (xx_blockNum), \
-       (a)->rnode = (xx_reln)->rd_node \
+       (a).rnode = (xx_reln)->rd_node, \
+       (a).blockNum = (xx_blockNum) \
  )
  
  #define BUFFERTAGS_EQUAL(a,b) \
  ( \
-       (a)->rnode.tblNode == (b)->rnode.tblNode && \
-       (a)->rnode.relNode == (b)->rnode.relNode && \
-       (a)->blockNum == (b)->blockNum \
+       RelFileNodeEquals((a).rnode, (b).rnode) && \
+       (a).blockNum == (b).blockNum \
  )
  
  /*
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h

index e671e4dac23838827d5b8668dd3b9058a3944a25..bfc7617583449e9984e58010fb7f101959f8a469 100644 (file)
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.75 2004/02/04 01:24:53 wieck Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.76 2004/04/21 18:06:30 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -142,7 +142,7 @@ extern long *LocalRefCount;
   * prototypes for functions in bufmgr.c
   */
  extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
-extern int     ReleaseBuffer(Buffer buffer);
+extern void ReleaseBuffer(Buffer buffer);
  extern void WriteBuffer(Buffer buffer);
  extern void WriteNoReleaseBuffer(Buffer buffer);
  extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 21 Apr 2004 18:06:30 +0000 (18:06 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 21 Apr 2004 18:06:30 +0000 (18:06 +0000)
src/backend/storage/buffer/buf_init.c		patch \| blob \| history
src/backend/storage/buffer/bufmgr.c		patch \| blob \| history
src/backend/storage/buffer/freelist.c		patch \| blob \| history
src/backend/storage/buffer/localbuf.c		patch \| blob \| history
src/include/storage/buf_internals.h		patch \| blob \| history
src/include/storage/bufmgr.h		patch \| blob \| history