1 /*-------------------------------------------------------------------------
4 * buffer manager interface routines
6 * Copyright (c) 1994, Regents of the University of California
10 * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.31 1998/01/07 21:04:49 momjian Exp $
12 *-------------------------------------------------------------------------
16 * BufferAlloc() -- lookup a buffer in the buffer table. If
17 * it isn't there add it, but do not read it into memory.
18 * This is used when we are about to reinitialize the
19 * buffer so don't care what the current disk contents are.
20 * BufferAlloc() pins the new buffer in memory.
22 * ReadBuffer() -- same as BufferAlloc() but reads the data
23 * on a buffer cache miss.
25 * ReleaseBuffer() -- unpin the buffer
27 * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty"
28 * but don't unpin. The disk IO is delayed until buffer
29 * replacement if WriteMode is BUFFER_LATE_WRITE.
31 * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
33 * FlushBuffer() -- as above but never delayed write.
35 * BufferSync() -- flush all dirty buffers in the buffer pool.
37 * InitBufferPool() -- Init the buffer module.
40 * freelist.c -- chooses victim for buffer replacement
41 * buf_table.c -- manages the buffer lookup table
43 #include <sys/types.h>
52 /* declarations split between these three files */
53 #include "storage/buf.h"
54 #include "storage/buf_internals.h"
55 #include "storage/bufmgr.h"
57 #include "storage/fd.h"
58 #include "storage/ipc.h"
59 #include "storage/s_lock.h"
60 #include "storage/shmem.h"
61 #include "storage/spin.h"
62 #include "storage/smgr.h"
63 #include "storage/lmgr.h"
64 #include "miscadmin.h"
65 #include "utils/builtins.h"
66 #include "utils/hsearch.h"
67 #include "utils/palloc.h"
68 #include "utils/memutils.h"
69 #include "utils/relcache.h"
70 #include "executor/execdebug.h" /* for NDirectFileRead */
71 #include "catalog/catalog.h"
73 extern SPINLOCK BufMgrLock;
74 extern long int ReadBufferCount;
75 extern long int ReadLocalBufferCount;
76 extern long int BufferHitCount;
77 extern long int LocalBufferHitCount;
78 extern long int BufferFlushCount;
79 extern long int LocalBufferFlushCount;
81 static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is
84 static void WaitIO(BufferDesc *buf, SPINLOCK spinlock);
86 #ifndef HAS_TEST_AND_SET
87 static void SignalIO(BufferDesc *buf);
88 extern long *NWaitIOBackendP; /* defined in buf_init.c */
90 #endif /* HAS_TEST_AND_SET */
93 ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum,
96 BufferAlloc(Relation reln, BlockNumber blockNum,
97 bool *foundPtr, bool bufferLockHeld);
98 static int FlushBuffer(Buffer buffer, bool release);
99 static void BufferSync(void);
100 static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld);
102 /* not static but used by vacuum only ... */
103 int BlowawayRelationBuffers(Relation rdesc, BlockNumber block);
105 /* ---------------------------------------------------
106 * RelationGetBufferWithBuffer
107 * see if the given buffer is what we want
108 * if yes, we don't need to bother the buffer manager
109 * ---------------------------------------------------
112 RelationGetBufferWithBuffer(Relation relation,
113 BlockNumber blockNumber,
119 if (BufferIsValid(buffer))
121 if (!BufferIsLocal(buffer))
123 bufHdr = &BufferDescriptors[buffer - 1];
124 lrelId = RelationGetLRelId(relation);
125 SpinAcquire(BufMgrLock);
126 if (bufHdr->tag.blockNum == blockNumber &&
127 bufHdr->tag.relId.relId == lrelId.relId &&
128 bufHdr->tag.relId.dbId == lrelId.dbId)
130 SpinRelease(BufMgrLock);
133 return (ReadBufferWithBufferLock(relation, blockNumber, true));
137 bufHdr = &LocalBufferDescriptors[-buffer - 1];
138 if (bufHdr->tag.relId.relId == relation->rd_id &&
139 bufHdr->tag.blockNum == blockNumber)
145 return (ReadBuffer(relation, blockNumber));
149 * ReadBuffer -- returns a buffer containing the requested
150 * block of the requested relation. If the blknum
151 * requested is P_NEW, extend the relation file and
152 * allocate a new block.
154 * Returns: the buffer number for the buffer containing
155 * the block read or NULL on an error.
157 * Assume when this function is called, that reln has been
161 extern int ShowPinTrace;
164 #undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG
172 ReadBuffer(Relation reln, BlockNumber blockNum)
174 return ReadBufferWithBufferLock(reln, blockNum, false);
180 * XXX caller must have already acquired BufMgrLock
184 is_userbuffer(Buffer buffer)
186 BufferDesc *buf = &BufferDescriptors[buffer - 1];
188 if (IsSystemRelationName(buf->sb_relname))
197 ReadBuffer_Debug(char *file,
200 BlockNumber blockNum)
204 buffer = ReadBufferWithBufferLock(reln, blockNum, false);
205 if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
207 BufferDesc *buf = &BufferDescriptors[buffer - 1];
209 fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \
210 refcount = %ld, file: %s, line: %d\n",
211 buffer, buf->sb_relname, buf->tag.blockNum,
212 PrivateRefCount[buffer - 1], file, line);
220 * ReadBufferWithBufferLock -- does the work of
221 * ReadBuffer() but with the possibility that
222 * the buffer lock has already been held. this
223 * is yet another effort to reduce the number of
224 * semops in the system.
227 ReadBufferWithBufferLock(Relation reln,
228 BlockNumber blockNum,
232 int extend; /* extending the file by one block */
237 extend = (blockNum == P_NEW);
238 isLocalBuf = reln->rd_islocal;
242 ReadLocalBufferCount++;
243 bufHdr = LocalBufferAlloc(reln, blockNum, &found);
245 LocalBufferHitCount++;
252 * lookup the buffer. IO_IN_PROGRESS is set if the requested
253 * block is not currently in memory.
255 bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld);
262 return (InvalidBuffer);
265 /* if its already in the buffer pool, we're done */
270 * This happens when a bogus buffer was returned previously and is
271 * floating around in the buffer pool. A routine calling this
272 * would want this extended.
276 /* new buffers are zero-filled */
277 MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
278 smgrextend(DEFAULT_SMGR, reln,
279 (char *) MAKE_PTR(bufHdr->data));
281 return (BufferDescriptorGetBuffer(bufHdr));
286 * if we have gotten to this point, the reln pointer must be ok and
287 * the relation file must be open.
291 /* new buffers are zero-filled */
292 MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
293 status = smgrextend(DEFAULT_SMGR, reln,
294 (char *) MAKE_PTR(bufHdr->data));
298 status = smgrread(DEFAULT_SMGR, reln, blockNum,
299 (char *) MAKE_PTR(bufHdr->data));
303 return (BufferDescriptorGetBuffer(bufHdr));
305 /* lock buffer manager again to update IO IN PROGRESS */
306 SpinAcquire(BufMgrLock);
308 if (status == SM_FAIL)
310 /* IO Failed. cleanup the data structures and go home */
312 if (!BufTableDelete(bufHdr))
314 SpinRelease(BufMgrLock);
315 elog(FATAL, "BufRead: buffer table broken after IO error\n");
317 /* remember that BufferAlloc() pinned the buffer */
321 * Have to reset the flag so that anyone waiting for the buffer
322 * can tell that the contents are invalid.
324 bufHdr->flags |= BM_IO_ERROR;
325 bufHdr->flags &= ~BM_IO_IN_PROGRESS;
329 /* IO Succeeded. clear the flags, finish buffer update */
331 bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
334 /* If anyone was waiting for IO to complete, wake them up now */
335 #ifdef HAS_TEST_AND_SET
336 S_UNLOCK(&(bufHdr->io_in_progress_lock));
338 if (bufHdr->refcount > 1)
342 SpinRelease(BufMgrLock);
344 if (status == SM_FAIL)
345 return (InvalidBuffer);
347 return (BufferDescriptorGetBuffer(bufHdr));
351 * BufferAlloc -- Get a buffer from the buffer pool but dont
354 * Returns: descriptor for buffer
356 * When this routine returns, the BufMgrLock is guaranteed NOT be held.
359 BufferAlloc(Relation reln,
360 BlockNumber blockNum,
366 BufferTag newTag; /* identity of requested block */
367 bool inProgress; /* buffer undergoing IO */
368 bool newblock = FALSE;
370 /* create a new tag so we can lookup the buffer */
371 /* assume that the relation is already open */
372 if (blockNum == P_NEW)
375 blockNum = smgrnblocks(DEFAULT_SMGR, reln);
378 INIT_BUFFERTAG(&newTag, reln, blockNum);
381 SpinAcquire(BufMgrLock);
383 /* see if the block is in the buffer pool already */
384 buf = BufTableLookup(&newTag);
389 * Found it. Now, (a) pin the buffer so no one steals it from the
390 * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting
391 * the buffer into the buffer pool.
395 inProgress = (buf->flags & BM_IO_IN_PROGRESS);
400 WaitIO(buf, BufMgrLock);
401 if (buf->flags & BM_IO_ERROR)
405 * wierd race condition:
407 * We were waiting for someone else to read the buffer. While
408 * we were waiting, the reader boof'd in some way, so the
409 * contents of the buffer are still invalid. By saying
410 * that we didn't find it, we can make the caller
411 * reinitialize the buffer. If two processes are waiting
412 * for this block, both will read the block. The second
413 * one to finish may overwrite any updates made by the
414 * first. (Assume higher level synchronization prevents
415 * this from happening).
417 * This is never going to happen, don't worry about it.
423 _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);
426 SpinRelease(BufMgrLock);
434 * Didn't find it in the buffer pool. We'll have to initialize a new
435 * buffer. First, grab one from the free list. If it's dirty, flush
436 * it to disk. Remember to unlock BufMgr spinlock while doing the IOs.
439 for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)
442 /* GetFreeBuffer will abort if it can't find a free buffer */
443 buf = GetFreeBuffer();
446 * But it can return buf == NULL if we are in aborting transaction
447 * now and so elog(ERROR,...) in GetFreeBuffer will not abort
454 * There should be exactly one pin on the buffer after it is
455 * allocated -- ours. If it had a pin it wouldn't have been on
456 * the free list. No one else could have pinned it between
457 * GetFreeBuffer and here because we have the BufMgrLock.
459 Assert(buf->refcount == 0);
461 PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
463 if (buf->flags & BM_DIRTY)
468 * Set BM_IO_IN_PROGRESS to keep anyone from doing anything
469 * with the contents of the buffer while we write it out. We
470 * don't really care if they try to read it, but if they can
471 * complete a BufferAlloc on it they can then scribble into
472 * it, and we'd really like to avoid that while we are
473 * flushing the buffer. Setting this flag should block them
474 * in WaitIO until we're done.
477 buf->flags |= BM_IO_IN_PROGRESS;
478 #ifdef HAS_TEST_AND_SET
481 * All code paths that acquire this lock pin the buffer first;
482 * since no one had it pinned (it just came off the free
483 * list), no one else can have this lock.
485 Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
486 S_LOCK(&(buf->io_in_progress_lock));
487 #endif /* HAS_TEST_AND_SET */
490 * Write the buffer out, being careful to release BufMgrLock
491 * before starting the I/O.
493 * This #ifndef is here because a few extra semops REALLY kill
494 * you on machines that don't have spinlocks. If you don't
495 * operate with much concurrency, well...
497 smok = BufferReplace(buf, true);
498 #ifndef OPTIMIZE_SINGLE
499 SpinAcquire(BufMgrLock);
500 #endif /* OPTIMIZE_SINGLE */
504 elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s",
505 buf->tag.blockNum, buf->sb_dbname, buf->sb_relname);
507 buf->flags |= BM_IO_ERROR;
508 buf->flags &= ~BM_IO_IN_PROGRESS;
509 #ifdef HAS_TEST_AND_SET
510 S_UNLOCK(&(buf->io_in_progress_lock));
511 #else /* !HAS_TEST_AND_SET */
512 if (buf->refcount > 1)
514 #endif /* !HAS_TEST_AND_SET */
515 PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
517 if (buf->refcount == 0)
519 AddBufferToFreelist(buf);
520 buf->flags |= BM_FREE;
522 buf = (BufferDesc *) NULL;
528 * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
529 * be setted by anyone. - vadim 01/17/97
531 if (buf->flags & BM_JUST_DIRTIED)
533 elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing",
534 buf->tag.blockNum, buf->sb_relname);
538 buf->flags &= ~BM_DIRTY;
543 * Somebody could have pinned the buffer while we were doing
544 * the I/O and had given up the BufMgrLock (though they would
545 * be waiting for us to clear the BM_IO_IN_PROGRESS flag).
546 * That's why this is a loop -- if so, we need to clear the
547 * I/O flags, remove our pin and start all over again.
549 * People may be making buffers free at any time, so there's no
550 * reason to think that we have an immediate disaster on our
553 if (buf && buf->refcount > 1)
556 buf->flags &= ~BM_IO_IN_PROGRESS;
557 #ifdef HAS_TEST_AND_SET
558 S_UNLOCK(&(buf->io_in_progress_lock));
559 #else /* !HAS_TEST_AND_SET */
560 if (buf->refcount > 1)
562 #endif /* !HAS_TEST_AND_SET */
563 PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
565 buf = (BufferDesc *) NULL;
569 * Somebody could have allocated another buffer for the same
570 * block we are about to read in. (While we flush out the
571 * dirty buffer, we don't hold the lock and someone could have
572 * allocated another buffer for the same block. The problem is
573 * we haven't gotten around to insert the new tag into the
574 * buffer table. So we need to check here. -ay 3/95
576 buf2 = BufTableLookup(&newTag);
581 * Found it. Someone has already done what we're about to
582 * do. We'll just handle this as if it were found in the
583 * buffer pool in the first place.
587 #ifdef HAS_TEST_AND_SET
588 S_UNLOCK(&(buf->io_in_progress_lock));
589 #else /* !HAS_TEST_AND_SET */
590 if (buf->refcount > 1)
592 #endif /* !HAS_TEST_AND_SET */
594 /* give up the buffer since we don't need it any more */
596 PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0;
597 AddBufferToFreelist(buf);
598 buf->flags |= BM_FREE;
599 buf->flags &= ~BM_IO_IN_PROGRESS;
603 inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
608 WaitIO(buf2, BufMgrLock);
609 if (buf2->flags & BM_IO_ERROR)
615 SpinRelease(BufMgrLock);
623 * At this point we should have the sole pin on a non-dirty buffer and
624 * we may or may not already have the BM_IO_IN_PROGRESS flag set.
628 * Change the name of the buffer in the lookup table:
630 * Need to update the lookup table before the read starts. If someone
631 * comes along looking for the buffer while we are reading it in, we
632 * don't want them to allocate a new buffer. For the same reason, we
633 * didn't want to erase the buf table entry for the buffer we were
634 * writing back until now, either.
637 if (!BufTableDelete(buf))
639 SpinRelease(BufMgrLock);
640 elog(FATAL, "buffer wasn't in the buffer table\n");
644 /* record the database name and relation name for this buffer */
645 strcpy(buf->sb_relname, reln->rd_rel->relname.data);
646 strcpy(buf->sb_dbname, GetDatabaseName());
648 INIT_BUFFERTAG(&(buf->tag), reln, blockNum);
649 if (!BufTableInsert(buf))
651 SpinRelease(BufMgrLock);
652 elog(FATAL, "Buffer in lookup table twice \n");
656 * Buffer contents are currently invalid. Have to mark IO IN PROGRESS
657 * so no one fiddles with them until the read completes. If this
658 * routine has been called simply to allocate a buffer, no io will be
659 * attempted, so the flag isnt set.
663 buf->flags |= BM_IO_IN_PROGRESS;
664 #ifdef HAS_TEST_AND_SET
665 Assert(S_LOCK_FREE(&(buf->io_in_progress_lock)));
666 S_LOCK(&(buf->io_in_progress_lock));
667 #endif /* HAS_TEST_AND_SET */
671 _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);
674 SpinRelease(BufMgrLock);
682 * Pushes buffer contents to disk if WriteMode is BUFFER_FLUSH_WRITE.
683 * Otherwise, marks contents as dirty.
685 * Assume that buffer is pinned. Assume that reln is
689 * Pin count is decremented.
695 WriteBuffer(Buffer buffer)
699 if (WriteMode == BUFFER_FLUSH_WRITE)
701 return (FlushBuffer(buffer, TRUE));
706 if (BufferIsLocal(buffer))
707 return WriteLocalBuffer(buffer, TRUE);
709 if (BAD_BUFFER_ID(buffer))
712 bufHdr = &BufferDescriptors[buffer - 1];
714 SpinAcquire(BufMgrLock);
715 Assert(bufHdr->refcount > 0);
716 bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
718 SpinRelease(BufMgrLock);
719 CommitInfoNeedsSave[buffer - 1] = 0;
726 WriteBuffer_Debug(char *file, int line, Buffer buffer)
729 if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
733 buf = &BufferDescriptors[buffer - 1];
734 fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \
735 refcount = %ld, file: %s, line: %d\n",
736 buffer, buf->sb_relname, buf->tag.blockNum,
737 PrivateRefCount[buffer - 1], file, line);
744 * DirtyBufferCopy() -- For a given dbid/relid/blockno, if the buffer is
745 * in the cache and is dirty, mark it clean and copy
746 * it to the requested location. This is a logical
747 * write, and has been installed to support the cache
748 * management code for write-once storage managers.
750 * DirtyBufferCopy() -- Copy a given dirty buffer to the requested
753 * We treat this as a write. If the requested buffer is in the pool
754 * and is dirty, we copy it to the location requested and mark it
755 * clean. This routine supports the Sony jukebox storage manager,
756 * which agrees to take responsibility for the data once we mark
759 * NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95
763 DirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest)
768 btag.relId.relId = relid;
769 btag.relId.dbId = dbid;
770 btag.blockNum = blkno;
772 SpinAcquire(BufMgrLock);
773 buf = BufTableLookup(&btag);
775 if (buf == (BufferDesc *) NULL
776 || !(buf->flags & BM_DIRTY)
777 || !(buf->flags & BM_VALID))
779 SpinRelease(BufMgrLock);
784 * hate to do this holding the lock, but release and reacquire is
787 memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ);
789 buf->flags &= ~BM_DIRTY;
791 SpinRelease(BufMgrLock);
797 * FlushBuffer -- like WriteBuffer, but force the page to disk.
799 * 'buffer' is known to be dirty/pinned, so there should not be a
800 * problem reading the BufferDesc members without the BufMgrLock
801 * (nobody should be able to change tags, flags, etc. out from under
805 FlushBuffer(Buffer buffer, bool release)
812 if (BufferIsLocal(buffer))
813 return FlushLocalBuffer(buffer, release);
815 if (BAD_BUFFER_ID(buffer))
816 return (STATUS_ERROR);
818 bufHdr = &BufferDescriptors[buffer - 1];
819 bufdb = bufHdr->tag.relId.dbId;
821 Assert(bufdb == MyDatabaseId || bufdb == (Oid) NULL);
822 bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId);
823 Assert(bufrel != (Relation) NULL);
825 /* To check if block content changed while flushing. - vadim 01/17/97 */
826 SpinAcquire(BufMgrLock);
827 bufHdr->flags &= ~BM_JUST_DIRTIED;
828 SpinRelease(BufMgrLock);
830 status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum,
831 (char *) MAKE_PTR(bufHdr->data));
833 RelationDecrementReferenceCount(bufrel);
835 if (status == SM_FAIL)
837 elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s",
838 bufHdr->tag.blockNum, bufHdr->sb_relname);
839 return (STATUS_ERROR);
843 SpinAcquire(BufMgrLock);
846 * If this buffer was marked by someone as DIRTY while we were
847 * flushing it out we must not clear DIRTY flag - vadim 01/17/97
849 if (bufHdr->flags & BM_JUST_DIRTIED)
851 elog(NOTICE, "FlusfBuffer: content of block %u (%s) changed while flushing",
852 bufHdr->tag.blockNum, bufHdr->sb_relname);
856 bufHdr->flags &= ~BM_DIRTY;
860 SpinRelease(BufMgrLock);
861 CommitInfoNeedsSave[buffer - 1] = 0;
867 * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
868 * when the operation is complete.
870 * We know that the buffer is for a relation in our private cache,
871 * because this routine is called only to write out buffers that
872 * were changed by the executing backend.
875 WriteNoReleaseBuffer(Buffer buffer)
879 if (WriteMode == BUFFER_FLUSH_WRITE)
881 return (FlushBuffer(buffer, FALSE));
886 if (BufferIsLocal(buffer))
887 return WriteLocalBuffer(buffer, FALSE);
889 if (BAD_BUFFER_ID(buffer))
890 return (STATUS_ERROR);
892 bufHdr = &BufferDescriptors[buffer - 1];
894 SpinAcquire(BufMgrLock);
895 bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
896 SpinRelease(BufMgrLock);
897 CommitInfoNeedsSave[buffer - 1] = 0;
903 #undef ReleaseAndReadBuffer
905 * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer()
906 * so that only one semop needs to be called.
910 ReleaseAndReadBuffer(Buffer buffer,
912 BlockNumber blockNum)
917 if (BufferIsLocal(buffer))
919 Assert(LocalRefCount[-buffer - 1] > 0);
920 LocalRefCount[-buffer - 1]--;
924 if (BufferIsValid(buffer))
926 bufHdr = &BufferDescriptors[buffer - 1];
927 Assert(PrivateRefCount[buffer - 1] > 0);
928 PrivateRefCount[buffer - 1]--;
929 if (PrivateRefCount[buffer - 1] == 0 &&
930 LastRefCount[buffer - 1] == 0)
934 * only release buffer if it is not pinned in previous
937 SpinAcquire(BufMgrLock);
939 if (bufHdr->refcount == 0)
941 AddBufferToFreelist(bufHdr);
942 bufHdr->flags |= BM_FREE;
944 if (CommitInfoNeedsSave[buffer - 1])
946 bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
947 CommitInfoNeedsSave[buffer - 1] = 0;
949 retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
955 return (ReadBuffer(relation, blockNum));
959 * BufferSync -- Flush all dirty buffers in the pool.
961 * This is called at transaction commit time. It does the wrong thing,
962 * right now. We should flush only our own changes to stable storage,
963 * and we should obey the lock protocol on the buffer manager metadata
964 * as we do it. Also, we need to be sure that no other transaction is
965 * modifying the page as we flush it. This is only a problem for objects
966 * that use a non-two-phase locking protocol, like btree indices. For
967 * those objects, we would like to set a write lock for the duration of
968 * our IO. Another possibility is to code updates to btree pages
969 * carefully, so that writing them out out of order cannot cause
970 * any unrecoverable errors.
972 * I don't want to think hard about this right now, so I will try
973 * to come back to it later.
985 SpinAcquire(BufMgrLock);
986 for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
988 if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
990 bufdb = bufHdr->tag.relId.dbId;
991 bufrel = bufHdr->tag.relId.relId;
992 if (bufdb == MyDatabaseId || bufdb == (Oid) 0)
994 reln = RelationIdCacheGetRelation(bufrel);
997 * We have to pin buffer to keep anyone from stealing it
998 * from the buffer pool while we are flushing it or
999 * waiting in WaitIO. It's bad for GetFreeBuffer in
1000 * BufferAlloc, but there is no other way to prevent
1001 * writing into disk block data from some other buffer,
1002 * getting smgr status of some other block and clearing
1003 * BM_DIRTY of ... - VAdim 09/16/96
1006 if (bufHdr->flags & BM_IO_IN_PROGRESS)
1008 WaitIO(bufHdr, BufMgrLock);
1009 UnpinBuffer(bufHdr);
1010 if (bufHdr->flags & BM_IO_ERROR)
1012 elog(ERROR, "BufferSync: write error %u for %s",
1013 bufHdr->tag.blockNum, bufHdr->sb_relname);
1015 if (reln != (Relation) NULL)
1016 RelationDecrementReferenceCount(reln);
1021 * To check if block content changed while flushing (see
1022 * below). - vadim 01/17/97
1024 bufHdr->flags &= ~BM_JUST_DIRTIED;
1027 * If we didn't have the reldesc in our local cache, flush
1028 * this page out using the 'blind write' storage manager
1029 * routine. If we did find it, use the standard
1033 #ifndef OPTIMIZE_SINGLE
1034 SpinRelease(BufMgrLock);
1035 #endif /* OPTIMIZE_SINGLE */
1036 if (reln == (Relation) NULL)
1038 status = smgrblindwrt(DEFAULT_SMGR, bufHdr->sb_dbname,
1039 bufHdr->sb_relname, bufdb, bufrel,
1040 bufHdr->tag.blockNum,
1041 (char *) MAKE_PTR(bufHdr->data));
1045 status = smgrwrite(DEFAULT_SMGR, reln,
1046 bufHdr->tag.blockNum,
1047 (char *) MAKE_PTR(bufHdr->data));
1049 #ifndef OPTIMIZE_SINGLE
1050 SpinAcquire(BufMgrLock);
1051 #endif /* OPTIMIZE_SINGLE */
1053 UnpinBuffer(bufHdr);
1054 if (status == SM_FAIL)
1056 bufHdr->flags |= BM_IO_ERROR;
1057 elog(ERROR, "BufferSync: cannot write %u for %s",
1058 bufHdr->tag.blockNum, bufHdr->sb_relname);
1063 * If this buffer was marked by someone as DIRTY while we
1064 * were flushing it out we must not clear DIRTY flag -
1067 if (!(bufHdr->flags & BM_JUST_DIRTIED))
1068 bufHdr->flags &= ~BM_DIRTY;
1069 if (reln != (Relation) NULL)
1070 RelationDecrementReferenceCount(reln);
1074 SpinRelease(BufMgrLock);
1081 * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf'
1082 * is cleared. Because IO_IN_PROGRESS conflicts are
1083 * expected to be rare, there is only one BufferIO
1084 * lock in the entire system. All processes block
1085 * on this semaphore when they try to use a buffer
1086 * that someone else is faulting in. Whenever a
1087 * process finishes an IO and someone is waiting for
1088 * the buffer, BufferIO is signaled (SignalIO). All
1089 * waiting processes then wake up and check to see
1090 * if their buffer is now ready. This implementation
1091 * is simple, but efficient enough if WaitIO is
1092 * rarely called by multiple processes simultaneously.
1094 * ProcSleep atomically releases the spinlock and goes to
1097 * Note: there is an easy fix if the queue becomes long.
1098 * save the id of the buffer we are waiting for in
1099 * the queue structure. That way signal can figure
1100 * out which proc to wake up.
1102 #ifdef HAS_TEST_AND_SET
1104 WaitIO(BufferDesc *buf, SPINLOCK spinlock)
1106 SpinRelease(spinlock);
1107 S_LOCK(&(buf->io_in_progress_lock));
1108 S_UNLOCK(&(buf->io_in_progress_lock));
1109 SpinAcquire(spinlock);
1112 #else /* HAS_TEST_AND_SET */
1113 IpcSemaphoreId WaitIOSemId;
1116 WaitIO(BufferDesc *buf, SPINLOCK spinlock)
1123 /* wait until someone releases IO lock */
1124 (*NWaitIOBackendP)++;
1125 SpinRelease(spinlock);
1126 IpcSemaphoreLock(WaitIOSemId, 0, 1);
1127 SpinAcquire(spinlock);
1128 inProgress = (buf->flags & BM_IO_IN_PROGRESS);
1138 SignalIO(BufferDesc *buf)
1140 /* somebody better be waiting. */
1141 Assert(buf->refcount > 1);
1142 IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP);
1143 *NWaitIOBackendP = 0;
1146 #endif /* HAS_TEST_AND_SET */
1148 long NDirectFileRead; /* some I/O's are direct file access.
1150 long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */
1153 PrintBufferUsage(FILE *statfp)
1158 if (ReadBufferCount == 0)
1161 hitrate = (float) BufferHitCount *100.0 / ReadBufferCount;
1163 if (ReadLocalBufferCount == 0)
1166 localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount;
1168 fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
1169 ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate);
1170 fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n",
1171 ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate);
1172 fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n",
1173 NDirectFileRead, NDirectFileWrite);
1180 ReadBufferCount = 0;
1181 BufferFlushCount = 0;
1182 LocalBufferHitCount = 0;
1183 ReadLocalBufferCount = 0;
1184 LocalBufferFlushCount = 0;
1185 NDirectFileRead = 0;
1186 NDirectFileWrite = 0;
1189 /* ----------------------------------------------
1192 * this routine is supposed to be called when a transaction aborts.
1193 * it will release all the buffer pins held by the transaciton.
1195 * ----------------------------------------------
1202 for (i = 1; i <= NBuffers; i++)
1204 CommitInfoNeedsSave[i - 1] = 0;
1205 if (BufferIsValid(i))
1207 while (PrivateRefCount[i - 1] > 0)
1212 LastRefCount[i - 1] = 0;
1215 ResetLocalBufferPool();
1218 /* -----------------------------------------------
1219 * BufferPoolCheckLeak
1221 * check if there is buffer leak
1223 * -----------------------------------------------
1226 BufferPoolCheckLeak()
1231 for (i = 1; i <= NBuffers; i++)
1233 if (BufferIsValid(i))
1236 "buffer leak [%d] detected in BufferPoolCheckLeak()", i - 1);
1248 /* ------------------------------------------------
1251 * flush all dirty blocks in buffer pool to disk
1253 * ------------------------------------------------
1256 FlushBufferPool(int StableMainMemoryFlag)
1258 if (!StableMainMemoryFlag)
1267 * True iff the refcnt of the local buffer is > 0
1269 * BufferIsValid(InvalidBuffer) is False.
1270 * BufferIsValid(UnknownBuffer) is False.
1273 BufferIsValid(Buffer bufnum)
1275 if (BufferIsLocal(bufnum))
1276 return (bufnum >= -NLocBuffer && LocalRefCount[-bufnum - 1] > 0);
1278 if (BAD_BUFFER_ID(bufnum))
1281 return ((bool) (PrivateRefCount[bufnum - 1] > 0));
1285 * BufferGetBlockNumber --
1286 * Returns the block number associated with a buffer.
1289 * Assumes that the buffer is valid.
1292 BufferGetBlockNumber(Buffer buffer)
1294 Assert(BufferIsValid(buffer));
1296 /* XXX should be a critical section */
1297 if (BufferIsLocal(buffer))
1298 return (LocalBufferDescriptors[-buffer - 1].tag.blockNum);
1300 return (BufferDescriptors[buffer - 1].tag.blockNum);
1304 * BufferGetRelation --
1305 * Returns the relation desciptor associated with a buffer.
1308 * Assumes buffer is valid.
1311 BufferGetRelation(Buffer buffer)
1316 Assert(BufferIsValid(buffer));
1317 Assert(!BufferIsLocal(buffer)); /* not supported for local buffers */
1319 /* XXX should be a critical section */
1320 relid = LRelIdGetRelationId(BufferDescriptors[buffer - 1].tag.relId);
1321 relation = RelationIdGetRelation(relid);
1323 RelationDecrementReferenceCount(relation);
1325 if (RelationHasReferenceCountZero(relation))
1329 * elog(NOTICE, "BufferGetRelation: 0->1");
1332 RelationIncrementReferenceCount(relation);
1341 * Flush the buffer corresponding to 'bufHdr'
1345 BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld)
1352 if (!bufferLockHeld)
1353 SpinAcquire(BufMgrLock);
1356 * first try to find the reldesc in the cache, if no luck, don't
1357 * bother to build the reldesc from scratch, just do a blind write.
1360 bufdb = bufHdr->tag.relId.dbId;
1361 bufrel = bufHdr->tag.relId.relId;
1363 if (bufdb == MyDatabaseId || bufdb == (Oid) NULL)
1364 reln = RelationIdCacheGetRelation(bufrel);
1366 reln = (Relation) NULL;
1368 /* To check if block content changed while flushing. - vadim 01/17/97 */
1369 bufHdr->flags &= ~BM_JUST_DIRTIED;
1371 SpinRelease(BufMgrLock);
1373 if (reln != (Relation) NULL)
1375 status = smgrflush(DEFAULT_SMGR, reln, bufHdr->tag.blockNum,
1376 (char *) MAKE_PTR(bufHdr->data));
1381 /* blind write always flushes */
1382 status = smgrblindwrt(DEFAULT_SMGR, bufHdr->sb_dbname,
1383 bufHdr->sb_relname, bufdb, bufrel,
1384 bufHdr->tag.blockNum,
1385 (char *) MAKE_PTR(bufHdr->data));
1388 if (reln != (Relation) NULL)
1389 RelationDecrementReferenceCount(reln);
1391 if (status == SM_FAIL)
1400 * RelationGetNumberOfBlocks --
1401 * Returns the buffer descriptor associated with a page in a relation.
1404 * XXX may fail for huge relations.
1405 * XXX should be elsewhere.
1406 * XXX maybe should be hidden
1409 RelationGetNumberOfBlocks(Relation relation)
1412 ((relation->rd_islocal) ? relation->rd_nblocks :
1413 smgrnblocks(DEFAULT_SMGR, relation));
1418 * Returns a reference to a disk page image associated with a buffer.
1421 * Assumes buffer is valid.
1424 BufferGetBlock(Buffer buffer)
1426 Assert(BufferIsValid(buffer));
1428 if (BufferIsLocal(buffer))
1429 return ((Block) MAKE_PTR(LocalBufferDescriptors[-buffer - 1].data));
1431 return ((Block) MAKE_PTR(BufferDescriptors[buffer - 1].data));
1434 /* ---------------------------------------------------------------------
1435 * ReleaseRelationBuffers
1437 * this function unmarks all the dirty pages of a relation
1438 * in the buffer pool so that at the end of transaction
1439 * these pages will not be flushed.
1440 * XXX currently it sequentially searches the buffer pool, should be
1441 * changed to more clever ways of searching.
1442 * --------------------------------------------------------------------
1445 ReleaseRelationBuffers(Relation rdesc)
1451 if (rdesc->rd_islocal)
1453 for (i = 0; i < NLocBuffer; i++)
1455 buf = &LocalBufferDescriptors[i];
1456 if ((buf->flags & BM_DIRTY) &&
1457 (buf->tag.relId.relId == rdesc->rd_id))
1459 buf->flags &= ~BM_DIRTY;
1465 for (i = 1; i <= NBuffers; i++)
1467 buf = &BufferDescriptors[i - 1];
1470 SpinAcquire(BufMgrLock);
1473 if ((buf->flags & BM_DIRTY) &&
1474 (buf->tag.relId.dbId == MyDatabaseId) &&
1475 (buf->tag.relId.relId == rdesc->rd_id))
1477 buf->flags &= ~BM_DIRTY;
1478 if (!(buf->flags & BM_FREE))
1480 SpinRelease(BufMgrLock);
1487 SpinRelease(BufMgrLock);
1490 /* ---------------------------------------------------------------------
1493 * This function marks all the buffers in the buffer cache for a
1494 * particular database as clean. This is used when we destroy a
1495 * database, to avoid trying to flush data to disk when the directory
1496 * tree no longer exists.
1498 * This is an exceedingly non-public interface.
1499 * --------------------------------------------------------------------
1502 DropBuffers(Oid dbid)
1507 SpinAcquire(BufMgrLock);
1508 for (i = 1; i <= NBuffers; i++)
1510 buf = &BufferDescriptors[i - 1];
1511 if ((buf->tag.relId.dbId == dbid) && (buf->flags & BM_DIRTY))
1513 buf->flags &= ~BM_DIRTY;
1516 SpinRelease(BufMgrLock);
1519 /* -----------------------------------------------------------------
1522 * this function prints all the buffer descriptors, for debugging
1524 * -----------------------------------------------------------------
1530 BufferDesc *buf = BufferDescriptors;
1532 if (IsUnderPostmaster)
1534 SpinAcquire(BufMgrLock);
1535 for (i = 0; i < NBuffers; ++i, ++buf)
1537 elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \
1538 blockNum=%d, flags=0x%x, refcount=%d %d)",
1539 i, buf->freeNext, buf->freePrev,
1540 buf->sb_relname, buf->tag.blockNum, buf->flags,
1541 buf->refcount, PrivateRefCount[i]);
1543 SpinRelease(BufMgrLock);
1547 /* interactive backend */
1548 for (i = 0; i < NBuffers; ++i, ++buf)
1550 printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n",
1551 i, buf->sb_relname, buf->tag.blockNum,
1552 buf->flags, buf->refcount, PrivateRefCount[i]);
1561 BufferDesc *buf = BufferDescriptors;
1563 SpinAcquire(BufMgrLock);
1564 for (i = 0; i < NBuffers; ++i, ++buf)
1566 if (PrivateRefCount[i] > 0)
1567 elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \
1568 blockNum=%d, flags=0x%x, refcount=%d %d)\n",
1569 i, buf->freeNext, buf->freePrev, buf->sb_relname,
1570 buf->tag.blockNum, buf->flags,
1571 buf->refcount, PrivateRefCount[i]);
1573 SpinRelease(BufMgrLock);
1577 * BufferPoolBlowaway
1579 * this routine is solely for the purpose of experiments -- sometimes
1580 * you may want to blowaway whatever is left from the past in buffer
1581 * pool and start measuring some performance with a clean empty buffer
1586 BufferPoolBlowaway()
1591 for (i = 1; i <= NBuffers; i++)
1593 if (BufferIsValid(i))
1595 while (BufferIsValid(i))
1598 BufTableDelete(&BufferDescriptors[i - 1]);
1604 /* ---------------------------------------------------------------------
1605 * BlowawayRelationBuffers
1607 * This function blowaway all the pages with blocknumber >= passed
1608 * of a relation in the buffer pool. Used by vacuum before truncation...
1610 * Returns: 0 - Ok, -1 - DIRTY, -2 - PINNED
1612 * XXX currently it sequentially searches the buffer pool, should be
1613 * changed to more clever ways of searching.
1614 * --------------------------------------------------------------------
1617 BlowawayRelationBuffers(Relation rdesc, BlockNumber block)
1622 if (rdesc->rd_islocal)
1624 for (i = 0; i < NLocBuffer; i++)
1626 buf = &LocalBufferDescriptors[i];
1627 if (buf->tag.relId.relId == rdesc->rd_id &&
1628 buf->tag.blockNum >= block)
1630 if (buf->flags & BM_DIRTY)
1632 elog (NOTICE, "BlowawayRelationBuffers(%s (local), %u): block %u is dirty",
1633 rdesc->rd_rel->relname.data, block, buf->tag.blockNum);
1636 if (LocalRefCount[i] > 0)
1638 elog (NOTICE, "BlowawayRelationBuffers(%s (local), %u): block %u is referenced (%d)",
1639 rdesc->rd_rel->relname.data, block,
1640 buf->tag.blockNum, LocalRefCount[i]);
1643 buf->tag.relId.relId = InvalidOid;
1649 SpinAcquire(BufMgrLock);
1650 for (i = 0; i < NBuffers; i++)
1652 buf = &BufferDescriptors[i];
1653 if (buf->tag.relId.dbId == MyDatabaseId &&
1654 buf->tag.relId.relId == rdesc->rd_id &&
1655 buf->tag.blockNum >= block)
1657 if (buf->flags & BM_DIRTY)
1659 elog (NOTICE, "BlowawayRelationBuffers(%s, %u): block %u is dirty (private %d, last %d, global %d)",
1660 buf->sb_relname, block, buf->tag.blockNum,
1661 PrivateRefCount[i], LastRefCount[i], buf->refcount);
1662 SpinRelease(BufMgrLock);
1665 if (!(buf->flags & BM_FREE))
1667 elog (NOTICE, "BlowawayRelationBuffers(%s, %u): block %u is referenced (private %d, last %d, global %d)",
1668 buf->sb_relname, block, buf->tag.blockNum,
1669 PrivateRefCount[i], LastRefCount[i], buf->refcount);
1670 SpinRelease(BufMgrLock);
1673 BufTableDelete(buf);
1676 SpinRelease(BufMgrLock);
1680 #undef IncrBufferRefCount
1681 #undef ReleaseBuffer
1684 IncrBufferRefCount(Buffer buffer)
1686 if (BufferIsLocal(buffer))
1688 Assert(LocalRefCount[-buffer - 1] >= 0);
1689 LocalRefCount[-buffer - 1]++;
1693 Assert(!BAD_BUFFER_ID(buffer));
1694 Assert(PrivateRefCount[buffer - 1] >= 0);
1695 PrivateRefCount[buffer - 1]++;
1700 * ReleaseBuffer -- remove the pin on a buffer without
1705 ReleaseBuffer(Buffer buffer)
1709 if (BufferIsLocal(buffer))
1711 Assert(LocalRefCount[-buffer - 1] > 0);
1712 LocalRefCount[-buffer - 1]--;
1716 if (BAD_BUFFER_ID(buffer))
1717 return (STATUS_ERROR);
1719 bufHdr = &BufferDescriptors[buffer - 1];
1721 Assert(PrivateRefCount[buffer - 1] > 0);
1722 PrivateRefCount[buffer - 1]--;
1723 if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0)
1727 * only release buffer if it is not pinned in previous ExecMain
1730 SpinAcquire(BufMgrLock);
1732 if (bufHdr->refcount == 0)
1734 AddBufferToFreelist(bufHdr);
1735 bufHdr->flags |= BM_FREE;
1737 if (CommitInfoNeedsSave[buffer - 1])
1739 bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);
1740 CommitInfoNeedsSave[buffer - 1] = 0;
1742 SpinRelease(BufMgrLock);
1750 IncrBufferRefCount_Debug(char *file, int line, Buffer buffer)
1752 IncrBufferRefCount(buffer);
1753 if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
1755 BufferDesc *buf = &BufferDescriptors[buffer - 1];
1757 fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \
1758 refcount = %ld, file: %s, line: %d\n",
1759 buffer, buf->sb_relname, buf->tag.blockNum,
1760 PrivateRefCount[buffer - 1], file, line);
1768 ReleaseBuffer_Debug(char *file, int line, Buffer buffer)
1770 ReleaseBuffer(buffer);
1771 if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))
1773 BufferDesc *buf = &BufferDescriptors[buffer - 1];
1775 fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \
1776 refcount = %ld, file: %s, line: %d\n",
1777 buffer, buf->sb_relname, buf->tag.blockNum,
1778 PrivateRefCount[buffer - 1], file, line);
1786 ReleaseAndReadBuffer_Debug(char *file,
1790 BlockNumber blockNum)
1795 bufferValid = BufferIsValid(buffer);
1796 b = ReleaseAndReadBuffer(buffer, relation, blockNum);
1797 if (ShowPinTrace && bufferValid && BufferIsLocal(buffer)
1798 && is_userbuffer(buffer))
1800 BufferDesc *buf = &BufferDescriptors[buffer - 1];
1802 fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
1803 refcount = %ld, file: %s, line: %d\n",
1804 buffer, buf->sb_relname, buf->tag.blockNum,
1805 PrivateRefCount[buffer - 1], file, line);
1807 if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer))
1809 BufferDesc *buf = &BufferDescriptors[b - 1];
1811 fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \
1812 refcount = %ld, file: %s, line: %d\n",
1813 b, buf->sb_relname, buf->tag.blockNum,
1814 PrivateRefCount[b - 1], file, line);
1824 * trace allocations and deallocations in a circular buffer in
1825 * shared memory. check the buffer before doing the allocation,
1826 * and die if there's anything fishy.
1829 _bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType)
1831 static int mypid = 0;
1839 start = *CurTraceBuf;
1844 cur = BMT_LIMIT - 1;
1848 tb = &TraceBuf[cur];
1849 if (tb->bmt_op != BMT_NOTUSED)
1851 if (tb->bmt_buf == bufNo)
1853 if ((tb->bmt_op == BMT_DEALLOC)
1854 || (tb->bmt_dbid == dbId && tb->bmt_relid == relId
1855 && tb->bmt_blkno == blkNo))
1858 /* die holding the buffer lock */
1859 _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur);
1867 cur = BMT_LIMIT - 1;
1873 tb = &TraceBuf[start];
1874 tb->bmt_pid = mypid;
1875 tb->bmt_buf = bufNo;
1876 tb->bmt_dbid = dbId;
1877 tb->bmt_relid = relId;
1878 tb->bmt_blkno = blkNo;
1879 tb->bmt_op = allocType;
1881 *CurTraceBuf = (start + 1) % BMT_LIMIT;
1884 _bm_die(Oid dbId, Oid relId, int blkNo, int bufNo,
1885 int allocType, long start, long cur)
1891 tb = &TraceBuf[cur];
1893 if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL)
1894 elog(FATAL, "buffer alloc trace error and can't open log file");
1896 fprintf(fp, "buffer alloc trace detected the following error:\n\n");
1897 fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n",
1898 bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"),
1899 (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation"));
1901 fprintf(fp, "the trace buffer contains:\n");
1907 if (tb->bmt_op != BMT_NOTUSED)
1909 fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%d,%d> ",
1910 i, (i == cur ? " ---> " : "\t"),
1911 tb->bmt_pid, tb->bmt_buf,
1912 tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno);
1917 fprintf(fp, "allocate (found)\n");
1920 case BMT_ALLOCNOTFND:
1921 fprintf(fp, "allocate (not found)\n");
1925 fprintf(fp, "deallocate\n");
1929 fprintf(fp, "unknown op type %d\n", tb->bmt_op);
1934 i = (i + 1) % BMT_LIMIT;
1939 fprintf(fp, "\noperation causing error:\n");
1940 fprintf(fp, "\tpid %d buf %d for <%d,%d,%d> ",
1941 getpid(), bufNo, dbId, relId, blkNo);
1946 fprintf(fp, "allocate (found)\n");
1949 case BMT_ALLOCNOTFND:
1950 fprintf(fp, "allocate (not found)\n");
1954 fprintf(fp, "deallocate\n");
1958 fprintf(fp, "unknown op type %d\n", allocType);
1964 kill(getpid(), SIGILL);
1967 #endif /* BMTRACE */
1970 BufferRefCountReset(int *refcountsave)
1974 for (i = 0; i < NBuffers; i++)
1976 refcountsave[i] = PrivateRefCount[i];
1977 LastRefCount[i] += PrivateRefCount[i];
1978 PrivateRefCount[i] = 0;
1983 BufferRefCountRestore(int *refcountsave)
1987 for (i = 0; i < NBuffers; i++)
1989 PrivateRefCount[i] = refcountsave[i];
1990 LastRefCount[i] -= refcountsave[i];
1991 refcountsave[i] = 0;
1996 SetBufferWriteMode(int mode)
2006 SetBufferCommitInfoNeedsSave(Buffer buffer)
2008 if (!BufferIsLocal(buffer))
2009 CommitInfoNeedsSave[buffer - 1]++;