From: Vadim B. Mikheev Date: Thu, 30 Nov 2000 08:46:26 +0000 (+0000) Subject: No more #ifdef XLOG. X-Git-Tag: REL7_1_BETA~38 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=81c8c244b26011a071c89b43a38bba7039226019;p=postgresql No more #ifdef XLOG. --- diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index ce1b067d52..7b30aa72a9 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -6,7 +6,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.66 2000/11/21 21:15:53 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.67 2000/11/30 08:46:20 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -23,9 +23,7 @@ #include "miscadmin.h" #include "utils/syscache.h" -#ifdef XLOG #include "access/xlogutils.h" -#endif /* non-export function prototypes */ static InsertIndexResult gistdoinsert(Relation r, IndexTuple itup, @@ -1348,7 +1346,6 @@ int_range_out(INTRANGE *r) #endif /* defined GISTDEBUG */ -#ifdef XLOG void gist_redo(XLogRecPtr lsn, XLogRecord *record) { @@ -1365,4 +1362,3 @@ void gist_desc(char *buf, uint8 xl_info, char* rec) { } -#endif diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 599696ba83..428eb32f0a 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.45 2000/11/21 21:15:54 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.46 2000/11/30 08:46:20 vadim Exp $ * * NOTES * This file contains only the public interface routines. @@ -27,9 +27,7 @@ bool BuildingHash = false; -#ifdef XLOG #include "access/xlogutils.h" -#endif /* @@ -482,7 +480,6 @@ hashdelete(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } -#ifdef XLOG void hash_redo(XLogRecPtr lsn, XLogRecord *record) { @@ -499,4 +496,3 @@ void hash_desc(char *buf, uint8 xl_info, char* rec) { } -#endif diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f636c7fdaf..7b60a897aa 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.96 2000/11/21 21:15:54 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.97 2000/11/30 08:46:20 vadim Exp $ * * * INTERFACE ROUTINES @@ -86,7 +86,6 @@ #include "utils/inval.h" #include "utils/relcache.h" -#ifdef XLOG #include "access/xlogutils.h" XLogRecPtr log_heap_move(Relation reln, ItemPointerData from, HeapTuple newtup); @@ -99,8 +98,6 @@ static XLogRecPtr log_heap_update(Relation reln, ItemPointerData from, static void HeapPageCleanup(Buffer buffer); -#endif - /* ---------------------------------------------------------------- * heap support routines @@ -1370,7 +1367,6 @@ heap_insert(Relation relation, HeapTuple tup) /* NO ELOG(ERROR) from here till changes are logged */ RelationPutHeapTuple(relation, buffer, tup); -#ifdef XLOG /* XLOG stuff */ { xl_heap_insert xlrec; @@ -1392,7 +1388,6 @@ heap_insert(Relation relation, HeapTuple tup) PageSetLSN(BufferGetPage(buffer), recptr); PageSetSUI(BufferGetPage(buffer), ThisStartUpID); } -#endif LockBuffer(buffer, BUFFER_LOCK_UNLOCK); WriteBuffer(buffer); @@ -1485,7 +1480,6 @@ l1: return result; } -#ifdef XLOG /* XLOG stuff */ { xl_heap_delete xlrec; @@ -1500,7 +1494,6 @@ l1: PageSetLSN(dp, recptr); PageSetSUI(dp, ThisStartUpID); } -#endif /* store transaction information of xact deleting the tuple */ TransactionIdStore(GetCurrentTransactionId(), &(tp.t_data->t_xmax)); @@ -1638,7 +1631,6 @@ l2: newbuf = buffer; else { -#ifdef XLOG /* * We have to unlock old tuple buffer before extending table * file but have to keep lock on the old tuple. To avoid second @@ -1650,7 +1642,7 @@ l2: _locked_tuple_.node = relation->rd_node; _locked_tuple_.tid = *otid; XactPushRollback(_heap_unlock_tuple, (void*) &_locked_tuple_); -#endif + TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax)); oldtup.t_data->t_cmax = GetCurrentCommandId(); oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED | @@ -1677,15 +1669,12 @@ l2: else { oldtup.t_data->t_infomask &= ~HEAP_XMAX_UNLOGGED; -#ifdef XLOG XactPopRollback(); -#endif } /* record address of new tuple in t_ctid of old one */ oldtup.t_data->t_ctid = newtup->t_self; -#ifdef XLOG /* XLOG stuff */ { XLogRecPtr recptr = log_heap_update(relation, @@ -1699,7 +1688,6 @@ l2: PageSetLSN(BufferGetPage(buffer), recptr); PageSetSUI(BufferGetPage(buffer), ThisStartUpID); } -#endif if (newbuf != buffer) { @@ -1791,13 +1779,11 @@ l3: return result; } -#ifdef XLOG /* * XLOG stuff: no logging is required as long as we have no * savepoints. For savepoints private log could be used... */ ((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID; -#endif /* store transaction information of xact marking the tuple */ TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax)); @@ -1984,8 +1970,6 @@ heap_restrpos(HeapScanDesc scan) } } -#ifdef XLOG - static XLogRecPtr log_heap_update(Relation reln, ItemPointerData from, HeapTuple newtup, bool move) @@ -2634,5 +2618,3 @@ heap_desc(char *buf, uint8 xl_info, char* rec) else strcat(buf, "UNKNOWN"); } - -#endif /* XLOG */ diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 6d287fb79b..f00b1e7918 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.68 2000/11/16 05:50:58 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.69 2000/11/30 08:46:21 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -61,9 +61,7 @@ static void _bt_pgaddtup(Relation rel, Page page, static bool _bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey); -#ifdef XLOG static Relation _xlheapRel; /* temporary hack */ -#endif /* * _bt_doinsert() -- Handle insertion of a single btitem in the tree. @@ -123,9 +121,7 @@ top: } } -#ifdef XLOG _xlheapRel = heapRel; /* temporary hack */ -#endif /* do the insertion */ res = _bt_insertonpg(rel, buf, stack, natts, itup_scankey, btitem, 0); @@ -522,7 +518,6 @@ _bt_insertonpg(Relation rel, } else { -#ifdef XLOG /* XLOG stuff */ { char xlbuf[sizeof(xl_btree_insert) + @@ -562,7 +557,7 @@ _bt_insertonpg(Relation rel, PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } -#endif + _bt_pgaddtup(rel, page, itemsz, btitem, newitemoff, "page"); itup_off = newitemoff; itup_blkno = BufferGetBlockNumber(buf); @@ -612,10 +607,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, rightoff; OffsetNumber maxoff; OffsetNumber i; - -#ifdef XLOG BTItem lhikey; -#endif rbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); origpage = BufferGetPage(buf); @@ -685,9 +677,7 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, itemsz = ItemIdGetLength(itemid); item = (BTItem) PageGetItem(origpage, itemid); } -#ifdef XLOG lhikey = item; -#endif if (PageAddItem(leftpage, (Item) item, itemsz, leftoff, LP_USED) == InvalidOffsetNumber) elog(STOP, "btree: failed to add hikey to the left sibling"); @@ -775,7 +765,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, spage = BufferGetPage(sbuf); } -#ifdef XLOG /* * Right sibling is locked, new siblings are prepared, but original * page is not updated yet. Log changes before continuing. @@ -860,7 +849,6 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, PageSetSUI(spage, ThisStartUpID); } } -#endif /* * By here, the original data page has been split into two new halves, @@ -1165,19 +1153,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) BTItem item; Size itemsz; BTItem new_item; - -#ifdef XLOG Buffer metabuf; -#endif /* get a new root page */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootpage = BufferGetPage(rootbuf); rootblknum = BufferGetBlockNumber(rootbuf); - -#ifdef XLOG metabuf = _bt_getbuf(rel, BTREE_METAPAGE,BT_WRITE); -#endif /* NO ELOG(ERROR) from here till newroot op is logged */ @@ -1237,7 +1219,6 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) elog(STOP, "btree: failed to add rightkey to new root page"); pfree(new_item); -#ifdef XLOG /* XLOG stuff */ { xl_btree_newroot xlrec; @@ -1267,16 +1248,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) _bt_wrtbuf(rel, metabuf); } -#endif /* write and let go of the new root buffer */ _bt_wrtbuf(rel, rootbuf); -#ifndef XLOG - /* update metadata page with new root block number */ - _bt_metaproot(rel, rootblknum, 0); -#endif - /* update and release new sibling, and finally the old root */ _bt_wrtbuf(rel, rbuf); _bt_wrtbuf(rel, lbuf); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index dd7882a9da..fe9036d111 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.41 2000/11/30 01:39:06 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.42 2000/11/30 08:46:21 vadim Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -170,7 +170,6 @@ _bt_getroot(Relation rel, int access) rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); rootopaque->btpo_flags |= (BTP_LEAF | BTP_ROOT); -#ifdef XLOG /* XLOG stuff */ { xl_btree_newroot xlrec; @@ -187,7 +186,6 @@ _bt_getroot(Relation rel, int access) PageSetLSN(metapg, recptr); PageSetSUI(metapg, ThisStartUpID); } -#endif metad->btm_root = rootblkno; metad->btm_level = 1; @@ -403,7 +401,6 @@ _bt_pagedel(Relation rel, ItemPointer tid) buf = _bt_getbuf(rel, blkno, BT_WRITE); page = BufferGetPage(buf); -#ifdef XLOG /* XLOG stuff */ { xl_btree_delete xlrec; @@ -417,7 +414,6 @@ _bt_pagedel(Relation rel, ItemPointer tid) PageSetLSN(page, recptr); PageSetSUI(page, ThisStartUpID); } -#endif PageIndexTupleDelete(page, offno); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 37d898d97e..80d8d996d1 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.71 2000/11/21 21:15:55 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.72 2000/11/30 08:46:21 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -32,11 +32,8 @@ bool BuildingBtree = false; /* see comment in btbuild() */ bool FastBuild = true; /* use sort/build instead of insertion * build */ -#ifdef XLOG #include "access/xlogutils.h" -#endif - static void _bt_restscan(IndexScanDesc scan); /* @@ -733,8 +730,6 @@ _bt_restscan(IndexScanDesc scan) } } -#ifdef XLOG - static bool _bt_cleanup_page(Page page, RelFileNode hnode) { @@ -1529,5 +1524,3 @@ btree_desc(char *buf, uint8 xl_info, char* rec) else strcat(buf, "UNKNOWN"); } - -#endif diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c index 7104a53b70..e8abb0765c 100644 --- a/src/backend/access/rtree/rtree.c +++ b/src/backend/access/rtree/rtree.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.56 2000/11/21 21:15:55 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.57 2000/11/30 08:46:21 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -22,9 +22,7 @@ #include "executor/executor.h" #include "miscadmin.h" -#ifdef XLOG #include "access/xlogutils.h" -#endif typedef struct SPLITVEC { @@ -1069,7 +1067,6 @@ _rtdump(Relation r) #endif /* defined RTDEBUG */ -#ifdef XLOG void rtree_redo(XLogRecPtr lsn, XLogRecord *record) { @@ -1086,4 +1083,3 @@ void rtree_desc(char *buf, uint8 xl_info, char* rec) { } -#endif diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index ca8b77de57..b25db74da8 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -27,4 +27,3 @@ RmgrData RmgrTable[] = { {"Gist", gist_redo, gist_undo, gist_desc}, {"Sequence", seq_redo, seq_undo, seq_desc} }; - diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 72d5c23628..644d089b92 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.37 2000/11/21 21:15:57 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/transam.c,v 1.38 2000/11/30 08:46:22 vadim Exp $ * * NOTES * This file contains the high level access-method interface to the @@ -424,23 +424,12 @@ InitializeTransactionLog(void) SpinAcquire(OidGenLockId); if (!TransactionIdDidCommit(AmiTransactionId)) { - - /* ---------------- - * SOMEDAY initialize the information stored in - * the headers of the log/variable relations. - * ---------------- - */ TransactionLogUpdate(AmiTransactionId, XID_COMMIT); TransactionIdStore(AmiTransactionId, &cachedTestXid); cachedTestXidStatus = XID_COMMIT; -#ifdef XLOG Assert(!IsUnderPostmaster && ShmemVariableCache->nextXid <= FirstTransactionId); ShmemVariableCache->nextXid = FirstTransactionId; -#else - VariableRelationPutNextXid(FirstTransactionId); -#endif - } else if (RecoveryCheckingEnabled()) { diff --git a/src/backend/access/transam/transsup.c b/src/backend/access/transam/transsup.c index 74e8c39eae..3b2a345ff3 100644 --- a/src/backend/access/transam/transsup.c +++ b/src/backend/access/transam/transsup.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.26 2000/10/28 16:20:53 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/transsup.c,v 1.27 2000/11/30 08:46:22 vadim Exp $ * * NOTES * This file contains support functions for the high @@ -186,9 +186,7 @@ TransBlockGetXidStatus(Block tblock, bits8 bit2; BitIndex offset; -#ifdef XLOG tblock = (Block) ((char*) tblock + sizeof(XLogRecPtr)); -#endif /* ---------------- * calculate the index into the transaction data where @@ -231,9 +229,7 @@ TransBlockSetXidStatus(Block tblock, Index index; BitIndex offset; -#ifdef XLOG tblock = (Block) ((char*) tblock + sizeof(XLogRecPtr)); -#endif /* ---------------- * calculate the index into the transaction data where diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 75a568f8fd..3c81cb207a 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -1,290 +1,51 @@ /*------------------------------------------------------------------------- * * varsup.c - * postgres variable relation support routines - * - * Portions Copyright (c) 1996-2000, PostgreSQL, Inc - * Portions Copyright (c) 1994, Regents of the University of California + * postgres OID & XID variables support routines * + * Copyright (c) 2000, PostgreSQL, Inc * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.33 2000/11/20 16:47:30 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.34 2000/11/30 08:46:22 vadim Exp $ * *------------------------------------------------------------------------- */ -#include "postgres.h" -#ifdef XLOG -#include "xlog_varsup.c" -#else +#include "postgres.h" -#include "access/heapam.h" -#include "catalog/catname.h" +#include "access/transam.h" #include "storage/proc.h" -static void GetNewObjectIdBlock(Oid *oid_return, int oid_block_size); -static void VariableRelationGetNextOid(Oid *oid_return); -static void VariableRelationGetNextXid(TransactionId *xidP); -static void VariableRelationPutNextOid(Oid oid); +SPINLOCK OidGenLockId; -/* --------------------- - * spin lock for oid generation - * --------------------- - */ -int OidGenLockId; +extern SPINLOCK XidGenLockId; +extern void XLogPutNextOid(Oid nextOid); -/* --------------------- - * pointer to "variable cache" in shared memory (set up by shmem.c) - * --------------------- - */ +/* pointer to "variable cache" in shared memory (set up by shmem.c) */ VariableCache ShmemVariableCache = NULL; - -/* ---------------------------------------------------------------- - * variable relation query/update routines - * ---------------------------------------------------------------- - */ - -/* -------------------------------- - * VariableRelationGetNextXid - * -------------------------------- - */ -static void -VariableRelationGetNextXid(TransactionId *xidP) -{ - Buffer buf; - VariableRelationContents var; - - /* ---------------- - * We assume that a spinlock has been acquired to guarantee - * exclusive access to the variable relation. - * ---------------- - */ - - /* ---------------- - * do nothing before things are initialized - * ---------------- - */ - if (!RelationIsValid(VariableRelation)) - return; - - /* ---------------- - * read the variable page, get the the nextXid field and - * release the buffer - * ---------------- - */ - buf = ReadBuffer(VariableRelation, 0); - - if (!BufferIsValid(buf)) - { - SpinRelease(OidGenLockId); - elog(ERROR, "VariableRelationGetNextXid: ReadBuffer failed"); - } - - var = (VariableRelationContents) BufferGetBlock(buf); - - TransactionIdStore(var->nextXidData, xidP); - - ReleaseBuffer(buf); -} - -/* -------------------------------- - * VariableRelationPutNextXid - * -------------------------------- - */ -void -VariableRelationPutNextXid(TransactionId xid) -{ - Buffer buf; - VariableRelationContents var; - - /* ---------------- - * We assume that a spinlock has been acquired to guarantee - * exclusive access to the variable relation. - * ---------------- - */ - - /* ---------------- - * do nothing before things are initialized - * ---------------- - */ - if (!RelationIsValid(VariableRelation)) - return; - - /* ---------------- - * read the variable page, update the nextXid field and - * write the page back out to disk (with immediate write). - * ---------------- - */ - buf = ReadBuffer(VariableRelation, 0); - - if (!BufferIsValid(buf)) - { - SpinRelease(OidGenLockId); - elog(ERROR, "VariableRelationPutNextXid: ReadBuffer failed"); - } - - var = (VariableRelationContents) BufferGetBlock(buf); - - TransactionIdStore(xid, &(var->nextXidData)); - - FlushBuffer(buf, true, true); -} - -/* -------------------------------- - * VariableRelationGetNextOid - * -------------------------------- - */ -static void -VariableRelationGetNextOid(Oid *oid_return) -{ - Buffer buf; - VariableRelationContents var; - - /* ---------------- - * We assume that a spinlock has been acquired to guarantee - * exclusive access to the variable relation. - * ---------------- - */ - - /* ---------------- - * if the variable relation is not initialized, then we - * assume we are running at bootstrap time and so we return - * an invalid object id (this path should never be taken, probably). - * ---------------- - */ - if (!RelationIsValid(VariableRelation)) - { - (*oid_return) = InvalidOid; - return; - } - - /* ---------------- - * read the variable page, get the the nextOid field and - * release the buffer - * ---------------- - */ - buf = ReadBuffer(VariableRelation, 0); - - if (!BufferIsValid(buf)) - { - SpinRelease(OidGenLockId); - elog(ERROR, "VariableRelationGetNextOid: ReadBuffer failed"); - } - - var = (VariableRelationContents) BufferGetBlock(buf); - - (*oid_return) = var->nextOid; - - ReleaseBuffer(buf); -} - -/* -------------------------------- - * VariableRelationPutNextOid - * -------------------------------- - */ -static void -VariableRelationPutNextOid(Oid oid) -{ - Buffer buf; - VariableRelationContents var; - - /* ---------------- - * We assume that a spinlock has been acquired to guarantee - * exclusive access to the variable relation. - * ---------------- - */ - - /* ---------------- - * do nothing before things are initialized - * ---------------- - */ - if (!RelationIsValid(VariableRelation)) - return; - - /* ---------------- - * read the variable page, update the nextXid field and - * write the page back out to disk. - * ---------------- - */ - buf = ReadBuffer(VariableRelation, 0); - - if (!BufferIsValid(buf)) - { - SpinRelease(OidGenLockId); - elog(ERROR, "VariableRelationPutNextOid: ReadBuffer failed"); - } - - var = (VariableRelationContents) BufferGetBlock(buf); - - var->nextOid = oid; - - WriteBuffer(buf); -} - -/* ---------------------------------------------------------------- - * transaction id generation support - * ---------------------------------------------------------------- - */ - -/* ---------------- - * GetNewTransactionId - * - * Transaction IDs are allocated via a cache in shared memory. - * Each time we need more IDs, we advance the "next XID" value - * in pg_variable by VAR_XID_PREFETCH and set the cache to - * show that many XIDs as available. Then, allocating those XIDs - * requires just a spinlock and not a buffer read/write cycle. - * - * Since the cache is shared across all backends, cached but unused - * XIDs are not lost when a backend exits, only when the postmaster - * quits or forces shared memory reinit. So we can afford to have - * a pretty big value of VAR_XID_PREFETCH. - * - * This code does not worry about initializing the transaction counter - * (see transam.c's InitializeTransactionLog() for that). We also - * ignore the possibility that the counter could someday wrap around. - * ---------------- - */ - -#define VAR_XID_PREFETCH 1024 - void GetNewTransactionId(TransactionId *xid) { - - /* ---------------- - * during bootstrap initialization, we return the special - * bootstrap transaction id. - * ---------------- + /* + * During bootstrap initialization, we return the special + * bootstrap transaction id. */ if (AMI_OVERRIDE) { - TransactionIdStore(AmiTransactionId, xid); + *xid = AmiTransactionId; return; } - SpinAcquire(OidGenLockId); /* not good for concurrency... */ - - if (ShmemVariableCache->xid_count == 0) - { - TransactionId nextid; - - VariableRelationGetNextXid(&nextid); - TransactionIdStore(nextid, &(ShmemVariableCache->nextXid)); - ShmemVariableCache->xid_count = VAR_XID_PREFETCH; - TransactionIdAdd(&nextid, VAR_XID_PREFETCH); - VariableRelationPutNextXid(nextid); - } - - TransactionIdStore(ShmemVariableCache->nextXid, xid); - TransactionIdAdd(&(ShmemVariableCache->nextXid), 1); - (ShmemVariableCache->xid_count)--; + SpinAcquire(XidGenLockId); + *xid = ShmemVariableCache->nextXid; + (ShmemVariableCache->nextXid)++; if (MyProc != (PROC *) NULL) MyProc->xid = *xid; - SpinRelease(OidGenLockId); + SpinRelease(XidGenLockId); + } /* @@ -294,30 +55,20 @@ void ReadNewTransactionId(TransactionId *xid) { - /* ---------------- - * during bootstrap initialization, we return the special - * bootstrap transaction id. - * ---------------- + /* + * During bootstrap initialization, we return the special + * bootstrap transaction id. */ if (AMI_OVERRIDE) { - TransactionIdStore(AmiTransactionId, xid); + *xid = AmiTransactionId; return; } - SpinAcquire(OidGenLockId); /* not good for concurrency... */ + SpinAcquire(XidGenLockId); + *xid = ShmemVariableCache->nextXid; + SpinRelease(XidGenLockId); - /* - * Note that we don't check is ShmemVariableCache->xid_count equal to - * 0 or not. This will work as long as we don't call - * ReadNewTransactionId() before GetNewTransactionId(). - */ - if (ShmemVariableCache->nextXid == 0) - elog(ERROR, "ReadNewTransactionId: ShmemVariableCache->nextXid is not initialized"); - - TransactionIdStore(ShmemVariableCache->nextXid, xid); - - SpinRelease(OidGenLockId); } /* ---------------------------------------------------------------- @@ -325,199 +76,67 @@ ReadNewTransactionId(TransactionId *xid) * ---------------------------------------------------------------- */ -/* ---------------- - * GetNewObjectIdBlock - * - * This support function is used to allocate a block of object ids - * of the given size. - * ---------------- - */ -static void -GetNewObjectIdBlock(Oid *oid_return, /* place to return the first new - * object id */ - int oid_block_size) /* number of oids desired */ -{ - Oid firstfreeoid; - Oid nextoid; - - /* ---------------- - * Obtain exclusive access to the variable relation page - * ---------------- - */ - SpinAcquire(OidGenLockId); - - /* ---------------- - * get the "next" oid from the variable relation - * ---------------- - */ - VariableRelationGetNextOid(&firstfreeoid); - - /* ---------------- - * Allocate the range of OIDs to be returned to the caller. - * - * There are two things going on here. - * - * One: in a virgin database pg_variable will initially contain zeroes, - * so we will read out firstfreeoid = InvalidOid. We want to start - * allocating OIDs at BootstrapObjectIdData instead (OIDs below that - * are reserved for static assignment in the initial catalog data). - * - * Two: if a database is run long enough, the OID counter will wrap - * around. We must not generate an invalid OID when that happens, - * and it seems wise not to generate anything in the reserved range. - * Therefore we advance to BootstrapObjectIdData in this case too. - * - * The comparison here assumes that Oid is an unsigned type. - */ - nextoid = firstfreeoid + oid_block_size; - - if (! OidIsValid(firstfreeoid) || nextoid < firstfreeoid) - { - /* Initialization or wraparound time, force it up to safe range */ - firstfreeoid = BootstrapObjectIdData; - nextoid = firstfreeoid + oid_block_size; - } - - (*oid_return) = firstfreeoid; - - /* ---------------- - * Update the variable relation to show the block range as used. - * ---------------- - */ - VariableRelationPutNextOid(nextoid); - - /* ---------------- - * Relinquish our lock on the variable relation page - * ---------------- - */ - SpinRelease(OidGenLockId); -} - -/* ---------------- - * GetNewObjectId - * - * This function allocates and parses out object ids. Like - * GetNewTransactionId(), it "prefetches" 32 object ids by - * incrementing the nextOid stored in the var relation by 32 and then - * returning these id's one at a time until they are exhausted. - * This means we reduce the number of accesses to the variable - * relation by 32 for each backend. - * - * Note: 32 has no special significance. We don't want the - * number to be too large because when the backend - * terminates, we lose the oids we cached. - * - * Question: couldn't we use a shared-memory cache just like XIDs? - * That would allow a larger interval between pg_variable updates - * without cache losses. Note, however, that we can assign an OID - * without even a spinlock from the backend-local OID cache. - * Maybe two levels of caching would be good. - * ---------------- - */ - -#define VAR_OID_PREFETCH 32 - -static int prefetched_oid_count = 0; -static Oid next_prefetched_oid; +#define VAR_OID_PREFETCH 8192 +static Oid lastSeenOid = InvalidOid; void -GetNewObjectId(Oid *oid_return) /* place to return the new object id */ +GetNewObjectId(Oid *oid_return) { - /* ---------------- - * if we run out of prefetched oids, then we get some - * more before handing them out to the caller. - * ---------------- - */ + SpinAcquire(OidGenLockId); - if (prefetched_oid_count == 0) + /* If we run out of logged for use oids then we log more */ + if (ShmemVariableCache->oidCount == 0) { - int oid_block_size = VAR_OID_PREFETCH; - - /* ---------------- - * Make sure pg_variable is open. - * ---------------- - */ - if (!RelationIsValid(VariableRelation)) - VariableRelation = heap_openr(VariableRelationName, NoLock); - - /* ---------------- - * get a new block of prefetched object ids. - * ---------------- - */ - GetNewObjectIdBlock(&next_prefetched_oid, oid_block_size); - - /* ---------------- - * now reset the prefetched_oid_count. - * ---------------- - */ - prefetched_oid_count = oid_block_size; + XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH); + ShmemVariableCache->oidCount = VAR_OID_PREFETCH; } - /* ---------------- - * return the next prefetched oid in the pointer passed by - * the user and decrement the prefetch count. - * ---------------- - */ if (PointerIsValid(oid_return)) - (*oid_return) = next_prefetched_oid; + lastSeenOid = (*oid_return) = ShmemVariableCache->nextOid; - next_prefetched_oid++; - prefetched_oid_count--; + (ShmemVariableCache->nextOid)++; + (ShmemVariableCache->oidCount)--; + + SpinRelease(OidGenLockId); } void CheckMaxObjectId(Oid assigned_oid) { - Oid temp_oid; - if (prefetched_oid_count == 0) /* make sure next/max is set, or - * reload */ - GetNewObjectId(&temp_oid); - - /* ---------------- - * If we are below prefetched limits, do nothing - * ---------------- - */ - - if (assigned_oid < next_prefetched_oid) + if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid) return; - /* ---------------- - * If we are here, we are coming from a 'copy from' with oid's - * - * If we are in the prefetched oid range, just bump it up - * ---------------- - */ + SpinAcquire(OidGenLockId); + if (assigned_oid < ShmemVariableCache->nextOid) + { + lastSeenOid = ShmemVariableCache->nextOid - 1; + SpinRelease(OidGenLockId); + return; + } - if (assigned_oid <= next_prefetched_oid + prefetched_oid_count - 1) + /* If we are in the logged oid range, just bump nextOid up */ + if (assigned_oid <= ShmemVariableCache->nextOid + + ShmemVariableCache->oidCount - 1) { - prefetched_oid_count -= assigned_oid - next_prefetched_oid + 1; - next_prefetched_oid = assigned_oid + 1; + ShmemVariableCache->oidCount -= + assigned_oid - ShmemVariableCache->nextOid + 1; + ShmemVariableCache->nextOid = assigned_oid + 1; + SpinRelease(OidGenLockId); return; } - /* ---------------- - * We have exceeded the prefetch oid range - * - * We should lock the database and kill all other backends - * but we are loading oid's that we can not guarantee are unique - * anyway, so we must rely on the user - * - * We now: - * set the variable relation with the new max oid - * force the backend to reload its oid cache - * - * By reloading the oid cache, we don't have to update the variable - * relation every time when sequential OIDs are being loaded by COPY. - * ---------------- + /* + * We have exceeded the logged oid range. + * We should lock the database and kill all other backends + * but we are loading oid's that we can not guarantee are unique + * anyway, so we must rely on the user. */ - SpinAcquire(OidGenLockId); - VariableRelationPutNextOid(assigned_oid); + XLogPutNextOid(assigned_oid + VAR_OID_PREFETCH); + ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1; + ShmemVariableCache->nextOid = assigned_oid + 1; + SpinRelease(OidGenLockId); - prefetched_oid_count = 0; /* force reload */ - GetNewObjectId(&temp_oid); /* cause target OID to be allocated */ } - -#endif /* !XLOG */ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index df679d277d..b9ba82b63b 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.85 2000/11/30 01:47:31 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.86 2000/11/30 08:46:22 vadim Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -219,7 +219,6 @@ TransactionState CurrentTransactionState = &CurrentTransactionStateData; int DefaultXactIsoLevel = XACT_READ_COMMITTED; int XactIsoLevel; -#ifdef XLOG #include "access/xlogutils.h" int CommitDelay = 5; /* 1/200000 sec */ @@ -227,8 +226,6 @@ int CommitDelay = 5; /* 1/200000 sec */ static void (*_RollbackFunc)(void*) = NULL; static void *_RollbackData = NULL; -#endif - /* ---------------- * info returned when the system is disabled * @@ -662,19 +659,10 @@ RecordTransactionCommit() TransactionId xid; int leak; - /* ---------------- - * get the current transaction id - * ---------------- - */ xid = GetCurrentTransactionId(); - /* - * flush the buffer manager pages. Note: if we have stable main - * memory, dirty shared buffers are not flushed plai 8/7/90 - */ leak = BufferPoolCheckLeak(); -#ifdef XLOG if (MyLastRecPtr.xrecoff != 0) { xl_xact_commit xlrec; @@ -685,7 +673,7 @@ RecordTransactionCommit() xlrec.xtime = time(NULL); /* - * MUST SAVE ARRAY OF RELFILENODE-s TO DROP + * SHOULD SAVE ARRAY OF RELFILENODE-s TO DROP */ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, (char*) &xlrec, SizeOfXactCommit, NULL, 0); @@ -704,30 +692,6 @@ RecordTransactionCommit() MyProc->logRec.xrecoff = 0; } -#else - /* - * If no one shared buffer was changed by this transaction then we - * don't flush shared buffers and don't record commit status. - */ - if (SharedBufferChanged) - { - FlushBufferPool(); - if (leak) - ResetBufferPool(true); - - /* - * have the transaction access methods record the status of this - * transaction id in the pg_log relation. - */ - TransactionIdCommit(xid); - - /* - * Now write the log info to the disk too. - */ - leak = BufferPoolCheckLeak(); - FlushBufferPool(); - } -#endif if (leak) ResetBufferPool(true); @@ -815,23 +779,8 @@ AtCommit_Memory(void) static void RecordTransactionAbort(void) { - TransactionId xid; + TransactionId xid = GetCurrentTransactionId(); - /* ---------------- - * get the current transaction id - * ---------------- - */ - xid = GetCurrentTransactionId(); - - /* - * Have the transaction access methods record the status of this - * transaction id in the pg_log relation. We skip it if no one shared - * buffer was changed by this transaction. - */ - if (SharedBufferChanged && !TransactionIdDidCommit(xid)) - TransactionIdAbort(xid); - -#ifdef XLOG if (MyLastRecPtr.xrecoff != 0) { xl_xact_abort xlrec; @@ -841,9 +790,9 @@ RecordTransactionAbort(void) recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, (char*) &xlrec, SizeOfXactAbort, NULL, 0); + TransactionIdAbort(xid); MyProc->logRec.xrecoff = 0; } -#endif /* * Tell bufmgr and smgr to release resources. @@ -1748,8 +1697,6 @@ IsTransactionBlock(void) return false; } -#ifdef XLOG - void xact_redo(XLogRecPtr lsn, XLogRecord *record) { @@ -1760,7 +1707,7 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record) xl_xact_commit *xlrec = (xl_xact_commit*) XLogRecGetData(record); TransactionIdCommit(record->xl_xid); - /* MUST REMOVE FILES OF ALL DROPPED RELATIONS */ + /* SHOULD REMOVE FILES OF ALL DROPPED RELATIONS */ } else if (info == XLOG_XACT_ABORT) { @@ -1825,5 +1772,3 @@ XactPopRollback(void) { _RollbackFunc = NULL; } - -#endif diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b96159b414..70be51695d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.37 2000/11/30 01:47:31 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.38 2000/11/30 08:46:22 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -1443,12 +1443,10 @@ void BootStrapXLOG() { CheckPoint checkPoint; -#ifdef XLOG char buffer[BLCKSZ]; bool usexistent = false; XLogPageHeader page = (XLogPageHeader) buffer; XLogRecord *record; -#endif checkPoint.redo.xlogid = 0; checkPoint.redo.xrecoff = SizeOfXLogPHD; @@ -1462,8 +1460,6 @@ BootStrapXLOG() ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; -#ifdef XLOG - memset(buffer, 0, BLCKSZ); page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_info = 0; @@ -1488,8 +1484,6 @@ BootStrapXLOG() close(logFile); logFile = -1; -#endif - memset(ControlFile, 0, sizeof(ControlFileData)); ControlFile->logId = 0; ControlFile->logSeg = 1; @@ -1513,14 +1507,12 @@ str_time(time_t tnow) return buf; } - /* * This func must be called ONCE on system startup */ void StartupXLOG() { -#ifdef XLOG XLogCtlInsert *Insert; CheckPoint checkPoint; XLogRecPtr RecPtr, @@ -1529,8 +1521,6 @@ StartupXLOG() char buffer[MAXLOGRECSZ + SizeOfXLogRecord]; bool sie_saved = false; -#endif - elog(LOG, "starting up"); XLogCtl->xlblocks = (XLogRecPtr *) (((char *) XLogCtl) + sizeof(XLogCtlData)); @@ -1580,8 +1570,6 @@ StartupXLOG() elog(LOG, "database system was interrupted at %s", str_time(ControlFile->time)); -#ifdef XLOG - LastRec = RecPtr = ControlFile->checkPoint; if (!XRecOffIsValid(RecPtr.xrecoff)) elog(STOP, "Invalid checkPoint in control file"); @@ -1602,12 +1590,7 @@ StartupXLOG() checkPoint.nextXid, checkPoint.nextOid); if (checkPoint.nextXid < FirstTransactionId || checkPoint.nextOid < BootstrapObjectIdData) - -#ifdef XLOG_2 elog(STOP, "Invalid NextTransactionId/NextOid"); -#else - elog(LOG, "Invalid NextTransactionId/NextOid"); -#endif ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; @@ -1751,8 +1734,6 @@ StartupXLOG() } InRecovery = false; -#endif /* XLOG */ - ControlFile->state = DB_IN_PRODUCTION; ControlFile->time = time(NULL); UpdateControlFile(); @@ -1783,9 +1764,7 @@ ShutdownXLOG() { elog(LOG, "shutting down"); -#ifdef XLOG CreateDummyCaches(); -#endif CreateCheckPoint(true); elog(LOG, "database system is shut down"); @@ -1796,7 +1775,6 @@ extern XLogRecPtr GetUndoRecPtr(void); void CreateCheckPoint(bool shutdown) { -#ifdef XLOG CheckPoint checkPoint; XLogRecPtr recptr; XLogCtlInsert *Insert = &XLogCtl->Insert; @@ -1880,12 +1858,9 @@ CreateCheckPoint(bool shutdown) XLogFlush(recptr); -#endif /* XLOG */ - SpinAcquire(ControlFileLockId); if (shutdown) ControlFile->state = DB_SHUTDOWNED; -#ifdef XLOG else /* create new log file */ { if (recptr.xrecoff % XLogSegSize >= @@ -1914,16 +1889,10 @@ CreateCheckPoint(bool shutdown) _logSeg = ControlFile->logSeg - 1; strcpy(archdir, ControlFile->archdir); -#else - ControlFile->checkPoint.xlogid = 0; - ControlFile->checkPoint.xrecoff = SizeOfXLogPHD; -#endif - ControlFile->time = time(NULL); UpdateControlFile(); SpinRelease(ControlFileLockId); -#ifdef XLOG /* * Delete offline log files. Get oldest online * log file from undo rec if it's valid. @@ -1948,7 +1917,6 @@ CreateCheckPoint(bool shutdown) S_UNLOCK(&(XLogCtl->chkp_lck)); MyLastRecPtr.xrecoff = 0; /* to avoid commit record */ -#endif return; } diff --git a/src/backend/access/transam/xlog_varsup.c b/src/backend/access/transam/xlog_varsup.c deleted file mode 100644 index fd64085e78..0000000000 --- a/src/backend/access/transam/xlog_varsup.c +++ /dev/null @@ -1,142 +0,0 @@ -/*------------------------------------------------------------------------- - * - * varsup.c - * postgres OID & XID variables support routines - * - * Copyright (c) 2000, PostgreSQL, Inc - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/Attic/xlog_varsup.c,v 1.1 2000/11/03 11:39:35 vadim Exp $ - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include "access/transam.h" -#include "storage/proc.h" - -SPINLOCK OidGenLockId; - -extern SPINLOCK XidGenLockId; -extern void XLogPutNextOid(Oid nextOid); - -/* pointer to "variable cache" in shared memory (set up by shmem.c) */ -VariableCache ShmemVariableCache = NULL; - -void -GetNewTransactionId(TransactionId *xid) -{ - /* - * During bootstrap initialization, we return the special - * bootstrap transaction id. - */ - if (AMI_OVERRIDE) - { - *xid = AmiTransactionId; - return; - } - - SpinAcquire(XidGenLockId); - *xid = ShmemVariableCache->nextXid; - (ShmemVariableCache->nextXid)++; - - if (MyProc != (PROC *) NULL) - MyProc->xid = *xid; - - SpinRelease(XidGenLockId); - -} - -/* - * Like GetNewTransactionId reads nextXid but don't fetch it. - */ -void -ReadNewTransactionId(TransactionId *xid) -{ - - /* - * During bootstrap initialization, we return the special - * bootstrap transaction id. - */ - if (AMI_OVERRIDE) - { - *xid = AmiTransactionId; - return; - } - - SpinAcquire(XidGenLockId); - *xid = ShmemVariableCache->nextXid; - SpinRelease(XidGenLockId); - -} - -/* ---------------------------------------------------------------- - * object id generation support - * ---------------------------------------------------------------- - */ - -#define VAR_OID_PREFETCH 8192 -static Oid lastSeenOid = InvalidOid; - -void -GetNewObjectId(Oid *oid_return) -{ - SpinAcquire(OidGenLockId); - - /* If we run out of logged for use oids then we log more */ - if (ShmemVariableCache->oidCount == 0) - { - XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH); - ShmemVariableCache->oidCount = VAR_OID_PREFETCH; - } - - if (PointerIsValid(oid_return)) - lastSeenOid = (*oid_return) = ShmemVariableCache->nextOid; - - (ShmemVariableCache->nextOid)++; - (ShmemVariableCache->oidCount)--; - - SpinRelease(OidGenLockId); -} - -void -CheckMaxObjectId(Oid assigned_oid) -{ - - if (lastSeenOid != InvalidOid && assigned_oid < lastSeenOid) - return; - - SpinAcquire(OidGenLockId); - if (assigned_oid < ShmemVariableCache->nextOid) - { - lastSeenOid = ShmemVariableCache->nextOid - 1; - SpinRelease(OidGenLockId); - return; - } - - /* If we are in the logged oid range, just bump nextOid up */ - if (assigned_oid <= ShmemVariableCache->nextOid + - ShmemVariableCache->oidCount - 1) - { - ShmemVariableCache->oidCount -= - assigned_oid - ShmemVariableCache->nextOid + 1; - ShmemVariableCache->nextOid = assigned_oid + 1; - SpinRelease(OidGenLockId); - return; - } - - /* - * We have exceeded the logged oid range. - * We should lock the database and kill all other backends - * but we are loading oid's that we can not guarantee are unique - * anyway, so we must rely on the user. - */ - - XLogPutNextOid(assigned_oid + VAR_OID_PREFETCH); - ShmemVariableCache->oidCount = VAR_OID_PREFETCH - 1; - ShmemVariableCache->nextOid = assigned_oid + 1; - - SpinRelease(OidGenLockId); - -} diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index ab085875b5..acd19da263 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -10,9 +10,6 @@ */ #include "postgres.h" - -#ifdef XLOG - #include "access/xlog.h" #include "access/transam.h" #include "access/xact.h" @@ -397,5 +394,3 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) return(&(res->reldata)); } - -#endif diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 95f302d93e..6e8e27a748 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.69 2000/11/18 03:36:48 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/dbcommands.c,v 1.70 2000/11/30 08:46:22 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -153,13 +153,11 @@ createdb(const char *dbname, const char *dbpath, elog(ERROR, "database path may not contain single quotes"); /* ... otherwise we'd be open to shell exploits below */ -#ifdef XLOG /* Force dirty buffers out to disk, to ensure source database is * up-to-date for the copy. (We really only need to flush buffers * for the source database...) */ BufferSync(); -#endif /* * Close virtual file descriptors so the kernel has more available for @@ -255,13 +253,11 @@ createdb(const char *dbname, const char *dbpath, /* Close pg_database, but keep lock till commit */ heap_close(pg_database_rel, NoLock); -#ifdef XLOG /* Force dirty buffers out to disk, so that newly-connecting backends * will see the new database in pg_database right away. (They'll see * an uncommitted tuple, but they don't care; see GetRawDatabaseInfo.) */ BufferSync(); -#endif } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 3aeae1409b..5f2e193d05 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.173 2000/11/16 22:30:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.174 2000/11/30 08:46:22 vadim Exp $ * *------------------------------------------------------------------------- @@ -47,11 +47,9 @@ #include #endif -#ifdef XLOG #include "access/xlog.h" -XLogRecPtr log_heap_move(Relation reln, - ItemPointerData from, HeapTuple newtup); -#endif +extern XLogRecPtr log_heap_move(Relation reln, + ItemPointerData from, HeapTuple newtup); static MemoryContext vac_context = NULL; @@ -1492,7 +1490,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid); ItemPointerSet(&(newtup.t_self), destvacpage->blkno, newoff); -#ifdef XLOG { XLogRecPtr recptr = log_heap_move(onerel, tuple.t_self, &newtup); @@ -1505,7 +1502,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, PageSetLSN(ToPage, recptr); PageSetSUI(ToPage, ThisStartUpID); } -#endif if (((int) destvacpage->blkno) > last_move_dest_block) last_move_dest_block = destvacpage->blkno; @@ -1655,7 +1651,6 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)" ~(HEAP_XMIN_COMMITTED | HEAP_XMIN_INVALID | HEAP_MOVED_IN); tuple.t_data->t_infomask |= HEAP_MOVED_OFF; -#ifdef XLOG { XLogRecPtr recptr = log_heap_move(onerel, tuple.t_self, &newtup); @@ -1665,7 +1660,6 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)" PageSetLSN(ToPage, recptr); PageSetSUI(ToPage, ThisStartUpID); } -#endif cur_page->offsets_used++; num_moved++; @@ -1786,19 +1780,12 @@ failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)" if (num_moved > 0) { -#ifdef XLOG - RecordTransactionCommit(); -#else /* * We have to commit our tuple' movings before we'll truncate * relation, but we shouldn't lose our locks. And so - quick hack: - * flush buffers and record status of current transaction as - * committed, and continue. - vadim 11/13/96 + * record status of current transaction as committed, and continue. */ - FlushBufferPool(); - TransactionIdCommit(myXID); - FlushBufferPool(); -#endif + RecordTransactionCommit(); } /* diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index a25d4d9a55..9400da3805 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- * - * bufmgr.c + * xlog_bufmgr.c * buffer manager interface routines * * Portions Copyright (c) 1996-2000, PostgreSQL, Inc @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.96 2000/11/30 01:39:07 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.97 2000/11/30 08:46:23 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -31,9 +31,6 @@ * * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() * - * FlushBuffer() -- Write buffer immediately. Can unpin, or not, - * depending on parameter. - * * BufferSync() -- flush all dirty buffers in the buffer pool. * * InitBufferPool() -- Init the buffer module. @@ -42,13 +39,8 @@ * freelist.c -- chooses victim for buffer replacement * buf_table.c -- manages the buffer lookup table */ - #include "postgres.h" -#ifdef XLOG -#include "xlog_bufmgr.c" -#else - #include #include #include @@ -61,10 +53,11 @@ #include "storage/s_lock.h" #include "storage/smgr.h" #include "utils/relcache.h" - -#ifdef XLOG #include "catalog/pg_database.h" -#endif + +#define BufferGetLSN(bufHdr) \ + (*((XLogRecPtr*)MAKE_PTR((bufHdr)->data))) + extern SPINLOCK BufMgrLock; extern long int ReadBufferCount; @@ -99,9 +92,6 @@ static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, bool bufferLockHeld); static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr, bool bufferLockHeld); -static void SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr); -static void ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr); -static void BufferSync(void); static int BufferReplace(BufferDesc *bufHdr); void PrintBufferDescs(void); @@ -169,48 +159,6 @@ ReadBuffer(Relation reln, BlockNumber blockNum) return ReadBufferWithBufferLock(reln, blockNum, false); } -/* - * is_userbuffer - * - * XXX caller must have already acquired BufMgrLock - */ -#ifdef NOT_USED -static bool -is_userbuffer(Buffer buffer) -{ - BufferDesc *buf = &BufferDescriptors[buffer - 1]; - - if (IsSystemRelationName(buf->blind.relname)) - return false; - return true; -} - -#endif - -#ifdef NOT_USED -Buffer -ReadBuffer_Debug(char *file, - int line, - Relation reln, - BlockNumber blockNum) -{ - Buffer buffer; - - buffer = ReadBufferWithBufferLock(reln, blockNum, false); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) - { - BufferDesc *buf = &BufferDescriptors[buffer - 1]; - - fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } - return buffer; -} - -#endif - /* * ReadBufferWithBufferLock -- does the work of * ReadBuffer() but with the possibility that @@ -447,7 +395,7 @@ BufferAlloc(Relation reln, buf->refcount = 1; PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; - if (buf->flags & BM_DIRTY) + if (buf->flags & BM_DIRTY || buf->cntxDirty) { bool smok; @@ -505,18 +453,18 @@ BufferAlloc(Relation reln, } else { - /* * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't * be setted by anyone. - vadim 01/17/97 */ if (buf->flags & BM_JUST_DIRTIED) { - elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing", + elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing", buf->tag.blockNum, buf->blind.relname); } else buf->flags &= ~BM_DIRTY; + buf->cntxDirty = false; } /* @@ -676,131 +624,15 @@ WriteBuffer(Buffer buffer) SpinAcquire(BufMgrLock); Assert(bufHdr->refcount > 0); + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SetBufferDirtiedByMe(buffer, bufHdr); + UnpinBuffer(bufHdr); SpinRelease(BufMgrLock); return TRUE; } -#ifdef NOT_USED -void -WriteBuffer_Debug(char *file, int line, Buffer buffer) -{ - WriteBuffer(buffer); - if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) - { - BufferDesc *buf; - - buf = &BufferDescriptors[buffer - 1]; - fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } -} - -#endif - -/* - * FlushBuffer -- like WriteBuffer, but write the page immediately, - * rather than just marking it dirty. On success return, the buffer will - * no longer be dirty. - * - * 'buffer' is known to be dirty/pinned, so there should not be a - * problem reading the BufferDesc members without the BufMgrLock - * (nobody should be able to change tags out from under us). - * - * If 'sync' is true, a synchronous write is wanted (wait for buffer to hit - * the disk). Otherwise it's sufficient to issue the kernel write call. - * - * Unpin buffer if 'release' is true. - */ -int -FlushBuffer(Buffer buffer, bool sync, bool release) -{ - BufferDesc *bufHdr; - Relation bufrel; - int status; - - if (BufferIsLocal(buffer)) - return FlushLocalBuffer(buffer, sync, release) ? STATUS_OK : STATUS_ERROR; - - if (BAD_BUFFER_ID(buffer)) - return STATUS_ERROR; - - Assert(PrivateRefCount[buffer - 1] > 0); /* else caller didn't pin */ - - bufHdr = &BufferDescriptors[buffer - 1]; - - bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode); - - Assert(bufrel != (Relation) NULL); - - SharedBufferChanged = true; - - /* To check if block content changed while flushing. - vadim 01/17/97 */ - SpinAcquire(BufMgrLock); - WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */ - bufHdr->flags &= ~BM_JUST_DIRTIED; - StartBufferIO(bufHdr, false); /* output IO start */ - - SpinRelease(BufMgrLock); - - /* - * Grab a read lock on the buffer to ensure that no - * other backend changes its contents while we write it; - * see comments in BufferSync(). - */ - LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE); - - if (sync) - status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - else - status = smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - - LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK); - - /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ - RelationDecrementReferenceCount(bufrel); - - if (status == SM_FAIL) - { - elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s", - bufHdr->tag.blockNum, bufHdr->blind.relname); - return STATUS_ERROR; - } - BufferFlushCount++; - - SpinAcquire(BufMgrLock); - bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ - TerminateBufferIO(bufHdr); /* output IO finished */ - - /* - * If this buffer was marked by someone as DIRTY while we were - * flushing it out we must not clear shared DIRTY flag - vadim - * 01/17/97 - * - * ... but we can clear BufferDirtiedByMe anyway - tgl 3/31/00 - */ - if (bufHdr->flags & BM_JUST_DIRTIED) - { - elog(NOTICE, "FlushBuffer: content of block %u (%s) changed while flushing", - bufHdr->tag.blockNum, bufHdr->blind.relname); - } - else - bufHdr->flags &= ~BM_DIRTY; - ClearBufferDirtiedByMe(buffer, bufHdr); - if (release) - UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); - - return STATUS_OK; -} - /* * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer * when the operation is complete. @@ -822,8 +654,9 @@ WriteNoReleaseBuffer(Buffer buffer) SpinAcquire(BufMgrLock); Assert(bufHdr->refcount > 0); + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SetBufferDirtiedByMe(buffer, bufHdr); + SpinRelease(BufMgrLock); return STATUS_OK; @@ -876,307 +709,138 @@ ReleaseAndReadBuffer(Buffer buffer, } /* - * SetBufferDirtiedByMe -- mark a shared buffer as being dirtied by this xact - * - * This flag essentially remembers that we need to write and fsync this buffer - * before we can commit the transaction. The write might end up getting done - * by another backend, but we must do the fsync ourselves (else we could - * commit before the data actually reaches disk). We do not issue fsync - * instantly upon write; the storage manager keeps track of which files need - * to be fsync'd before commit can occur. A key aspect of this data structure - * is that we will be able to notify the storage manager that an fsync is - * needed even after another backend has done the physical write and replaced - * the buffer contents with something else! + * BufferSync -- Write all dirty buffers in the pool. * - * NB: we must be holding the bufmgr lock at entry, and the buffer must be - * pinned so that no other backend can take it away from us. + * This is called at checkpoint time and write out all dirty buffers. */ -static void -SetBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr) -{ - BufferTag *tagLastDirtied = &BufferTagLastDirtied[buffer - 1]; - Relation reln; - int status; - - /* - * If the flag is already set, check to see whether the buffertag is - * the same. If not, some other backend already wrote the buffer data - * that we dirtied. We must tell the storage manager to make an fsync - * pending on that file before we can overwrite the old tag value. - */ - if (BufferDirtiedByMe[buffer - 1]) - { - if (RelFileNodeEquals(bufHdr->tag.rnode, tagLastDirtied->rnode) && - bufHdr->tag.blockNum == tagLastDirtied->blockNum) - return; /* Same tag already dirtied, so no work */ - -#ifndef OPTIMIZE_SINGLE - SpinRelease(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - - reln = RelationNodeCacheGetRelation(tagLastDirtied->rnode); - - if (reln == (Relation) NULL) - { - status = smgrblindmarkdirty(DEFAULT_SMGR, - tagLastDirtied->rnode, - tagLastDirtied->blockNum); - } - else - { - Assert(RelFileNodeEquals(tagLastDirtied->rnode, reln->rd_node)); - status = smgrmarkdirty(DEFAULT_SMGR, reln, - tagLastDirtied->blockNum); - - /* - * drop relcache refcnt incremented by - * RelationNodeCacheGetRelation - */ - RelationDecrementReferenceCount(reln); - } - if (status == SM_FAIL) - { - elog(ERROR, "SetBufferDirtiedByMe: cannot mark %u for %s", - tagLastDirtied->blockNum, - BufferBlindLastDirtied[buffer - 1].relname); - } - -#ifndef OPTIMIZE_SINGLE - SpinAcquire(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - - } - - *tagLastDirtied = bufHdr->tag; - BufferBlindLastDirtied[buffer - 1] = bufHdr->blind; - BufferDirtiedByMe[buffer - 1] = true; -} - -/* - * ClearBufferDirtiedByMe -- mark a shared buffer as no longer needing fsync - * - * If we write out a buffer ourselves, then the storage manager will set its - * needs-fsync flag for that file automatically, and so we can clear our own - * flag that says it needs to be done later. - * - * NB: we must be holding the bufmgr lock at entry. - */ -static void -ClearBufferDirtiedByMe(Buffer buffer, BufferDesc *bufHdr) -{ - BufferTag *tagLastDirtied = &BufferTagLastDirtied[buffer - 1]; - - /* - * Do *not* clear the flag if it refers to some other buffertag than - * the data we just wrote. This is unlikely, but possible if some - * other backend replaced the buffer contents since we set our flag. - */ - if (RelFileNodeEquals(bufHdr->tag.rnode, tagLastDirtied->rnode) && - bufHdr->tag.blockNum == tagLastDirtied->blockNum) - BufferDirtiedByMe[buffer - 1] = false; -} - -/* - * BufferSync -- Flush all dirty buffers in the pool. - * - * This is called at transaction commit time. We find all buffers - * that have been dirtied by the current xact and flush them to disk. - * We do *not* flush dirty buffers that have been dirtied by other xacts. - * (This is a substantial change from pre-7.0 behavior.) - */ -static void +void BufferSync() { int i; BufferDesc *bufHdr; + Buffer buffer; int status; - Relation reln; - bool didwrite; + RelFileNode rnode; + XLogRecPtr recptr; + Relation reln = NULL; for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { - /* Ignore buffers that were not dirtied by me */ - if (!BufferDirtiedByMe[i]) - continue; SpinAcquire(BufMgrLock); - /* - * We only need to write if the buffer is still dirty and still - * contains the same disk page that it contained when we dirtied - * it. Otherwise, someone else has already written our changes for - * us, and we need only fsync. - * - * (NOTE: it's still possible to do an unnecessary write, if other - * xacts have written and then re-dirtied the page since our last - * change to it. But that should be pretty uncommon, and there's - * no easy way to detect it anyway.) - */ - reln = NULL; - didwrite = false; - if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) + if (!(bufHdr->flags & BM_VALID)) { - if (RelFileNodeEquals(bufHdr->tag.rnode, BufferTagLastDirtied[i].rnode) && - bufHdr->tag.blockNum == BufferTagLastDirtied[i].blockNum) - { - /* - * Try to find relation for buf. This could fail, if the - * rel has been flushed from the relcache since we dirtied - * the page. That should be uncommon, so paying the extra - * cost of a blind write when it happens seems OK. - */ - if (!InRecovery) - reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode); - - /* - * We have to pin buffer to keep anyone from stealing it - * from the buffer pool while we are flushing it or - * waiting in WaitIO. It's bad for GetFreeBuffer in - * BufferAlloc, but there is no other way to prevent - * writing into disk block data from some other buffer, - * getting smgr status of some other block and clearing - * BM_DIRTY of ... - VAdim 09/16/96 - */ - PinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_IN_PROGRESS) - { - WaitIO(bufHdr, BufMgrLock); - UnpinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_ERROR) - { - elog(ERROR, "BufferSync: write error %u for %s", - bufHdr->tag.blockNum, bufHdr->blind.relname); - } - } - else - { - - /* - * To check if block content changed while flushing - * (see below). - vadim 01/17/97 - */ - WaitIO(bufHdr, BufMgrLock); /* confirm end of IO */ - bufHdr->flags &= ~BM_JUST_DIRTIED; - StartBufferIO(bufHdr, false); /* output IO start */ - - SpinRelease(BufMgrLock); - - /* - * Grab a read lock on the buffer to ensure that no - * other backend changes its contents while we write it; - * otherwise we could write a non-self-consistent page - * image to disk, which'd be bad news if the other - * transaction aborts before writing its changes. - * - * Note that we still need the BM_JUST_DIRTIED mechanism - * in case someone dirties the buffer just before we - * grab this lock or just after we release it. - */ - LockBuffer(BufferDescriptorGetBuffer(bufHdr), - BUFFER_LOCK_SHARE); + SpinRelease(BufMgrLock); + continue; + } - /* - * If we didn't have the reldesc in our local cache, - * write this page out using the 'blind write' storage - * manager routine. If we did find it, use the - * standard interface. - */ - if (reln == (Relation) NULL) - { - status = smgrblindwrt(DEFAULT_SMGR, - bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data), - true); /* must fsync */ - } - else - { - status = smgrwrite(DEFAULT_SMGR, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } + /* + * Pin buffer and ensure that no one reads it from disk + */ + PinBuffer(bufHdr); + /* Synchronize with BufferAlloc */ + if (bufHdr->flags & BM_IO_IN_PROGRESS) + WaitIO(bufHdr, BufMgrLock); - /* - * Release the per-buffer readlock, reacquire BufMgrLock. - */ - LockBuffer(BufferDescriptorGetBuffer(bufHdr), - BUFFER_LOCK_UNLOCK); + buffer = BufferDescriptorGetBuffer(bufHdr); + rnode = bufHdr->tag.rnode; - SpinAcquire(BufMgrLock); + SpinRelease(BufMgrLock); - UnpinBuffer(bufHdr); - if (status == SM_FAIL) - { - bufHdr->flags |= BM_IO_ERROR; - elog(ERROR, "BufferSync: cannot write %u for %s", - bufHdr->tag.blockNum, bufHdr->blind.relname); - } - bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ - TerminateBufferIO(bufHdr); /* Sync IO finished */ - BufferFlushCount++; - didwrite = true; + /* + * Try to find relation for buffer + */ + reln = RelationNodeCacheGetRelation(rnode); - /* - * If this buffer was marked by someone as DIRTY while - * we were flushing it out we must not clear DIRTY - * flag - vadim 01/17/97 - * - * but it is OK to clear BufferDirtiedByMe - tgl 3/31/00 - */ - if (!(bufHdr->flags & BM_JUST_DIRTIED)) - bufHdr->flags &= ~BM_DIRTY; - } + /* + * Protect buffer content against concurrent update + */ + LockBuffer(buffer, BUFFER_LOCK_SHARE); - /* drop refcnt obtained by RelationNodeCacheGetRelation */ - if (reln != (Relation) NULL) - RelationDecrementReferenceCount(reln); - } - } + /* + * Force XLOG flush for buffer' LSN + */ + recptr = BufferGetLSN(bufHdr); + XLogFlush(recptr); /* - * If we did not write the buffer (because someone else did), we - * must still fsync the file containing it, to ensure that the - * write is down to disk before we commit. + * Now it's safe to write buffer to disk + * (if needed at all -:)) */ - if (!didwrite) + + SpinAcquire(BufMgrLock); + if (bufHdr->flags & BM_IO_IN_PROGRESS) + WaitIO(bufHdr, BufMgrLock); + + if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { -#ifndef OPTIMIZE_SINGLE + bufHdr->flags &= ~BM_JUST_DIRTIED; + StartBufferIO(bufHdr, false); /* output IO start */ + SpinRelease(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ - reln = RelationNodeCacheGetRelation(BufferTagLastDirtied[i].rnode); if (reln == (Relation) NULL) { - status = smgrblindmarkdirty(DEFAULT_SMGR, - BufferTagLastDirtied[i].rnode, - BufferTagLastDirtied[i].blockNum); + status = smgrblindwrt(DEFAULT_SMGR, + bufHdr->tag.rnode, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data), + true); /* must fsync */ } else { - status = smgrmarkdirty(DEFAULT_SMGR, reln, - BufferTagLastDirtied[i].blockNum); + status = smgrwrite(DEFAULT_SMGR, reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + } - /* - * drop relcache refcnt incremented by - * RelationNodeCacheGetRelation - */ - RelationDecrementReferenceCount(reln); + if (status == SM_FAIL) /* disk failure ?! */ + elog(STOP, "BufferSync: cannot write %u for %s", + bufHdr->tag.blockNum, bufHdr->blind.relname); + + /* + * Note that it's safe to change cntxDirty here because of + * we protect it from upper writers by share lock and from + * other bufmgr routines by BM_IO_IN_PROGRESS + */ + bufHdr->cntxDirty = false; + + /* + * Release the per-buffer readlock, reacquire BufMgrLock. + */ + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + BufferFlushCount++; - } -#ifndef OPTIMIZE_SINGLE SpinAcquire(BufMgrLock); -#endif /* OPTIMIZE_SINGLE */ + + bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ + TerminateBufferIO(bufHdr); /* Sync IO finished */ + + /* + * If this buffer was marked by someone as DIRTY while + * we were flushing it out we must not clear DIRTY + * flag - vadim 01/17/97 + */ + if (!(bufHdr->flags & BM_JUST_DIRTIED)) + bufHdr->flags &= ~BM_DIRTY; } + else + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - BufferDirtiedByMe[i] = false; + UnpinBuffer(bufHdr); SpinRelease(BufMgrLock); + + /* drop refcnt obtained by RelationNodeCacheGetRelation */ + if (reln != (Relation) NULL) + { + RelationDecrementReferenceCount(reln); + reln = NULL; + } } -#ifndef XLOG - LocalBufferSync(); -#endif -} +} /* * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared. @@ -1278,9 +942,6 @@ ResetBufferPool(bool isCommit) SpinRelease(BufMgrLock); } PrivateRefCount[i] = 0; - - if (!isCommit) - BufferDirtiedByMe[i] = false; } ResetLocalBufferPool(); @@ -1321,16 +982,29 @@ relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)", } /* ------------------------------------------------ - * FlushBufferPool - * - * flush all dirty blocks in buffer pool to disk + * FlushBufferPool * + * Flush all dirty blocks in buffer pool to disk + * at the checkpoint time * ------------------------------------------------ */ void FlushBufferPool(void) { BufferSync(); + smgrsync(); +} + +/* + * At the commit time we have to flush local buffer pool only + */ +void +BufmgrCommit(void) +{ + LocalBufferSync(); + /* + * All files created in current transaction will be fsync-ed + */ smgrcommit(); } @@ -1358,35 +1032,28 @@ BufferGetBlockNumber(Buffer buffer) * * Write out the buffer corresponding to 'bufHdr' * - * This routine used to flush the data to disk (ie, force immediate fsync) - * but that's no longer necessary because BufferSync is smarter than before. - * * BufMgrLock must be held at entry, and the buffer must be pinned. */ static int BufferReplace(BufferDesc *bufHdr) { Relation reln; + XLogRecPtr recptr; int status; - /* - * first try to find the reldesc in the cache, if no luck, don't - * bother to build the reldesc from scratch, just do a blind write. - */ - - reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode); - /* To check if block content changed while flushing. - vadim 01/17/97 */ bufHdr->flags &= ~BM_JUST_DIRTIED; SpinRelease(BufMgrLock); /* - * Grab a read lock on the buffer to ensure that no - * other backend changes its contents while we write it; - * see comments in BufferSync(). + * No need to lock buffer context - no one should be able to + * end ReadBuffer */ - LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_SHARE); + recptr = BufferGetLSN(bufHdr); + XLogFlush(recptr); + + reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode); if (reln != (Relation) NULL) { @@ -1401,25 +1068,15 @@ BufferReplace(BufferDesc *bufHdr) false); /* no fsync */ } - LockBuffer(BufferDescriptorGetBuffer(bufHdr), BUFFER_LOCK_UNLOCK); - - SpinAcquire(BufMgrLock); - /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); + SpinAcquire(BufMgrLock); + if (status == SM_FAIL) return FALSE; - /* - * If we had marked this buffer as needing to be fsync'd, we can - * forget about that, because it's now the storage manager's - * responsibility (but only if we called smgrwrite, not smgrblindwrt). - */ - if (reln != (Relation) NULL) - ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr); - BufferFlushCount++; return TRUE; @@ -1438,7 +1095,8 @@ BlockNumber RelationGetNumberOfBlocks(Relation relation) { return ((relation->rd_myxactonly) ? relation->rd_nblocks : - smgrnblocks(DEFAULT_SMGR, relation)); + ((relation->rd_rel->relkind == RELKIND_VIEW) ? 0 : + smgrnblocks(DEFAULT_SMGR, relation))); } /* --------------------------------------------------------------------- @@ -1471,6 +1129,7 @@ DropRelationBuffers(Relation rel) if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) { bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; LocalRefCount[i] = 0; bufHdr->tag.rnode.relNode = InvalidOid; } @@ -1503,6 +1162,7 @@ recheck: } /* Now we can do what we came for */ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; /* * Release any refcount we may have. @@ -1526,20 +1186,6 @@ recheck: */ BufTableDelete(bufHdr); } - - /* - * Also check to see if BufferDirtiedByMe info for this buffer - * refers to the target relation, and clear it if so. This is - * independent of whether the current contents of the buffer - * belong to the target relation! - * - * NOTE: we have no way to clear BufferDirtiedByMe info in other - * backends, but hopefully there are none with that bit set for - * this rel, since we hold exclusive lock on this rel. - */ - if (RelFileNodeEquals(rel->rd_node, - BufferTagLastDirtied[i - 1].rnode)) - BufferDirtiedByMe[i - 1] = false; } SpinRelease(BufMgrLock); @@ -1570,6 +1216,7 @@ DropRelFileNodeBuffers(RelFileNode rnode) if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) { bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; LocalRefCount[i] = 0; bufHdr->tag.rnode.relNode = InvalidOid; } @@ -1600,6 +1247,7 @@ recheck: } /* Now we can do what we came for */ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; /* * Release any refcount we may have. @@ -1623,20 +1271,6 @@ recheck: */ BufTableDelete(bufHdr); } - - /* - * Also check to see if BufferDirtiedByMe info for this buffer - * refers to the target relation, and clear it if so. This is - * independent of whether the current contents of the buffer - * belong to the target relation! - * - * NOTE: we have no way to clear BufferDirtiedByMe info in other - * backends, but hopefully there are none with that bit set for - * this rel, since we hold exclusive lock on this rel. - */ - if (RelFileNodeEquals(rnode, - BufferTagLastDirtied[i - 1].rnode)) - BufferDirtiedByMe[i - 1] = false; } SpinRelease(BufMgrLock); @@ -1689,6 +1323,7 @@ recheck: } /* Now we can do what we came for */ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; /* * The thing should be free, if caller has checked that no @@ -1700,17 +1335,6 @@ recheck: */ BufTableDelete(bufHdr); } - /* - * Also check to see if BufferDirtiedByMe info for this buffer - * refers to the target database, and clear it if so. This is - * independent of whether the current contents of the buffer - * belong to the target database! - * - * (Actually, this is probably unnecessary, since I shouldn't have - * ever dirtied pages of the target database, but...) - */ - if (BufferTagLastDirtied[i - 1].rnode.tblNode == dbid) - BufferDirtiedByMe[i - 1] = false; } SpinRelease(BufMgrLock); } @@ -1847,6 +1471,8 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) { int i; BufferDesc *bufHdr; + XLogRecPtr recptr; + int status; if (rel->rd_myxactonly) { @@ -1855,22 +1481,27 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) bufHdr = &LocalBufferDescriptors[i]; if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) { - if (bufHdr->flags & BM_DIRTY) + if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { - if (FlushBuffer(-i - 1, false, false) != STATUS_OK) + status = smgrwrite(DEFAULT_SMGR, rel, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + if (status == SM_FAIL) { elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum); - return -1; + return(-1); } + bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); + bufHdr->cntxDirty = false; } if (LocalRefCount[i] > 0) { elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum, LocalRefCount[i]); - return -2; + return(-2); } if (bufHdr->tag.blockNum >= firstDelBlock) { @@ -1887,22 +1518,57 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) bufHdr = &BufferDescriptors[i]; if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) { - if (bufHdr->flags & BM_DIRTY) + if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { PinBuffer(bufHdr); + if (bufHdr->flags & BM_IO_IN_PROGRESS) + WaitIO(bufHdr, BufMgrLock); SpinRelease(BufMgrLock); - if (FlushBuffer(i + 1, false, false) != STATUS_OK) + + /* + * Force XLOG flush for buffer' LSN + */ + recptr = BufferGetLSN(bufHdr); + XLogFlush(recptr); + + /* + * Now it's safe to write buffer to disk + */ + + SpinAcquire(BufMgrLock); + if (bufHdr->flags & BM_IO_IN_PROGRESS) + WaitIO(bufHdr, BufMgrLock); + + if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { - SpinAcquire(BufMgrLock); - UnpinBuffer(bufHdr); + bufHdr->flags &= ~BM_JUST_DIRTIED; + StartBufferIO(bufHdr, false); /* output IO start */ + SpinRelease(BufMgrLock); - elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is dirty (private %ld, global %d), could not flush it", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum, - PrivateRefCount[i], bufHdr->refcount); - return -1; + + status = smgrwrite(DEFAULT_SMGR, rel, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); + + if (status == SM_FAIL) /* disk failure ?! */ + elog(STOP, "FlushRelationBuffers: cannot write %u for %s", + bufHdr->tag.blockNum, bufHdr->blind.relname); + + BufferFlushCount++; + + SpinAcquire(BufMgrLock); + bufHdr->flags &= ~BM_IO_IN_PROGRESS; + TerminateBufferIO(bufHdr); + Assert(!(bufHdr->flags & BM_JUST_DIRTIED)); + bufHdr->flags &= ~BM_DIRTY; + /* + * Note that it's safe to change cntxDirty here because + * of we protect it from upper writers by + * AccessExclusiveLock and from other bufmgr routines + * by BM_IO_IN_PROGRESS + */ + bufHdr->cntxDirty = false; } - SpinAcquire(BufMgrLock); UnpinBuffer(bufHdr); } if (!(bufHdr->flags & BM_FREE)) @@ -2341,6 +2007,9 @@ LockBuffer(Buffer buffer, int mode) } buf->w_lock = true; *buflock |= BL_W_LOCK; + + buf->cntxDirty = true; + if (*buflock & BL_RI_LOCK) { @@ -2458,11 +2127,11 @@ AbortBufferIO(void) Assert(buf->flags & BM_IO_IN_PROGRESS); SpinAcquire(BufMgrLock); if (IsForInput) - Assert(!(buf->flags & BM_DIRTY)); + Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); else { - Assert((buf->flags & BM_DIRTY) != 0); - if ((buf->flags & BM_IO_ERROR) != 0) + Assert(buf->flags & BM_DIRTY || buf->cntxDirty); + if (buf->flags & BM_IO_ERROR) { elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s", buf->tag.blockNum, buf->blind.dbname, buf->blind.relname); @@ -2528,5 +2197,3 @@ MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer)) SpinRelease(BufMgrLock); return; } - -#endif /* ! XLOG */ diff --git a/src/backend/storage/buffer/xlog_bufmgr.c b/src/backend/storage/buffer/xlog_bufmgr.c deleted file mode 100644 index fb02413f97..0000000000 --- a/src/backend/storage/buffer/xlog_bufmgr.c +++ /dev/null @@ -1,2202 +0,0 @@ -/*------------------------------------------------------------------------- - * - * xlog_bufmgr.c - * buffer manager interface routines - * - * Portions Copyright (c) 1996-2000, PostgreSQL, Inc - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_bufmgr.c,v 1.6 2000/11/30 01:39:07 tgl Exp $ - * - *------------------------------------------------------------------------- - */ -/* - * - * BufferAlloc() -- lookup a buffer in the buffer table. If - * it isn't there add it, but do not read data into memory. - * This is used when we are about to reinitialize the - * buffer so don't care what the current disk contents are. - * BufferAlloc() also pins the new buffer in memory. - * - * ReadBuffer() -- like BufferAlloc() but reads the data - * on a buffer cache miss. - * - * ReleaseBuffer() -- unpin the buffer - * - * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" - * but don't unpin. The disk IO is delayed until buffer - * replacement. - * - * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() - * - * BufferSync() -- flush all dirty buffers in the buffer pool. - * - * InitBufferPool() -- Init the buffer module. - * - * See other files: - * freelist.c -- chooses victim for buffer replacement - * buf_table.c -- manages the buffer lookup table - */ -#include "postgres.h" - -#include -#include -#include -#include - -#include "executor/execdebug.h" -#include "miscadmin.h" -#include "storage/buf_internals.h" -#include "storage/bufmgr.h" -#include "storage/s_lock.h" -#include "storage/smgr.h" -#include "utils/relcache.h" - -#ifdef XLOG -#include "catalog/pg_database.h" -#endif - -#define BufferGetLSN(bufHdr) \ - (*((XLogRecPtr*)MAKE_PTR((bufHdr)->data))) - - -extern SPINLOCK BufMgrLock; -extern long int ReadBufferCount; -extern long int ReadLocalBufferCount; -extern long int BufferHitCount; -extern long int LocalBufferHitCount; -extern long int BufferFlushCount; -extern long int LocalBufferFlushCount; - -/* - * It's used to avoid disk writes for read-only transactions - * (i.e. when no one shared buffer was changed by transaction). - * We set it to true in WriteBuffer/WriteNoReleaseBuffer when - * marking shared buffer as dirty. We set it to false in xact.c - * after transaction is committed/aborted. - */ -bool SharedBufferChanged = false; - -static void WaitIO(BufferDesc *buf, SPINLOCK spinlock); -static void StartBufferIO(BufferDesc *buf, bool forInput); -static void TerminateBufferIO(BufferDesc *buf); -static void ContinueBufferIO(BufferDesc *buf, bool forInput); -extern void AbortBufferIO(void); - -/* - * Macro : BUFFER_IS_BROKEN - * Note that write error doesn't mean the buffer broken -*/ -#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY)) - -static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, - bool bufferLockHeld); -static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, - bool *foundPtr, bool bufferLockHeld); -static int BufferReplace(BufferDesc *bufHdr); -void PrintBufferDescs(void); - -/* --------------------------------------------------- - * RelationGetBufferWithBuffer - * see if the given buffer is what we want - * if yes, we don't need to bother the buffer manager - * --------------------------------------------------- - */ -Buffer -RelationGetBufferWithBuffer(Relation relation, - BlockNumber blockNumber, - Buffer buffer) -{ - BufferDesc *bufHdr; - - if (BufferIsValid(buffer)) - { - if (!BufferIsLocal(buffer)) - { - bufHdr = &BufferDescriptors[buffer - 1]; - SpinAcquire(BufMgrLock); - if (bufHdr->tag.blockNum == blockNumber && - RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) - { - SpinRelease(BufMgrLock); - return buffer; - } - return ReadBufferWithBufferLock(relation, blockNumber, true); - } - else - { - bufHdr = &LocalBufferDescriptors[-buffer - 1]; - if (bufHdr->tag.blockNum == blockNumber && - RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) - return buffer; - } - } - return ReadBuffer(relation, blockNumber); -} - -/* - * ReadBuffer -- returns a buffer containing the requested - * block of the requested relation. If the blknum - * requested is P_NEW, extend the relation file and - * allocate a new block. - * - * Returns: the buffer number for the buffer containing - * the block read or NULL on an error. - * - * Assume when this function is called, that reln has been - * opened already. - */ - -#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG - * defined */ - -/* - * ReadBuffer - * - */ -Buffer -ReadBuffer(Relation reln, BlockNumber blockNum) -{ - return ReadBufferWithBufferLock(reln, blockNum, false); -} - -/* - * ReadBufferWithBufferLock -- does the work of - * ReadBuffer() but with the possibility that - * the buffer lock has already been held. this - * is yet another effort to reduce the number of - * semops in the system. - */ -static Buffer -ReadBufferWithBufferLock(Relation reln, - BlockNumber blockNum, - bool bufferLockHeld) -{ - BufferDesc *bufHdr; - int extend; /* extending the file by one block */ - int status; - bool found; - bool isLocalBuf; - - extend = (blockNum == P_NEW); - isLocalBuf = reln->rd_myxactonly; - - if (isLocalBuf) - { - ReadLocalBufferCount++; - bufHdr = LocalBufferAlloc(reln, blockNum, &found); - if (found) - LocalBufferHitCount++; - } - else - { - ReadBufferCount++; - - /* - * lookup the buffer. IO_IN_PROGRESS is set if the requested - * block is not currently in memory. - */ - bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld); - if (found) - BufferHitCount++; - } - - if (!bufHdr) - return InvalidBuffer; - - /* if it's already in the buffer pool, we're done */ - if (found) - { - - /* - * This happens when a bogus buffer was returned previously and is - * floating around in the buffer pool. A routine calling this - * would want this extended. - */ - if (extend) - { - /* new buffers are zero-filled */ - MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - smgrextend(DEFAULT_SMGR, reln, - (char *) MAKE_PTR(bufHdr->data)); - } - return BufferDescriptorGetBuffer(bufHdr); - - } - - /* - * if we have gotten to this point, the reln pointer must be ok and - * the relation file must be open. - */ - if (extend) - { - /* new buffers are zero-filled */ - MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - status = smgrextend(DEFAULT_SMGR, reln, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - status = smgrread(DEFAULT_SMGR, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - if (isLocalBuf) - return BufferDescriptorGetBuffer(bufHdr); - - /* lock buffer manager again to update IO IN PROGRESS */ - SpinAcquire(BufMgrLock); - - if (status == SM_FAIL) - { - /* IO Failed. cleanup the data structures and go home */ - - if (!BufTableDelete(bufHdr)) - { - SpinRelease(BufMgrLock); - elog(FATAL, "BufRead: buffer table broken after IO error\n"); - } - /* remember that BufferAlloc() pinned the buffer */ - UnpinBuffer(bufHdr); - - /* - * Have to reset the flag so that anyone waiting for the buffer - * can tell that the contents are invalid. - */ - bufHdr->flags |= BM_IO_ERROR; - bufHdr->flags &= ~BM_IO_IN_PROGRESS; - } - else - { - /* IO Succeeded. clear the flags, finish buffer update */ - - bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); - } - - /* If anyone was waiting for IO to complete, wake them up now */ - TerminateBufferIO(bufHdr); - - SpinRelease(BufMgrLock); - - if (status == SM_FAIL) - return InvalidBuffer; - - return BufferDescriptorGetBuffer(bufHdr); -} - -/* - * BufferAlloc -- Get a buffer from the buffer pool but dont - * read it. - * - * Returns: descriptor for buffer - * - * When this routine returns, the BufMgrLock is guaranteed NOT be held. - */ -static BufferDesc * -BufferAlloc(Relation reln, - BlockNumber blockNum, - bool *foundPtr, - bool bufferLockHeld) -{ - BufferDesc *buf, - *buf2; - BufferTag newTag; /* identity of requested block */ - bool inProgress; /* buffer undergoing IO */ - bool newblock = FALSE; - - /* create a new tag so we can lookup the buffer */ - /* assume that the relation is already open */ - if (blockNum == P_NEW) - { - newblock = TRUE; - blockNum = smgrnblocks(DEFAULT_SMGR, reln); - } - - INIT_BUFFERTAG(&newTag, reln, blockNum); - - if (!bufferLockHeld) - SpinAcquire(BufMgrLock); - - /* see if the block is in the buffer pool already */ - buf = BufTableLookup(&newTag); - if (buf != NULL) - { - - /* - * Found it. Now, (a) pin the buffer so no one steals it from the - * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting - * the buffer into the buffer pool. - */ - - PinBuffer(buf); - inProgress = (buf->flags & BM_IO_IN_PROGRESS); - - *foundPtr = TRUE; - if (inProgress) /* confirm end of IO */ - { - WaitIO(buf, BufMgrLock); - inProgress = (buf->flags & BM_IO_IN_PROGRESS); - } - if (BUFFER_IS_BROKEN(buf)) - { - - /* - * I couldn't understand the following old comment. If there's - * no IO for the buffer and the buffer is BROKEN,it should be - * read again. So start a new buffer IO here. - * - * wierd race condition: - * - * We were waiting for someone else to read the buffer. While we - * were waiting, the reader boof'd in some way, so the - * contents of the buffer are still invalid. By saying that - * we didn't find it, we can make the caller reinitialize the - * buffer. If two processes are waiting for this block, both - * will read the block. The second one to finish may - * overwrite any updates made by the first. (Assume higher - * level synchronization prevents this from happening). - * - * This is never going to happen, don't worry about it. - */ - *foundPtr = FALSE; - } -#ifdef BMTRACE - _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND); -#endif /* BMTRACE */ - - if (!(*foundPtr)) - StartBufferIO(buf, true); - SpinRelease(BufMgrLock); - - return buf; - } - - *foundPtr = FALSE; - - /* - * Didn't find it in the buffer pool. We'll have to initialize a new - * buffer. First, grab one from the free list. If it's dirty, flush - * it to disk. Remember to unlock BufMgr spinlock while doing the IOs. - */ - inProgress = FALSE; - for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) - { - buf = GetFreeBuffer(); - - /* GetFreeBuffer will abort if it can't find a free buffer */ - Assert(buf); - - /* - * There should be exactly one pin on the buffer after it is - * allocated -- ours. If it had a pin it wouldn't have been on - * the free list. No one else could have pinned it between - * GetFreeBuffer and here because we have the BufMgrLock. - */ - Assert(buf->refcount == 0); - buf->refcount = 1; - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; - - if (buf->flags & BM_DIRTY || buf->cntxDirty) - { - bool smok; - - /* - * skip write error buffers - */ - if ((buf->flags & BM_IO_ERROR) != 0) - { - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - buf->refcount--; - buf = (BufferDesc *) NULL; - continue; - } - /* - * Set BM_IO_IN_PROGRESS to keep anyone from doing anything - * with the contents of the buffer while we write it out. We - * don't really care if they try to read it, but if they can - * complete a BufferAlloc on it they can then scribble into - * it, and we'd really like to avoid that while we are - * flushing the buffer. Setting this flag should block them - * in WaitIO until we're done. - */ - inProgress = TRUE; - - /* - * All code paths that acquire this lock pin the buffer first; - * since no one had it pinned (it just came off the free - * list), no one else can have this lock. - */ - StartBufferIO(buf, false); - - /* - * Write the buffer out, being careful to release BufMgrLock - * before starting the I/O. - */ - smok = BufferReplace(buf); - - if (smok == FALSE) - { - elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", - buf->tag.blockNum, buf->blind.dbname, buf->blind.relname); - inProgress = FALSE; - buf->flags |= BM_IO_ERROR; - buf->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(buf); - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - Assert(buf->refcount > 0); - buf->refcount--; - if (buf->refcount == 0) - { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } - buf = (BufferDesc *) NULL; - } - else - { - /* - * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't - * be setted by anyone. - vadim 01/17/97 - */ - if (buf->flags & BM_JUST_DIRTIED) - { - elog(STOP, "BufferAlloc: content of block %u (%s) changed while flushing", - buf->tag.blockNum, buf->blind.relname); - } - else - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; - } - - /* - * Somebody could have pinned the buffer while we were doing - * the I/O and had given up the BufMgrLock (though they would - * be waiting for us to clear the BM_IO_IN_PROGRESS flag). - * That's why this is a loop -- if so, we need to clear the - * I/O flags, remove our pin and start all over again. - * - * People may be making buffers free at any time, so there's no - * reason to think that we have an immediate disaster on our - * hands. - */ - if (buf && buf->refcount > 1) - { - inProgress = FALSE; - buf->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(buf); - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - buf->refcount--; - buf = (BufferDesc *) NULL; - } - - /* - * Somebody could have allocated another buffer for the same - * block we are about to read in. (While we flush out the - * dirty buffer, we don't hold the lock and someone could have - * allocated another buffer for the same block. The problem is - * we haven't gotten around to insert the new tag into the - * buffer table. So we need to check here. -ay 3/95 - */ - buf2 = BufTableLookup(&newTag); - if (buf2 != NULL) - { - - /* - * Found it. Someone has already done what we're about to - * do. We'll just handle this as if it were found in the - * buffer pool in the first place. - */ - if (buf != NULL) - { - buf->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(buf); - /* give up the buffer since we don't need it any more */ - PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; - Assert(buf->refcount > 0); - buf->refcount--; - if (buf->refcount == 0) - { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } - } - - PinBuffer(buf2); - inProgress = (buf2->flags & BM_IO_IN_PROGRESS); - - *foundPtr = TRUE; - if (inProgress) - { - WaitIO(buf2, BufMgrLock); - inProgress = (buf2->flags & BM_IO_IN_PROGRESS); - } - if (BUFFER_IS_BROKEN(buf2)) - *foundPtr = FALSE; - - if (!(*foundPtr)) - StartBufferIO(buf2, true); - SpinRelease(BufMgrLock); - - return buf2; - } - } - } - - /* - * At this point we should have the sole pin on a non-dirty buffer and - * we may or may not already have the BM_IO_IN_PROGRESS flag set. - */ - - /* - * Change the name of the buffer in the lookup table: - * - * Need to update the lookup table before the read starts. If someone - * comes along looking for the buffer while we are reading it in, we - * don't want them to allocate a new buffer. For the same reason, we - * didn't want to erase the buf table entry for the buffer we were - * writing back until now, either. - */ - - if (!BufTableDelete(buf)) - { - SpinRelease(BufMgrLock); - elog(FATAL, "buffer wasn't in the buffer table\n"); - } - - /* record the database name and relation name for this buffer */ - strcpy(buf->blind.dbname, (DatabaseName) ? DatabaseName : "Recovery"); - strcpy(buf->blind.relname, RelationGetPhysicalRelationName(reln)); - - INIT_BUFFERTAG(&(buf->tag), reln, blockNum); - if (!BufTableInsert(buf)) - { - SpinRelease(BufMgrLock); - elog(FATAL, "Buffer in lookup table twice \n"); - } - - /* - * Buffer contents are currently invalid. Have to mark IO IN PROGRESS - * so no one fiddles with them until the read completes. If this - * routine has been called simply to allocate a buffer, no io will be - * attempted, so the flag isnt set. - */ - if (!inProgress) - StartBufferIO(buf, true); - else - ContinueBufferIO(buf, true); - -#ifdef BMTRACE - _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND); -#endif /* BMTRACE */ - - SpinRelease(BufMgrLock); - - return buf; -} - -/* - * WriteBuffer - * - * Marks buffer contents as dirty (actual write happens later). - * - * Assume that buffer is pinned. Assume that reln is - * valid. - * - * Side Effects: - * Pin count is decremented. - */ - -#undef WriteBuffer - -int -WriteBuffer(Buffer buffer) -{ - BufferDesc *bufHdr; - - if (BufferIsLocal(buffer)) - return WriteLocalBuffer(buffer, TRUE); - - if (BAD_BUFFER_ID(buffer)) - return FALSE; - - bufHdr = &BufferDescriptors[buffer - 1]; - - SharedBufferChanged = true; - - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - - UnpinBuffer(bufHdr); - SpinRelease(BufMgrLock); - - return TRUE; -} - -/* - * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer - * when the operation is complete. - */ -int -WriteNoReleaseBuffer(Buffer buffer) -{ - BufferDesc *bufHdr; - - if (BufferIsLocal(buffer)) - return WriteLocalBuffer(buffer, FALSE); - - if (BAD_BUFFER_ID(buffer)) - return STATUS_ERROR; - - bufHdr = &BufferDescriptors[buffer - 1]; - - SharedBufferChanged = true; - - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - - SpinRelease(BufMgrLock); - - return STATUS_OK; -} - - -#undef ReleaseAndReadBuffer -/* - * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() - * so that only one semop needs to be called. - * - */ -Buffer -ReleaseAndReadBuffer(Buffer buffer, - Relation relation, - BlockNumber blockNum) -{ - BufferDesc *bufHdr; - Buffer retbuf; - - if (BufferIsLocal(buffer)) - { - Assert(LocalRefCount[-buffer - 1] > 0); - LocalRefCount[-buffer - 1]--; - } - else - { - if (BufferIsValid(buffer)) - { - bufHdr = &BufferDescriptors[buffer - 1]; - Assert(PrivateRefCount[buffer - 1] > 0); - PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0) - { - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->refcount--; - if (bufHdr->refcount == 0) - { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; - } - retbuf = ReadBufferWithBufferLock(relation, blockNum, true); - return retbuf; - } - } - } - - return ReadBuffer(relation, blockNum); -} - -/* - * BufferSync -- Write all dirty buffers in the pool. - * - * This is called at checkpoint time and write out all dirty buffers. - */ -void -BufferSync() -{ - int i; - BufferDesc *bufHdr; - Buffer buffer; - int status; - RelFileNode rnode; - XLogRecPtr recptr; - Relation reln = NULL; - - for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) - { - - SpinAcquire(BufMgrLock); - - if (!(bufHdr->flags & BM_VALID)) - { - SpinRelease(BufMgrLock); - continue; - } - - /* - * Pin buffer and ensure that no one reads it from disk - */ - PinBuffer(bufHdr); - /* Synchronize with BufferAlloc */ - if (bufHdr->flags & BM_IO_IN_PROGRESS) - WaitIO(bufHdr, BufMgrLock); - - buffer = BufferDescriptorGetBuffer(bufHdr); - rnode = bufHdr->tag.rnode; - - SpinRelease(BufMgrLock); - - /* - * Try to find relation for buffer - */ - reln = RelationNodeCacheGetRelation(rnode); - - /* - * Protect buffer content against concurrent update - */ - LockBuffer(buffer, BUFFER_LOCK_SHARE); - - /* - * Force XLOG flush for buffer' LSN - */ - recptr = BufferGetLSN(bufHdr); - XLogFlush(recptr); - - /* - * Now it's safe to write buffer to disk - * (if needed at all -:)) - */ - - SpinAcquire(BufMgrLock); - if (bufHdr->flags & BM_IO_IN_PROGRESS) - WaitIO(bufHdr, BufMgrLock); - - if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) - { - bufHdr->flags &= ~BM_JUST_DIRTIED; - StartBufferIO(bufHdr, false); /* output IO start */ - - SpinRelease(BufMgrLock); - - if (reln == (Relation) NULL) - { - status = smgrblindwrt(DEFAULT_SMGR, - bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data), - true); /* must fsync */ - } - else - { - status = smgrwrite(DEFAULT_SMGR, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - if (status == SM_FAIL) /* disk failure ?! */ - elog(STOP, "BufferSync: cannot write %u for %s", - bufHdr->tag.blockNum, bufHdr->blind.relname); - - /* - * Note that it's safe to change cntxDirty here because of - * we protect it from upper writers by share lock and from - * other bufmgr routines by BM_IO_IN_PROGRESS - */ - bufHdr->cntxDirty = false; - - /* - * Release the per-buffer readlock, reacquire BufMgrLock. - */ - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - BufferFlushCount++; - - SpinAcquire(BufMgrLock); - - bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */ - TerminateBufferIO(bufHdr); /* Sync IO finished */ - - /* - * If this buffer was marked by someone as DIRTY while - * we were flushing it out we must not clear DIRTY - * flag - vadim 01/17/97 - */ - if (!(bufHdr->flags & BM_JUST_DIRTIED)) - bufHdr->flags &= ~BM_DIRTY; - } - else - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - - UnpinBuffer(bufHdr); - - SpinRelease(BufMgrLock); - - /* drop refcnt obtained by RelationNodeCacheGetRelation */ - if (reln != (Relation) NULL) - { - RelationDecrementReferenceCount(reln); - reln = NULL; - } - } - -} - -/* - * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf' is cleared. - * - * Should be entered with buffer manager spinlock held; releases it before - * waiting and re-acquires it afterwards. - */ -static void -WaitIO(BufferDesc *buf, SPINLOCK spinlock) -{ - - /* - * Changed to wait until there's no IO - Inoue 01/13/2000 - */ - while ((buf->flags & BM_IO_IN_PROGRESS) != 0) - { - SpinRelease(spinlock); - S_LOCK(&(buf->io_in_progress_lock)); - S_UNLOCK(&(buf->io_in_progress_lock)); - SpinAcquire(spinlock); - } -} - - -long NDirectFileRead; /* some I/O's are direct file access. - * bypass bufmgr */ -long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */ - -void -PrintBufferUsage(FILE *statfp) -{ - float hitrate; - float localhitrate; - - if (ReadBufferCount == 0) - hitrate = 0.0; - else - hitrate = (float) BufferHitCount *100.0 / ReadBufferCount; - - if (ReadLocalBufferCount == 0) - localhitrate = 0.0; - else - localhitrate = (float) LocalBufferHitCount *100.0 / ReadLocalBufferCount; - - fprintf(statfp, "!\tShared blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", - ReadBufferCount - BufferHitCount, BufferFlushCount, hitrate); - fprintf(statfp, "!\tLocal blocks: %10ld read, %10ld written, buffer hit rate = %.2f%%\n", - ReadLocalBufferCount - LocalBufferHitCount, LocalBufferFlushCount, localhitrate); - fprintf(statfp, "!\tDirect blocks: %10ld read, %10ld written\n", - NDirectFileRead, NDirectFileWrite); -} - -void -ResetBufferUsage() -{ - BufferHitCount = 0; - ReadBufferCount = 0; - BufferFlushCount = 0; - LocalBufferHitCount = 0; - ReadLocalBufferCount = 0; - LocalBufferFlushCount = 0; - NDirectFileRead = 0; - NDirectFileWrite = 0; -} - -/* ---------------------------------------------- - * ResetBufferPool - * - * This routine is supposed to be called when a transaction aborts. - * it will release all the buffer pins held by the transaction. - * Currently, we also call it during commit if BufferPoolCheckLeak - * detected a problem --- in that case, isCommit is TRUE, and we - * only clean up buffer pin counts. - * - * During abort, we also forget any pending fsync requests. Dirtied buffers - * will still get written, eventually, but there will be no fsync for them. - * - * ---------------------------------------------- - */ -void -ResetBufferPool(bool isCommit) -{ - int i; - - for (i = 0; i < NBuffers; i++) - { - if (PrivateRefCount[i] != 0) - { - BufferDesc *buf = &BufferDescriptors[i]; - - SpinAcquire(BufMgrLock); - Assert(buf->refcount > 0); - buf->refcount--; - if (buf->refcount == 0) - { - AddBufferToFreelist(buf); - buf->flags |= BM_FREE; - } - SpinRelease(BufMgrLock); - } - PrivateRefCount[i] = 0; - } - - ResetLocalBufferPool(); - - if (!isCommit) - smgrabort(); -} - -/* ----------------------------------------------- - * BufferPoolCheckLeak - * - * check if there is buffer leak - * - * ----------------------------------------------- - */ -int -BufferPoolCheckLeak() -{ - int i; - int result = 0; - - for (i = 1; i <= NBuffers; i++) - { - if (PrivateRefCount[i - 1] != 0) - { - BufferDesc *buf = &(BufferDescriptors[i - 1]); - - elog(NOTICE, - "Buffer Leak: [%03d] (freeNext=%ld, freePrev=%ld, \ -relname=%s, blockNum=%d, flags=0x%x, refcount=%d %ld)", - i - 1, buf->freeNext, buf->freePrev, - buf->blind.relname, buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i - 1]); - result = 1; - } - } - return result; -} - -/* ------------------------------------------------ - * FlushBufferPool - * - * Flush all dirty blocks in buffer pool to disk - * at the checkpoint time - * ------------------------------------------------ - */ -void -FlushBufferPool(void) -{ - BufferSync(); - smgrsync(); -} - -/* - * At the commit time we have to flush local buffer pool only - */ -void -BufmgrCommit(void) -{ - LocalBufferSync(); - /* - * All files created in current transaction will be fsync-ed - */ - smgrcommit(); -} - -/* - * BufferGetBlockNumber - * Returns the block number associated with a buffer. - * - * Note: - * Assumes that the buffer is valid. - */ -BlockNumber -BufferGetBlockNumber(Buffer buffer) -{ - Assert(BufferIsValid(buffer)); - - /* XXX should be a critical section */ - if (BufferIsLocal(buffer)) - return LocalBufferDescriptors[-buffer - 1].tag.blockNum; - else - return BufferDescriptors[buffer - 1].tag.blockNum; -} - -/* - * BufferReplace - * - * Write out the buffer corresponding to 'bufHdr' - * - * BufMgrLock must be held at entry, and the buffer must be pinned. - */ -static int -BufferReplace(BufferDesc *bufHdr) -{ - Relation reln; - XLogRecPtr recptr; - int status; - - /* To check if block content changed while flushing. - vadim 01/17/97 */ - bufHdr->flags &= ~BM_JUST_DIRTIED; - - SpinRelease(BufMgrLock); - - /* - * No need to lock buffer context - no one should be able to - * end ReadBuffer - */ - recptr = BufferGetLSN(bufHdr); - XLogFlush(recptr); - - reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode); - - if (reln != (Relation) NULL) - { - status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data), - false); /* no fsync */ - } - - /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ - if (reln != (Relation) NULL) - RelationDecrementReferenceCount(reln); - - SpinAcquire(BufMgrLock); - - if (status == SM_FAIL) - return FALSE; - - BufferFlushCount++; - - return TRUE; -} - -/* - * RelationGetNumberOfBlocks - * Returns the buffer descriptor associated with a page in a relation. - * - * Note: - * XXX may fail for huge relations. - * XXX should be elsewhere. - * XXX maybe should be hidden - */ -BlockNumber -RelationGetNumberOfBlocks(Relation relation) -{ - return ((relation->rd_myxactonly) ? relation->rd_nblocks : - ((relation->rd_rel->relkind == RELKIND_VIEW) ? 0 : - smgrnblocks(DEFAULT_SMGR, relation))); -} - -/* --------------------------------------------------------------------- - * DropRelationBuffers - * - * This function removes all the buffered pages for a relation - * from the buffer pool. Dirty pages are simply dropped, without - * bothering to write them out first. This is NOT rollback-able, - * and so should be used only with extreme caution! - * - * We assume that the caller holds an exclusive lock on the relation, - * which should assure that no new buffers will be acquired for the rel - * meanwhile. - * - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. - * -------------------------------------------------------------------- - */ -void -DropRelationBuffers(Relation rel) -{ - int i; - BufferDesc *bufHdr; - - if (rel->rd_myxactonly) - { - for (i = 0; i < NLocBuffer; i++) - { - bufHdr = &LocalBufferDescriptors[i]; - if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) - { - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->cntxDirty = false; - LocalRefCount[i] = 0; - bufHdr->tag.rnode.relNode = InvalidOid; - } - } - return; - } - - SpinAcquire(BufMgrLock); - for (i = 1; i <= NBuffers; i++) - { - bufHdr = &BufferDescriptors[i - 1]; -recheck: - if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) - { - - /* - * If there is I/O in progress, better wait till it's done; - * don't want to delete the relation out from under someone - * who's just trying to flush the buffer! - */ - if (bufHdr->flags & BM_IO_IN_PROGRESS) - { - WaitIO(bufHdr, BufMgrLock); - - /* - * By now, the buffer very possibly belongs to some other - * rel, so check again before proceeding. - */ - goto recheck; - } - /* Now we can do what we came for */ - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->cntxDirty = false; - - /* - * Release any refcount we may have. - * - * This is very probably dead code, and if it isn't then it's - * probably wrong. I added the Assert to find out --- tgl - * 11/99. - */ - if (!(bufHdr->flags & BM_FREE)) - { - /* Assert checks that buffer will actually get freed! */ - Assert(PrivateRefCount[i - 1] == 1 && - bufHdr->refcount == 1); - /* ReleaseBuffer expects we do not hold the lock at entry */ - SpinRelease(BufMgrLock); - ReleaseBuffer(i); - SpinAcquire(BufMgrLock); - } - /* - * And mark the buffer as no longer occupied by this rel. - */ - BufTableDelete(bufHdr); - } - } - - SpinRelease(BufMgrLock); -} - -/* --------------------------------------------------------------------- - * DropRelFileNodeBuffers - * - * This is the same as DropRelationBuffers, except that the target - * relation is specified by RelFileNode. - * - * This is NOT rollback-able. One legitimate use is to clear the - * buffer cache of buffers for a relation that is being deleted - * during transaction abort. - * -------------------------------------------------------------------- - */ -void -DropRelFileNodeBuffers(RelFileNode rnode) -{ - int i; - BufferDesc *bufHdr; - - /* We have to search both local and shared buffers... */ - - for (i = 0; i < NLocBuffer; i++) - { - bufHdr = &LocalBufferDescriptors[i]; - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) - { - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->cntxDirty = false; - LocalRefCount[i] = 0; - bufHdr->tag.rnode.relNode = InvalidOid; - } - } - - SpinAcquire(BufMgrLock); - for (i = 1; i <= NBuffers; i++) - { - bufHdr = &BufferDescriptors[i - 1]; -recheck: - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) - { - - /* - * If there is I/O in progress, better wait till it's done; - * don't want to delete the relation out from under someone - * who's just trying to flush the buffer! - */ - if (bufHdr->flags & BM_IO_IN_PROGRESS) - { - WaitIO(bufHdr, BufMgrLock); - - /* - * By now, the buffer very possibly belongs to some other - * rel, so check again before proceeding. - */ - goto recheck; - } - /* Now we can do what we came for */ - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->cntxDirty = false; - - /* - * Release any refcount we may have. - * - * This is very probably dead code, and if it isn't then it's - * probably wrong. I added the Assert to find out --- tgl - * 11/99. - */ - if (!(bufHdr->flags & BM_FREE)) - { - /* Assert checks that buffer will actually get freed! */ - Assert(PrivateRefCount[i - 1] == 1 && - bufHdr->refcount == 1); - /* ReleaseBuffer expects we do not hold the lock at entry */ - SpinRelease(BufMgrLock); - ReleaseBuffer(i); - SpinAcquire(BufMgrLock); - } - /* - * And mark the buffer as no longer occupied by this rel. - */ - BufTableDelete(bufHdr); - } - } - - SpinRelease(BufMgrLock); -} - -/* --------------------------------------------------------------------- - * DropBuffers - * - * This function removes all the buffers in the buffer cache for a - * particular database. Dirty pages are simply dropped, without - * bothering to write them out first. This is used when we destroy a - * database, to avoid trying to flush data to disk when the directory - * tree no longer exists. Implementation is pretty similar to - * DropRelationBuffers() which is for destroying just one relation. - * -------------------------------------------------------------------- - */ -void -DropBuffers(Oid dbid) -{ - int i; - BufferDesc *bufHdr; - - SpinAcquire(BufMgrLock); - for (i = 1; i <= NBuffers; i++) - { - bufHdr = &BufferDescriptors[i - 1]; -recheck: - /* - * We know that currently database OID is tblNode but - * this probably will be changed in future and this - * func will be used to drop tablespace buffers. - */ - if (bufHdr->tag.rnode.tblNode == dbid) - { - - /* - * If there is I/O in progress, better wait till it's done; - * don't want to delete the database out from under someone - * who's just trying to flush the buffer! - */ - if (bufHdr->flags & BM_IO_IN_PROGRESS) - { - WaitIO(bufHdr, BufMgrLock); - - /* - * By now, the buffer very possibly belongs to some other - * DB, so check again before proceeding. - */ - goto recheck; - } - /* Now we can do what we came for */ - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->cntxDirty = false; - - /* - * The thing should be free, if caller has checked that no - * backends are running in that database. - */ - Assert(bufHdr->flags & BM_FREE); - /* - * And mark the buffer as no longer occupied by this page. - */ - BufTableDelete(bufHdr); - } - } - SpinRelease(BufMgrLock); -} - -/* ----------------------------------------------------------------- - * PrintBufferDescs - * - * this function prints all the buffer descriptors, for debugging - * use only. - * ----------------------------------------------------------------- - */ -void -PrintBufferDescs() -{ - int i; - BufferDesc *buf = BufferDescriptors; - - if (IsUnderPostmaster) - { - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; ++i, ++buf) - { - elog(DEBUG, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \ -blockNum=%d, flags=0x%x, refcount=%d %ld)", - i, buf->freeNext, buf->freePrev, - buf->blind.relname, buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); - } - SpinRelease(BufMgrLock); - } - else - { - /* interactive backend */ - for (i = 0; i < NBuffers; ++i, ++buf) - { - printf("[%-2d] (%s, %d) flags=0x%x, refcnt=%d %ld)\n", - i, buf->blind.relname, buf->tag.blockNum, - buf->flags, buf->refcount, PrivateRefCount[i]); - } - } -} - -void -PrintPinnedBufs() -{ - int i; - BufferDesc *buf = BufferDescriptors; - - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; ++i, ++buf) - { - if (PrivateRefCount[i] > 0) - elog(NOTICE, "[%02d] (freeNext=%ld, freePrev=%ld, relname=%s, \ -blockNum=%d, flags=0x%x, refcount=%d %ld)\n", - i, buf->freeNext, buf->freePrev, buf->blind.relname, - buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); - } - SpinRelease(BufMgrLock); -} - -/* - * BufferPoolBlowaway - * - * this routine is solely for the purpose of experiments -- sometimes - * you may want to blowaway whatever is left from the past in buffer - * pool and start measuring some performance with a clean empty buffer - * pool. - */ -#ifdef NOT_USED -void -BufferPoolBlowaway() -{ - int i; - - BufferSync(); - for (i = 1; i <= NBuffers; i++) - { - if (BufferIsValid(i)) - { - while (BufferIsValid(i)) - ReleaseBuffer(i); - } - BufTableDelete(&BufferDescriptors[i - 1]); - } -} - -#endif - -/* --------------------------------------------------------------------- - * FlushRelationBuffers - * - * This function writes all dirty pages of a relation out to disk. - * Furthermore, pages that have blocknumber >= firstDelBlock are - * actually removed from the buffer pool. An error code is returned - * if we fail to dump a dirty buffer or if we find one of - * the target pages is pinned into the cache. - * - * This is called by DROP TABLE to clear buffers for the relation - * from the buffer pool. Note that we must write dirty buffers, - * rather than just dropping the changes, because our transaction - * might abort later on; we want to roll back safely in that case. - * - * This is also called by VACUUM before truncating the relation to the - * given number of blocks. It might seem unnecessary for VACUUM to - * write dirty pages before firstDelBlock, since VACUUM should already - * have committed its changes. However, it is possible for there still - * to be dirty pages: if some page had unwritten on-row tuple status - * updates from a prior transaction, and VACUUM had no additional - * changes to make to that page, then VACUUM won't have written it. - * This is harmless in most cases but will break pg_upgrade, which - * relies on VACUUM to ensure that *all* tuples have correct on-row - * status. So, we check and flush all dirty pages of the rel - * regardless of block number. - * - * In all cases, the caller should be holding AccessExclusiveLock on - * the target relation to ensure that no other backend is busy reading - * more blocks of the relation (or might do so before we commit). - * - * Formerly, we considered it an error condition if we found dirty - * buffers here. However, since BufferSync no longer forces out all - * dirty buffers at every xact commit, it's possible for dirty buffers - * to still be present in the cache due to failure of an earlier - * transaction. So, must flush dirty buffers without complaint. - * - * Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED - * - * XXX currently it sequentially searches the buffer pool, should be - * changed to more clever ways of searching. - * -------------------------------------------------------------------- - */ -int -FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) -{ - int i; - BufferDesc *bufHdr; - XLogRecPtr recptr; - int status; - - if (rel->rd_myxactonly) - { - for (i = 0; i < NLocBuffer; i++) - { - bufHdr = &LocalBufferDescriptors[i]; - if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) - { - if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) - { - status = smgrwrite(DEFAULT_SMGR, rel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - if (status == SM_FAIL) - { - elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is dirty, could not flush it", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum); - return(-1); - } - bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->cntxDirty = false; - } - if (LocalRefCount[i] > 0) - { - elog(NOTICE, "FlushRelationBuffers(%s (local), %u): block %u is referenced (%ld)", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum, LocalRefCount[i]); - return(-2); - } - if (bufHdr->tag.blockNum >= firstDelBlock) - { - bufHdr->tag.rnode.relNode = InvalidOid; - } - } - } - return 0; - } - - SpinAcquire(BufMgrLock); - for (i = 0; i < NBuffers; i++) - { - bufHdr = &BufferDescriptors[i]; - if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) - { - if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) - { - PinBuffer(bufHdr); - if (bufHdr->flags & BM_IO_IN_PROGRESS) - WaitIO(bufHdr, BufMgrLock); - SpinRelease(BufMgrLock); - - /* - * Force XLOG flush for buffer' LSN - */ - recptr = BufferGetLSN(bufHdr); - XLogFlush(recptr); - - /* - * Now it's safe to write buffer to disk - */ - - SpinAcquire(BufMgrLock); - if (bufHdr->flags & BM_IO_IN_PROGRESS) - WaitIO(bufHdr, BufMgrLock); - - if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) - { - bufHdr->flags &= ~BM_JUST_DIRTIED; - StartBufferIO(bufHdr, false); /* output IO start */ - - SpinRelease(BufMgrLock); - - status = smgrwrite(DEFAULT_SMGR, rel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - - if (status == SM_FAIL) /* disk failure ?! */ - elog(STOP, "FlushRelationBuffers: cannot write %u for %s", - bufHdr->tag.blockNum, bufHdr->blind.relname); - - BufferFlushCount++; - - SpinAcquire(BufMgrLock); - bufHdr->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(bufHdr); - Assert(!(bufHdr->flags & BM_JUST_DIRTIED)); - bufHdr->flags &= ~BM_DIRTY; - /* - * Note that it's safe to change cntxDirty here because - * of we protect it from upper writers by - * AccessExclusiveLock and from other bufmgr routines - * by BM_IO_IN_PROGRESS - */ - bufHdr->cntxDirty = false; - } - UnpinBuffer(bufHdr); - } - if (!(bufHdr->flags & BM_FREE)) - { - SpinRelease(BufMgrLock); - elog(NOTICE, "FlushRelationBuffers(%s, %u): block %u is referenced (private %ld, global %d)", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum, - PrivateRefCount[i], bufHdr->refcount); - return -2; - } - if (bufHdr->tag.blockNum >= firstDelBlock) - { - BufTableDelete(bufHdr); - } - } - } - SpinRelease(BufMgrLock); - return 0; -} - -#undef ReleaseBuffer - -/* - * ReleaseBuffer -- remove the pin on a buffer without - * marking it dirty. - * - */ -int -ReleaseBuffer(Buffer buffer) -{ - BufferDesc *bufHdr; - - if (BufferIsLocal(buffer)) - { - Assert(LocalRefCount[-buffer - 1] > 0); - LocalRefCount[-buffer - 1]--; - return STATUS_OK; - } - - if (BAD_BUFFER_ID(buffer)) - return STATUS_ERROR; - - bufHdr = &BufferDescriptors[buffer - 1]; - - Assert(PrivateRefCount[buffer - 1] > 0); - PrivateRefCount[buffer - 1]--; - if (PrivateRefCount[buffer - 1] == 0) - { - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->refcount--; - if (bufHdr->refcount == 0) - { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; - } - SpinRelease(BufMgrLock); - } - - return STATUS_OK; -} - -#ifdef NOT_USED -void -IncrBufferRefCount_Debug(char *file, int line, Buffer buffer) -{ - IncrBufferRefCount(buffer); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) - { - BufferDesc *buf = &BufferDescriptors[buffer - 1]; - - fprintf(stderr, "PIN(Incr) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } -} - -#endif - -#ifdef NOT_USED -void -ReleaseBuffer_Debug(char *file, int line, Buffer buffer) -{ - ReleaseBuffer(buffer); - if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) - { - BufferDesc *buf = &BufferDescriptors[buffer - 1]; - - fprintf(stderr, "UNPIN(Rel) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } -} - -#endif - -#ifdef NOT_USED -int -ReleaseAndReadBuffer_Debug(char *file, - int line, - Buffer buffer, - Relation relation, - BlockNumber blockNum) -{ - bool bufferValid; - Buffer b; - - bufferValid = BufferIsValid(buffer); - b = ReleaseAndReadBuffer(buffer, relation, blockNum); - if (ShowPinTrace && bufferValid && BufferIsLocal(buffer) - && is_userbuffer(buffer)) - { - BufferDesc *buf = &BufferDescriptors[buffer - 1]; - - fprintf(stderr, "UNPIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } - if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) - { - BufferDesc *buf = &BufferDescriptors[b - 1]; - - fprintf(stderr, "PIN(Rel&Rd) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - b, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[b - 1], file, line); - } - return b; -} - -#endif - -#ifdef BMTRACE - -/* - * trace allocations and deallocations in a circular buffer in - * shared memory. check the buffer before doing the allocation, - * and die if there's anything fishy. - */ - -_bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType) -{ - long start, - cur; - bmtrace *tb; - - start = *CurTraceBuf; - - if (start > 0) - cur = start - 1; - else - cur = BMT_LIMIT - 1; - - for (;;) - { - tb = &TraceBuf[cur]; - if (tb->bmt_op != BMT_NOTUSED) - { - if (tb->bmt_buf == bufNo) - { - if ((tb->bmt_op == BMT_DEALLOC) - || (tb->bmt_dbid == dbId && tb->bmt_relid == relId - && tb->bmt_blkno == blkNo)) - goto okay; - - /* die holding the buffer lock */ - _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur); - } - } - - if (cur == start) - goto okay; - - if (cur == 0) - cur = BMT_LIMIT - 1; - else - cur--; - } - -okay: - tb = &TraceBuf[start]; - tb->bmt_pid = MyProcPid; - tb->bmt_buf = bufNo; - tb->bmt_dbid = dbId; - tb->bmt_relid = relId; - tb->bmt_blkno = blkNo; - tb->bmt_op = allocType; - - *CurTraceBuf = (start + 1) % BMT_LIMIT; -} - -_bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, - int allocType, long start, long cur) -{ - FILE *fp; - bmtrace *tb; - int i; - - tb = &TraceBuf[cur]; - - if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL) - elog(FATAL, "buffer alloc trace error and can't open log file"); - - fprintf(fp, "buffer alloc trace detected the following error:\n\n"); - fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n", - bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"), - (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation")); - - fprintf(fp, "the trace buffer contains:\n"); - - i = start; - for (;;) - { - tb = &TraceBuf[i]; - if (tb->bmt_op != BMT_NOTUSED) - { - fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%u,%d> ", - i, (i == cur ? " ---> " : "\t"), - tb->bmt_pid, tb->bmt_buf, - tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); - - switch (tb->bmt_op) - { - case BMT_ALLOCFND: - fprintf(fp, "allocate (found)\n"); - break; - - case BMT_ALLOCNOTFND: - fprintf(fp, "allocate (not found)\n"); - break; - - case BMT_DEALLOC: - fprintf(fp, "deallocate\n"); - break; - - default: - fprintf(fp, "unknown op type %d\n", tb->bmt_op); - break; - } - } - - i = (i + 1) % BMT_LIMIT; - if (i == start) - break; - } - - fprintf(fp, "\noperation causing error:\n"); - fprintf(fp, "\tpid %d buf %d for <%d,%u,%d> ", - getpid(), bufNo, dbId, relId, blkNo); - - switch (allocType) - { - case BMT_ALLOCFND: - fprintf(fp, "allocate (found)\n"); - break; - - case BMT_ALLOCNOTFND: - fprintf(fp, "allocate (not found)\n"); - break; - - case BMT_DEALLOC: - fprintf(fp, "deallocate\n"); - break; - - default: - fprintf(fp, "unknown op type %d\n", allocType); - break; - } - - FreeFile(fp); - - kill(getpid(), SIGILL); -} - -#endif /* BMTRACE */ - -/* - * SetBufferCommitInfoNeedsSave - * - * Mark a buffer dirty when we have updated tuple commit-status bits in it. - * - * This is similar to WriteNoReleaseBuffer, except that we do not set - * SharedBufferChanged or BufferDirtiedByMe, because we have not made a - * critical change that has to be flushed to disk before xact commit --- the - * status-bit update could be redone by someone else just as easily. The - * buffer will be marked dirty, but it will not be written to disk until - * there is another reason to write it. - * - * This routine might get called many times on the same page, if we are making - * the first scan after commit of an xact that added/deleted many tuples. - * So, be as quick as we can if the buffer is already dirty. - */ -void -SetBufferCommitInfoNeedsSave(Buffer buffer) -{ - BufferDesc *bufHdr; - - if (BufferIsLocal(buffer)) - return; - - if (BAD_BUFFER_ID(buffer)) - return; - - bufHdr = &BufferDescriptors[buffer - 1]; - - if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != - (BM_DIRTY | BM_JUST_DIRTIED)) - { - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - SpinRelease(BufMgrLock); - } -} - -void -UnlockBuffers() -{ - BufferDesc *buf; - int i; - - for (i = 0; i < NBuffers; i++) - { - if (BufferLocks[i] == 0) - continue; - - Assert(BufferIsValid(i + 1)); - buf = &(BufferDescriptors[i]); - - S_LOCK(&(buf->cntx_lock)); - - if (BufferLocks[i] & BL_R_LOCK) - { - Assert(buf->r_locks > 0); - (buf->r_locks)--; - } - if (BufferLocks[i] & BL_RI_LOCK) - { - - /* - * Someone else could remove our RI lock when acquiring W - * lock. This is possible if we came here from elog(ERROR) - * from IpcSemaphore{Lock|Unlock}(WaitCLSemId). And so we - * don't do Assert(buf->ri_lock) here. - */ - buf->ri_lock = false; - } - if (BufferLocks[i] & BL_W_LOCK) - { - Assert(buf->w_lock); - buf->w_lock = false; - } - - S_UNLOCK(&(buf->cntx_lock)); - - BufferLocks[i] = 0; - } -} - -void -LockBuffer(Buffer buffer, int mode) -{ - BufferDesc *buf; - bits8 *buflock; - - Assert(BufferIsValid(buffer)); - if (BufferIsLocal(buffer)) - return; - - buf = &(BufferDescriptors[buffer - 1]); - buflock = &(BufferLocks[buffer - 1]); - - S_LOCK(&(buf->cntx_lock)); - - if (mode == BUFFER_LOCK_UNLOCK) - { - if (*buflock & BL_R_LOCK) - { - Assert(buf->r_locks > 0); - Assert(!(buf->w_lock)); - Assert(!(*buflock & (BL_W_LOCK | BL_RI_LOCK))); - (buf->r_locks)--; - *buflock &= ~BL_R_LOCK; - } - else if (*buflock & BL_W_LOCK) - { - Assert(buf->w_lock); - Assert(buf->r_locks == 0); - Assert(!(*buflock & (BL_R_LOCK | BL_RI_LOCK))); - buf->w_lock = false; - *buflock &= ~BL_W_LOCK; - } - else - elog(ERROR, "UNLockBuffer: buffer %lu is not locked", buffer); - } - else if (mode == BUFFER_LOCK_SHARE) - { - unsigned i = 0; - - Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK))); - while (buf->ri_lock || buf->w_lock) - { - S_UNLOCK(&(buf->cntx_lock)); - s_lock_sleep(i++); - S_LOCK(&(buf->cntx_lock)); - } - (buf->r_locks)++; - *buflock |= BL_R_LOCK; - } - else if (mode == BUFFER_LOCK_EXCLUSIVE) - { - unsigned i = 0; - - Assert(!(*buflock & (BL_R_LOCK | BL_W_LOCK | BL_RI_LOCK))); - while (buf->r_locks > 0 || buf->w_lock) - { - if (buf->r_locks > 3 || (*buflock & BL_RI_LOCK)) - { - - /* - * Our RI lock might be removed by concurrent W lock - * acquiring (see what we do with RI locks below when our - * own W acquiring succeeded) and so we set RI lock again - * if we already did this. - */ - *buflock |= BL_RI_LOCK; - buf->ri_lock = true; - } - S_UNLOCK(&(buf->cntx_lock)); - s_lock_sleep(i++); - S_LOCK(&(buf->cntx_lock)); - } - buf->w_lock = true; - *buflock |= BL_W_LOCK; - - buf->cntxDirty = true; - - if (*buflock & BL_RI_LOCK) - { - - /* - * It's possible to remove RI locks acquired by another W - * lockers here, but they'll take care about it. - */ - buf->ri_lock = false; - *buflock &= ~BL_RI_LOCK; - } - } - else - elog(ERROR, "LockBuffer: unknown lock mode %d", mode); - - S_UNLOCK(&(buf->cntx_lock)); -} - -/* - * Functions for IO error handling - * - * Note : We assume that nested buffer IO never occur. - * i.e at most one io_in_progress spinlock is held - * per proc. -*/ -static BufferDesc *InProgressBuf = (BufferDesc *) NULL; -static bool IsForInput; - -/* - * Function:StartBufferIO - * (Assumptions) - * My process is executing no IO - * BufMgrLock is held - * BM_IO_IN_PROGRESS mask is not set for the buffer - * The buffer is Pinned - * -*/ -static void -StartBufferIO(BufferDesc *buf, bool forInput) -{ - Assert(!InProgressBuf); - Assert(!(buf->flags & BM_IO_IN_PROGRESS)); - buf->flags |= BM_IO_IN_PROGRESS; - - /* - * There used to be - * - * Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); - * - * here, but that's wrong because of the way WaitIO works: someone else - * waiting for the I/O to complete will succeed in grabbing the lock - * for a few instructions, and if we context-swap back to here the - * Assert could fail. Tiny window for failure, but I've seen it - * happen -- tgl - */ - S_LOCK(&(buf->io_in_progress_lock)); - - InProgressBuf = buf; - IsForInput = forInput; -} - -/* - * Function:TerminateBufferIO - * (Assumptions) - * My process is executing IO for the buffer - * BufMgrLock is held - * The buffer is Pinned - * -*/ -static void -TerminateBufferIO(BufferDesc *buf) -{ - Assert(buf == InProgressBuf); - S_UNLOCK(&(buf->io_in_progress_lock)); - InProgressBuf = (BufferDesc *) 0; -} - -/* - * Function:ContinueBufferIO - * (Assumptions) - * My process is executing IO for the buffer - * BufMgrLock is held - * The buffer is Pinned - * -*/ -static void -ContinueBufferIO(BufferDesc *buf, bool forInput) -{ - Assert(buf == InProgressBuf); - Assert(buf->flags & BM_IO_IN_PROGRESS); - IsForInput = forInput; -} - -#ifdef NOT_USED -void -InitBufferIO(void) -{ - InProgressBuf = (BufferDesc *) 0; -} -#endif - -/* - * This function is called from ProcReleaseSpins(). - * BufMgrLock isn't held when this function is called. - * BM_IO_ERROR is always set. If BM_IO_ERROR was already - * set in case of output,this routine would kill all - * backends and reset postmaster. - */ -void -AbortBufferIO(void) -{ - BufferDesc *buf = InProgressBuf; - - if (buf) - { - Assert(buf->flags & BM_IO_IN_PROGRESS); - SpinAcquire(BufMgrLock); - if (IsForInput) - Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); - else - { - Assert(buf->flags & BM_DIRTY || buf->cntxDirty); - if (buf->flags & BM_IO_ERROR) - { - elog(NOTICE, "write error may be permanent: cannot write block %u for %s/%s", - buf->tag.blockNum, buf->blind.dbname, buf->blind.relname); - } - buf->flags |= BM_DIRTY; - } - buf->flags |= BM_IO_ERROR; - buf->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(buf); - SpinRelease(BufMgrLock); - } -} - -/* - * Cleanup buffer or mark it for cleanup. Buffer may be cleaned - * up if it's pinned only once. - * - * NOTE: buffer must be excl locked. - */ -void -MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer)) -{ - BufferDesc *bufHdr = &BufferDescriptors[buffer - 1]; - - Assert(PrivateRefCount[buffer - 1] > 0); - - if (PrivateRefCount[buffer - 1] > 1) - { - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - PrivateRefCount[buffer - 1]--; - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->CleanupFunc = CleanupFunc; - SpinRelease(BufMgrLock); - return; - } - - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - if (bufHdr->refcount == 1) - { - SpinRelease(BufMgrLock); - CleanupFunc(buffer); - CleanupFunc = NULL; - } - else - SpinRelease(BufMgrLock); - - LockBuffer(buffer, BUFFER_LOCK_UNLOCK); - PrivateRefCount[buffer - 1]--; - - SpinAcquire(BufMgrLock); - Assert(bufHdr->refcount > 0); - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - bufHdr->CleanupFunc = CleanupFunc; - bufHdr->refcount--; - if (bufHdr->refcount == 0) - { - AddBufferToFreelist(bufHdr); - bufHdr->flags |= BM_FREE; - } - SpinRelease(BufMgrLock); - return; -} diff --git a/src/backend/storage/buffer/xlog_localbuf.c b/src/backend/storage/buffer/xlog_localbuf.c deleted file mode 100644 index dda7456e72..0000000000 --- a/src/backend/storage/buffer/xlog_localbuf.c +++ /dev/null @@ -1,284 +0,0 @@ -/*------------------------------------------------------------------------- - * - * xlog_localbuf.c - * local buffer manager. Fast buffer manager for temporary tables - * or special cases when the operation is not visible to other backends. - * - * When a relation is being created, the descriptor will have rd_islocal - * set to indicate that the local buffer manager should be used. During - * the same transaction the relation is being created, any inserts or - * selects from the newly created relation will use the local buffer - * pool. rd_islocal is reset at the end of a transaction (commit/abort). - * This is useful for queries like SELECT INTO TABLE and create index. - * - * Portions Copyright (c) 1996-2000, PostgreSQL, Inc - * Portions Copyright (c) 1994-5, Regents of the University of California - * - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/buffer/Attic/xlog_localbuf.c,v 1.2 2000/11/30 01:39:07 tgl Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include -#include -#include -#include - -#include "executor/execdebug.h" -#include "storage/buf_internals.h" -#include "storage/bufmgr.h" -#include "storage/smgr.h" -#include "utils/relcache.h" - -extern long int LocalBufferFlushCount; - -int NLocBuffer = 64; -BufferDesc *LocalBufferDescriptors = NULL; -Block *LocalBufferBlockPointers = NULL; -long *LocalRefCount = NULL; - -static int nextFreeLocalBuf = 0; - -/*#define LBDEBUG*/ - -/* - * LocalBufferAlloc - - * allocate a local buffer. We do round robin allocation for now. - */ -BufferDesc * -LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) -{ - int i; - BufferDesc *bufHdr = (BufferDesc *) NULL; - - if (blockNum == P_NEW) - { - blockNum = reln->rd_nblocks; - reln->rd_nblocks++; - } - - /* a low tech search for now -- not optimized for scans */ - for (i = 0; i < NLocBuffer; i++) - { - if (LocalBufferDescriptors[i].tag.rnode.relNode == - reln->rd_node.relNode && - LocalBufferDescriptors[i].tag.blockNum == blockNum) - { - -#ifdef LBDEBUG - fprintf(stderr, "LB ALLOC (%u,%d) %d\n", - RelationGetRelid(reln), blockNum, -i - 1); -#endif - LocalRefCount[i]++; - *foundPtr = TRUE; - return &LocalBufferDescriptors[i]; - } - } - -#ifdef LBDEBUG - fprintf(stderr, "LB ALLOC (%u,%d) %d\n", - RelationGetRelid(reln), blockNum, -nextFreeLocalBuf - 1); -#endif - - /* need to get a new buffer (round robin for now) */ - for (i = 0; i < NLocBuffer; i++) - { - int b = (nextFreeLocalBuf + i) % NLocBuffer; - - if (LocalRefCount[b] == 0) - { - bufHdr = &LocalBufferDescriptors[b]; - LocalRefCount[b]++; - nextFreeLocalBuf = (b + 1) % NLocBuffer; - break; - } - } - if (bufHdr == NULL) - elog(ERROR, "no empty local buffer."); - - /* - * this buffer is not referenced but it might still be dirty (the last - * transaction to touch it doesn't need its contents but has not - * flushed it). if that's the case, write it out before reusing it! - */ - if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) - { - Relation bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode); - - Assert(bufrel != NULL); - - /* flush this page */ - smgrwrite(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - LocalBufferFlushCount++; - - /* - * drop relcache refcount incremented by - * RelationIdCacheGetRelation - */ - RelationDecrementReferenceCount(bufrel); - } - - /* - * it's all ours now. - * - * We need not in tblNode currently but will in future I think, - * when we'll give up rel->rd_fd to fmgr cache. - */ - bufHdr->tag.rnode = reln->rd_node; - bufHdr->tag.blockNum = blockNum; - bufHdr->flags &= ~BM_DIRTY; - bufHdr->cntxDirty = false; - - /* - * lazy memory allocation: allocate space on first use of a buffer. - */ - if (bufHdr->data == (SHMEM_OFFSET) 0) - { - char *data = (char *) malloc(BLCKSZ); - - if (data == NULL) - elog(FATAL, "Out of memory in LocalBufferAlloc"); - /* - * This is a bit of a hack: bufHdr->data needs to be a shmem offset - * for consistency with the shared-buffer case, so make it one - * even though it's not really a valid shmem offset. - */ - bufHdr->data = MAKE_OFFSET(data); - /* - * Set pointer for use by BufferGetBlock() macro. - */ - LocalBufferBlockPointers[-(bufHdr->buf_id + 2)] = (Block) data; - } - - *foundPtr = FALSE; - return bufHdr; -} - -/* - * WriteLocalBuffer - - * writes out a local buffer - */ -int -WriteLocalBuffer(Buffer buffer, bool release) -{ - int bufid; - - Assert(BufferIsLocal(buffer)); - -#ifdef LBDEBUG - fprintf(stderr, "LB WRITE %d\n", buffer); -#endif - - bufid = -(buffer + 1); - LocalBufferDescriptors[bufid].flags |= BM_DIRTY; - - if (release) - { - Assert(LocalRefCount[bufid] > 0); - LocalRefCount[bufid]--; - } - - return true; -} - -/* - * InitLocalBuffer - - * init the local buffer cache. Since most queries (esp. multi-user ones) - * don't involve local buffers, we delay allocating actual memory for the - * buffer until we need it. - */ -void -InitLocalBuffer(void) -{ - int i; - - /* - * these aren't going away. I'm not gonna use palloc. - */ - LocalBufferDescriptors = - (BufferDesc *) calloc(NLocBuffer, sizeof(BufferDesc)); - LocalBufferBlockPointers = (Block *) calloc(NLocBuffer, sizeof(Block)); - LocalRefCount = (long *) calloc(NLocBuffer, sizeof(long)); - nextFreeLocalBuf = 0; - - for (i = 0; i < NLocBuffer; i++) - { - BufferDesc *buf = &LocalBufferDescriptors[i]; - - /* - * negative to indicate local buffer. This is tricky: shared - * buffers start with 0. We have to start with -2. (Note that the - * routine BufferDescriptorGetBuffer adds 1 to buf_id so our first - * buffer id is -1.) - */ - buf->buf_id = -i - 2; - } -} - -/* - * LocalBufferSync - * - * Flush all dirty buffers in the local buffer cache at commit time. - * Since the buffer cache is only used for keeping relations visible - * during a transaction, we will not need these buffers again. - * - * Note that we have to *flush* local buffers because of them are not - * visible to checkpoint makers. But we can skip XLOG flush check. - */ -void -LocalBufferSync(void) -{ - int i; - - for (i = 0; i < NLocBuffer; i++) - { - BufferDesc *buf = &LocalBufferDescriptors[i]; - Relation bufrel; - - if (buf->flags & BM_DIRTY || buf->cntxDirty) - { -#ifdef LBDEBUG - fprintf(stderr, "LB SYNC %d\n", -i - 1); -#endif - bufrel = RelationNodeCacheGetRelation(buf->tag.rnode); - - Assert(bufrel != NULL); - - smgrwrite(DEFAULT_SMGR, bufrel, buf->tag.blockNum, - (char *) MAKE_PTR(buf->data)); - smgrmarkdirty(DEFAULT_SMGR, bufrel, buf->tag.blockNum); - LocalBufferFlushCount++; - - /* drop relcache refcount from RelationIdCacheGetRelation */ - RelationDecrementReferenceCount(bufrel); - - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; - } - } - - MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; -} - -void -ResetLocalBufferPool(void) -{ - int i; - - for (i = 0; i < NLocBuffer; i++) - { - BufferDesc *buf = &LocalBufferDescriptors[i]; - - buf->tag.rnode.relNode = InvalidOid; - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; - } - - MemSet(LocalRefCount, 0, sizeof(long) * NLocBuffer); - nextFreeLocalBuf = 0; -} diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 3143217ab0..f46c04c4b6 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.67 2000/11/23 01:08:57 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.68 2000/11/30 08:46:23 vadim Exp $ * * NOTES: * @@ -192,20 +192,6 @@ static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode); static char *filepath(char *filename); static long pg_nofile(void); -#ifndef XLOG -/* - * pg_fsync --- same as fsync except does nothing if -F switch was given - */ -int -pg_fsync(int fd) -{ - if (enableFsync) - return fsync(fd); - else - return 0; -} -#endif - /* * BasicOpenFile --- same as open(2) except can free other FDs if needed * @@ -665,7 +651,6 @@ fileNameOpenFile(FileName fileName, vfdP->fileFlags = fileFlags & ~(O_TRUNC | O_EXCL); vfdP->fileMode = fileMode; vfdP->seekPos = 0; -#ifdef XLOG /* * Have to fsync file on commit. Alternative way - log * file creation and fsync log before actual file creation. @@ -673,7 +658,6 @@ fileNameOpenFile(FileName fileName, if (fileFlags & O_CREAT) vfdP->fdstate = FD_DIRTY; else -#endif vfdP->fdstate = 0x0; return file; @@ -832,13 +816,7 @@ FileWrite(File file, char *buffer, int amount) FileAccess(file); returnCode = write(VfdCache[file].fd, buffer, amount); if (returnCode > 0) - { VfdCache[file].seekPos += returnCode; -#ifndef XLOG - /* mark the file as needing fsync */ - VfdCache[file].fdstate |= FD_DIRTY; -#endif - } else VfdCache[file].seekPos = FileUnknownPos; diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 8215fd5acc..41fac94e07 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.79 2000/11/10 03:53:45 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.80 2000/11/30 08:46:24 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -569,14 +569,6 @@ mdblindwrt(RelFileNode rnode, elog(DEBUG, "mdblindwrt: write() failed: %m"); status = SM_FAIL; } -#ifndef XLOG - else if (dofsync && - pg_fsync(fd) < 0) - { - elog(DEBUG, "mdblindwrt: fsync() failed: %m"); - status = SM_FAIL; - } -#endif if (close(fd) < 0) { @@ -840,7 +832,6 @@ mdabort() return SM_SUCCESS; } -#ifdef XLOG /* * mdsync() -- Sync storage. * @@ -854,7 +845,6 @@ mdsync() sync(); return SM_SUCCESS; } -#endif /* * _fdvec_alloc () -- grab a free (or new) md file descriptor vector. diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 31929d1d34..48b56b17b2 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.45 2000/11/21 21:16:01 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.46 2000/11/30 08:46:24 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -47,9 +47,7 @@ typedef struct f_smgr int (*smgr_truncate) (Relation reln, int nblocks); int (*smgr_commit) (void); /* may be NULL */ int (*smgr_abort) (void); /* may be NULL */ -#ifdef XLOG int (*smgr_sync) (void); -#endif } f_smgr; /* @@ -62,11 +60,7 @@ static f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, mdread, mdwrite, mdflush, mdblindwrt, mdmarkdirty, mdblindmarkdirty, -#ifdef XLOG mdnblocks, mdtruncate, mdcommit, mdabort, mdsync -#else - mdnblocks, mdtruncate, mdcommit, mdabort -#endif }, #ifdef STABLE_MEMORY_STORAGE @@ -545,7 +539,6 @@ smgrabort() return SM_SUCCESS; } -#ifdef XLOG int smgrsync() { @@ -564,7 +557,6 @@ smgrsync() return SM_SUCCESS; } -#endif #ifdef NOT_USED bool @@ -578,8 +570,6 @@ smgriswo(int16 smgrno) #endif -#ifdef XLOG - void smgr_redo(XLogRecPtr lsn, XLogRecord *record) { @@ -594,4 +584,3 @@ void smgr_desc(char *buf, uint8 xl_info, char* rec) { } -#endif diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 3ed5a8e9fd..2f4697a39b 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.116 2000/11/10 00:33:10 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/cache/relcache.c,v 1.117 2000/11/30 08:46:24 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -2027,8 +2027,7 @@ RelationCacheInitializePhase2(void) } } -#ifdef XLOG /* used by XLogInitCache */ - +/* used by XLogInitCache */ void CreateDummyCaches(void); void DestroyDummyCaches(void); @@ -2082,8 +2081,6 @@ DestroyDummyCaches(void) MemoryContextSwitchTo(oldcxt); } -#endif /* XLOG */ - static void AttrDefaultFetch(Relation relation) { diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 0454ffc848..fd477aab28 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.73 2000/11/28 23:27:57 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/init/postinit.c,v 1.74 2000/11/30 08:46:25 vadim Exp $ * * *------------------------------------------------------------------------- @@ -177,11 +177,6 @@ InitPostgres(const char *dbname, const char *username) { bool bootstrap = IsBootstrapProcessingMode(); -#ifndef XLOG - if (!TransactionFlushEnabled()) - on_shmem_exit(FlushBufferPool, 0); -#endif - SetDatabaseName(dbname); /* ---------------- * initialize the database id used for system caches and lock tables @@ -190,11 +185,7 @@ InitPostgres(const char *dbname, const char *username) if (bootstrap) { MyDatabaseId = TemplateDbOid; -#ifdef OLD_FILE_NAMING - SetDatabasePath(ExpandDatabasePath(dbname)); -#else SetDatabasePath(GetDatabasePath(MyDatabaseId)); -#endif LockDisable(true); } else @@ -228,13 +219,7 @@ InitPostgres(const char *dbname, const char *username) "Database \"%s\" does not exist in the system catalog.", dbname); -#ifdef OLD_FILE_NAMING - fullpath = ExpandDatabasePath(datpath); - if (!fullpath) - elog(FATAL, "Database path could not be resolved."); -#else fullpath = GetDatabasePath(MyDatabaseId); -#endif /* Verify the database path */ diff --git a/src/include/access/htup.h b/src/include/access/htup.h index 6484abf36d..eebc1570f5 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: htup.h,v 1.40 2000/11/30 01:47:32 vadim Exp $ + * $Id: htup.h,v 1.41 2000/11/30 08:46:25 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -63,11 +63,6 @@ typedef struct HeapTupleHeaderData typedef HeapTupleHeaderData *HeapTupleHeader; - -#ifdef XLOG - -/* XLOG stuff */ - /* * XLOG allows to store some information in high 4 bits of log * record xl_info field @@ -127,11 +122,6 @@ typedef struct xl_heap_update #define SizeOfHeapUpdate (offsetof(xl_heap_update, mask) + sizeof(uint8)) -/* end of XLOG stuff */ - -#endif /* XLOG */ - - /* * MaxTupleSize is the maximum allowed size of a tuple, including header and * MAXALIGN alignment padding. Basically it's BLCKSZ minus the other stuff @@ -147,7 +137,6 @@ typedef struct xl_heap_update #define MaxTupleSize \ (BLCKSZ - MAXALIGN(sizeof(PageHeaderData) + MaxSpecialSpace)) - /* * MaxAttrSize is a somewhat arbitrary upper limit on the declared size of * data fields of char(n) and similar types. It need not have anything diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 830fc65731..fa3326d4c3 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: nbtree.h,v 1.47 2000/11/21 21:16:05 petere Exp $ + * $Id: nbtree.h,v 1.48 2000/11/30 08:46:25 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -42,10 +42,7 @@ typedef struct BTPageOpaqueData #define BTP_ROOT (1 << 1) /* It's the root page (has no parent) */ #define BTP_FREE (1 << 2) /* not currently used... */ #define BTP_META (1 << 3) /* Set in the meta-page only */ - -#ifdef XLOG #define BTP_REORDER (1 << 4) /* items must be re-ordered */ -#endif } BTPageOpaqueData; typedef BTPageOpaqueData *BTPageOpaque; @@ -209,11 +206,6 @@ typedef BTStackData *BTStack; #define P_FIRSTKEY ((OffsetNumber) 2) #define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY) - -#ifdef XLOG - -/* XLOG stuff */ - /* * XLOG allows to store some information in high 4 bits of log * record xl_info field @@ -257,7 +249,6 @@ typedef struct xl_btree_insert #define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData) - /* * This is what we need to know about insert with split - * 22 + {4 + 8 | left hi-key} + [btitem] + right sibling btitems. Note that @@ -292,10 +283,6 @@ typedef struct xl_btree_newroot #define SizeOfBtreeNewroot (offsetof(xl_btree_newroot, rootblk) + sizeof(BlockIdData)) -/* end of XLOG stuff */ - -#endif /* XLOG */ - /* * Operator strategy numbers -- ordering of these is <, <=, =, >=, > */ diff --git a/src/include/access/transam.h b/src/include/access/transam.h index f1238d5c26..0e6212eed7 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: transam.h,v 1.27 2000/11/21 21:16:05 petere Exp $ + * $Id: transam.h,v 1.28 2000/11/30 08:46:25 vadim Exp $ * * NOTES * Transaction System Version 101 now support proper oid @@ -67,11 +67,7 @@ typedef unsigned char XidStatus;/* (2 bits) */ * transaction page definitions * ---------------- */ -#ifdef XLOG #define TP_DataSize (BLCKSZ - sizeof(XLogRecPtr)) -#else -#define TP_DataSize BLCKSZ -#endif #define TP_NumXidStatusPerBlock (TP_DataSize * 4) /* ---------------- @@ -88,10 +84,8 @@ typedef unsigned char XidStatus;/* (2 bits) */ */ typedef struct LogRelationContentsData { -#ifdef XLOG XLogRecPtr LSN; /* temp hack: LSN is member of any block */ /* so should be described in bufmgr */ -#endif int TransSystemVersion; } LogRelationContentsData; @@ -115,9 +109,7 @@ typedef LogRelationContentsData *LogRelationContents; */ typedef struct VariableRelationContentsData { -#ifdef XLOG XLogRecPtr LSN; -#endif int TransSystemVersion; TransactionId nextXidData; TransactionId lastXidData; /* unused */ @@ -127,21 +119,14 @@ typedef struct VariableRelationContentsData typedef VariableRelationContentsData *VariableRelationContents; /* - * VariableCache is placed in shmem and used by backends to - * get next available XID & OID without access to - * variable relation. Actually, I would like to have two - * different on-disk storages for next XID and OID... - * But hoping that someday we will use per database OID - * generator I leaved this as is. - vadim 07/21/98 + * VariableCache is placed in shmem and used by + * backends to get next available XID & OID. */ typedef struct VariableCacheData { -#ifndef XLOG - uint32 xid_count; -#endif - TransactionId nextXid; - Oid nextOid; - uint32 oidCount; + TransactionId nextXid; + Oid nextOid; + uint32 oidCount; } VariableCacheData; typedef VariableCacheData *VariableCache; diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 5aab06e115..6736942e05 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: xact.h,v 1.30 2000/11/21 21:16:05 petere Exp $ + * $Id: xact.h,v 1.31 2000/11/30 08:46:25 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -78,9 +78,6 @@ typedef TransactionStateData *TransactionState; #define StoreInvalidTransactionId(dest) \ (*((TransactionId*) (dest)) = NullTransactionId) - -#ifdef XLOG - /* * XLOG allows to store some information in high 4 bits of log * record xl_info field @@ -106,8 +103,6 @@ typedef struct xl_xact_abort #define SizeOfXactAbort ((offsetof(xl_xact_abort, xtime) + sizeof(time_t))) -#endif - /* ---------------- * extern definitions * ---------------- diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h index ab5caccf32..016381f0d2 100644 --- a/src/include/access/xlogutils.h +++ b/src/include/access/xlogutils.h @@ -1,5 +1,5 @@ - #ifndef XLOG_UTILS_H +#define XLOG_UTILS_H #include "access/rmgr.h" #include "utils/rel.h" diff --git a/src/include/config.h.in b/src/include/config.h.in index 1f6939dd96..13745c07f3 100644 --- a/src/include/config.h.in +++ b/src/include/config.h.in @@ -8,7 +8,7 @@ * or in config.h afterwards. Of course, if you edit config.h, then your * changes will be overwritten the next time you run configure. * - * $Id: config.h.in,v 1.150 2000/11/29 20:59:54 tgl Exp $ + * $Id: config.h.in,v 1.151 2000/11/30 08:46:25 vadim Exp $ */ #ifndef CONFIG_H @@ -234,9 +234,6 @@ # define HAVE_UNIX_SOCKETS 1 #endif -/* Enable WAL. Don't disable this, it was only used during development. */ -#define XLOG 1 - /* *------------------------------------------------------------------------ * These hand-configurable symbols are for enabling debugging code, diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 41fce0780f..e7ac9f548c 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: buf_internals.h,v 1.45 2000/11/30 01:39:08 tgl Exp $ + * $Id: buf_internals.h,v 1.46 2000/11/30 08:46:26 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -18,7 +18,6 @@ #include "storage/lmgr.h" #include "storage/s_lock.h" - /* Buf Mgr constants */ /* in bufmgr.c */ extern int Data_Descriptors; @@ -28,7 +27,6 @@ extern int Num_Descriptors; extern int ShowPinTrace; - /* * Flags for buffer descriptors */ @@ -105,9 +103,7 @@ typedef struct sbufdesc bool ri_lock; /* read-intent lock */ bool w_lock; /* context exclusively locked */ -#ifdef XLOG bool cntxDirty; /* new way to mark block as dirty */ -#endif BufferBlindId blind; /* was used to support blind write */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 2452ac6496..27bcc090ed 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufmgr.h,v 1.45 2000/11/30 01:39:08 tgl Exp $ + * $Id: bufmgr.h,v 1.46 2000/11/30 08:46:26 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -192,10 +192,8 @@ extern void AbortBufferIO(void); extern bool BufferIsUpdatable(Buffer buffer); extern void MarkBufferForCleanup(Buffer buffer, void (*CleanupFunc)(Buffer)); -#ifdef XLOG extern void BufmgrCommit(void); extern void BufferSync(void); -#endif extern void InitLocalBuffer(void); diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index f8415dbd22..58e6b0a6df 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: bufpage.h,v 1.36 2000/11/20 21:12:26 vadim Exp $ + * $Id: bufpage.h,v 1.37 2000/11/30 08:46:26 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -118,13 +118,13 @@ typedef OpaqueData *Opaque; */ typedef struct PageHeaderData { -#ifdef XLOG /* XXX LSN is member of *any* block, not */ + /* XXX LSN is member of *any* block, not */ /* only page-organized - 'll change later */ XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog */ /* record for last change of this page */ StartUpID pd_sui; /* SUI of last changes (currently it's */ /* used by heap AM only) */ -#endif + LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ @@ -298,8 +298,6 @@ typedef enum (sizeof(PageHeaderData) - sizeof(ItemIdData)))) \ / ((int) sizeof(ItemIdData))) -#ifdef XLOG - #define PageGetLSN(page) \ (((PageHeader) (page))->pd_lsn) #define PageSetLSN(page, lsn) \ @@ -310,8 +308,6 @@ typedef enum #define PageSetSUI(page, sui) \ (((PageHeader) (page))->pd_sui = (StartUpID) (sui)) -#endif - /* ---------------------------------------------------------------- * extern declarations * ---------------------------------------------------------------- diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 45ef1aedcd..c4b1a85c35 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: fd.h,v 1.23 2000/11/10 03:53:45 vadim Exp $ + * $Id: fd.h,v 1.24 2000/11/30 08:46:26 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -71,10 +71,6 @@ extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode); extern void closeAllVfds(void); extern void AtEOXact_Files(void); -#ifdef XLOG #define pg_fsync(fd) fsync(fd) -#else -extern int pg_fsync(int fd); -#endif #endif /* FD_H */ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 7205d5fb7b..4e4531dd65 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: smgr.h,v 1.25 2000/11/21 21:16:05 petere Exp $ + * $Id: smgr.h,v 1.26 2000/11/30 08:46:26 vadim Exp $ * *------------------------------------------------------------------------- */ @@ -47,14 +47,11 @@ extern int smgrtruncate(int16 which, Relation reln, int nblocks); extern int smgrDoPendingDeletes(bool isCommit); extern int smgrcommit(void); extern int smgrabort(void); - -#ifdef XLOG extern int smgrsync(void); extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record); extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record); extern void smgr_desc(char *buf, uint8 xl_info, char* rec); -#endif /* internals: move me elsewhere -- ay 7/94 */ @@ -77,10 +74,7 @@ extern int mdnblocks(Relation reln); extern int mdtruncate(Relation reln, int nblocks); extern int mdcommit(void); extern int mdabort(void); - -#ifdef XLOG extern int mdsync(void); -#endif /* mm.c */ extern SPINLOCK MMCacheLock;