From: Tom Lane Date: Tue, 10 Feb 2004 01:55:27 +0000 (+0000) Subject: Restructure smgr API as per recent proposal. smgr no longer depends on X-Git-Tag: REL8_0_0BETA1~1219 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=87bd95638552b8fc1f5f787ce5b862bb6fc2eb80;p=postgresql Restructure smgr API as per recent proposal. smgr no longer depends on the relcache, and so the notion of 'blind write' is gone. This should improve efficiency in bgwriter and background checkpoint processes. Internal restructuring in md.c to remove the not-very-useful array of MdfdVec objects --- might as well just use pointers. Also remove the long-dead 'persistent main memory' storage manager (mm.c), since it seems quite unlikely to ever get resurrected. --- diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 87a251915a..fa77318ea3 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -12,7 +12,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.111 2004/02/06 19:36:17 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.112 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -821,7 +821,9 @@ btvacuumcleanup(PG_FUNCTION_ARGS) /* * Do the physical truncation. */ - new_pages = smgrtruncate(DEFAULT_SMGR, rel, new_pages); + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + new_pages = smgrtruncate(rel->rd_smgr, new_pages); rel->rd_nblocks = new_pages; /* update relcache * immediately */ rel->rd_targblock = InvalidBlockNumber; diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index ba3054f14b..c92f90f6ca 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.10 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.11 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include #include "access/slru.h" +#include "storage/fd.h" #include "storage/lwlock.h" #include "miscadmin.h" diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index d5f357bc5f..06e152d1bb 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.161 2004/01/26 22:51:55 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.162 2004/02/10 01:55:24 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -159,6 +159,7 @@ #include "executor/spi.h" #include "libpq/be-fsstubs.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/proc.h" #include "storage/sinval.h" #include "storage/smgr.h" diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index fe1ecd453c..9056f0b454 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.133 2004/01/26 22:35:31 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.134 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ #include "catalog/catversion.h" #include "catalog/pg_control.h" #include "storage/bufpage.h" +#include "storage/fd.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" #include "storage/proc.h" @@ -3126,7 +3127,6 @@ ShutdownXLOG(int code, Datum arg) MyXactMadeTempRelUpdate = false; CritSectionCount++; - CreateDummyCaches(); CreateCheckPoint(true, true); ShutdownCLOG(); CritSectionCount--; diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index d200b7e17a..0271742ce0 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.28 2003/12/14 00:34:47 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.29 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -261,14 +261,12 @@ _xl_remove_hash_entry(XLogRelDesc *rdesc) if (hentry == NULL) elog(PANIC, "_xl_remove_hash_entry: file was not found in cache"); - if (rdesc->reldata.rd_fd >= 0) - smgrclose(DEFAULT_SMGR, &(rdesc->reldata)); + if (rdesc->reldata.rd_smgr != NULL) + smgrclose(rdesc->reldata.rd_smgr); memset(rdesc, 0, sizeof(XLogRelDesc)); memset(tpgc, 0, sizeof(FormData_pg_class)); rdesc->reldata.rd_rel = tpgc; - - return; } static XLogRelDesc * @@ -296,7 +294,6 @@ _xl_new_reldesc(void) void XLogInitRelationCache(void) { - CreateDummyCaches(); _xl_init_rel_cache(); } @@ -306,8 +303,6 @@ XLogCloseRelationCache(void) HASH_SEQ_STATUS status; XLogRelCacheEntry *hentry; - DestroyDummyCaches(); - if (!_xlrelarr) return; @@ -347,11 +342,18 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode); - /* unexisting DB id */ - res->reldata.rd_lockInfo.lockRelId.dbId = RecoveryDb; - res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode; res->reldata.rd_node = rnode; + /* + * We set up the lockRelId in case anything tries to lock the dummy + * relation. Note that this is fairly bogus since relNode may be + * different from the relation's OID. It shouldn't really matter + * though, since we are presumably running by ourselves and can't + * have any lock conflicts ... + */ + res->reldata.rd_lockInfo.lockRelId.dbId = rnode.tblNode; + res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode; + hentry = (XLogRelCacheEntry *) hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found); @@ -364,9 +366,17 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) hentry->rdesc = res; res->reldata.rd_targblock = InvalidBlockNumber; - res->reldata.rd_fd = -1; - res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata), - true /* allow failure */ ); + res->reldata.rd_smgr = smgropen(res->reldata.rd_node); + /* + * Create the target file if it doesn't already exist. This lets + * us cope if the replay sequence contains writes to a relation + * that is later deleted. (The original coding of this routine + * would instead return NULL, causing the writes to be suppressed. + * But that seems like it risks losing valuable data if the filesystem + * loses an inode during a crash. Better to write the data until we + * are actually told to delete the file.) + */ + smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true); } res->moreRecently = &(_xlrelarr[0]); @@ -374,8 +384,5 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode) _xlrelarr[0].lessRecently = res; res->lessRecently->moreRecently = res; - if (res->reldata.rd_fd < 0) /* file doesn't exist */ - return (NULL); - return (&(res->reldata)); } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 8d29134d39..2f67061c48 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.175 2004/01/07 18:56:25 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.176 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -465,14 +465,12 @@ BootstrapMain(int argc, char *argv[]) break; case BS_XLOG_CHECKPOINT: - CreateDummyCaches(); CreateCheckPoint(false, false); SetSavedRedoRecPtr(); /* pass redo ptr back to * postmaster */ proc_exit(0); /* done */ case BS_XLOG_BGWRITER: - CreateDummyCaches(); BufferBackgroundWriter(); proc_exit(0); /* done */ diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 14c3745e5e..905aa5b0b2 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.257 2003/12/28 21:57:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.258 2004/02/10 01:55:24 tgl Exp $ * * * INTERFACE ROUTINES @@ -298,9 +298,9 @@ heap_create(const char *relname, void heap_storage_create(Relation rel) { - Assert(rel->rd_fd < 0); - rel->rd_fd = smgrcreate(DEFAULT_SMGR, rel); - Assert(rel->rd_fd >= 0); + Assert(rel->rd_smgr == NULL); + rel->rd_smgr = smgropen(rel->rd_node); + smgrcreate(rel->rd_smgr, rel->rd_istemp, false); } /* ---------------------------------------------------------------- @@ -1210,7 +1210,12 @@ heap_drop_with_catalog(Oid rid) */ if (rel->rd_rel->relkind != RELKIND_VIEW && rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE) - smgrunlink(DEFAULT_SMGR, rel); + { + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); + rel->rd_smgr = NULL; + } /* * Close relcache entry, but *keep* AccessExclusiveLock on the @@ -1706,7 +1711,7 @@ SetRelationNumChecks(Relation rel, int numchecks) else { /* Skip the disk update, but force relcache inval anyway */ - CacheInvalidateRelcache(RelationGetRelid(rel)); + CacheInvalidateRelcache(rel); } heap_freetuple(reltup); @@ -1943,7 +1948,9 @@ RelationTruncateIndexes(Oid heapId) DropRelationBuffers(currentIndex); /* Now truncate the actual data and set blocks to zero */ - smgrtruncate(DEFAULT_SMGR, currentIndex, 0); + if (currentIndex->rd_smgr == NULL) + currentIndex->rd_smgr = smgropen(currentIndex->rd_node); + smgrtruncate(currentIndex->rd_smgr, 0); currentIndex->rd_nblocks = 0; currentIndex->rd_targblock = InvalidBlockNumber; @@ -1990,7 +1997,9 @@ heap_truncate(Oid rid) DropRelationBuffers(rel); /* Now truncate the actual data and set blocks to zero */ - smgrtruncate(DEFAULT_SMGR, rel, 0); + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + smgrtruncate(rel->rd_smgr, 0); rel->rd_nblocks = 0; rel->rd_targblock = InvalidBlockNumber; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 4180526301..9c92f21740 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.226 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.227 2004/02/10 01:55:24 tgl Exp $ * * * INTERFACE ROUTINES @@ -808,7 +808,11 @@ index_drop(Oid indexId) if (i < 0) elog(ERROR, "FlushRelationBuffers returned %d", i); - smgrunlink(DEFAULT_SMGR, userIndexRelation); + if (userIndexRelation->rd_smgr == NULL) + userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node); + smgrscheduleunlink(userIndexRelation->rd_smgr, + userIndexRelation->rd_istemp); + userIndexRelation->rd_smgr = NULL; /* * We are presently too lazy to attempt to compute the new correct @@ -818,7 +822,7 @@ index_drop(Oid indexId) * owning relation to ensure other backends update their relcache * lists of indexes. */ - CacheInvalidateRelcache(heapId); + CacheInvalidateRelcache(userHeapRelation); /* * Close rels, but keep locks @@ -1057,7 +1061,7 @@ setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid) else { /* no need to change tuple, but force relcache rebuild anyway */ - CacheInvalidateRelcache(relid); + CacheInvalidateRelcacheByTuple(tuple); } if (!pg_class_scan) @@ -1077,10 +1081,11 @@ void setNewRelfilenode(Relation relation) { Oid newrelfilenode; + RelFileNode newrnode; + SMgrRelation srel; Relation pg_class; HeapTuple tuple; Form_pg_class rd_rel; - RelationData workrel; /* Can't change relfilenode for nailed tables (indexes ok though) */ Assert(!relation->rd_isnailed || @@ -1107,14 +1112,18 @@ setNewRelfilenode(Relation relation) /* create another storage file. Is it a little ugly ? */ /* NOTE: any conflict in relfilenode value will be caught here */ - memcpy((char *) &workrel, relation, sizeof(RelationData)); - workrel.rd_fd = -1; - workrel.rd_node.relNode = newrelfilenode; - heap_storage_create(&workrel); - smgrclose(DEFAULT_SMGR, &workrel); + newrnode = relation->rd_node; + newrnode.relNode = newrelfilenode; + + srel = smgropen(newrnode); + smgrcreate(srel, relation->rd_istemp, false); + smgrclose(srel); /* schedule unlinking old relfilenode */ - smgrunlink(DEFAULT_SMGR, relation); + if (relation->rd_smgr == NULL) + relation->rd_smgr = smgropen(relation->rd_node); + smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp); + relation->rd_smgr = NULL; /* update the pg_class row */ rd_rel->relfilenode = newrelfilenode; @@ -1672,7 +1681,9 @@ reindex_index(Oid indexId) DropRelationBuffers(iRel); /* Now truncate the actual data and set blocks to zero */ - smgrtruncate(DEFAULT_SMGR, iRel, 0); + if (iRel->rd_smgr == NULL) + iRel->rd_smgr = smgropen(iRel->rd_node); + smgrtruncate(iRel->rd_smgr, 0); iRel->rd_nblocks = 0; iRel->rd_targblock = InvalidBlockNumber; } diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 66850d32d5..7af8200e06 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.217 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.218 2004/02/10 01:55:24 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,6 +40,7 @@ #include "parser/parse_coerce.h" #include "parser/parse_relation.h" #include "rewrite/rewriteHandler.h" +#include "storage/fd.h" #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "utils/acl.h" diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 2b8fdb9a2d..85f49537ef 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.130 2004/01/07 18:56:25 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.131 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -30,6 +30,7 @@ #include "commands/comment.h" #include "commands/dbcommands.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/freespace.h" #include "storage/sinval.h" #include "utils/acl.h" diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 809f425bc6..6fadd0d4e1 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.97 2004/01/28 21:02:39 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.98 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1010,7 +1010,7 @@ setRelhassubclassInRelation(Oid relationId, bool relhassubclass) else { /* no need to change tuple, but force relcache rebuild anyway */ - CacheInvalidateRelcache(relationId); + CacheInvalidateRelcacheByTuple(tuple); } heap_freetuple(tuple); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 34cfc4d10e..bddf3f5ad6 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.163 2003/11/29 19:51:47 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.164 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -691,7 +691,7 @@ renametrig(Oid relid, * relcache entries. (Ideally this should happen * automatically...) */ - CacheInvalidateRelcache(relid); + CacheInvalidateRelcache(targetrel); } else { diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c index d2d1a3c7a9..9352aeb0ec 100644 --- a/src/backend/commands/user.c +++ b/src/backend/commands/user.c @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.136 2004/02/02 17:21:07 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.137 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -27,6 +27,7 @@ #include "commands/user.h" #include "libpq/crypt.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/pmsignal.h" #include "utils/acl.h" #include "utils/array.h" diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index dae8c3f37c..29a2df1ef1 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.271 2004/01/07 18:56:25 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.272 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2513,7 +2513,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, /* truncate relation, if needed */ if (blkno < nblocks) { - blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno); + if (onerel->rd_smgr == NULL) + onerel->rd_smgr = smgropen(onerel->rd_node); + blkno = smgrtruncate(onerel->rd_smgr, blkno); onerel->rd_nblocks = blkno; /* update relcache immediately */ onerel->rd_targblock = InvalidBlockNumber; vacrelstats->rel_pages = blkno; /* set new number of blocks */ @@ -2582,7 +2584,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages) (errmsg("\"%s\": truncated %u to %u pages", RelationGetRelationName(onerel), vacrelstats->rel_pages, relblocks))); - relblocks = smgrtruncate(DEFAULT_SMGR, onerel, relblocks); + if (onerel->rd_smgr == NULL) + onerel->rd_smgr = smgropen(onerel->rd_node); + relblocks = smgrtruncate(onerel->rd_smgr, relblocks); onerel->rd_nblocks = relblocks; /* update relcache immediately */ onerel->rd_targblock = InvalidBlockNumber; vacrelstats->rel_pages = relblocks; /* set new number of diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index c271152877..17f91efef7 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -31,7 +31,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.35 2004/02/06 19:36:17 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.36 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -148,9 +148,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt) vac_open_indexes(onerel, &nindexes, &Irel); hasindex = (nindexes > 0); - /* Turn on vacuum cost accounting */ - if (VacuumCostNaptime > 0) - VacuumCostActive = true; + /* Turn vacuum cost accounting on or off */ + VacuumCostActive = (VacuumCostNaptime > 0); VacuumCostBalance = 0; /* Do the vacuuming */ @@ -784,7 +783,9 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) /* * Do the physical truncation. */ - new_rel_pages = smgrtruncate(DEFAULT_SMGR, onerel, new_rel_pages); + if (onerel->rd_smgr == NULL) + onerel->rd_smgr = smgropen(onerel->rd_node); + new_rel_pages = smgrtruncate(onerel->rd_smgr, new_rel_pages); onerel->rd_nblocks = new_rel_pages; /* update relcache immediately */ onerel->rd_targblock = InvalidBlockNumber; vacrelstats->rel_pages = new_rel_pages; /* save new number of diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c index aa8ba2f884..ed19e76db2 100644 --- a/src/backend/libpq/be-fsstubs.c +++ b/src/backend/libpq/be-fsstubs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.69 2003/11/29 19:51:49 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.70 2004/02/10 01:55:25 tgl Exp $ * * NOTES * This should be moved to a more appropriate place. It is here @@ -41,6 +41,7 @@ #include "libpq/be-fsstubs.h" #include "libpq/libpq-fs.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/large_object.h" #include "utils/memutils.h" diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index f1cbe96fd2..995afe5509 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.92 2004/01/14 23:01:55 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.93 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -480,7 +480,12 @@ DefineQueryRewrite(RuleStmt *stmt) * XXX what about getting rid of its TOAST table? For now, we don't. */ if (RelisBecomingView) - smgrunlink(DEFAULT_SMGR, event_relation); + { + if (event_relation->rd_smgr == NULL) + event_relation->rd_smgr = smgropen(event_relation->rd_node); + smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp); + event_relation->rd_smgr = NULL; + } /* Close rel, but keep lock till commit... */ heap_close(event_relation, NoLock); diff --git a/src/backend/rewrite/rewriteSupport.c b/src/backend/rewrite/rewriteSupport.c index 54fdcfcdde..6e01de4b5c 100644 --- a/src/backend/rewrite/rewriteSupport.c +++ b/src/backend/rewrite/rewriteSupport.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.57 2003/11/29 19:51:55 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.58 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -84,7 +84,7 @@ SetRelationRuleStatus(Oid relationId, bool relHasRules, else { /* no need to change tuple, but force relcache rebuild anyway */ - CacheInvalidateRelcache(relationId); + CacheInvalidateRelcacheByTuple(tuple); } heap_freetuple(tuple); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index b927b5ea5e..203e03ab05 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.156 2004/02/06 19:36:18 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.157 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,7 +85,7 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum, bool bufferLockHeld); static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr); -static bool BufferReplace(BufferDesc *bufHdr); +static void BufferReplace(BufferDesc *bufHdr); #ifdef NOT_USED void PrintBufferDescs(void); @@ -127,7 +127,6 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, bool bufferLockHeld) { BufferDesc *bufHdr; - int status; bool found; bool isExtend; bool isLocalBuf; @@ -135,6 +134,10 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, isExtend = (blockNum == P_NEW); isLocalBuf = reln->rd_istemp; + /* Open it at the smgr level if not already done */ + if (reln->rd_smgr == NULL) + reln->rd_smgr = smgropen(reln->rd_node); + if (isLocalBuf) { ReadLocalBufferCount++; @@ -160,7 +163,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, if (isExtend) { /* must be sure we have accurate file length! */ - blockNum = reln->rd_nblocks = smgrnblocks(DEFAULT_SMGR, reln); + blockNum = reln->rd_nblocks = smgrnblocks(reln->rd_smgr); reln->rd_nblocks++; } @@ -207,23 +210,19 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, } /* - * if we have gotten to this point, the reln pointer must be ok and - * the relation file must be open. + * if we have gotten to this point, the relation must be open in the smgr. */ if (isExtend) { /* new buffers are zero-filled */ MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); - status = smgrextend(DEFAULT_SMGR, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); + smgrextend(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data)); } else { - status = smgrread(DEFAULT_SMGR, reln, blockNum, - (char *) MAKE_PTR(bufHdr->data)); + smgrread(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data)); /* check for garbage data */ - if (status == SM_SUCCESS && - !PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data))) + if (!PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data))) { /* * During WAL recovery, the first access to any data page should @@ -250,47 +249,20 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum, if (isLocalBuf) { /* No shared buffer state to update... */ - if (status == SM_FAIL) - { - bufHdr->flags |= BM_IO_ERROR; - return InvalidBuffer; - } return BufferDescriptorGetBuffer(bufHdr); } /* lock buffer manager again to update IO IN PROGRESS */ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); - if (status == SM_FAIL) - { - /* IO Failed. cleanup the data structures and go home */ - StrategyInvalidateBuffer(bufHdr); - - /* remember that BufferAlloc() pinned the buffer */ - UnpinBuffer(bufHdr); - - /* - * Have to reset the flag so that anyone waiting for the buffer - * can tell that the contents are invalid. - */ - bufHdr->flags |= BM_IO_ERROR; - bufHdr->flags &= ~BM_IO_IN_PROGRESS; - } - else - { - /* IO Succeeded. clear the flags, finish buffer update */ - - bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); - } + /* IO Succeeded. clear the flags, finish buffer update */ + bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); /* If anyone was waiting for IO to complete, wake them up now */ TerminateBufferIO(bufHdr); LWLockRelease(BufMgrLock); - if (status == SM_FAIL) - return InvalidBuffer; - return BufferDescriptorGetBuffer(bufHdr); } @@ -391,8 +363,6 @@ BufferAlloc(Relation reln, if (buf->flags & BM_DIRTY || buf->cntxDirty) { - bool replace_ok; - /* * skip write error buffers */ @@ -425,39 +395,21 @@ BufferAlloc(Relation reln, * Write the buffer out, being careful to release BufMgrLock * before starting the I/O. */ - replace_ok = BufferReplace(buf); + BufferReplace(buf); - if (replace_ok == false) + /* + * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't + * be set by anyone. - vadim 01/17/97 + */ + if (buf->flags & BM_JUST_DIRTIED) { - ereport(WARNING, - (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u", - buf->tag.blockNum, - buf->tag.rnode.tblNode, - buf->tag.rnode.relNode))); - inProgress = FALSE; - buf->flags |= BM_IO_ERROR; - buf->flags &= ~BM_IO_IN_PROGRESS; - TerminateBufferIO(buf); - UnpinBuffer(buf); - buf = NULL; + elog(PANIC, "content of block %u of %u/%u changed while flushing", + buf->tag.blockNum, + buf->tag.rnode.tblNode, buf->tag.rnode.relNode); } - else - { - /* - * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't - * be set by anyone. - vadim 01/17/97 - */ - if (buf->flags & BM_JUST_DIRTIED) - { - elog(PANIC, "content of block %u of %u/%u changed while flushing", - buf->tag.blockNum, - buf->tag.rnode.tblNode, buf->tag.rnode.relNode); - } - buf->flags &= ~BM_DIRTY; - buf->cntxDirty = false; - } + buf->flags &= ~BM_DIRTY; + buf->cntxDirty = false; /* * Somebody could have pinned the buffer while we were doing @@ -721,10 +673,8 @@ BufferSync(int percent, int maxpages) for (i = 0; i < num_buffer_dirty; i++) { Buffer buffer; - int status; - RelFileNode rnode; XLogRecPtr recptr; - Relation reln; + SMgrRelation reln; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); @@ -775,15 +725,9 @@ BufferSync(int percent, int maxpages) StartBufferIO(bufHdr, false); /* output IO start */ buffer = BufferDescriptorGetBuffer(bufHdr); - rnode = bufHdr->tag.rnode; LWLockRelease(BufMgrLock); - /* - * Try to find relation for buffer - */ - reln = RelationNodeCacheGetRelation(rnode); - /* * Protect buffer content against concurrent update */ @@ -805,27 +749,13 @@ BufferSync(int percent, int maxpages) bufHdr->flags &= ~BM_JUST_DIRTIED; LWLockRelease(BufMgrLock); - if (reln == NULL) - { - status = smgrblindwrt(DEFAULT_SMGR, - bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - status = smgrwrite(DEFAULT_SMGR, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } + /* Find smgr relation for buffer */ + reln = smgropen(bufHdr->tag.rnode); - if (status == SM_FAIL) /* disk failure ?! */ - ereport(PANIC, - (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u", - bufHdr->tag.blockNum, - bufHdr->tag.rnode.tblNode, - bufHdr->tag.rnode.relNode))); + /* And write... */ + smgrwrite(reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); /* * Note that it's safe to change cntxDirty here because of we @@ -853,10 +783,6 @@ BufferSync(int percent, int maxpages) bufHdr->flags &= ~BM_DIRTY; UnpinBuffer(bufHdr); LWLockRelease(BufMgrLock); - - /* drop refcnt obtained by RelationNodeCacheGetRelation */ - if (reln != NULL) - RelationDecrementReferenceCount(reln); } pfree(buffer_dirty); @@ -1026,12 +952,22 @@ BufferBackgroundWriter(void) n = BufferSync(BgWriterPercent, BgWriterMaxpages); /* - * Whatever signal is sent to us, let's just die galantly. If + * Whatever signal is sent to us, let's just die gallantly. If * it wasn't meant that way, the postmaster will reincarnate us. */ if (InterruptPending) return; + /* + * Whenever we have nothing to do, close all smgr files. This + * is so we won't hang onto smgr references to deleted files + * indefinitely. XXX this is a bogus, temporary solution. 'Twould + * be much better to do this once per checkpoint, but the bgwriter + * doesn't yet know anything about checkpoints. + */ + if (n == 0) + smgrcloseall(); + /* * Nap for the configured time or sleep for 10 seconds if * there was nothing to do at all. @@ -1073,17 +1009,15 @@ BufferGetBlockNumber(Buffer buffer) /* * BufferReplace * - * Write out the buffer corresponding to 'bufHdr'. Returns 'true' if - * the buffer was successfully written out, 'false' otherwise. + * Write out the buffer corresponding to 'bufHdr'. * * BufMgrLock must be held at entry, and the buffer must be pinned. */ -static bool +static void BufferReplace(BufferDesc *bufHdr) { - Relation reln; + SMgrRelation reln; XLogRecPtr recptr; - int status; ErrorContextCallback errcontext; /* To check if block content changed while flushing. - vadim 01/17/97 */ @@ -1104,36 +1038,20 @@ BufferReplace(BufferDesc *bufHdr) recptr = BufferGetLSN(bufHdr); XLogFlush(recptr); - reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode); + /* Find smgr relation for buffer */ + reln = smgropen(bufHdr->tag.rnode); - if (reln != NULL) - { - status = smgrwrite(DEFAULT_SMGR, reln, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - - /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */ - if (reln != NULL) - RelationDecrementReferenceCount(reln); + /* And write... */ + smgrwrite(reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); /* Pop the error context stack */ error_context_stack = errcontext.previous; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); - if (status == SM_FAIL) - return false; - BufferFlushCount++; - - return true; } /* @@ -1151,12 +1069,17 @@ RelationGetNumberOfBlocks(Relation relation) * * Don't call smgr on a view or a composite type, either. */ - if (relation->rd_rel->relkind == RELKIND_VIEW) - relation->rd_nblocks = 0; - else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + if (relation->rd_rel->relkind == RELKIND_VIEW || + relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) relation->rd_nblocks = 0; else if (!relation->rd_isnew && !relation->rd_istemp) - relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation); + { + /* Open it at the smgr level if not already done */ + if (relation->rd_smgr == NULL) + relation->rd_smgr = smgropen(relation->rd_node); + + relation->rd_nblocks = smgrnblocks(relation->rd_smgr); + } return relation->rd_nblocks; } @@ -1172,12 +1095,17 @@ RelationGetNumberOfBlocks(Relation relation) void RelationUpdateNumberOfBlocks(Relation relation) { - if (relation->rd_rel->relkind == RELKIND_VIEW) - relation->rd_nblocks = 0; - else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) + if (relation->rd_rel->relkind == RELKIND_VIEW || + relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) relation->rd_nblocks = 0; else - relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation); + { + /* Open it at the smgr level if not already done */ + if (relation->rd_smgr == NULL) + relation->rd_smgr = smgropen(relation->rd_node); + + relation->rd_nblocks = smgrnblocks(relation->rd_smgr); + } } /* --------------------------------------------------------------------- @@ -1465,7 +1393,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) int i; BufferDesc *bufHdr; XLogRecPtr recptr; - int status; ErrorContextCallback errcontext; /* Setup error traceback support for ereport() */ @@ -1484,17 +1411,13 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) { if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { - status = smgrwrite(DEFAULT_SMGR, rel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - if (status == SM_FAIL) - { - error_context_stack = errcontext.previous; - elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is dirty, could not flush it", - RelationGetRelationName(rel), firstDelBlock, - bufHdr->tag.blockNum); - return (-1); - } + /* Open it at the smgr level if not already done */ + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); + + smgrwrite(rel->rd_smgr, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); bufHdr->cntxDirty = false; } @@ -1553,17 +1476,13 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock) LWLockRelease(BufMgrLock); - status = smgrwrite(DEFAULT_SMGR, rel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); + /* Open it at the smgr level if not already done */ + if (rel->rd_smgr == NULL) + rel->rd_smgr = smgropen(rel->rd_node); - if (status == SM_FAIL) /* disk failure ?! */ - ereport(PANIC, - (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u", - bufHdr->tag.blockNum, - bufHdr->tag.rnode.tblNode, - bufHdr->tag.rnode.relNode))); + smgrwrite(rel->rd_smgr, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); BufferFlushCount++; @@ -2046,7 +1965,11 @@ AbortBufferIO(void) LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(buf->flags & BM_IO_IN_PROGRESS); if (IsForInput) + { Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); + /* Don't think that buffer is valid */ + StrategyInvalidateBuffer(buf); + } else { Assert(buf->flags & BM_DIRTY || buf->cntxDirty); diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 01c8303928..bcbedc9c65 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.51 2004/01/07 18:56:27 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.52 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -90,24 +90,15 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) */ if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { - Relation bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode); + SMgrRelation reln; - /* flush this page */ - if (bufrel == NULL) - { - smgrblindwrt(DEFAULT_SMGR, - bufHdr->tag.rnode, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - } - else - { - smgrwrite(DEFAULT_SMGR, bufrel, - bufHdr->tag.blockNum, - (char *) MAKE_PTR(bufHdr->data)); - /* drop refcount incremented by RelationNodeCacheGetRelation */ - RelationDecrementReferenceCount(bufrel); - } + /* Find smgr relation for buffer */ + reln = smgropen(bufHdr->tag.rnode); + + /* And write... */ + smgrwrite(reln, + bufHdr->tag.blockNum, + (char *) MAKE_PTR(bufHdr->data)); LocalBufferFlushCount++; } @@ -143,9 +134,6 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr) /* * it's all ours now. - * - * We need not in tblNode currently but will in future I think, when - * we'll give up rel->rd_fd to fmgr cache. */ bufHdr->tag.rnode = reln->rd_node; bufHdr->tag.blockNum = blockNum; diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 7de7d85e74..f95b1b3441 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.63 2004/01/26 22:59:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.64 2004/02/10 01:55:25 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -65,9 +65,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, size += FreeSpaceShmemSize(); #ifdef EXEC_BACKEND size += ShmemBackendArraySize(); -#endif -#ifdef STABLE_MEMORY_STORAGE - size += MMShmemSize(); #endif size += 100000; /* might as well round it off to a multiple of a typical page size */ diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile index 7c2a0f62b2..71695f9a74 100644 --- a/src/backend/storage/smgr/Makefile +++ b/src/backend/storage/smgr/Makefile @@ -4,7 +4,7 @@ # Makefile for storage/smgr # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.14 2003/11/29 19:51:57 pgsql Exp $ +# $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.15 2004/02/10 01:55:26 tgl Exp $ # #------------------------------------------------------------------------- @@ -12,7 +12,7 @@ subdir = src/backend/storage/smgr top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = md.o mm.o smgr.o smgrtype.o +OBJS = md.o smgr.o smgrtype.o all: SUBSYS.o diff --git a/src/backend/storage/smgr/README b/src/backend/storage/smgr/README index 606431f926..124d5bcdff 100644 --- a/src/backend/storage/smgr/README +++ b/src/backend/storage/smgr/README @@ -1,40 +1,31 @@ -# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.2 2003/11/29 19:51:57 pgsql Exp $ +# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.3 2004/02/10 01:55:26 tgl Exp $ -This directory contains the code that supports the Postgres storage manager -switch and all of the installed storage managers. In released systems, -the only supported storage manager is the magnetic disk manager. At UC -Berkeley, the Sony WORM optical disk jukebox and persistent main memory are -also supported. +In the original Berkeley Postgres system, there were several storage managers, +of which only the "magnetic disk" manager remains. (At Berkeley there were +also managers for the Sony WORM optical disk jukebox and persistent main +memory, but these were never supported in any externally released Postgres, +nor in any version of PostgreSQL.) However, we retain the notion of a storage +manager switch in case anyone wants to reintroduce other kinds of storage +managers. -As of Postgres Release 3.0, every relation in the system is tagged with the -storage manager on which it resides. The storage manager switch code turns -what used to by filesystem operations into operations on the correct store, -for any given relation. +In Berkeley Postgres each relation was tagged with the ID of the storage +manager to use for it. This is gone. It would be more reasonable to +associate storage managers with tablespaces (a feature not present as this +text is being written, but one likely to emerge soon). The files in this directory, and their contents, are smgrtype.c Storage manager type -- maps string names to storage manager IDs and provides simple comparison operators. This is the regproc support for type 'smgr' in the system catalogs. + (This is vestigial since no columns of type smgr exist + in the catalogs anymore.) smgr.c The storage manager switch dispatch code. The routines in this file call the appropriate storage manager to do hardware - accesses requested by the backend. + accesses requested by the backend. smgr.c also manages the + file handle cache (SMgrRelation table). md.c The magnetic disk storage manager. - mm.c The persistent main memory storage manager (#undef'ed in - tmp/c.h for all distributed systems). - - sj.c The sony jukebox storage manager and cache management code - (#undef'ed in tmp/c.h for all distributed systems). The - routines in this file allocate extents, maintain block - maps, and guarantee the persistence and coherency of a cache - of jukebox blocks on magnetic disk. - - pgjb.c The postgres jukebox interface routines. The routines here - handle exclusion on the physical device and translate requests - from the storage manager code (sj.c) into jbaccess calls. - - jbaccess.c Access code for the physical Sony jukebox device. This code - was swiped from Andy McFadden's jblib.a code at UC Berkeley. +Note that md.c in turn relies on src/backend/storage/file/fd.c. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 0405c2849a..58629218a3 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.101 2004/01/07 18:56:27 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.102 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,108 +21,81 @@ #include "catalog/catalog.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/smgr.h" -#include "utils/inval.h" #include "utils/memutils.h" + /* * The magnetic disk storage manager keeps track of open file * descriptors in its own descriptor pool. This is done to make it * easier to support relations that are larger than the operating - * system's file size limit (often 2GBytes). In order to do that, we + * system's file size limit (often 2GBytes). In order to do that, * we break relations up into chunks of < 2GBytes and store one chunk * in each of several files that represent the relation. See the * BLCKSZ and RELSEG_SIZE configuration constants in - * include/pg_config.h. + * include/pg_config.h. All chunks except the last MUST have size exactly + * equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate(). * - * The file descriptor stored in the relation cache (see RelationGetFile()) - * is actually an index into the Md_fdvec array. -1 indicates not open. + * The file descriptor pointer (md_fd field) stored in the SMgrRelation + * cache is, therefore, just the head of a list of MdfdVec objects. + * But note the md_fd pointer can be NULL, indicating relation not open. * - * When a relation is broken into multiple chunks, only the first chunk - * has its own entry in the Md_fdvec array; the remaining chunks have - * palloc'd MdfdVec objects that are chained onto the first chunk via the - * mdfd_chain links. All chunks except the last MUST have size exactly - * equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate(). + * All MdfdVec objects are palloc'd in the MdCxt memory context. */ typedef struct _MdfdVec { - int mdfd_vfd; /* fd number in vfd pool */ - int mdfd_flags; /* fd status flags */ + File mdfd_vfd; /* fd number in fd.c's pool */ -/* these are the assigned bits in mdfd_flags: */ -#define MDFD_FREE (1 << 0) /* unused entry */ - - int mdfd_nextFree; /* link to next freelist member, if free */ #ifndef LET_OS_MANAGE_FILESIZE struct _MdfdVec *mdfd_chain; /* for large relations */ #endif } MdfdVec; -static int Nfds = 100; /* initial/current size of Md_fdvec array */ -static MdfdVec *Md_fdvec = NULL; -static int Md_Free = -1; /* head of freelist of unused fdvec - * entries */ -static int CurFd = 0; /* first never-used fdvec index */ static MemoryContext MdCxt; /* context for all md.c allocations */ -/* routines declared here */ -static void mdclose_fd(int fd); -static int _mdfd_getrelnfd(Relation reln); -static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags); -static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno); - -static int _mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno); -static int _fdvec_alloc(void); -static void _fdvec_free(int); +/* routines declared here */ +static MdfdVec *mdopen(SMgrRelation reln); +static MdfdVec *_fdvec_alloc(void); +#ifndef LET_OS_MANAGE_FILESIZE +static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, + int oflags); +#endif +static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno); static BlockNumber _mdnblocks(File file, Size blcksz); + /* * mdinit() -- Initialize private state for magnetic disk storage manager. - * - * We keep a private table of all file descriptors. This routine - * allocates and initializes the table. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ -int +bool mdinit(void) { - int i; - MdCxt = AllocSetContextCreate(TopMemoryContext, "MdSmgr", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); - Md_fdvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec)); - - MemSet(Md_fdvec, 0, Nfds * sizeof(MdfdVec)); - - /* Set free list */ - for (i = 0; i < Nfds; i++) - { - Md_fdvec[i].mdfd_nextFree = i + 1; - Md_fdvec[i].mdfd_flags = MDFD_FREE; - } - Md_Free = 0; - Md_fdvec[Nfds - 1].mdfd_nextFree = -1; - - return SM_SUCCESS; + return true; } -int -mdcreate(Relation reln) +/* + * mdcreate() -- Create a new relation on magnetic disk. + * + * If isRedo is true, it's okay for the relation to exist already. + */ +bool +mdcreate(SMgrRelation reln, bool isRedo) { char *path; - int fd, - vfd; + File fd; - Assert(reln->rd_fd < 0); + Assert(reln->md_fd == NULL); - path = relpath(reln->rd_node); + path = relpath(reln->smgr_rnode); fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); @@ -134,43 +107,45 @@ mdcreate(Relation reln) * During bootstrap, there are cases where a system relation will * be accessed (by internal backend processes) before the * bootstrap script nominally creates it. Therefore, allow the - * file to exist already, but in bootstrap mode only. (See also + * file to exist already, even if isRedo is not set. (See also * mdopen) */ - if (IsBootstrapProcessingMode()) + if (isRedo || IsBootstrapProcessingMode()) fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { pfree(path); /* be sure to return the error reported by create, not open */ errno = save_errno; - return -1; + return false; } errno = 0; } pfree(path); - vfd = _fdvec_alloc(); - if (vfd < 0) - return -1; + reln->md_fd = _fdvec_alloc(); - Md_fdvec[vfd].mdfd_vfd = fd; - Md_fdvec[vfd].mdfd_flags = (uint16) 0; + reln->md_fd->mdfd_vfd = fd; #ifndef LET_OS_MANAGE_FILESIZE - Md_fdvec[vfd].mdfd_chain = NULL; + reln->md_fd->mdfd_chain = NULL; #endif - return vfd; + return true; } /* * mdunlink() -- Unlink a relation. + * + * Note that we're passed a RelFileNode --- by the time this is called, + * there won't be an SMgrRelation hashtable entry anymore. + * + * If isRedo is true, it's okay for the relation to be already gone. */ -int -mdunlink(RelFileNode rnode) +bool +mdunlink(RelFileNode rnode, bool isRedo) { - int status = SM_SUCCESS; + bool status = true; int save_errno = 0; char *path; @@ -179,13 +154,16 @@ mdunlink(RelFileNode rnode) /* Delete the first segment, or only segment if not doing segmenting */ if (unlink(path) < 0) { - status = SM_FAIL; - save_errno = errno; + if (!isRedo || errno != ENOENT) + { + status = false; + save_errno = errno; + } } #ifndef LET_OS_MANAGE_FILESIZE /* Get the additional segments, if any */ - if (status == SM_SUCCESS) + if (status) { char *segpath = (char *) palloc(strlen(path) + 12); BlockNumber segno; @@ -198,7 +176,7 @@ mdunlink(RelFileNode rnode) /* ENOENT is expected after the last segment... */ if (errno != ENOENT) { - status = SM_FAIL; + status = false; save_errno = errno; } break; @@ -222,16 +200,15 @@ mdunlink(RelFileNode rnode) * relation (ie, blocknum is the current EOF), and so in case of * failure we clean up by truncating. * - * This routine returns SM_FAIL or SM_SUCCESS, with errno set as - * appropriate. + * This routine returns true or false, with errno set as appropriate. * * Note: this routine used to call mdnblocks() to get the block position * to write at, but that's pretty silly since the caller needs to know where * the block will be written, and accordingly must have done mdnblocks() * already. Might as well pass in the position and save a seek. */ -int -mdextend(Relation reln, BlockNumber blocknum, char *buffer) +bool +mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer) { long seekpos; int nbytes; @@ -256,7 +233,7 @@ mdextend(Relation reln, BlockNumber blocknum, char *buffer) * to make room for the new page's buffer. */ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; + return false; if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { @@ -269,29 +246,32 @@ mdextend(Relation reln, BlockNumber blocknum, char *buffer) FileSeek(v->mdfd_vfd, seekpos, SEEK_SET); errno = save_errno; } - return SM_FAIL; + return false; } #ifndef LET_OS_MANAGE_FILESIZE Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); #endif - return SM_SUCCESS; + return true; } /* - * mdopen() -- Open the specified relation. + * mdopen() -- Open the specified relation. ereport's on failure. + * + * Note we only open the first segment, when there are multiple segments. */ -int -mdopen(Relation reln) +static MdfdVec * +mdopen(SMgrRelation reln) { char *path; - int fd; - int vfd; + File fd; - Assert(reln->rd_fd < 0); + /* No work if already open */ + if (reln->md_fd) + return reln->md_fd; - path = relpath(reln->rd_node); + path = relpath(reln->smgr_rnode); fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); @@ -309,57 +289,45 @@ mdopen(Relation reln) if (fd < 0) { pfree(path); - return -1; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); } } pfree(path); - vfd = _fdvec_alloc(); - if (vfd < 0) - return -1; + reln->md_fd = _fdvec_alloc(); - Md_fdvec[vfd].mdfd_vfd = fd; - Md_fdvec[vfd].mdfd_flags = (uint16) 0; + reln->md_fd->mdfd_vfd = fd; #ifndef LET_OS_MANAGE_FILESIZE - Md_fdvec[vfd].mdfd_chain = NULL; + reln->md_fd->mdfd_chain = NULL; Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); #endif - return vfd; + return reln->md_fd; } /* * mdclose() -- Close the specified relation, if it isn't closed already. * - * AND FREE fd vector! It may be re-used for other relations! - * reln should be flushed from cache after closing !.. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. + * Returns true or false with errno set as appropriate. */ -int -mdclose(Relation reln) +bool +mdclose(SMgrRelation reln) { - int fd; - - fd = RelationGetFile(reln); - if (fd < 0) - return SM_SUCCESS; /* already closed, so no work */ - - mdclose_fd(fd); - - reln->rd_fd = -1; + MdfdVec *v = reln->md_fd; - return SM_SUCCESS; -} + /* No work if already closed */ + if (v == NULL) + return true; -static void -mdclose_fd(int fd) -{ - MdfdVec *v; + reln->md_fd = NULL; /* prevent dangling pointer after error */ #ifndef LET_OS_MANAGE_FILESIZE - for (v = &Md_fdvec[fd]; v != NULL;) + while (v != NULL) { MdfdVec *ov = v; @@ -368,32 +336,24 @@ mdclose_fd(int fd) FileClose(v->mdfd_vfd); /* Now free vector */ v = v->mdfd_chain; - if (ov != &Md_fdvec[fd]) - pfree(ov); + pfree(ov); } - - Md_fdvec[fd].mdfd_chain = NULL; #else - v = &Md_fdvec[fd]; - if (v != NULL) - { - if (v->mdfd_vfd >= 0) - FileClose(v->mdfd_vfd); - } + if (v->mdfd_vfd >= 0) + FileClose(v->mdfd_vfd); + pfree(v); #endif - _fdvec_free(fd); + return true; } /* * mdread() -- Read the specified block from a relation. - * - * Returns SM_SUCCESS or SM_FAIL. */ -int -mdread(Relation reln, BlockNumber blocknum, char *buffer) +bool +mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - int status; + bool status; long seekpos; int nbytes; MdfdVec *v; @@ -408,9 +368,9 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer) #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; + return false; - status = SM_SUCCESS; + status = true; if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) { /* @@ -425,7 +385,7 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer) (nbytes > 0 && mdnblocks(reln) == blocknum)) MemSet(buffer, 0, BLCKSZ); else - status = SM_FAIL; + status = false; } return status; @@ -433,11 +393,9 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer) /* * mdwrite() -- Write the supplied block at the appropriate location. - * - * Returns SM_SUCCESS or SM_FAIL. */ -int -mdwrite(Relation reln, BlockNumber blocknum, char *buffer) +bool +mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer) { long seekpos; MdfdVec *v; @@ -452,69 +410,12 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer) #endif if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) - return SM_FAIL; + return false; if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ) - return SM_FAIL; + return false; - return SM_SUCCESS; -} - -/* - * mdblindwrt() -- Write a block to disk blind. - * - * We have to be able to do this using only the rnode of the relation - * in which the block belongs. Otherwise this is much like mdwrite(). - */ -int -mdblindwrt(RelFileNode rnode, - BlockNumber blkno, - char *buffer) -{ - int status; - long seekpos; - int fd; - - fd = _mdfd_blind_getseg(rnode, blkno); - - if (fd < 0) - return SM_FAIL; - -#ifndef LET_OS_MANAGE_FILESIZE - seekpos = (long) (BLCKSZ * (blkno % ((BlockNumber) RELSEG_SIZE))); - Assert(seekpos < BLCKSZ * RELSEG_SIZE); -#else - seekpos = (long) (BLCKSZ * (blkno)); -#endif - - errno = 0; - if (lseek(fd, seekpos, SEEK_SET) != seekpos) - { - elog(LOG, "lseek(%ld) failed: %m", seekpos); - close(fd); - return SM_FAIL; - } - - status = SM_SUCCESS; - - /* write the block */ - errno = 0; - if (write(fd, buffer, BLCKSZ) != BLCKSZ) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - elog(LOG, "write() failed: %m"); - status = SM_FAIL; - } - - if (close(fd) < 0) - { - elog(LOG, "close() failed: %m"); - status = SM_FAIL; - } - - return status; + return true; } /* @@ -525,24 +426,16 @@ mdblindwrt(RelFileNode rnode, * called, then only segments up to the last one actually touched * are present in the chain... * - * Returns # of blocks, ereport's on error. + * Returns # of blocks, or InvalidBlockNumber on error. */ BlockNumber -mdnblocks(Relation reln) +mdnblocks(SMgrRelation reln) { - int fd; - MdfdVec *v; + MdfdVec *v = mdopen(reln); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber nblocks; - BlockNumber segno; -#endif - - fd = _mdfd_getrelnfd(reln); - v = &Md_fdvec[fd]; - -#ifndef LET_OS_MANAGE_FILESIZE - segno = 0; + BlockNumber segno = 0; /* * Skip through any segments that aren't the last one, to avoid @@ -583,8 +476,7 @@ mdnblocks(Relation reln) */ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); if (v->mdfd_chain == NULL) - elog(ERROR, "could not count blocks of \"%s\": %m", - RelationGetRelationName(reln)); + return InvalidBlockNumber; /* failed? */ } v = v->mdfd_chain; @@ -600,9 +492,8 @@ mdnblocks(Relation reln) * Returns # of blocks or InvalidBlockNumber on error. */ BlockNumber -mdtruncate(Relation reln, BlockNumber nblocks) +mdtruncate(SMgrRelation reln, BlockNumber nblocks) { - int fd; MdfdVec *v; BlockNumber curnblk; @@ -615,13 +506,14 @@ mdtruncate(Relation reln, BlockNumber nblocks) * that truncate/delete loop will get them all! */ curnblk = mdnblocks(reln); + if (curnblk == InvalidBlockNumber) + return InvalidBlockNumber; /* mdnblocks failed */ if (nblocks > curnblk) return InvalidBlockNumber; /* bogus request */ if (nblocks == curnblk) return nblocks; /* no work */ - fd = _mdfd_getrelnfd(reln); - v = &Md_fdvec[fd]; + v = mdopen(reln); #ifndef LET_OS_MANAGE_FILESIZE priorblocks = 0; @@ -641,7 +533,7 @@ mdtruncate(Relation reln, BlockNumber nblocks) FileTruncate(v->mdfd_vfd, 0); FileUnlink(v->mdfd_vfd); v = v->mdfd_chain; - Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st + Assert(ov != reln->md_fd); /* we never drop the 1st * segment */ pfree(ov); } @@ -682,115 +574,65 @@ mdtruncate(Relation reln, BlockNumber nblocks) /* * mdcommit() -- Commit a transaction. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. */ -int +bool mdcommit(void) { /* * We don't actually have to do anything here... */ - return SM_SUCCESS; + return true; } /* * mdabort() -- Abort a transaction. */ -int +bool mdabort(void) { /* * We don't actually have to do anything here... */ - return SM_SUCCESS; + return true; } /* * mdsync() -- Sync previous writes to stable storage. */ -int +bool mdsync(void) { sync(); if (IsUnderPostmaster) sleep(2); sync(); - return SM_SUCCESS; + return true; } /* - * _fdvec_alloc() -- Grab a free (or new) md file descriptor vector. + * _fdvec_alloc() -- Make a MdfdVec object. */ -static int +static MdfdVec * _fdvec_alloc(void) { - MdfdVec *nvec; - int fdvec, - i; - - if (Md_Free >= 0) /* get from free list */ - { - fdvec = Md_Free; - Md_Free = Md_fdvec[fdvec].mdfd_nextFree; - Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE); - Md_fdvec[fdvec].mdfd_flags = 0; - if (fdvec >= CurFd) - { - Assert(fdvec == CurFd); - CurFd++; - } - return fdvec; - } - - /* Must allocate more room */ - - if (Nfds != CurFd) - elog(FATAL, "_fdvec_alloc error"); - - Nfds *= 2; - - nvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec)); - MemSet(nvec, 0, Nfds * sizeof(MdfdVec)); - memcpy(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec)); - pfree(Md_fdvec); + MdfdVec *v; - Md_fdvec = nvec; - - /* Set new free list */ - for (i = CurFd; i < Nfds; i++) - { - Md_fdvec[i].mdfd_nextFree = i + 1; - Md_fdvec[i].mdfd_flags = MDFD_FREE; - } - Md_fdvec[Nfds - 1].mdfd_nextFree = -1; - Md_Free = CurFd + 1; - - fdvec = CurFd; - CurFd++; - Md_fdvec[fdvec].mdfd_flags = 0; + v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); + v->mdfd_vfd = -1; +#ifndef LET_OS_MANAGE_FILESIZE + v->mdfd_chain = NULL; +#endif - return fdvec; + return v; } +#ifndef LET_OS_MANAGE_FILESIZE /* - * _fdvec_free() -- free md file descriptor vector. - * + * Open the specified segment of the relation, + * and make a MdfdVec object for it. Returns NULL on failure. */ -static -void -_fdvec_free(int fdvec) -{ - - Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE); - Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE); - Md_fdvec[fdvec].mdfd_nextFree = Md_Free; - Md_fdvec[fdvec].mdfd_flags = MDFD_FREE; - Md_Free = fdvec; -} - static MdfdVec * -_mdfd_openseg(Relation reln, BlockNumber segno, int oflags) +_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) { MdfdVec *v; int fd; @@ -798,7 +640,7 @@ _mdfd_openseg(Relation reln, BlockNumber segno, int oflags) *fullpath; /* be sure we have enough space for the '.segno', if any */ - path = relpath(reln->rd_node); + path = relpath(reln->smgr_rnode); if (segno > 0) { @@ -818,61 +660,32 @@ _mdfd_openseg(Relation reln, BlockNumber segno, int oflags) return NULL; /* allocate an mdfdvec entry for it */ - v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec)); + v = _fdvec_alloc(); /* fill the entry */ v->mdfd_vfd = fd; - v->mdfd_flags = (uint16) 0; -#ifndef LET_OS_MANAGE_FILESIZE v->mdfd_chain = NULL; Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); -#endif /* all done */ return v; } - -/* - * _mdfd_getrelnfd() -- Get the (virtual) fd for the relation, - * opening it if it's not already open - * - */ -static int -_mdfd_getrelnfd(Relation reln) -{ - int fd; - - fd = RelationGetFile(reln); - if (fd < 0) - { - if ((fd = mdopen(reln)) < 0) - elog(ERROR, "could not open relation \"%s\": %m", - RelationGetRelationName(reln)); - reln->rd_fd = fd; - } - return fd; -} +#endif /* * _mdfd_getseg() -- Find the segment of the relation holding the - * specified block - * + * specified block. ereport's on failure. */ static MdfdVec * -_mdfd_getseg(Relation reln, BlockNumber blkno) +_mdfd_getseg(SMgrRelation reln, BlockNumber blkno) { - MdfdVec *v; - int fd; + MdfdVec *v = mdopen(reln); #ifndef LET_OS_MANAGE_FILESIZE BlockNumber segno; BlockNumber i; -#endif - - fd = _mdfd_getrelnfd(reln); -#ifndef LET_OS_MANAGE_FILESIZE - for (v = &Md_fdvec[fd], segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1; + for (segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1; segno > 0; i++, segno--) { @@ -892,65 +705,24 @@ _mdfd_getseg(Relation reln, BlockNumber blkno) v->mdfd_chain = _mdfd_openseg(reln, i, (segno == 1) ? O_CREAT : 0); if (v->mdfd_chain == NULL) - elog(ERROR, "could not open segment %u of relation \"%s\" (target block %u): %m", - i, RelationGetRelationName(reln), blkno); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open segment %u of relation %u/%u (target block %u): %m", + i, + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode, + blkno))); } v = v->mdfd_chain; } -#else - v = &Md_fdvec[fd]; #endif return v; } /* - * Find the segment of the relation holding the specified block. - * - * This performs the same work as _mdfd_getseg() except that we must work - * "blind" with no Relation struct. We assume that we are not likely to - * touch the same relation again soon, so we do not create an FD entry for - * the relation --- we just open a kernel file descriptor which will be - * used and promptly closed. We also assume that the target block already - * exists, ie, we need not extend the relation. - * - * The return value is the kernel descriptor, or -1 on failure. + * Get number of blocks present in a single disk file */ -static int -_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno) -{ - char *path; - int fd; - -#ifndef LET_OS_MANAGE_FILESIZE - BlockNumber segno; -#endif - - path = relpath(rnode); - -#ifndef LET_OS_MANAGE_FILESIZE - /* append the '.segno', if needed */ - segno = blkno / ((BlockNumber) RELSEG_SIZE); - if (segno > 0) - { - char *segpath = (char *) palloc(strlen(path) + 12); - - sprintf(segpath, "%s.%u", path, segno); - pfree(path); - path = segpath; - } -#endif - - /* call fd.c to allow other FDs to be closed if needed */ - fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600); - if (fd < 0) - elog(LOG, "could not open \"%s\": %m", path); - - pfree(path); - - return fd; -} - static BlockNumber _mdnblocks(File file, Size blcksz) { diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c deleted file mode 100644 index 5043fd66a5..0000000000 --- a/src/backend/storage/smgr/mm.c +++ /dev/null @@ -1,552 +0,0 @@ -/*------------------------------------------------------------------------- - * - * mm.c - * main memory storage manager - * - * This code manages relations that reside in (presumably stable) - * main memory. - * - * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * - * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/mm.c,v 1.36 2004/01/07 18:56:27 neilc Exp $ - * - *------------------------------------------------------------------------- - */ -#include "postgres.h" - -#include - -#include "storage/smgr.h" -#include "miscadmin.h" - - -#ifdef STABLE_MEMORY_STORAGE - -/* - * MMCacheTag -- Unique triplet for blocks stored by the main memory - * storage manager. - */ - -typedef struct MMCacheTag -{ - Oid mmct_dbid; - Oid mmct_relid; - BlockNumber mmct_blkno; -} MMCacheTag; - -/* - * Shared-memory hash table for main memory relations contains - * entries of this form. - */ - -typedef struct MMHashEntry -{ - MMCacheTag mmhe_tag; - int mmhe_bufno; -} MMHashEntry; - -/* - * MMRelTag -- Unique identifier for each relation that is stored in the - * main-memory storage manager. - */ - -typedef struct MMRelTag -{ - Oid mmrt_dbid; - Oid mmrt_relid; -} MMRelTag; - -/* - * Shared-memory hash table for # blocks in main memory relations contains - * entries of this form. - */ - -typedef struct MMRelHashEntry -{ - MMRelTag mmrhe_tag; - int mmrhe_nblocks; -} MMRelHashEntry; - -#define MMNBUFFERS 10 -#define MMNRELATIONS 2 - -static int *MMCurTop; -static int *MMCurRelno; -static MMCacheTag *MMBlockTags; -static char *MMBlockCache; -static HTAB *MMCacheHT; -static HTAB *MMRelCacheHT; - -int -mminit(void) -{ - char *mmcacheblk; - int mmsize = 0; - bool found; - HASHCTL info; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS); - mmsize += MAXALIGN(sizeof(*MMCurTop)); - mmsize += MAXALIGN(sizeof(*MMCurRelno)); - mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag))); - mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found); - - if (mmcacheblk == NULL) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - info.keysize = sizeof(MMCacheTag); - info.entrysize = sizeof(MMHashEntry); - info.hash = tag_hash; - - MMCacheHT = ShmemInitHash("Main memory store HT", - MMNBUFFERS, MMNBUFFERS, - &info, (HASH_ELEM | HASH_FUNCTION)); - - if (MMCacheHT == NULL) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - info.keysize = sizeof(MMRelTag); - info.entrysize = sizeof(MMRelHashEntry); - info.hash = tag_hash; - - MMRelCacheHT = ShmemInitHash("Main memory rel HT", - MMNRELATIONS, MMNRELATIONS, - &info, (HASH_ELEM | HASH_FUNCTION)); - - if (MMRelCacheHT == NULL) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - if (IsUnderPostmaster) /* was IsPostmaster bjm */ - { - MemSet(mmcacheblk, 0, mmsize); - LWLockRelease(MMCacheLock); - return SM_SUCCESS; - } - - LWLockRelease(MMCacheLock); - - MMCurTop = (int *) mmcacheblk; - mmcacheblk += sizeof(int); - MMCurRelno = (int *) mmcacheblk; - mmcacheblk += sizeof(int); - MMBlockTags = (MMCacheTag *) mmcacheblk; - mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag)); - MMBlockCache = mmcacheblk; - - return SM_SUCCESS; -} - -int -mmshutdown(void) -{ - return SM_SUCCESS; -} - -int -mmcreate(Relation reln) -{ - MMRelHashEntry *entry; - bool found; - MMRelTag tag; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - if (*MMCurRelno == MMNRELATIONS) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - (*MMCurRelno)++; - - tag.mmrt_relid = RelationGetRelid(reln); - if (reln->rd_rel->relisshared) - tag.mmrt_dbid = (Oid) 0; - else - tag.mmrt_dbid = MyDatabaseId; - - entry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &tag, - HASH_ENTER, &found); - - if (entry == NULL) - { - LWLockRelease(MMCacheLock); - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } - - if (found) - { - /* already exists */ - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - - entry->mmrhe_nblocks = 0; - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmunlink() -- Unlink a relation. - * - * XXX currently broken: needs to accept RelFileNode, not Relation - */ -int -mmunlink(RelFileNode rnode) -{ - int i; - MMHashEntry *entry; - MMRelHashEntry *rentry; - MMRelTag rtag; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - for (i = 0; i < MMNBUFFERS; i++) - { - if (MMBlockTags[i].mmct_dbid == rnode.tblNode - && MMBlockTags[i].mmct_relid == rnode.relNode) - { - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &MMBlockTags[i], - HASH_REMOVE, NULL); - if (entry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "cache hash table corrupted"); - } - MMBlockTags[i].mmct_dbid = (Oid) 0; - MMBlockTags[i].mmct_relid = (Oid) 0; - MMBlockTags[i].mmct_blkno = (BlockNumber) 0; - } - } - rtag.mmrt_dbid = rnode.tblNode; - rtag.mmrt_relid = rnode.relNode; - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &rtag, - HASH_REMOVE, NULL); - - if (rentry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "rel cache hash table corrupted"); - } - - (*MMCurRelno)--; - - LWLockRelease(MMCacheLock); - return 1; -} - -/* - * mmextend() -- Add a block to the specified relation. - * - * This routine returns SM_FAIL or SM_SUCCESS, with errno set as - * appropriate. - */ -int -mmextend(Relation reln, BlockNumber blocknum, char *buffer) -{ - MMRelHashEntry *rentry; - MMHashEntry *entry; - int i; - Oid reldbid; - int offset; - bool found; - MMRelTag rtag; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - reldbid = (Oid) 0; - else - reldbid = MyDatabaseId; - - tag.mmct_dbid = rtag.mmrt_dbid = reldbid; - tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln); - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - if (*MMCurTop == MMNBUFFERS) - { - for (i = 0; i < MMNBUFFERS; i++) - { - if (MMBlockTags[i].mmct_dbid == 0 && - MMBlockTags[i].mmct_relid == 0) - break; - } - if (i == MMNBUFFERS) - { - LWLockRelease(MMCacheLock); - return SM_FAIL; - } - } - else - { - i = *MMCurTop; - (*MMCurTop)++; - } - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &rtag, - HASH_FIND, NULL); - if (rentry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "rel cache hash table corrupted"); - } - - tag.mmct_blkno = rentry->mmrhe_nblocks; - - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &tag, - HASH_ENTER, &found); - if (entry == NULL || found) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "cache hash table corrupted"); - } - - entry->mmhe_bufno = i; - MMBlockTags[i].mmct_dbid = reldbid; - MMBlockTags[i].mmct_relid = RelationGetRelid(reln); - MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks; - - /* page numbers are zero-based, so we increment this at the end */ - (rentry->mmrhe_nblocks)++; - - /* write the extended page */ - offset = (i * BLCKSZ); - memmove(&(MMBlockCache[offset]), buffer, BLCKSZ); - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmopen() -- Open the specified relation. - */ -int -mmopen(Relation reln) -{ - /* automatically successful */ - return 0; -} - -/* - * mmclose() -- Close the specified relation. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. - */ -int -mmclose(Relation reln) -{ - /* automatically successful */ - return SM_SUCCESS; -} - -/* - * mmread() -- Read the specified block from a relation. - * - * Returns SM_SUCCESS or SM_FAIL. - */ -int -mmread(Relation reln, BlockNumber blocknum, char *buffer) -{ - MMHashEntry *entry; - int offset; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - tag.mmct_dbid = (Oid) 0; - else - tag.mmct_dbid = MyDatabaseId; - - tag.mmct_relid = RelationGetRelid(reln); - tag.mmct_blkno = blocknum; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &tag, - HASH_FIND, NULL); - - if (entry == NULL) - { - /* reading nonexistent pages is defined to fill them with zeroes */ - LWLockRelease(MMCacheLock); - MemSet(buffer, 0, BLCKSZ); - return SM_SUCCESS; - } - - offset = (entry->mmhe_bufno * BLCKSZ); - memmove(buffer, &MMBlockCache[offset], BLCKSZ); - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmwrite() -- Write the supplied block at the appropriate location. - * - * Returns SM_SUCCESS or SM_FAIL. - */ -int -mmwrite(Relation reln, BlockNumber blocknum, char *buffer) -{ - MMHashEntry *entry; - int offset; - MMCacheTag tag; - - if (reln->rd_rel->relisshared) - tag.mmct_dbid = (Oid) 0; - else - tag.mmct_dbid = MyDatabaseId; - - tag.mmct_relid = RelationGetRelid(reln); - tag.mmct_blkno = blocknum; - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - entry = (MMHashEntry *) hash_search(MMCacheHT, - (void *) &tag, - HASH_FIND, NULL); - - if (entry == NULL) - { - LWLockRelease(MMCacheLock); - elog(FATAL, "cache hash table missing requested page"); - } - - offset = (entry->mmhe_bufno * BLCKSZ); - memmove(&MMBlockCache[offset], buffer, BLCKSZ); - - LWLockRelease(MMCacheLock); - - return SM_SUCCESS; -} - -/* - * mmblindwrt() -- Write a block to stable storage blind. - * - * We have to be able to do this using only the rnode of the relation - * in which the block belongs. Otherwise this is much like mmwrite(). - */ -int -mmblindwrt(RelFileNode rnode, - BlockNumber blkno, - char *buffer) -{ - return SM_FAIL; -} - -/* - * mmnblocks() -- Get the number of blocks stored in a relation. - * - * Returns # of blocks or InvalidBlockNumber on error. - */ -BlockNumber -mmnblocks(Relation reln) -{ - MMRelTag rtag; - MMRelHashEntry *rentry; - BlockNumber nblocks; - - if (reln->rd_rel->relisshared) - rtag.mmrt_dbid = (Oid) 0; - else - rtag.mmrt_dbid = MyDatabaseId; - - rtag.mmrt_relid = RelationGetRelid(reln); - - LWLockAcquire(MMCacheLock, LW_EXCLUSIVE); - - rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT, - (void *) &rtag, - HASH_FIND, NULL); - - if (rentry) - nblocks = rentry->mmrhe_nblocks; - else - nblocks = InvalidBlockNumber; - - LWLockRelease(MMCacheLock); - - return nblocks; -} - -/* - * mmcommit() -- Commit a transaction. - * - * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate. - */ -int -mmcommit(void) -{ - return SM_SUCCESS; -} - -/* - * mmabort() -- Abort a transaction. - */ - -int -mmabort(void) -{ - return SM_SUCCESS; -} - -/* - * MMShmemSize() -- Declare amount of shared memory we require. - * - * The shared memory initialization code creates a block of shared - * memory exactly big enough to hold all the structures it needs to. - * This routine declares how much space the main memory storage - * manager will use. - */ -int -MMShmemSize(void) -{ - int size = 0; - - /* - * first compute space occupied by the (dbid,relid,blkno) hash table - */ - size += hash_estimate_size(MMNBUFFERS, sizeof(MMHashEntry)); - - /* - * now do the same for the rel hash table - */ - size += hash_estimate_size(MMNRELATIONS, sizeof(MMRelHashEntry)); - - /* - * finally, add in the memory block we use directly - */ - - size += MAXALIGN(BLCKSZ * MMNBUFFERS); - size += MAXALIGN(sizeof(*MMCurTop)); - size += MAXALIGN(sizeof(*MMCurRelno)); - size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag)); - - return size; -} - -#endif /* STABLE_MEMORY_STORAGE */ diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 0e33af5f28..09ee4144c5 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.68 2004/01/06 18:07:31 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.69 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,72 +21,52 @@ #include "storage/freespace.h" #include "storage/ipc.h" #include "storage/smgr.h" +#include "utils/hsearch.h" #include "utils/memutils.h" -static void smgrshutdown(int code, Datum arg); - +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to return TRUE on success, FALSE on error. (For + * nblocks and truncate we instead say that returning InvalidBlockNumber + * indicates an error.) + */ typedef struct f_smgr { - int (*smgr_init) (void); /* may be NULL */ - int (*smgr_shutdown) (void); /* may be NULL */ - int (*smgr_create) (Relation reln); - int (*smgr_unlink) (RelFileNode rnode); - int (*smgr_extend) (Relation reln, BlockNumber blocknum, + bool (*smgr_init) (void); /* may be NULL */ + bool (*smgr_shutdown) (void); /* may be NULL */ + bool (*smgr_close) (SMgrRelation reln); + bool (*smgr_create) (SMgrRelation reln, bool isRedo); + bool (*smgr_unlink) (RelFileNode rnode, bool isRedo); + bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - int (*smgr_open) (Relation reln); - int (*smgr_close) (Relation reln); - int (*smgr_read) (Relation reln, BlockNumber blocknum, + bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - int (*smgr_write) (Relation reln, BlockNumber blocknum, + bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, char *buffer); - int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno, - char *buffer); - BlockNumber (*smgr_nblocks) (Relation reln); - BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks); - int (*smgr_commit) (void); /* may be NULL */ - int (*smgr_abort) (void); /* may be NULL */ - int (*smgr_sync) (void); + BlockNumber (*smgr_nblocks) (SMgrRelation reln); + BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks); + bool (*smgr_commit) (void); /* may be NULL */ + bool (*smgr_abort) (void); /* may be NULL */ + bool (*smgr_sync) (void); /* may be NULL */ } f_smgr; -/* - * The weird placement of commas in this init block is to keep the compiler - * happy, regardless of what storage managers we have (or don't have). - */ - -static f_smgr smgrsw[] = { +static const f_smgr smgrsw[] = { /* magnetic disk */ - {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, - mdread, mdwrite, mdblindwrt, - mdnblocks, mdtruncate, mdcommit, mdabort, mdsync - }, - -#ifdef STABLE_MEMORY_STORAGE - /* main memory */ - {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, - mmread, mmwrite, mmblindwrt, - mmnblocks, NULL, mmcommit, mmabort, NULL}, -#endif + {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend, + mdread, mdwrite, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync + } }; -/* - * This array records which storage managers are write-once, and which - * support overwrite. A 'true' entry means that the storage manager is - * write-once. In the best of all possible worlds, there would be no - * write-once storage managers. - */ +static const int NSmgr = lengthof(smgrsw); -#ifdef NOT_USED -static bool smgrwo[] = { - false, /* magnetic disk */ -#ifdef STABLE_MEMORY_STORAGE - false, /* main memory */ -#endif -}; -#endif -static int NSmgr = lengthof(smgrsw); +/* + * Each backend has a hashtable that stores all extant SMgrRelation objects. + */ +static HTAB *SMgrRelationHash = NULL; /* * We keep a list of all relations (represented as RelFileNode values) @@ -105,7 +85,7 @@ static int NSmgr = lengthof(smgrsw); typedef struct PendingRelDelete { RelFileNode relnode; /* relation that may need to be deleted */ - int16 which; /* which storage manager? */ + int which; /* which storage manager? */ bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ struct PendingRelDelete *next; /* linked-list link */ @@ -114,12 +94,20 @@ typedef struct PendingRelDelete static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ +/* local function prototypes */ +static void smgrshutdown(int code, Datum arg); +static void smgr_internal_unlink(RelFileNode rnode, int which, + bool isTemp, bool isRedo); + + /* * smgrinit(), smgrshutdown() -- Initialize or shut down all storage * managers. * + * Note: in the normal multiprocess scenario with a postmaster, these are + * called at postmaster start and stop, not per-backend. */ -int +void smgrinit(void) { int i; @@ -128,7 +116,7 @@ smgrinit(void) { if (smgrsw[i].smgr_init) { - if ((*(smgrsw[i].smgr_init)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_init)) ()) elog(FATAL, "smgr initialization failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); @@ -137,8 +125,6 @@ smgrinit(void) /* register the shutdown proc */ on_proc_exit(smgrshutdown, 0); - - return SM_SUCCESS; } static void @@ -150,7 +136,7 @@ smgrshutdown(int code, Datum arg) { if (smgrsw[i].smgr_shutdown) { - if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_shutdown)) ()) elog(FATAL, "smgr shutdown failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); @@ -158,58 +144,178 @@ smgrshutdown(int code, Datum arg) } } +/* + * smgropen() -- Return an SMgrRelation object, creating it if need be. + * + * This does not attempt to actually open the object. + */ +SMgrRelation +smgropen(RelFileNode rnode) +{ + SMgrRelation reln; + bool found; + + if (SMgrRelationHash == NULL) + { + /* First time through: initialize the hash table */ + HASHCTL ctl; + + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(RelFileNode); + ctl.entrysize = sizeof(SMgrRelationData); + ctl.hash = tag_hash; + SMgrRelationHash = hash_create("smgr relation table", 400, + &ctl, HASH_ELEM | HASH_FUNCTION); + } + + /* Look up or create an entry */ + reln = (SMgrRelation) hash_search(SMgrRelationHash, + (void *) &rnode, + HASH_ENTER, &found); + if (reln == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Initialize it if not present before */ + if (!found) + { + /* hash_search already filled in the lookup key */ + reln->smgr_which = 0; /* we only have md.c at present */ + reln->md_fd = NULL; /* mark it not open */ + } + + return reln; +} + +/* + * smgrclose() -- Close and delete an SMgrRelation object. + * + * It is the caller's responsibility not to leave any dangling references + * to the object. (Pointers should be cleared after successful return; + * on the off chance of failure, the SMgrRelation object will still exist.) + */ +void +smgrclose(SMgrRelation reln) +{ + if (! (*(smgrsw[reln->smgr_which].smgr_close)) (reln)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); + + if (hash_search(SMgrRelationHash, + (void *) &(reln->smgr_rnode), + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "SMgrRelation hashtable corrupted"); +} + +/* + * smgrcloseall() -- Close all existing SMgrRelation objects. + * + * It is the caller's responsibility not to leave any dangling references. + */ +void +smgrcloseall(void) +{ + HASH_SEQ_STATUS status; + SMgrRelation reln; + + /* Nothing to do if hashtable not set up */ + if (SMgrRelationHash == NULL) + return; + + hash_seq_init(&status, SMgrRelationHash); + + while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) + { + smgrclose(reln); + } +} + +/* + * smgrclosenode() -- Close SMgrRelation object for given RelFileNode, + * if one exists. + * + * This has the same effects as smgrclose(smgropen(rnode)), but it avoids + * uselessly creating a hashtable entry only to drop it again when no + * such entry exists already. + * + * It is the caller's responsibility not to leave any dangling references. + */ +void +smgrclosenode(RelFileNode rnode) +{ + SMgrRelation reln; + + /* Nothing to do if hashtable not set up */ + if (SMgrRelationHash == NULL) + return; + + reln = (SMgrRelation) hash_search(SMgrRelationHash, + (void *) &rnode, + HASH_FIND, NULL); + if (reln != NULL) + smgrclose(reln); +} + /* * smgrcreate() -- Create a new relation. * - * This routine takes a reldesc, creates the relation on the appropriate - * device, and returns a file descriptor for it. + * Given an already-created (but presumably unused) SMgrRelation, + * cause the underlying disk file or other storage to be created. + * + * If isRedo is true, it is okay for the underlying file to exist + * already because we are in a WAL replay sequence. In this case + * we should make no PendingRelDelete entry; the WAL sequence will + * tell whether to drop the file. */ -int -smgrcreate(int16 which, Relation reln) +void +smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) { - int fd; PendingRelDelete *pending; - if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0) + if (! (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create relation \"%s\": %m", - RelationGetRelationName(reln)))); + errmsg("could not create relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); + + if (isRedo) + return; /* Add the relation to the list of stuff to delete at abort */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); - pending->relnode = reln->rd_node; - pending->which = which; - pending->isTemp = reln->rd_istemp; + pending->relnode = reln->smgr_rnode; + pending->which = reln->smgr_which; + pending->isTemp = isTemp; pending->atCommit = false; /* delete if abort */ pending->next = pendingDeletes; pendingDeletes = pending; - - return fd; } /* - * smgrunlink() -- Unlink a relation. + * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit. + * + * The relation is marked to be removed from the store if we + * successfully commit the current transaction. * - * The relation is removed from the store. Actually, we just remember - * that we want to do this at transaction commit. + * This also implies smgrclose() on the SMgrRelation object. */ -int -smgrunlink(int16 which, Relation reln) +void +smgrscheduleunlink(SMgrRelation reln, bool isTemp) { PendingRelDelete *pending; - /* Make sure the file is closed */ - if (reln->rd_fd >= 0) - smgrclose(which, reln); - /* Add the relation to the list of stuff to delete at commit */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); - pending->relnode = reln->rd_node; - pending->which = which; - pending->isTemp = reln->rd_istemp; + pending->relnode = reln->smgr_rnode; + pending->which = reln->smgr_which; + pending->isTemp = isTemp; pending->atCommit = true; /* delete if commit */ pending->next = pendingDeletes; pendingDeletes = pending; @@ -224,78 +330,83 @@ smgrunlink(int16 which, Relation reln) * immediately, but for now I'll keep the logic simple. */ - return SM_SUCCESS; + /* Now close the file and throw away the hashtable entry */ + smgrclose(reln); } /* - * smgrextend() -- Add a new block to a file. + * smgrdounlink() -- Immediately unlink a relation. * - * The semantics are basically the same as smgrwrite(): write at the - * specified position. However, we are expecting to extend the - * relation (ie, blocknum is the current EOF), and so in case of - * failure we clean up by truncating. + * The relation is removed from the store. This should not be used + * during transactional operations, since it can't be undone. * - * Returns SM_SUCCESS on success; aborts the current transaction on - * failure. + * If isRedo is true, it is okay for the underlying file to be gone + * already. (In practice isRedo will always be true.) + * + * This also implies smgrclose() on the SMgrRelation object. */ -int -smgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer) +void +smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo) { - int status; + RelFileNode rnode = reln->smgr_rnode; + int which = reln->smgr_which; - status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer); - - if (status == SM_FAIL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not extend relation \"%s\": %m", - RelationGetRelationName(reln)), - errhint("Check free disk space."))); + /* Close the file and throw away the hashtable entry */ + smgrclose(reln); - return status; + smgr_internal_unlink(rnode, which, isTemp, isRedo); } /* - * smgropen() -- Open a relation using a particular storage manager. - * - * Returns the fd for the open relation on success. - * - * On failure, returns -1 if failOK, else aborts the transaction. + * Shared subroutine that actually does the unlink ... */ -int -smgropen(int16 which, Relation reln, bool failOK) +static void +smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) { - int fd; - - if (reln->rd_rel->relkind == RELKIND_VIEW) - return -1; - if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) - return -1; - if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0) - if (!failOK) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not open file \"%s\": %m", - RelationGetRelationName(reln)))); - - return fd; + /* + * Get rid of any leftover buffers for the rel (shouldn't be any in the + * commit case, but there can be in the abort case). + */ + DropRelFileNodeBuffers(rnode, isTemp); + + /* + * Tell the free space map to forget this relation. It won't be accessed + * any more anyway, but we may as well recycle the map space quickly. + */ + FreeSpaceMapForgetRel(&rnode); + + /* + * And delete the physical files. + * + * Note: we treat deletion failure as a WARNING, not an error, + * because we've already decided to commit or abort the current xact. + */ + if (! (*(smgrsw[which].smgr_unlink)) (rnode, isRedo)) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not unlink relation %u/%u: %m", + rnode.tblNode, + rnode.relNode))); } /* - * smgrclose() -- Close a relation. + * smgrextend() -- Add a new block to a file. * - * Returns SM_SUCCESS on success, aborts on failure. + * The semantics are basically the same as smgrwrite(): write at the + * specified position. However, we are expecting to extend the + * relation (ie, blocknum is the current EOF), and so in case of + * failure we clean up by truncating. */ -int -smgrclose(int16 which, Relation reln) +void +smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL) + if (! (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close relation \"%s\": %m", - RelationGetRelationName(reln)))); - - return SM_SUCCESS; + errmsg("could not extend relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode), + errhint("Check free disk space."))); } /* @@ -304,24 +415,18 @@ smgrclose(int16 which, Relation reln) * * This routine is called from the buffer manager in order to * instantiate pages in the shared buffer cache. All storage managers - * return pages in the format that POSTGRES expects. This routine - * dispatches the read. On success, it returns SM_SUCCESS. On failure, - * the current transaction is aborted. + * return pages in the format that POSTGRES expects. */ -int -smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) +void +smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - int status; - - status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer); - - if (status == SM_FAIL) + if (! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read block %d of relation \"%s\": %m", - blocknum, RelationGetRelationName(reln)))); - - return status; + errmsg("could not read block %u of relation %u/%u: %m", + blocknum, + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); } /* @@ -329,56 +434,17 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer) * * This is not a synchronous write -- the block is not necessarily * on disk at return, only dumped out to the kernel. - * - * The buffer is written out via the appropriate - * storage manager. This routine returns SM_SUCCESS or aborts - * the current transaction. */ -int -smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer) -{ - int status; - - status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer); - - if (status == SM_FAIL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write block %d of relation \"%s\": %m", - blocknum, RelationGetRelationName(reln)))); - - return status; -} - -/* - * smgrblindwrt() -- Write a page out blind. - * - * In some cases, we may find a page in the buffer cache that we - * can't make a reldesc for. This happens, for example, when we - * want to reuse a dirty page that was written by a transaction - * that has not yet committed, which created a new relation. In - * this case, the buffer manager will call smgrblindwrt() with - * the name and OID of the database and the relation to which the - * buffer belongs. Every storage manager must be able to write - * this page out to stable storage in this circumstance. - */ -int -smgrblindwrt(int16 which, - RelFileNode rnode, - BlockNumber blkno, - char *buffer) +void +smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer) { - int status; - - status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer); - - if (status == SM_FAIL) + if (! (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer)) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write block %d of %u/%u blind: %m", - blkno, rnode.tblNode, rnode.relNode))); - - return status; + errmsg("could not write block %u of relation %u/%u: %m", + blocknum, + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); } /* @@ -389,11 +455,11 @@ smgrblindwrt(int16 which, * transaction on failure. */ BlockNumber -smgrnblocks(int16 which, Relation reln) +smgrnblocks(SMgrRelation reln) { BlockNumber nblocks; - nblocks = (*(smgrsw[which].smgr_nblocks)) (reln); + nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln); /* * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would @@ -404,8 +470,9 @@ smgrnblocks(int16 which, Relation reln) if (nblocks == InvalidBlockNumber) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not count blocks of relation \"%s\": %m", - RelationGetRelationName(reln)))); + errmsg("could not count blocks of relation %u/%u: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode))); return nblocks; } @@ -418,27 +485,25 @@ smgrnblocks(int16 which, Relation reln) * transaction on failure. */ BlockNumber -smgrtruncate(int16 which, Relation reln, BlockNumber nblocks) +smgrtruncate(SMgrRelation reln, BlockNumber nblocks) { BlockNumber newblks; - newblks = nblocks; - if (smgrsw[which].smgr_truncate) - { - /* - * Tell the free space map to forget anything it may have stored - * for the about-to-be-deleted blocks. We want to be sure it - * won't return bogus block numbers later on. - */ - FreeSpaceMapTruncateRel(&reln->rd_node, nblocks); - - newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks); - if (newblks == InvalidBlockNumber) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not truncate relation \"%s\" to %u blocks: %m", - RelationGetRelationName(reln), nblocks))); - } + /* + * Tell the free space map to forget anything it may have stored + * for the about-to-be-deleted blocks. We want to be sure it + * won't return bogus block numbers later on. + */ + FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks); + + newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks); + if (newblks == InvalidBlockNumber) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not truncate relation %u/%u to %u blocks: %m", + reln->smgr_rnode.tblNode, + reln->smgr_rnode.relNode, + nblocks))); return newblks; } @@ -446,7 +511,7 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks) /* * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact. */ -int +void smgrDoPendingDeletes(bool isCommit) { while (pendingDeletes != NULL) @@ -455,39 +520,12 @@ smgrDoPendingDeletes(bool isCommit) pendingDeletes = pending->next; if (pending->atCommit == isCommit) - { - /* - * Get rid of any leftover buffers for the rel (shouldn't be - * any in the commit case, but there can be in the abort - * case). - */ - DropRelFileNodeBuffers(pending->relnode, pending->isTemp); - - /* - * Tell the free space map to forget this relation. It won't - * be accessed any more anyway, but we may as well recycle the - * map space quickly. - */ - FreeSpaceMapForgetRel(&pending->relnode); - - /* - * And delete the physical files. - * - * Note: we treat deletion failure as a WARNING, not an error, - * because we've already decided to commit or abort the - * current xact. - */ - if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL) - ereport(WARNING, - (errcode_for_file_access(), - errmsg("could not unlink %u/%u: %m", - pending->relnode.tblNode, - pending->relnode.relNode))); - } + smgr_internal_unlink(pending->relnode, + pending->which, + pending->isTemp, + false); pfree(pending); } - - return SM_SUCCESS; } /* @@ -496,7 +534,7 @@ smgrDoPendingDeletes(bool isCommit) * * This is called before we actually commit. */ -int +void smgrcommit(void) { int i; @@ -505,20 +543,18 @@ smgrcommit(void) { if (smgrsw[i].smgr_commit) { - if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_commit)) ()) elog(FATAL, "transaction commit failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } - - return SM_SUCCESS; } /* * smgrabort() -- Abort changes made during the current transaction. */ -int +void smgrabort(void) { int i; @@ -527,20 +563,18 @@ smgrabort(void) { if (smgrsw[i].smgr_abort) { - if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_abort)) ()) elog(FATAL, "transaction abort failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } - - return SM_SUCCESS; } /* * smgrsync() -- Sync files to disk at checkpoint time. */ -int +void smgrsync(void) { int i; @@ -549,26 +583,14 @@ smgrsync(void) { if (smgrsw[i].smgr_sync) { - if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL) + if (! (*(smgrsw[i].smgr_sync)) ()) elog(PANIC, "storage sync failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } - - return SM_SUCCESS; } -#ifdef NOT_USED -bool -smgriswo(int16 smgrno) -{ - if (smgrno < 0 || smgrno >= NSmgr) - elog(ERROR, "invalid storage manager id: %d", smgrno); - - return smgrwo[smgrno]; -} -#endif void smgr_redo(XLogRecPtr lsn, XLogRecord *record) diff --git a/src/backend/storage/smgr/smgrtype.c b/src/backend/storage/smgr/smgrtype.c index 10e0845241..60cc305bd1 100644 --- a/src/backend/storage/smgr/smgrtype.c +++ b/src/backend/storage/smgr/smgrtype.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.22 2003/11/29 19:51:57 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.23 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,26 +16,21 @@ #include "storage/smgr.h" + typedef struct smgrid { - char *smgr_name; + const char *smgr_name; } smgrid; /* * StorageManager[] -- List of defined storage managers. - * - * The weird comma placement is to keep compilers happy no matter - * which of these is (or is not) defined. */ - -static smgrid StorageManager[] = { - {"magnetic disk"}, -#ifdef STABLE_MEMORY_STORAGE - {"main memory"} -#endif +static const smgrid StorageManager[] = { + {"magnetic disk"} }; -static int NStorageManagers = lengthof(StorageManager); +static const int NStorageManagers = lengthof(StorageManager); + Datum smgrin(PG_FUNCTION_ARGS) diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 72c9310110..8a23fcc70e 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.209 2003/11/29 19:51:57 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.210 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,6 +48,7 @@ #include "parser/parse_type.h" #include "rewrite/rewriteDefine.h" #include "rewrite/rewriteRemove.h" +#include "storage/fd.h" #include "tcop/pquery.h" #include "tcop/utility.h" #include "utils/acl.h" diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 90577cb6e4..3364322dd5 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -74,7 +74,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.59 2003/11/29 19:52:00 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.60 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -83,6 +83,7 @@ #include "catalog/catalog.h" #include "miscadmin.h" #include "storage/sinval.h" +#include "storage/smgr.h" #include "utils/catcache.h" #include "utils/inval.h" #include "utils/memutils.h" @@ -298,19 +299,22 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr, */ static void AddRelcacheInvalidationMessage(InvalidationListHeader *hdr, - Oid dbId, Oid relId) + Oid dbId, Oid relId, RelFileNode physId) { SharedInvalidationMessage msg; /* Don't add a duplicate item */ - /* We assume comparing relId is sufficient, needn't check dbId */ + /* We assume dbId need not be checked because it will never change */ + /* relfilenode fields must be checked to support reassignment */ ProcessMessageList(hdr->rclist, - if (msg->rc.relId == relId) return); + if (msg->rc.relId == relId && + RelFileNodeEquals(msg->rc.physId, physId)) return); /* OK, add the item */ msg.rc.id = SHAREDINVALRELCACHE_ID; msg.rc.dbId = dbId; msg.rc.relId = relId; + msg.rc.physId = physId; AddInvalidationMessage(&hdr->rclist, &msg); } @@ -391,10 +395,10 @@ RegisterCatcacheInvalidation(int cacheId, * As above, but register a relcache invalidation event. */ static void -RegisterRelcacheInvalidation(Oid dbId, Oid relId) +RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId) { AddRelcacheInvalidationMessage(&CurrentCmdInvalidMsgs, - dbId, relId); + dbId, relId, physId); /* * If the relation being invalidated is one of those cached in the @@ -435,9 +439,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) } else if (msg->id == SHAREDINVALRELCACHE_ID) { - if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == 0) + /* + * If the message includes a valid relfilenode, we must ensure that + * smgr cache entry gets zapped. The relcache will handle this if + * called, otherwise we must do it directly. + */ + if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid) { - RelationIdInvalidateRelationCacheByRelationId(msg->rc.relId); + if (OidIsValid(msg->rc.physId.relNode)) + RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId); + else + RelationCacheInvalidateEntry(msg->rc.relId, NULL); for (i = 0; i < cache_callback_count; i++) { @@ -447,6 +459,12 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) (*ccitem->function) (ccitem->arg, msg->rc.relId); } } + else + { + /* might have smgr entry even if not in our database */ + if (OidIsValid(msg->rc.physId.relNode)) + smgrclosenode(msg->rc.physId); + } } else elog(FATAL, "unrecognized SI message id: %d", msg->id); @@ -456,7 +474,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) * InvalidateSystemCaches * * This blows away all tuples in the system catalog caches and - * all the cached relation descriptors (and closes their files too). + * all the cached relation descriptors and smgr cache entries. * Relation descriptors that have positive refcounts are then rebuilt. * * We call this when we see a shared-inval-queue overflow signal, @@ -469,7 +487,7 @@ InvalidateSystemCaches(void) int i; ResetCatalogCaches(); - RelationCacheInvalidate(); + RelationCacheInvalidate(); /* gets smgr cache too */ for (i = 0; i < cache_callback_count; i++) { @@ -488,11 +506,15 @@ static void PrepareForTupleInvalidation(Relation relation, HeapTuple tuple, void (*CacheIdRegisterFunc) (int, uint32, ItemPointer, Oid), - void (*RelationIdRegisterFunc) (Oid, Oid)) + void (*RelationIdRegisterFunc) (Oid, Oid, + RelFileNode)) { Oid tupleRelId; + Oid databaseId; Oid relationId; + RelFileNode rnode; + /* Do nothing during bootstrap */ if (IsBootstrapProcessingMode()) return; @@ -524,24 +546,49 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple, tupleRelId = RelationGetRelid(relation); if (tupleRelId == RelOid_pg_class) + { + Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); + relationId = HeapTupleGetOid(tuple); + if (classtup->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + rnode.tblNode = databaseId; /* XXX change for tablespaces */ + rnode.relNode = classtup->relfilenode; + /* + * Note: during a pg_class row update that assigns a new relfilenode + * value, we will be called on both the old and new tuples, and thus + * will broadcast invalidation messages showing both the old and new + * relfilenode values. This ensures that other backends will close + * smgr references to the old relfilenode file. + */ + } else if (tupleRelId == RelOid_pg_attribute) - relationId = ((Form_pg_attribute) GETSTRUCT(tuple))->attrelid; + { + Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple); + + relationId = atttup->attrelid; + /* + * KLUGE ALERT: we always send the relcache event with MyDatabaseId, + * even if the rel in question is shared (which we can't easily tell). + * This essentially means that only backends in this same database + * will react to the relcache flush request. This is in fact + * appropriate, since only those backends could see our pg_attribute + * change anyway. It looks a bit ugly though. + */ + databaseId = MyDatabaseId; + /* We assume no smgr cache flush is needed, either */ + rnode.tblNode = InvalidOid; + rnode.relNode = InvalidOid; + } else return; /* - * Yes. We need to register a relcache invalidation event for the - * relation identified by relationId. - * - * KLUGE ALERT: we always send the relcache event with MyDatabaseId, even - * if the rel in question is shared. This essentially means that only - * backends in this same database will react to the relcache flush - * request. This is in fact appropriate, since only those backends - * could see our pg_class or pg_attribute change anyway. It looks a - * bit ugly though. + * Yes. We need to register a relcache invalidation event. */ - (*RelationIdRegisterFunc) (MyDatabaseId, relationId); + (*RelationIdRegisterFunc) (databaseId, relationId, rnode); } @@ -660,7 +707,7 @@ CommandEndInvalidationMessages(bool isCommit) /* * CacheInvalidateHeapTuple * Register the given tuple for invalidation at end of command - * (ie, current command is outdating this tuple). + * (ie, current command is creating or outdating this tuple). */ void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple) @@ -678,12 +725,44 @@ CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple) * This is used in places that need to force relcache rebuild but aren't * changing any of the tuples recognized as contributors to the relcache * entry by PrepareForTupleInvalidation. (An example is dropping an index.) + * We assume in particular that relfilenode isn't changing. */ void -CacheInvalidateRelcache(Oid relationId) +CacheInvalidateRelcache(Relation relation) { - /* See KLUGE ALERT in PrepareForTupleInvalidation */ - RegisterRelcacheInvalidation(MyDatabaseId, relationId); + Oid databaseId; + Oid relationId; + + relationId = RelationGetRelid(relation); + if (relation->rd_rel->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + + RegisterRelcacheInvalidation(databaseId, relationId, relation->rd_node); +} + +/* + * CacheInvalidateRelcacheByTuple + * As above, but relation is identified by passing its pg_class tuple. + */ +void +CacheInvalidateRelcacheByTuple(HeapTuple classTuple) +{ + Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple); + Oid databaseId; + Oid relationId; + RelFileNode rnode; + + relationId = HeapTupleGetOid(classTuple); + if (classtup->relisshared) + databaseId = InvalidOid; + else + databaseId = MyDatabaseId; + rnode.tblNode = databaseId; /* XXX change for tablespaces */ + rnode.relNode = classtup->relfilenode; + + RegisterRelcacheInvalidation(databaseId, relationId, rnode); } /* diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 37b81f1244..8561cff549 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.196 2004/02/02 00:17:21 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.197 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -54,6 +54,7 @@ #include "optimizer/clauses.h" #include "optimizer/planmain.h" #include "optimizer/prep.h" +#include "storage/fd.h" #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/catcache.h" @@ -91,13 +92,6 @@ static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type}; static HTAB *RelationIdCache; static HTAB *RelationSysNameCache; -/* - * Bufmgr uses RelFileNode for lookup. Actually, I would like to do - * not pass Relation to bufmgr & beyond at all and keep some cache - * in smgr, but no time to do it right way now. -- vadim 10/22/2000 - */ -static HTAB *RelationNodeCache; - /* * This flag is false until we have prepared the critical relcache entries * that are needed to do indexscans on the tables read by relcache building. @@ -152,18 +146,12 @@ typedef struct relnamecacheent Relation reldesc; } RelNameCacheEnt; -typedef struct relnodecacheent -{ - RelFileNode relnode; - Relation reldesc; -} RelNodeCacheEnt; - /* * macros to manipulate the lookup hashtables */ #define RelationCacheInsert(RELATION) \ do { \ - RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; bool found; \ + RelIdCacheEnt *idhentry; bool found; \ idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \ (void *) &(RELATION->rd_id), \ HASH_ENTER, \ @@ -174,16 +162,6 @@ do { \ errmsg("out of memory"))); \ /* used to give notice if found -- now just keep quiet */ \ idhentry->reldesc = RELATION; \ - nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \ - (void *) &(RELATION->rd_node), \ - HASH_ENTER, \ - &found); \ - if (nodentry == NULL) \ - ereport(ERROR, \ - (errcode(ERRCODE_OUT_OF_MEMORY), \ - errmsg("out of memory"))); \ - /* used to give notice if found -- now just keep quiet */ \ - nodentry->reldesc = RELATION; \ if (IsSystemNamespace(RelationGetNamespace(RELATION))) \ { \ char *relname = RelationGetRelationName(RELATION); \ @@ -223,30 +201,14 @@ do { \ RELATION = NULL; \ } while(0) -#define RelationNodeCacheLookup(NODE, RELATION) \ -do { \ - RelNodeCacheEnt *hentry; \ - hentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \ - (void *)&(NODE), HASH_FIND,NULL); \ - if (hentry) \ - RELATION = hentry->reldesc; \ - else \ - RELATION = NULL; \ -} while(0) - #define RelationCacheDelete(RELATION) \ do { \ - RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; \ + RelIdCacheEnt *idhentry; \ idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \ (void *)&(RELATION->rd_id), \ HASH_REMOVE, NULL); \ if (idhentry == NULL) \ elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \ - nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \ - (void *)&(RELATION->rd_node), \ - HASH_REMOVE, NULL); \ - if (nodentry == NULL) \ - elog(WARNING, "trying to delete a rd_node reldesc that does not exist"); \ if (IsSystemNamespace(RelationGetNamespace(RELATION))) \ { \ char *relname = RelationGetRelationName(RELATION); \ @@ -423,7 +385,7 @@ AllocateRelationDesc(Relation relation, Form_pg_class relp) relation->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ - relation->rd_fd = -1; + relation->rd_smgr = NULL; /* * Copy the relation tuple form @@ -914,7 +876,7 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo, relation->rd_node.relNode = relation->rd_rel->relfilenode; /* make sure relation is marked as having no open file yet */ - relation->rd_fd = -1; + relation->rd_smgr = NULL; /* * Insert newly created relation into relcache hash tables. @@ -1303,7 +1265,7 @@ formrdesc(const char *relationName, relation->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ - relation->rd_fd = -1; + relation->rd_smgr = NULL; /* * initialize reference count @@ -1481,30 +1443,6 @@ RelationSysNameCacheGetRelation(const char *relationName) return rd; } -/* - * RelationNodeCacheGetRelation - * - * As above, but lookup by relfilenode. - * - * NOTE: this must NOT try to revalidate invalidated nailed indexes, since - * that could cause us to return an entry with a different relfilenode than - * the caller asked for. Currently this is used only by the buffer manager. - * Really the bufmgr's idea of relations should be separated out from the - * relcache ... - */ -Relation -RelationNodeCacheGetRelation(RelFileNode rnode) -{ - Relation rd; - - RelationNodeCacheLookup(rnode, rd); - - if (RelationIsValid(rd)) - RelationIncrementReferenceCount(rd); - - return rd; -} - /* * RelationIdGetRelation * @@ -1635,14 +1573,8 @@ RelationReloadClassinfo(Relation relation) elog(ERROR, "could not find tuple for system relation %u", relation->rd_id); relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); - if (relation->rd_node.relNode != relp->relfilenode) - { - /* We have to re-insert the entry into the relcache indexes */ - RelationCacheDelete(relation); - memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE); - relation->rd_node.relNode = relp->relfilenode; - RelationCacheInsert(relation); - } + memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE); + relation->rd_node.relNode = relp->relfilenode; heap_freetuple(pg_class_tuple); /* Must adjust number of blocks after we know the new relfilenode */ relation->rd_targblock = InvalidBlockNumber; @@ -1672,10 +1604,10 @@ RelationClearRelation(Relation relation, bool rebuild) * ensures that the low-level file access state is updated after, say, * a vacuum truncation. */ - if (relation->rd_fd >= 0) + if (relation->rd_smgr) { - smgrclose(DEFAULT_SMGR, relation); - relation->rd_fd = -1; + smgrclose(relation->rd_smgr); + relation->rd_smgr = NULL; } /* @@ -1866,18 +1798,31 @@ RelationForgetRelation(Oid rid) } /* - * RelationIdInvalidateRelationCacheByRelationId + * RelationCacheInvalidateEntry * * This routine is invoked for SI cache flush messages. * - * We used to skip local relations, on the grounds that they could - * not be targets of cross-backend SI update messages; but it seems - * safer to process them, so that our *own* SI update messages will - * have the same effects during CommandCounterIncrement for both - * local and nonlocal relations. + * Any relcache entry matching the relid must be flushed. (Note: caller has + * already determined that the relid belongs to our database or is a shared + * relation.) If rnode isn't NULL, we must also ensure that any smgr cache + * entry matching that rnode is flushed. + * + * Ordinarily, if rnode is supplied then it will match the relfilenode of + * the target relid. However, it's possible for rnode to be different if + * someone is engaged in a relfilenode change. In that case we want to + * make sure we clear the right cache entries. This has to be done here + * to keep things in sync between relcache and smgr cache --- we can't have + * someone flushing an smgr cache entry that a relcache entry still points + * to. + * + * We used to skip local relations, on the grounds that they could + * not be targets of cross-backend SI update messages; but it seems + * safer to process them, so that our *own* SI update messages will + * have the same effects during CommandCounterIncrement for both + * local and nonlocal relations. */ void -RelationIdInvalidateRelationCacheByRelationId(Oid relationId) +RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode) { Relation relation; @@ -1886,14 +1831,27 @@ RelationIdInvalidateRelationCacheByRelationId(Oid relationId) if (PointerIsValid(relation)) { relcacheInvalsReceived++; + if (rnode) + { + /* Need to be sure smgr is flushed, but don't do it twice */ + if (relation->rd_smgr == NULL || + !RelFileNodeEquals(*rnode, relation->rd_node)) + smgrclosenode(*rnode); + } RelationFlushRelation(relation); } + else + { + if (rnode) + smgrclosenode(*rnode); + } } /* * RelationCacheInvalidate * Blow away cached relation descriptors that have zero reference counts, - * and rebuild those with positive reference counts. + * and rebuild those with positive reference counts. Also reset the smgr + * relation cache. * * This is currently used only to recover from SI message buffer overflow, * so we do not touch new-in-transaction relations; they cannot be targets @@ -1934,6 +1892,13 @@ RelationCacheInvalidate(void) { relation = idhentry->reldesc; + /* Must close all smgr references to avoid leaving dangling ptrs */ + if (relation->rd_smgr) + { + smgrclose(relation->rd_smgr); + relation->rd_smgr = NULL; + } + /* Ignore new relations, since they are never SI targets */ if (relation->rd_isnew) continue; @@ -1970,6 +1935,13 @@ RelationCacheInvalidate(void) rebuildList = nconc(rebuildFirstList, rebuildList); + /* + * Now zap any remaining smgr cache entries. This must happen before + * we start to rebuild entries, since that may involve catalog fetches + * which will re-open catalog files. + */ + smgrcloseall(); + /* Phase 2: rebuild the items found to need rebuild in phase 1 */ foreach(l, rebuildList) { @@ -2107,7 +2079,7 @@ RelationBuildLocalRelation(const char *relname, rel->rd_targblock = InvalidBlockNumber; /* make sure relation is marked as having no open file yet */ - rel->rd_fd = -1; + rel->rd_smgr = NULL; RelationSetReferenceCount(rel, 1); @@ -2233,12 +2205,6 @@ RelationCacheInitialize(void) RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE, &ctl, HASH_ELEM | HASH_FUNCTION); - ctl.keysize = sizeof(RelFileNode); - ctl.entrysize = sizeof(RelNodeCacheEnt); - ctl.hash = tag_hash; - RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE, - &ctl, HASH_ELEM | HASH_FUNCTION); - /* * Try to load the relcache cache file. If successful, we're done for * now. Otherwise, initialize the cache with pre-made descriptors for @@ -2406,65 +2372,6 @@ RelationCacheInitializePhase3(void) } } - -/* used by XLogInitCache */ -void CreateDummyCaches(void); -void DestroyDummyCaches(void); - -void -CreateDummyCaches(void) -{ - MemoryContext oldcxt; - HASHCTL ctl; - - if (!CacheMemoryContext) - CreateCacheMemoryContext(); - - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(NameData); - ctl.entrysize = sizeof(RelNameCacheEnt); - RelationSysNameCache = hash_create("Relcache by name", INITRELCACHESIZE, - &ctl, HASH_ELEM); - - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(RelIdCacheEnt); - ctl.hash = tag_hash; - RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE, - &ctl, HASH_ELEM | HASH_FUNCTION); - - ctl.keysize = sizeof(RelFileNode); - ctl.entrysize = sizeof(RelNodeCacheEnt); - ctl.hash = tag_hash; - RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE, - &ctl, HASH_ELEM | HASH_FUNCTION); - - MemoryContextSwitchTo(oldcxt); -} - -void -DestroyDummyCaches(void) -{ - MemoryContext oldcxt; - - if (!CacheMemoryContext) - return; - - oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - - if (RelationIdCache) - hash_destroy(RelationIdCache); - if (RelationSysNameCache) - hash_destroy(RelationSysNameCache); - if (RelationNodeCache) - hash_destroy(RelationNodeCache); - - RelationIdCache = RelationSysNameCache = RelationNodeCache = NULL; - - MemoryContextSwitchTo(oldcxt); -} - static void AttrDefaultFetch(Relation relation) { @@ -3125,7 +3032,7 @@ load_relcache_init_file(void) /* * Reset transient-state fields in the relcache entry */ - rel->rd_fd = -1; + rel->rd_smgr = NULL; rel->rd_targblock = InvalidBlockNumber; if (rel->rd_isnailed) RelationSetReferenceCount(rel, 1); diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index e93dcb8a84..075269b4ad 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.122 2004/02/08 22:28:57 neilc Exp $ + * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.123 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,6 +33,7 @@ #include "catalog/pg_shadow.h" #include "libpq/libpq-be.h" #include "miscadmin.h" +#include "storage/fd.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "utils/builtins.h" diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index af113eb66a..226c5c2f99 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.30 2003/11/29 22:40:58 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.31 2004/02/10 01:55:26 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -72,15 +72,6 @@ typedef FormData_pg_database *Form_pg_database; DATA(insert OID = 1 ( template1 PGUID ENCODING t t 0 0 0 "" _null_ _null_ )); DESCR("Default template database"); - #define TemplateDbOid 1 -/* Just to mark OID as used for unused_oid script -:) */ -#define DATAMARKOID(x) - -DATAMARKOID(= 2) -#define RecoveryDb 2 - -#undef DATAMARKOID - #endif /* PG_DATABASE_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index a0b523da3d..84706272de 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.32 2003/11/29 22:41:13 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.33 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,6 +16,7 @@ #include "storage/backendid.h" #include "storage/itemptr.h" +#include "storage/relfilenode.h" /* @@ -27,6 +28,13 @@ * ID field). -1 means a relcache inval message. Other negative values * are available to identify other inval message types. * + * Relcache invalidation messages usually also cause invalidation of entries + * in the smgr's relation cache. This means they must carry both logical + * and physical relation ID info (ie, both dbOID/relOID and RelFileNode). + * In some cases RelFileNode information is not available so the sender fills + * those fields with zeroes --- this is okay so long as no smgr cache flush + * is required. + * * Shared-inval events are initially driven by detecting tuple inserts, * updates and deletions in system catalogs (see CacheInvalidateHeapTuple). * An update generates two inval events, one for the old tuple and one for @@ -63,6 +71,12 @@ typedef struct int16 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared relation */ Oid relId; /* relation ID */ + RelFileNode physId; /* physical file ID */ + /* + * Note: it is likely that RelFileNode will someday be changed to + * include database ID. In that case the dbId field will be redundant + * and should be removed to save space. + */ } SharedInvalRelcacheMsg; typedef union diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 0fd20fd436..738e436fb7 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.39 2003/11/29 22:41:13 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.40 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,36 +16,54 @@ #include "access/xlog.h" #include "fmgr.h" -#include "storage/relfilenode.h" #include "storage/block.h" -#include "utils/rel.h" +#include "storage/relfilenode.h" + + +/* + * smgr.c maintains a table of SMgrRelation objects, which are essentially + * cached file handles. An SMgrRelation is created (if not already present) + * by smgropen(), and destroyed by smgrclose(). Note that neither of these + * operations imply I/O, they just create or destroy a hashtable entry. + * (But smgrclose() may release associated resources, such as OS-level file + * descriptors.) + */ +typedef struct SMgrRelationData +{ + /* rnode is the hashtable lookup key, so it must be first! */ + RelFileNode smgr_rnode; /* relation physical identifier */ + /* additional public fields may someday exist here */ -#define SM_FAIL 0 -#define SM_SUCCESS 1 + /* + * Fields below here are intended to be private to smgr.c and its + * submodules. Do not touch them from elsewhere. + */ + int smgr_which; /* storage manager selector */ -#define DEFAULT_SMGR 0 + struct _MdfdVec *md_fd; /* for md.c; NULL if not open */ +} SMgrRelationData; -extern int smgrinit(void); -extern int smgrcreate(int16 which, Relation reln); -extern int smgrunlink(int16 which, Relation reln); -extern int smgrextend(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); -extern int smgropen(int16 which, Relation reln, bool failOK); -extern int smgrclose(int16 which, Relation reln); -extern int smgrread(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); -extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum, - char *buffer); -extern int smgrblindwrt(int16 which, RelFileNode rnode, - BlockNumber blkno, char *buffer); -extern BlockNumber smgrnblocks(int16 which, Relation reln); -extern BlockNumber smgrtruncate(int16 which, Relation reln, - BlockNumber nblocks); -extern int smgrDoPendingDeletes(bool isCommit); -extern int smgrcommit(void); -extern int smgrabort(void); -extern int smgrsync(void); +typedef SMgrRelationData *SMgrRelation; + + +extern void smgrinit(void); +extern SMgrRelation smgropen(RelFileNode rnode); +extern void smgrclose(SMgrRelation reln); +extern void smgrcloseall(void); +extern void smgrclosenode(RelFileNode rnode); +extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo); +extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp); +extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo); +extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern BlockNumber smgrnblocks(SMgrRelation reln); +extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks); +extern void smgrDoPendingDeletes(bool isCommit); +extern void smgrcommit(void); +extern void smgrabort(void); +extern void smgrsync(void); extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record); extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record); @@ -55,38 +73,18 @@ extern void smgr_desc(char *buf, uint8 xl_info, char *rec); /* internals: move me elsewhere -- ay 7/94 */ /* in md.c */ -extern int mdinit(void); -extern int mdcreate(Relation reln); -extern int mdunlink(RelFileNode rnode); -extern int mdextend(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdopen(Relation reln); -extern int mdclose(Relation reln); -extern int mdread(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer); -extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer); -extern BlockNumber mdnblocks(Relation reln); -extern BlockNumber mdtruncate(Relation reln, BlockNumber nblocks); -extern int mdcommit(void); -extern int mdabort(void); -extern int mdsync(void); - -/* mm.c */ -extern int mminit(void); -extern int mmcreate(Relation reln); -extern int mmunlink(RelFileNode rnode); -extern int mmextend(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmopen(Relation reln); -extern int mmclose(Relation reln); -extern int mmread(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer); -extern int mmblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer); -extern BlockNumber mmnblocks(Relation reln); -extern BlockNumber mmtruncate(Relation reln, BlockNumber nblocks); -extern int mmcommit(void); -extern int mmabort(void); - -extern int mmshutdown(void); -extern int MMShmemSize(void); +extern bool mdinit(void); +extern bool mdclose(SMgrRelation reln); +extern bool mdcreate(SMgrRelation reln, bool isRedo); +extern bool mdunlink(RelFileNode rnode, bool isRedo); +extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer); +extern BlockNumber mdnblocks(SMgrRelation reln); +extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks); +extern bool mdcommit(void); +extern bool mdabort(void); +extern bool mdsync(void); /* smgrtype.c */ extern Datum smgrout(PG_FUNCTION_ARGS); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 467d15ee83..e7052726f2 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.29 2003/11/29 22:41:15 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.30 2004/02/10 01:55:26 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -28,7 +28,9 @@ extern void CommandEndInvalidationMessages(bool isCommit); extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple); -extern void CacheInvalidateRelcache(Oid relationId); +extern void CacheInvalidateRelcache(Relation relation); + +extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple); extern void CacheRegisterSyscacheCallback(int cacheid, CacheCallbackFunction func, diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index dfdb8491e3..8532c5a737 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.72 2004/01/06 18:07:32 neilc Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.73 2004/02/10 01:55:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,7 +20,6 @@ #include "catalog/pg_index.h" #include "rewrite/prs2lock.h" #include "storage/block.h" -#include "storage/fd.h" #include "storage/relfilenode.h" @@ -98,16 +97,16 @@ typedef struct PgStat_Info bool index_scan_counted; } PgStat_Info; + /* * Here are the contents of a relation cache entry. */ typedef struct RelationData { - File rd_fd; /* open file descriptor, or -1 if - * none; this is NOT an operating - * system file descriptor */ - RelFileNode rd_node; /* file node (physical identifier) */ + RelFileNode rd_node; /* relation physical identifier */ + /* use "struct" here to avoid needing to include smgr.h: */ + struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */ BlockNumber rd_nblocks; /* number of blocks in rel */ BlockNumber rd_targblock; /* current insertion target block, or * InvalidBlockNumber */ @@ -226,14 +225,6 @@ typedef Relation *RelationPtr; */ #define RelationGetRelid(relation) ((relation)->rd_id) -/* - * RelationGetFile - * Returns the open file descriptor for the rel, or -1 if - * none. This is NOT an operating system file descriptor; see md.c - * for more information - */ -#define RelationGetFile(relation) ((relation)->rd_fd) - /* * RelationGetNumberOfAttributes * Returns the number of attributes in a relation. diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index c7c6a9231f..848d68b207 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.38 2003/11/29 22:41:16 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.39 2004/02/10 01:55:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,7 +24,6 @@ extern Relation RelationSysNameGetRelation(const char *relationName); /* finds an existing cache entry, but won't make a new one */ extern Relation RelationIdCacheGetRelation(Oid relationId); -extern Relation RelationNodeCacheGetRelation(RelFileNode rnode); extern void RelationClose(Relation relation); @@ -61,7 +60,7 @@ extern Relation RelationBuildLocalRelation(const char *relname, */ extern void RelationForgetRelation(Oid rid); -extern void RelationIdInvalidateRelationCacheByRelationId(Oid relationId); +extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode); extern void RelationCacheInvalidate(void); @@ -73,11 +72,6 @@ extern void AtEOXact_RelationCache(bool commit); extern bool RelationIdIsInInitFile(Oid relationId); extern void RelationCacheInitFileInvalidate(bool beforeSend); -/* XLOG support */ -extern void CreateDummyCaches(void); -extern void DestroyDummyCaches(void); - - /* should be used only by relcache.c and catcache.c */ extern bool criticalRelcachesBuilt;