* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.111 2004/02/06 19:36:17 wieck Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.112 2004/02/10 01:55:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* Do the physical truncation.
*/
- new_pages = smgrtruncate(DEFAULT_SMGR, rel, new_pages);
+ if (rel->rd_smgr == NULL)
+ rel->rd_smgr = smgropen(rel->rd_node);
+ new_pages = smgrtruncate(rel->rd_smgr, new_pages);
rel->rd_nblocks = new_pages; /* update relcache
* immediately */
rel->rd_targblock = InvalidBlockNumber;
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.10 2004/01/28 21:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.11 2004/02/10 01:55:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include <unistd.h>
#include "access/slru.h"
+#include "storage/fd.h"
#include "storage/lwlock.h"
#include "miscadmin.h"
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.161 2004/01/26 22:51:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.162 2004/02/10 01:55:24 tgl Exp $
*
* NOTES
* Transaction aborts can now occur two ways:
#include "executor/spi.h"
#include "libpq/be-fsstubs.h"
#include "miscadmin.h"
+#include "storage/fd.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "storage/smgr.h"
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.133 2004/01/26 22:35:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.134 2004/02/10 01:55:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "storage/bufpage.h"
+#include "storage/fd.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
MyXactMadeTempRelUpdate = false;
CritSectionCount++;
- CreateDummyCaches();
CreateCheckPoint(true, true);
ShutdownCLOG();
CritSectionCount--;
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.28 2003/12/14 00:34:47 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.29 2004/02/10 01:55:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
if (hentry == NULL)
elog(PANIC, "_xl_remove_hash_entry: file was not found in cache");
- if (rdesc->reldata.rd_fd >= 0)
- smgrclose(DEFAULT_SMGR, &(rdesc->reldata));
+ if (rdesc->reldata.rd_smgr != NULL)
+ smgrclose(rdesc->reldata.rd_smgr);
memset(rdesc, 0, sizeof(XLogRelDesc));
memset(tpgc, 0, sizeof(FormData_pg_class));
rdesc->reldata.rd_rel = tpgc;
-
- return;
}
static XLogRelDesc *
void
XLogInitRelationCache(void)
{
- CreateDummyCaches();
_xl_init_rel_cache();
}
HASH_SEQ_STATUS status;
XLogRelCacheEntry *hentry;
- DestroyDummyCaches();
-
if (!_xlrelarr)
return;
sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode);
- /* unexisting DB id */
- res->reldata.rd_lockInfo.lockRelId.dbId = RecoveryDb;
- res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
res->reldata.rd_node = rnode;
+ /*
+ * We set up the lockRelId in case anything tries to lock the dummy
+ * relation. Note that this is fairly bogus since relNode may be
+ * different from the relation's OID. It shouldn't really matter
+ * though, since we are presumably running by ourselves and can't
+ * have any lock conflicts ...
+ */
+ res->reldata.rd_lockInfo.lockRelId.dbId = rnode.tblNode;
+ res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
+
hentry = (XLogRelCacheEntry *)
hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found);
hentry->rdesc = res;
res->reldata.rd_targblock = InvalidBlockNumber;
- res->reldata.rd_fd = -1;
- res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata),
- true /* allow failure */ );
+ res->reldata.rd_smgr = smgropen(res->reldata.rd_node);
+ /*
+ * Create the target file if it doesn't already exist. This lets
+ * us cope if the replay sequence contains writes to a relation
+ * that is later deleted. (The original coding of this routine
+ * would instead return NULL, causing the writes to be suppressed.
+ * But that seems like it risks losing valuable data if the filesystem
+ * loses an inode during a crash. Better to write the data until we
+ * are actually told to delete the file.)
+ */
+ smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true);
}
res->moreRecently = &(_xlrelarr[0]);
_xlrelarr[0].lessRecently = res;
res->lessRecently->moreRecently = res;
- if (res->reldata.rd_fd < 0) /* file doesn't exist */
- return (NULL);
-
return (&(res->reldata));
}
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.175 2004/01/07 18:56:25 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.176 2004/02/10 01:55:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
break;
case BS_XLOG_CHECKPOINT:
- CreateDummyCaches();
CreateCheckPoint(false, false);
SetSavedRedoRecPtr(); /* pass redo ptr back to
* postmaster */
proc_exit(0); /* done */
case BS_XLOG_BGWRITER:
- CreateDummyCaches();
BufferBackgroundWriter();
proc_exit(0); /* done */
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.257 2003/12/28 21:57:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.258 2004/02/10 01:55:24 tgl Exp $
*
*
* INTERFACE ROUTINES
void
heap_storage_create(Relation rel)
{
- Assert(rel->rd_fd < 0);
- rel->rd_fd = smgrcreate(DEFAULT_SMGR, rel);
- Assert(rel->rd_fd >= 0);
+ Assert(rel->rd_smgr == NULL);
+ rel->rd_smgr = smgropen(rel->rd_node);
+ smgrcreate(rel->rd_smgr, rel->rd_istemp, false);
}
/* ----------------------------------------------------------------
*/
if (rel->rd_rel->relkind != RELKIND_VIEW &&
rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE)
- smgrunlink(DEFAULT_SMGR, rel);
+ {
+ if (rel->rd_smgr == NULL)
+ rel->rd_smgr = smgropen(rel->rd_node);
+ smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
+ rel->rd_smgr = NULL;
+ }
/*
* Close relcache entry, but *keep* AccessExclusiveLock on the
else
{
/* Skip the disk update, but force relcache inval anyway */
- CacheInvalidateRelcache(RelationGetRelid(rel));
+ CacheInvalidateRelcache(rel);
}
heap_freetuple(reltup);
DropRelationBuffers(currentIndex);
/* Now truncate the actual data and set blocks to zero */
- smgrtruncate(DEFAULT_SMGR, currentIndex, 0);
+ if (currentIndex->rd_smgr == NULL)
+ currentIndex->rd_smgr = smgropen(currentIndex->rd_node);
+ smgrtruncate(currentIndex->rd_smgr, 0);
currentIndex->rd_nblocks = 0;
currentIndex->rd_targblock = InvalidBlockNumber;
DropRelationBuffers(rel);
/* Now truncate the actual data and set blocks to zero */
- smgrtruncate(DEFAULT_SMGR, rel, 0);
+ if (rel->rd_smgr == NULL)
+ rel->rd_smgr = smgropen(rel->rd_node);
+ smgrtruncate(rel->rd_smgr, 0);
rel->rd_nblocks = 0;
rel->rd_targblock = InvalidBlockNumber;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.226 2004/01/28 21:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.227 2004/02/10 01:55:24 tgl Exp $
*
*
* INTERFACE ROUTINES
if (i < 0)
elog(ERROR, "FlushRelationBuffers returned %d", i);
- smgrunlink(DEFAULT_SMGR, userIndexRelation);
+ if (userIndexRelation->rd_smgr == NULL)
+ userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node);
+ smgrscheduleunlink(userIndexRelation->rd_smgr,
+ userIndexRelation->rd_istemp);
+ userIndexRelation->rd_smgr = NULL;
/*
* We are presently too lazy to attempt to compute the new correct
* owning relation to ensure other backends update their relcache
* lists of indexes.
*/
- CacheInvalidateRelcache(heapId);
+ CacheInvalidateRelcache(userHeapRelation);
/*
* Close rels, but keep locks
else
{
/* no need to change tuple, but force relcache rebuild anyway */
- CacheInvalidateRelcache(relid);
+ CacheInvalidateRelcacheByTuple(tuple);
}
if (!pg_class_scan)
setNewRelfilenode(Relation relation)
{
Oid newrelfilenode;
+ RelFileNode newrnode;
+ SMgrRelation srel;
Relation pg_class;
HeapTuple tuple;
Form_pg_class rd_rel;
- RelationData workrel;
/* Can't change relfilenode for nailed tables (indexes ok though) */
Assert(!relation->rd_isnailed ||
/* create another storage file. Is it a little ugly ? */
/* NOTE: any conflict in relfilenode value will be caught here */
- memcpy((char *) &workrel, relation, sizeof(RelationData));
- workrel.rd_fd = -1;
- workrel.rd_node.relNode = newrelfilenode;
- heap_storage_create(&workrel);
- smgrclose(DEFAULT_SMGR, &workrel);
+ newrnode = relation->rd_node;
+ newrnode.relNode = newrelfilenode;
+
+ srel = smgropen(newrnode);
+ smgrcreate(srel, relation->rd_istemp, false);
+ smgrclose(srel);
/* schedule unlinking old relfilenode */
- smgrunlink(DEFAULT_SMGR, relation);
+ if (relation->rd_smgr == NULL)
+ relation->rd_smgr = smgropen(relation->rd_node);
+ smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
+ relation->rd_smgr = NULL;
/* update the pg_class row */
rd_rel->relfilenode = newrelfilenode;
DropRelationBuffers(iRel);
/* Now truncate the actual data and set blocks to zero */
- smgrtruncate(DEFAULT_SMGR, iRel, 0);
+ if (iRel->rd_smgr == NULL)
+ iRel->rd_smgr = smgropen(iRel->rd_node);
+ smgrtruncate(iRel->rd_smgr, 0);
iRel->rd_nblocks = 0;
iRel->rd_targblock = InvalidBlockNumber;
}
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.217 2004/01/28 21:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.218 2004/02/10 01:55:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "parser/parse_coerce.h"
#include "parser/parse_relation.h"
#include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
#include "tcop/pquery.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.130 2004/01/07 18:56:25 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.131 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "miscadmin.h"
+#include "storage/fd.h"
#include "storage/freespace.h"
#include "storage/sinval.h"
#include "utils/acl.h"
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.97 2004/01/28 21:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.98 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
else
{
/* no need to change tuple, but force relcache rebuild anyway */
- CacheInvalidateRelcache(relationId);
+ CacheInvalidateRelcacheByTuple(tuple);
}
heap_freetuple(tuple);
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.163 2003/11/29 19:51:47 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.164 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* relcache entries. (Ideally this should happen
* automatically...)
*/
- CacheInvalidateRelcache(relid);
+ CacheInvalidateRelcache(targetrel);
}
else
{
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.136 2004/02/02 17:21:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.137 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "commands/user.h"
#include "libpq/crypt.h"
#include "miscadmin.h"
+#include "storage/fd.h"
#include "storage/pmsignal.h"
#include "utils/acl.h"
#include "utils/array.h"
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.271 2004/01/07 18:56:25 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.272 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* truncate relation, if needed */
if (blkno < nblocks)
{
- blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
+ if (onerel->rd_smgr == NULL)
+ onerel->rd_smgr = smgropen(onerel->rd_node);
+ blkno = smgrtruncate(onerel->rd_smgr, blkno);
onerel->rd_nblocks = blkno; /* update relcache immediately */
onerel->rd_targblock = InvalidBlockNumber;
vacrelstats->rel_pages = blkno; /* set new number of blocks */
(errmsg("\"%s\": truncated %u to %u pages",
RelationGetRelationName(onerel),
vacrelstats->rel_pages, relblocks)));
- relblocks = smgrtruncate(DEFAULT_SMGR, onerel, relblocks);
+ if (onerel->rd_smgr == NULL)
+ onerel->rd_smgr = smgropen(onerel->rd_node);
+ relblocks = smgrtruncate(onerel->rd_smgr, relblocks);
onerel->rd_nblocks = relblocks; /* update relcache immediately */
onerel->rd_targblock = InvalidBlockNumber;
vacrelstats->rel_pages = relblocks; /* set new number of
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.35 2004/02/06 19:36:17 wieck Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.36 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
vac_open_indexes(onerel, &nindexes, &Irel);
hasindex = (nindexes > 0);
- /* Turn on vacuum cost accounting */
- if (VacuumCostNaptime > 0)
- VacuumCostActive = true;
+ /* Turn vacuum cost accounting on or off */
+ VacuumCostActive = (VacuumCostNaptime > 0);
VacuumCostBalance = 0;
/* Do the vacuuming */
/*
* Do the physical truncation.
*/
- new_rel_pages = smgrtruncate(DEFAULT_SMGR, onerel, new_rel_pages);
+ if (onerel->rd_smgr == NULL)
+ onerel->rd_smgr = smgropen(onerel->rd_node);
+ new_rel_pages = smgrtruncate(onerel->rd_smgr, new_rel_pages);
onerel->rd_nblocks = new_rel_pages; /* update relcache immediately */
onerel->rd_targblock = InvalidBlockNumber;
vacrelstats->rel_pages = new_rel_pages; /* save new number of
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.69 2003/11/29 19:51:49 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.70 2004/02/10 01:55:25 tgl Exp $
*
* NOTES
* This should be moved to a more appropriate place. It is here
#include "libpq/be-fsstubs.h"
#include "libpq/libpq-fs.h"
#include "miscadmin.h"
+#include "storage/fd.h"
#include "storage/large_object.h"
#include "utils/memutils.h"
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.92 2004/01/14 23:01:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.93 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* XXX what about getting rid of its TOAST table? For now, we don't.
*/
if (RelisBecomingView)
- smgrunlink(DEFAULT_SMGR, event_relation);
+ {
+ if (event_relation->rd_smgr == NULL)
+ event_relation->rd_smgr = smgropen(event_relation->rd_node);
+ smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp);
+ event_relation->rd_smgr = NULL;
+ }
/* Close rel, but keep lock till commit... */
heap_close(event_relation, NoLock);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.57 2003/11/29 19:51:55 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.58 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
else
{
/* no need to change tuple, but force relcache rebuild anyway */
- CacheInvalidateRelcache(relationId);
+ CacheInvalidateRelcacheByTuple(tuple);
}
heap_freetuple(tuple);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.156 2004/02/06 19:36:18 wieck Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.157 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
bool bufferLockHeld);
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
-static bool BufferReplace(BufferDesc *bufHdr);
+static void BufferReplace(BufferDesc *bufHdr);
#ifdef NOT_USED
void PrintBufferDescs(void);
bool bufferLockHeld)
{
BufferDesc *bufHdr;
- int status;
bool found;
bool isExtend;
bool isLocalBuf;
isExtend = (blockNum == P_NEW);
isLocalBuf = reln->rd_istemp;
+ /* Open it at the smgr level if not already done */
+ if (reln->rd_smgr == NULL)
+ reln->rd_smgr = smgropen(reln->rd_node);
+
if (isLocalBuf)
{
ReadLocalBufferCount++;
if (isExtend)
{
/* must be sure we have accurate file length! */
- blockNum = reln->rd_nblocks = smgrnblocks(DEFAULT_SMGR, reln);
+ blockNum = reln->rd_nblocks = smgrnblocks(reln->rd_smgr);
reln->rd_nblocks++;
}
}
/*
- * if we have gotten to this point, the reln pointer must be ok and
- * the relation file must be open.
+ * if we have gotten to this point, the relation must be open in the smgr.
*/
if (isExtend)
{
/* new buffers are zero-filled */
MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
- status = smgrextend(DEFAULT_SMGR, reln, blockNum,
- (char *) MAKE_PTR(bufHdr->data));
+ smgrextend(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data));
}
else
{
- status = smgrread(DEFAULT_SMGR, reln, blockNum,
- (char *) MAKE_PTR(bufHdr->data));
+ smgrread(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data));
/* check for garbage data */
- if (status == SM_SUCCESS &&
- !PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data)))
+ if (!PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data)))
{
/*
* During WAL recovery, the first access to any data page should
if (isLocalBuf)
{
/* No shared buffer state to update... */
- if (status == SM_FAIL)
- {
- bufHdr->flags |= BM_IO_ERROR;
- return InvalidBuffer;
- }
return BufferDescriptorGetBuffer(bufHdr);
}
/* lock buffer manager again to update IO IN PROGRESS */
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- if (status == SM_FAIL)
- {
- /* IO Failed. cleanup the data structures and go home */
- StrategyInvalidateBuffer(bufHdr);
-
- /* remember that BufferAlloc() pinned the buffer */
- UnpinBuffer(bufHdr);
-
- /*
- * Have to reset the flag so that anyone waiting for the buffer
- * can tell that the contents are invalid.
- */
- bufHdr->flags |= BM_IO_ERROR;
- bufHdr->flags &= ~BM_IO_IN_PROGRESS;
- }
- else
- {
- /* IO Succeeded. clear the flags, finish buffer update */
-
- bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
- }
+ /* IO Succeeded. clear the flags, finish buffer update */
+ bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
/* If anyone was waiting for IO to complete, wake them up now */
TerminateBufferIO(bufHdr);
LWLockRelease(BufMgrLock);
- if (status == SM_FAIL)
- return InvalidBuffer;
-
return BufferDescriptorGetBuffer(bufHdr);
}
if (buf->flags & BM_DIRTY || buf->cntxDirty)
{
- bool replace_ok;
-
/*
* skip write error buffers
*/
* Write the buffer out, being careful to release BufMgrLock
* before starting the I/O.
*/
- replace_ok = BufferReplace(buf);
+ BufferReplace(buf);
- if (replace_ok == false)
+ /*
+ * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
+ * be set by anyone. - vadim 01/17/97
+ */
+ if (buf->flags & BM_JUST_DIRTIED)
{
- ereport(WARNING,
- (errcode(ERRCODE_IO_ERROR),
- errmsg("could not write block %u of %u/%u",
- buf->tag.blockNum,
- buf->tag.rnode.tblNode,
- buf->tag.rnode.relNode)));
- inProgress = FALSE;
- buf->flags |= BM_IO_ERROR;
- buf->flags &= ~BM_IO_IN_PROGRESS;
- TerminateBufferIO(buf);
- UnpinBuffer(buf);
- buf = NULL;
+ elog(PANIC, "content of block %u of %u/%u changed while flushing",
+ buf->tag.blockNum,
+ buf->tag.rnode.tblNode, buf->tag.rnode.relNode);
}
- else
- {
- /*
- * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
- * be set by anyone. - vadim 01/17/97
- */
- if (buf->flags & BM_JUST_DIRTIED)
- {
- elog(PANIC, "content of block %u of %u/%u changed while flushing",
- buf->tag.blockNum,
- buf->tag.rnode.tblNode, buf->tag.rnode.relNode);
- }
- buf->flags &= ~BM_DIRTY;
- buf->cntxDirty = false;
- }
+ buf->flags &= ~BM_DIRTY;
+ buf->cntxDirty = false;
/*
* Somebody could have pinned the buffer while we were doing
for (i = 0; i < num_buffer_dirty; i++)
{
Buffer buffer;
- int status;
- RelFileNode rnode;
XLogRecPtr recptr;
- Relation reln;
+ SMgrRelation reln;
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
StartBufferIO(bufHdr, false); /* output IO start */
buffer = BufferDescriptorGetBuffer(bufHdr);
- rnode = bufHdr->tag.rnode;
LWLockRelease(BufMgrLock);
- /*
- * Try to find relation for buffer
- */
- reln = RelationNodeCacheGetRelation(rnode);
-
/*
* Protect buffer content against concurrent update
*/
bufHdr->flags &= ~BM_JUST_DIRTIED;
LWLockRelease(BufMgrLock);
- if (reln == NULL)
- {
- status = smgrblindwrt(DEFAULT_SMGR,
- bufHdr->tag.rnode,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
- else
- {
- status = smgrwrite(DEFAULT_SMGR, reln,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
+ /* Find smgr relation for buffer */
+ reln = smgropen(bufHdr->tag.rnode);
- if (status == SM_FAIL) /* disk failure ?! */
- ereport(PANIC,
- (errcode(ERRCODE_IO_ERROR),
- errmsg("could not write block %u of %u/%u",
- bufHdr->tag.blockNum,
- bufHdr->tag.rnode.tblNode,
- bufHdr->tag.rnode.relNode)));
+ /* And write... */
+ smgrwrite(reln,
+ bufHdr->tag.blockNum,
+ (char *) MAKE_PTR(bufHdr->data));
/*
* Note that it's safe to change cntxDirty here because of we
bufHdr->flags &= ~BM_DIRTY;
UnpinBuffer(bufHdr);
LWLockRelease(BufMgrLock);
-
- /* drop refcnt obtained by RelationNodeCacheGetRelation */
- if (reln != NULL)
- RelationDecrementReferenceCount(reln);
}
pfree(buffer_dirty);
n = BufferSync(BgWriterPercent, BgWriterMaxpages);
/*
- * Whatever signal is sent to us, let's just die galantly. If
+ * Whatever signal is sent to us, let's just die gallantly. If
* it wasn't meant that way, the postmaster will reincarnate us.
*/
if (InterruptPending)
return;
+ /*
+ * Whenever we have nothing to do, close all smgr files. This
+ * is so we won't hang onto smgr references to deleted files
+ * indefinitely. XXX this is a bogus, temporary solution. 'Twould
+ * be much better to do this once per checkpoint, but the bgwriter
+ * doesn't yet know anything about checkpoints.
+ */
+ if (n == 0)
+ smgrcloseall();
+
/*
* Nap for the configured time or sleep for 10 seconds if
* there was nothing to do at all.
/*
* BufferReplace
*
- * Write out the buffer corresponding to 'bufHdr'. Returns 'true' if
- * the buffer was successfully written out, 'false' otherwise.
+ * Write out the buffer corresponding to 'bufHdr'.
*
* BufMgrLock must be held at entry, and the buffer must be pinned.
*/
-static bool
+static void
BufferReplace(BufferDesc *bufHdr)
{
- Relation reln;
+ SMgrRelation reln;
XLogRecPtr recptr;
- int status;
ErrorContextCallback errcontext;
/* To check if block content changed while flushing. - vadim 01/17/97 */
recptr = BufferGetLSN(bufHdr);
XLogFlush(recptr);
- reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
+ /* Find smgr relation for buffer */
+ reln = smgropen(bufHdr->tag.rnode);
- if (reln != NULL)
- {
- status = smgrwrite(DEFAULT_SMGR, reln,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
- else
- {
- status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
-
- /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
- if (reln != NULL)
- RelationDecrementReferenceCount(reln);
+ /* And write... */
+ smgrwrite(reln,
+ bufHdr->tag.blockNum,
+ (char *) MAKE_PTR(bufHdr->data));
/* Pop the error context stack */
error_context_stack = errcontext.previous;
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- if (status == SM_FAIL)
- return false;
-
BufferFlushCount++;
-
- return true;
}
/*
*
* Don't call smgr on a view or a composite type, either.
*/
- if (relation->rd_rel->relkind == RELKIND_VIEW)
- relation->rd_nblocks = 0;
- else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+ if (relation->rd_rel->relkind == RELKIND_VIEW ||
+ relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
relation->rd_nblocks = 0;
else if (!relation->rd_isnew && !relation->rd_istemp)
- relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);
+ {
+ /* Open it at the smgr level if not already done */
+ if (relation->rd_smgr == NULL)
+ relation->rd_smgr = smgropen(relation->rd_node);
+
+ relation->rd_nblocks = smgrnblocks(relation->rd_smgr);
+ }
return relation->rd_nblocks;
}
void
RelationUpdateNumberOfBlocks(Relation relation)
{
- if (relation->rd_rel->relkind == RELKIND_VIEW)
- relation->rd_nblocks = 0;
- else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+ if (relation->rd_rel->relkind == RELKIND_VIEW ||
+ relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
relation->rd_nblocks = 0;
else
- relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);
+ {
+ /* Open it at the smgr level if not already done */
+ if (relation->rd_smgr == NULL)
+ relation->rd_smgr = smgropen(relation->rd_node);
+
+ relation->rd_nblocks = smgrnblocks(relation->rd_smgr);
+ }
}
/* ---------------------------------------------------------------------
int i;
BufferDesc *bufHdr;
XLogRecPtr recptr;
- int status;
ErrorContextCallback errcontext;
/* Setup error traceback support for ereport() */
{
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
- status = smgrwrite(DEFAULT_SMGR, rel,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- if (status == SM_FAIL)
- {
- error_context_stack = errcontext.previous;
- elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is dirty, could not flush it",
- RelationGetRelationName(rel), firstDelBlock,
- bufHdr->tag.blockNum);
- return (-1);
- }
+ /* Open it at the smgr level if not already done */
+ if (rel->rd_smgr == NULL)
+ rel->rd_smgr = smgropen(rel->rd_node);
+
+ smgrwrite(rel->rd_smgr,
+ bufHdr->tag.blockNum,
+ (char *) MAKE_PTR(bufHdr->data));
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
}
LWLockRelease(BufMgrLock);
- status = smgrwrite(DEFAULT_SMGR, rel,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
+ /* Open it at the smgr level if not already done */
+ if (rel->rd_smgr == NULL)
+ rel->rd_smgr = smgropen(rel->rd_node);
- if (status == SM_FAIL) /* disk failure ?! */
- ereport(PANIC,
- (errcode(ERRCODE_IO_ERROR),
- errmsg("could not write block %u of %u/%u",
- bufHdr->tag.blockNum,
- bufHdr->tag.rnode.tblNode,
- bufHdr->tag.rnode.relNode)));
+ smgrwrite(rel->rd_smgr,
+ bufHdr->tag.blockNum,
+ (char *) MAKE_PTR(bufHdr->data));
BufferFlushCount++;
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(buf->flags & BM_IO_IN_PROGRESS);
if (IsForInput)
+ {
Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
+ /* Don't think that buffer is valid */
+ StrategyInvalidateBuffer(buf);
+ }
else
{
Assert(buf->flags & BM_DIRTY || buf->cntxDirty);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.51 2004/01/07 18:56:27 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.52 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
*/
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
- Relation bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
+ SMgrRelation reln;
- /* flush this page */
- if (bufrel == NULL)
- {
- smgrblindwrt(DEFAULT_SMGR,
- bufHdr->tag.rnode,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
- else
- {
- smgrwrite(DEFAULT_SMGR, bufrel,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- /* drop refcount incremented by RelationNodeCacheGetRelation */
- RelationDecrementReferenceCount(bufrel);
- }
+ /* Find smgr relation for buffer */
+ reln = smgropen(bufHdr->tag.rnode);
+
+ /* And write... */
+ smgrwrite(reln,
+ bufHdr->tag.blockNum,
+ (char *) MAKE_PTR(bufHdr->data));
LocalBufferFlushCount++;
}
/*
* it's all ours now.
- *
- * We need not in tblNode currently but will in future I think, when
- * we'll give up rel->rd_fd to fmgr cache.
*/
bufHdr->tag.rnode = reln->rd_node;
bufHdr->tag.blockNum = blockNum;
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.63 2004/01/26 22:59:53 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.64 2004/02/10 01:55:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
size += FreeSpaceShmemSize();
#ifdef EXEC_BACKEND
size += ShmemBackendArraySize();
-#endif
-#ifdef STABLE_MEMORY_STORAGE
- size += MMShmemSize();
#endif
size += 100000;
/* might as well round it off to a multiple of a typical page size */
# Makefile for storage/smgr
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.14 2003/11/29 19:51:57 pgsql Exp $
+# $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.15 2004/02/10 01:55:26 tgl Exp $
#
#-------------------------------------------------------------------------
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = md.o mm.o smgr.o smgrtype.o
+OBJS = md.o smgr.o smgrtype.o
all: SUBSYS.o
-# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.2 2003/11/29 19:51:57 pgsql Exp $
+# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.3 2004/02/10 01:55:26 tgl Exp $
-This directory contains the code that supports the Postgres storage manager
-switch and all of the installed storage managers. In released systems,
-the only supported storage manager is the magnetic disk manager. At UC
-Berkeley, the Sony WORM optical disk jukebox and persistent main memory are
-also supported.
+In the original Berkeley Postgres system, there were several storage managers,
+of which only the "magnetic disk" manager remains. (At Berkeley there were
+also managers for the Sony WORM optical disk jukebox and persistent main
+memory, but these were never supported in any externally released Postgres,
+nor in any version of PostgreSQL.) However, we retain the notion of a storage
+manager switch in case anyone wants to reintroduce other kinds of storage
+managers.
-As of Postgres Release 3.0, every relation in the system is tagged with the
-storage manager on which it resides. The storage manager switch code turns
-what used to by filesystem operations into operations on the correct store,
-for any given relation.
+In Berkeley Postgres each relation was tagged with the ID of the storage
+manager to use for it. This is gone. It would be more reasonable to
+associate storage managers with tablespaces (a feature not present as this
+text is being written, but one likely to emerge soon).
The files in this directory, and their contents, are
smgrtype.c Storage manager type -- maps string names to storage manager
IDs and provides simple comparison operators. This is the
regproc support for type 'smgr' in the system catalogs.
+ (This is vestigial since no columns of type smgr exist
+ in the catalogs anymore.)
smgr.c The storage manager switch dispatch code. The routines in
this file call the appropriate storage manager to do hardware
- accesses requested by the backend.
+ accesses requested by the backend. smgr.c also manages the
+ file handle cache (SMgrRelation table).
md.c The magnetic disk storage manager.
- mm.c The persistent main memory storage manager (#undef'ed in
- tmp/c.h for all distributed systems).
-
- sj.c The sony jukebox storage manager and cache management code
- (#undef'ed in tmp/c.h for all distributed systems). The
- routines in this file allocate extents, maintain block
- maps, and guarantee the persistence and coherency of a cache
- of jukebox blocks on magnetic disk.
-
- pgjb.c The postgres jukebox interface routines. The routines here
- handle exclusion on the physical device and translate requests
- from the storage manager code (sj.c) into jbaccess calls.
-
- jbaccess.c Access code for the physical Sony jukebox device. This code
- was swiped from Andy McFadden's jblib.a code at UC Berkeley.
+Note that md.c in turn relies on src/backend/storage/file/fd.c.
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.101 2004/01/07 18:56:27 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.102 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/catalog.h"
#include "miscadmin.h"
+#include "storage/fd.h"
#include "storage/smgr.h"
-#include "utils/inval.h"
#include "utils/memutils.h"
+
/*
* The magnetic disk storage manager keeps track of open file
* descriptors in its own descriptor pool. This is done to make it
* easier to support relations that are larger than the operating
- * system's file size limit (often 2GBytes). In order to do that, we
+ * system's file size limit (often 2GBytes). In order to do that,
* we break relations up into chunks of < 2GBytes and store one chunk
* in each of several files that represent the relation. See the
* BLCKSZ and RELSEG_SIZE configuration constants in
- * include/pg_config.h.
+ * include/pg_config.h. All chunks except the last MUST have size exactly
+ * equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
*
- * The file descriptor stored in the relation cache (see RelationGetFile())
- * is actually an index into the Md_fdvec array. -1 indicates not open.
+ * The file descriptor pointer (md_fd field) stored in the SMgrRelation
+ * cache is, therefore, just the head of a list of MdfdVec objects.
+ * But note the md_fd pointer can be NULL, indicating relation not open.
*
- * When a relation is broken into multiple chunks, only the first chunk
- * has its own entry in the Md_fdvec array; the remaining chunks have
- * palloc'd MdfdVec objects that are chained onto the first chunk via the
- * mdfd_chain links. All chunks except the last MUST have size exactly
- * equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
+ * All MdfdVec objects are palloc'd in the MdCxt memory context.
*/
typedef struct _MdfdVec
{
- int mdfd_vfd; /* fd number in vfd pool */
- int mdfd_flags; /* fd status flags */
+ File mdfd_vfd; /* fd number in fd.c's pool */
-/* these are the assigned bits in mdfd_flags: */
-#define MDFD_FREE (1 << 0) /* unused entry */
-
- int mdfd_nextFree; /* link to next freelist member, if free */
#ifndef LET_OS_MANAGE_FILESIZE
struct _MdfdVec *mdfd_chain; /* for large relations */
#endif
} MdfdVec;
-static int Nfds = 100; /* initial/current size of Md_fdvec array */
-static MdfdVec *Md_fdvec = NULL;
-static int Md_Free = -1; /* head of freelist of unused fdvec
- * entries */
-static int CurFd = 0; /* first never-used fdvec index */
static MemoryContext MdCxt; /* context for all md.c allocations */
-/* routines declared here */
-static void mdclose_fd(int fd);
-static int _mdfd_getrelnfd(Relation reln);
-static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags);
-static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno);
-
-static int _mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno);
-static int _fdvec_alloc(void);
-static void _fdvec_free(int);
+/* routines declared here */
+static MdfdVec *mdopen(SMgrRelation reln);
+static MdfdVec *_fdvec_alloc(void);
+#ifndef LET_OS_MANAGE_FILESIZE
+static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
+ int oflags);
+#endif
+static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno);
static BlockNumber _mdnblocks(File file, Size blcksz);
+
/*
* mdinit() -- Initialize private state for magnetic disk storage manager.
- *
- * We keep a private table of all file descriptors. This routine
- * allocates and initializes the table.
- *
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
-int
+bool
mdinit(void)
{
- int i;
-
MdCxt = AllocSetContextCreate(TopMemoryContext,
"MdSmgr",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
- Md_fdvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));
-
- MemSet(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
-
- /* Set free list */
- for (i = 0; i < Nfds; i++)
- {
- Md_fdvec[i].mdfd_nextFree = i + 1;
- Md_fdvec[i].mdfd_flags = MDFD_FREE;
- }
- Md_Free = 0;
- Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
-
- return SM_SUCCESS;
+ return true;
}
-int
-mdcreate(Relation reln)
+/*
+ * mdcreate() -- Create a new relation on magnetic disk.
+ *
+ * If isRedo is true, it's okay for the relation to exist already.
+ */
+bool
+mdcreate(SMgrRelation reln, bool isRedo)
{
char *path;
- int fd,
- vfd;
+ File fd;
- Assert(reln->rd_fd < 0);
+ Assert(reln->md_fd == NULL);
- path = relpath(reln->rd_node);
+ path = relpath(reln->smgr_rnode);
fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
* During bootstrap, there are cases where a system relation will
* be accessed (by internal backend processes) before the
* bootstrap script nominally creates it. Therefore, allow the
- * file to exist already, but in bootstrap mode only. (See also
+ * file to exist already, even if isRedo is not set. (See also
* mdopen)
*/
- if (IsBootstrapProcessingMode())
+ if (isRedo || IsBootstrapProcessingMode())
fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
if (fd < 0)
{
pfree(path);
/* be sure to return the error reported by create, not open */
errno = save_errno;
- return -1;
+ return false;
}
errno = 0;
}
pfree(path);
- vfd = _fdvec_alloc();
- if (vfd < 0)
- return -1;
+ reln->md_fd = _fdvec_alloc();
- Md_fdvec[vfd].mdfd_vfd = fd;
- Md_fdvec[vfd].mdfd_flags = (uint16) 0;
+ reln->md_fd->mdfd_vfd = fd;
#ifndef LET_OS_MANAGE_FILESIZE
- Md_fdvec[vfd].mdfd_chain = NULL;
+ reln->md_fd->mdfd_chain = NULL;
#endif
- return vfd;
+ return true;
}
/*
* mdunlink() -- Unlink a relation.
+ *
+ * Note that we're passed a RelFileNode --- by the time this is called,
+ * there won't be an SMgrRelation hashtable entry anymore.
+ *
+ * If isRedo is true, it's okay for the relation to be already gone.
*/
-int
-mdunlink(RelFileNode rnode)
+bool
+mdunlink(RelFileNode rnode, bool isRedo)
{
- int status = SM_SUCCESS;
+ bool status = true;
int save_errno = 0;
char *path;
/* Delete the first segment, or only segment if not doing segmenting */
if (unlink(path) < 0)
{
- status = SM_FAIL;
- save_errno = errno;
+ if (!isRedo || errno != ENOENT)
+ {
+ status = false;
+ save_errno = errno;
+ }
}
#ifndef LET_OS_MANAGE_FILESIZE
/* Get the additional segments, if any */
- if (status == SM_SUCCESS)
+ if (status)
{
char *segpath = (char *) palloc(strlen(path) + 12);
BlockNumber segno;
/* ENOENT is expected after the last segment... */
if (errno != ENOENT)
{
- status = SM_FAIL;
+ status = false;
save_errno = errno;
}
break;
* relation (ie, blocknum is the current EOF), and so in case of
* failure we clean up by truncating.
*
- * This routine returns SM_FAIL or SM_SUCCESS, with errno set as
- * appropriate.
+ * This routine returns true or false, with errno set as appropriate.
*
* Note: this routine used to call mdnblocks() to get the block position
* to write at, but that's pretty silly since the caller needs to know where
* the block will be written, and accordingly must have done mdnblocks()
* already. Might as well pass in the position and save a seek.
*/
-int
-mdextend(Relation reln, BlockNumber blocknum, char *buffer)
+bool
+mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
long seekpos;
int nbytes;
* to make room for the new page's buffer.
*/
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return SM_FAIL;
+ return false;
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
errno = save_errno;
}
- return SM_FAIL;
+ return false;
}
#ifndef LET_OS_MANAGE_FILESIZE
Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
#endif
- return SM_SUCCESS;
+ return true;
}
/*
- * mdopen() -- Open the specified relation.
+ * mdopen() -- Open the specified relation. ereport's on failure.
+ *
+ * Note we only open the first segment, when there are multiple segments.
*/
-int
-mdopen(Relation reln)
+static MdfdVec *
+mdopen(SMgrRelation reln)
{
char *path;
- int fd;
- int vfd;
+ File fd;
- Assert(reln->rd_fd < 0);
+ /* No work if already open */
+ if (reln->md_fd)
+ return reln->md_fd;
- path = relpath(reln->rd_node);
+ path = relpath(reln->smgr_rnode);
fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
if (fd < 0)
{
pfree(path);
- return -1;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open relation %u/%u: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
}
}
pfree(path);
- vfd = _fdvec_alloc();
- if (vfd < 0)
- return -1;
+ reln->md_fd = _fdvec_alloc();
- Md_fdvec[vfd].mdfd_vfd = fd;
- Md_fdvec[vfd].mdfd_flags = (uint16) 0;
+ reln->md_fd->mdfd_vfd = fd;
#ifndef LET_OS_MANAGE_FILESIZE
- Md_fdvec[vfd].mdfd_chain = NULL;
+ reln->md_fd->mdfd_chain = NULL;
Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
#endif
- return vfd;
+ return reln->md_fd;
}
/*
* mdclose() -- Close the specified relation, if it isn't closed already.
*
- * AND FREE fd vector! It may be re-used for other relations!
- * reln should be flushed from cache after closing !..
- *
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
+ * Returns true or false with errno set as appropriate.
*/
-int
-mdclose(Relation reln)
+bool
+mdclose(SMgrRelation reln)
{
- int fd;
-
- fd = RelationGetFile(reln);
- if (fd < 0)
- return SM_SUCCESS; /* already closed, so no work */
-
- mdclose_fd(fd);
-
- reln->rd_fd = -1;
+ MdfdVec *v = reln->md_fd;
- return SM_SUCCESS;
-}
+ /* No work if already closed */
+ if (v == NULL)
+ return true;
-static void
-mdclose_fd(int fd)
-{
- MdfdVec *v;
+ reln->md_fd = NULL; /* prevent dangling pointer after error */
#ifndef LET_OS_MANAGE_FILESIZE
- for (v = &Md_fdvec[fd]; v != NULL;)
+ while (v != NULL)
{
MdfdVec *ov = v;
FileClose(v->mdfd_vfd);
/* Now free vector */
v = v->mdfd_chain;
- if (ov != &Md_fdvec[fd])
- pfree(ov);
+ pfree(ov);
}
-
- Md_fdvec[fd].mdfd_chain = NULL;
#else
- v = &Md_fdvec[fd];
- if (v != NULL)
- {
- if (v->mdfd_vfd >= 0)
- FileClose(v->mdfd_vfd);
- }
+ if (v->mdfd_vfd >= 0)
+ FileClose(v->mdfd_vfd);
+ pfree(v);
#endif
- _fdvec_free(fd);
+ return true;
}
/*
* mdread() -- Read the specified block from a relation.
- *
- * Returns SM_SUCCESS or SM_FAIL.
*/
-int
-mdread(Relation reln, BlockNumber blocknum, char *buffer)
+bool
+mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
- int status;
+ bool status;
long seekpos;
int nbytes;
MdfdVec *v;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return SM_FAIL;
+ return false;
- status = SM_SUCCESS;
+ status = true;
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
/*
(nbytes > 0 && mdnblocks(reln) == blocknum))
MemSet(buffer, 0, BLCKSZ);
else
- status = SM_FAIL;
+ status = false;
}
return status;
/*
* mdwrite() -- Write the supplied block at the appropriate location.
- *
- * Returns SM_SUCCESS or SM_FAIL.
*/
-int
-mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
+bool
+mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
long seekpos;
MdfdVec *v;
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return SM_FAIL;
+ return false;
if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
- return SM_FAIL;
+ return false;
- return SM_SUCCESS;
-}
-
-/*
- * mdblindwrt() -- Write a block to disk blind.
- *
- * We have to be able to do this using only the rnode of the relation
- * in which the block belongs. Otherwise this is much like mdwrite().
- */
-int
-mdblindwrt(RelFileNode rnode,
- BlockNumber blkno,
- char *buffer)
-{
- int status;
- long seekpos;
- int fd;
-
- fd = _mdfd_blind_getseg(rnode, blkno);
-
- if (fd < 0)
- return SM_FAIL;
-
-#ifndef LET_OS_MANAGE_FILESIZE
- seekpos = (long) (BLCKSZ * (blkno % ((BlockNumber) RELSEG_SIZE)));
- Assert(seekpos < BLCKSZ * RELSEG_SIZE);
-#else
- seekpos = (long) (BLCKSZ * (blkno));
-#endif
-
- errno = 0;
- if (lseek(fd, seekpos, SEEK_SET) != seekpos)
- {
- elog(LOG, "lseek(%ld) failed: %m", seekpos);
- close(fd);
- return SM_FAIL;
- }
-
- status = SM_SUCCESS;
-
- /* write the block */
- errno = 0;
- if (write(fd, buffer, BLCKSZ) != BLCKSZ)
- {
- /* if write didn't set errno, assume problem is no disk space */
- if (errno == 0)
- errno = ENOSPC;
- elog(LOG, "write() failed: %m");
- status = SM_FAIL;
- }
-
- if (close(fd) < 0)
- {
- elog(LOG, "close() failed: %m");
- status = SM_FAIL;
- }
-
- return status;
+ return true;
}
/*
* called, then only segments up to the last one actually touched
* are present in the chain...
*
- * Returns # of blocks, ereport's on error.
+ * Returns # of blocks, or InvalidBlockNumber on error.
*/
BlockNumber
-mdnblocks(Relation reln)
+mdnblocks(SMgrRelation reln)
{
- int fd;
- MdfdVec *v;
+ MdfdVec *v = mdopen(reln);
#ifndef LET_OS_MANAGE_FILESIZE
BlockNumber nblocks;
- BlockNumber segno;
-#endif
-
- fd = _mdfd_getrelnfd(reln);
- v = &Md_fdvec[fd];
-
-#ifndef LET_OS_MANAGE_FILESIZE
- segno = 0;
+ BlockNumber segno = 0;
/*
* Skip through any segments that aren't the last one, to avoid
*/
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
if (v->mdfd_chain == NULL)
- elog(ERROR, "could not count blocks of \"%s\": %m",
- RelationGetRelationName(reln));
+ return InvalidBlockNumber; /* failed? */
}
v = v->mdfd_chain;
* Returns # of blocks or InvalidBlockNumber on error.
*/
BlockNumber
-mdtruncate(Relation reln, BlockNumber nblocks)
+mdtruncate(SMgrRelation reln, BlockNumber nblocks)
{
- int fd;
MdfdVec *v;
BlockNumber curnblk;
* that truncate/delete loop will get them all!
*/
curnblk = mdnblocks(reln);
+ if (curnblk == InvalidBlockNumber)
+ return InvalidBlockNumber; /* mdnblocks failed */
if (nblocks > curnblk)
return InvalidBlockNumber; /* bogus request */
if (nblocks == curnblk)
return nblocks; /* no work */
- fd = _mdfd_getrelnfd(reln);
- v = &Md_fdvec[fd];
+ v = mdopen(reln);
#ifndef LET_OS_MANAGE_FILESIZE
priorblocks = 0;
FileTruncate(v->mdfd_vfd, 0);
FileUnlink(v->mdfd_vfd);
v = v->mdfd_chain;
- Assert(ov != &Md_fdvec[fd]); /* we never drop the 1st
+ Assert(ov != reln->md_fd); /* we never drop the 1st
* segment */
pfree(ov);
}
/*
* mdcommit() -- Commit a transaction.
- *
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
-int
+bool
mdcommit(void)
{
/*
* We don't actually have to do anything here...
*/
- return SM_SUCCESS;
+ return true;
}
/*
* mdabort() -- Abort a transaction.
*/
-int
+bool
mdabort(void)
{
/*
* We don't actually have to do anything here...
*/
- return SM_SUCCESS;
+ return true;
}
/*
* mdsync() -- Sync previous writes to stable storage.
*/
-int
+bool
mdsync(void)
{
sync();
if (IsUnderPostmaster)
sleep(2);
sync();
- return SM_SUCCESS;
+ return true;
}
/*
- * _fdvec_alloc() -- Grab a free (or new) md file descriptor vector.
+ * _fdvec_alloc() -- Make a MdfdVec object.
*/
-static int
+static MdfdVec *
_fdvec_alloc(void)
{
- MdfdVec *nvec;
- int fdvec,
- i;
-
- if (Md_Free >= 0) /* get from free list */
- {
- fdvec = Md_Free;
- Md_Free = Md_fdvec[fdvec].mdfd_nextFree;
- Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE);
- Md_fdvec[fdvec].mdfd_flags = 0;
- if (fdvec >= CurFd)
- {
- Assert(fdvec == CurFd);
- CurFd++;
- }
- return fdvec;
- }
-
- /* Must allocate more room */
-
- if (Nfds != CurFd)
- elog(FATAL, "_fdvec_alloc error");
-
- Nfds *= 2;
-
- nvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));
- MemSet(nvec, 0, Nfds * sizeof(MdfdVec));
- memcpy(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec));
- pfree(Md_fdvec);
+ MdfdVec *v;
- Md_fdvec = nvec;
-
- /* Set new free list */
- for (i = CurFd; i < Nfds; i++)
- {
- Md_fdvec[i].mdfd_nextFree = i + 1;
- Md_fdvec[i].mdfd_flags = MDFD_FREE;
- }
- Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
- Md_Free = CurFd + 1;
-
- fdvec = CurFd;
- CurFd++;
- Md_fdvec[fdvec].mdfd_flags = 0;
+ v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
+ v->mdfd_vfd = -1;
+#ifndef LET_OS_MANAGE_FILESIZE
+ v->mdfd_chain = NULL;
+#endif
- return fdvec;
+ return v;
}
+#ifndef LET_OS_MANAGE_FILESIZE
/*
- * _fdvec_free() -- free md file descriptor vector.
- *
+ * Open the specified segment of the relation,
+ * and make a MdfdVec object for it. Returns NULL on failure.
*/
-static
-void
-_fdvec_free(int fdvec)
-{
-
- Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE);
- Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE);
- Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
- Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
- Md_Free = fdvec;
-}
-
static MdfdVec *
-_mdfd_openseg(Relation reln, BlockNumber segno, int oflags)
+_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
{
MdfdVec *v;
int fd;
*fullpath;
/* be sure we have enough space for the '.segno', if any */
- path = relpath(reln->rd_node);
+ path = relpath(reln->smgr_rnode);
if (segno > 0)
{
return NULL;
/* allocate an mdfdvec entry for it */
- v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
+ v = _fdvec_alloc();
/* fill the entry */
v->mdfd_vfd = fd;
- v->mdfd_flags = (uint16) 0;
-#ifndef LET_OS_MANAGE_FILESIZE
v->mdfd_chain = NULL;
Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
-#endif
/* all done */
return v;
}
-
-/*
- * _mdfd_getrelnfd() -- Get the (virtual) fd for the relation,
- * opening it if it's not already open
- *
- */
-static int
-_mdfd_getrelnfd(Relation reln)
-{
- int fd;
-
- fd = RelationGetFile(reln);
- if (fd < 0)
- {
- if ((fd = mdopen(reln)) < 0)
- elog(ERROR, "could not open relation \"%s\": %m",
- RelationGetRelationName(reln));
- reln->rd_fd = fd;
- }
- return fd;
-}
+#endif
/*
* _mdfd_getseg() -- Find the segment of the relation holding the
- * specified block
- *
+ * specified block. ereport's on failure.
*/
static MdfdVec *
-_mdfd_getseg(Relation reln, BlockNumber blkno)
+_mdfd_getseg(SMgrRelation reln, BlockNumber blkno)
{
- MdfdVec *v;
- int fd;
+ MdfdVec *v = mdopen(reln);
#ifndef LET_OS_MANAGE_FILESIZE
BlockNumber segno;
BlockNumber i;
-#endif
-
- fd = _mdfd_getrelnfd(reln);
-#ifndef LET_OS_MANAGE_FILESIZE
- for (v = &Md_fdvec[fd], segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1;
+ for (segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1;
segno > 0;
i++, segno--)
{
v->mdfd_chain = _mdfd_openseg(reln, i, (segno == 1) ? O_CREAT : 0);
if (v->mdfd_chain == NULL)
- elog(ERROR, "could not open segment %u of relation \"%s\" (target block %u): %m",
- i, RelationGetRelationName(reln), blkno);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open segment %u of relation %u/%u (target block %u): %m",
+ i,
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode,
+ blkno)));
}
v = v->mdfd_chain;
}
-#else
- v = &Md_fdvec[fd];
#endif
return v;
}
/*
- * Find the segment of the relation holding the specified block.
- *
- * This performs the same work as _mdfd_getseg() except that we must work
- * "blind" with no Relation struct. We assume that we are not likely to
- * touch the same relation again soon, so we do not create an FD entry for
- * the relation --- we just open a kernel file descriptor which will be
- * used and promptly closed. We also assume that the target block already
- * exists, ie, we need not extend the relation.
- *
- * The return value is the kernel descriptor, or -1 on failure.
+ * Get number of blocks present in a single disk file
*/
-static int
-_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno)
-{
- char *path;
- int fd;
-
-#ifndef LET_OS_MANAGE_FILESIZE
- BlockNumber segno;
-#endif
-
- path = relpath(rnode);
-
-#ifndef LET_OS_MANAGE_FILESIZE
- /* append the '.segno', if needed */
- segno = blkno / ((BlockNumber) RELSEG_SIZE);
- if (segno > 0)
- {
- char *segpath = (char *) palloc(strlen(path) + 12);
-
- sprintf(segpath, "%s.%u", path, segno);
- pfree(path);
- path = segpath;
- }
-#endif
-
- /* call fd.c to allow other FDs to be closed if needed */
- fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
- if (fd < 0)
- elog(LOG, "could not open \"%s\": %m", path);
-
- pfree(path);
-
- return fd;
-}
-
static BlockNumber
_mdnblocks(File file, Size blcksz)
{
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * mm.c
- * main memory storage manager
- *
- * This code manages relations that reside in (presumably stable)
- * main memory.
- *
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/mm.c,v 1.36 2004/01/07 18:56:27 neilc Exp $
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include <math.h>
-
-#include "storage/smgr.h"
-#include "miscadmin.h"
-
-
-#ifdef STABLE_MEMORY_STORAGE
-
-/*
- * MMCacheTag -- Unique triplet for blocks stored by the main memory
- * storage manager.
- */
-
-typedef struct MMCacheTag
-{
- Oid mmct_dbid;
- Oid mmct_relid;
- BlockNumber mmct_blkno;
-} MMCacheTag;
-
-/*
- * Shared-memory hash table for main memory relations contains
- * entries of this form.
- */
-
-typedef struct MMHashEntry
-{
- MMCacheTag mmhe_tag;
- int mmhe_bufno;
-} MMHashEntry;
-
-/*
- * MMRelTag -- Unique identifier for each relation that is stored in the
- * main-memory storage manager.
- */
-
-typedef struct MMRelTag
-{
- Oid mmrt_dbid;
- Oid mmrt_relid;
-} MMRelTag;
-
-/*
- * Shared-memory hash table for # blocks in main memory relations contains
- * entries of this form.
- */
-
-typedef struct MMRelHashEntry
-{
- MMRelTag mmrhe_tag;
- int mmrhe_nblocks;
-} MMRelHashEntry;
-
-#define MMNBUFFERS 10
-#define MMNRELATIONS 2
-
-static int *MMCurTop;
-static int *MMCurRelno;
-static MMCacheTag *MMBlockTags;
-static char *MMBlockCache;
-static HTAB *MMCacheHT;
-static HTAB *MMRelCacheHT;
-
-int
-mminit(void)
-{
- char *mmcacheblk;
- int mmsize = 0;
- bool found;
- HASHCTL info;
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
- mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
- mmsize += MAXALIGN(sizeof(*MMCurTop));
- mmsize += MAXALIGN(sizeof(*MMCurRelno));
- mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag)));
- mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found);
-
- if (mmcacheblk == NULL)
- {
- LWLockRelease(MMCacheLock);
- return SM_FAIL;
- }
-
- info.keysize = sizeof(MMCacheTag);
- info.entrysize = sizeof(MMHashEntry);
- info.hash = tag_hash;
-
- MMCacheHT = ShmemInitHash("Main memory store HT",
- MMNBUFFERS, MMNBUFFERS,
- &info, (HASH_ELEM | HASH_FUNCTION));
-
- if (MMCacheHT == NULL)
- {
- LWLockRelease(MMCacheLock);
- return SM_FAIL;
- }
-
- info.keysize = sizeof(MMRelTag);
- info.entrysize = sizeof(MMRelHashEntry);
- info.hash = tag_hash;
-
- MMRelCacheHT = ShmemInitHash("Main memory rel HT",
- MMNRELATIONS, MMNRELATIONS,
- &info, (HASH_ELEM | HASH_FUNCTION));
-
- if (MMRelCacheHT == NULL)
- {
- LWLockRelease(MMCacheLock);
- return SM_FAIL;
- }
-
- if (IsUnderPostmaster) /* was IsPostmaster bjm */
- {
- MemSet(mmcacheblk, 0, mmsize);
- LWLockRelease(MMCacheLock);
- return SM_SUCCESS;
- }
-
- LWLockRelease(MMCacheLock);
-
- MMCurTop = (int *) mmcacheblk;
- mmcacheblk += sizeof(int);
- MMCurRelno = (int *) mmcacheblk;
- mmcacheblk += sizeof(int);
- MMBlockTags = (MMCacheTag *) mmcacheblk;
- mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag));
- MMBlockCache = mmcacheblk;
-
- return SM_SUCCESS;
-}
-
-int
-mmshutdown(void)
-{
- return SM_SUCCESS;
-}
-
-int
-mmcreate(Relation reln)
-{
- MMRelHashEntry *entry;
- bool found;
- MMRelTag tag;
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
- if (*MMCurRelno == MMNRELATIONS)
- {
- LWLockRelease(MMCacheLock);
- return SM_FAIL;
- }
-
- (*MMCurRelno)++;
-
- tag.mmrt_relid = RelationGetRelid(reln);
- if (reln->rd_rel->relisshared)
- tag.mmrt_dbid = (Oid) 0;
- else
- tag.mmrt_dbid = MyDatabaseId;
-
- entry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
- (void *) &tag,
- HASH_ENTER, &found);
-
- if (entry == NULL)
- {
- LWLockRelease(MMCacheLock);
- ereport(FATAL,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- }
-
- if (found)
- {
- /* already exists */
- LWLockRelease(MMCacheLock);
- return SM_FAIL;
- }
-
- entry->mmrhe_nblocks = 0;
-
- LWLockRelease(MMCacheLock);
-
- return SM_SUCCESS;
-}
-
-/*
- * mmunlink() -- Unlink a relation.
- *
- * XXX currently broken: needs to accept RelFileNode, not Relation
- */
-int
-mmunlink(RelFileNode rnode)
-{
- int i;
- MMHashEntry *entry;
- MMRelHashEntry *rentry;
- MMRelTag rtag;
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
- for (i = 0; i < MMNBUFFERS; i++)
- {
- if (MMBlockTags[i].mmct_dbid == rnode.tblNode
- && MMBlockTags[i].mmct_relid == rnode.relNode)
- {
- entry = (MMHashEntry *) hash_search(MMCacheHT,
- (void *) &MMBlockTags[i],
- HASH_REMOVE, NULL);
- if (entry == NULL)
- {
- LWLockRelease(MMCacheLock);
- elog(FATAL, "cache hash table corrupted");
- }
- MMBlockTags[i].mmct_dbid = (Oid) 0;
- MMBlockTags[i].mmct_relid = (Oid) 0;
- MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
- }
- }
- rtag.mmrt_dbid = rnode.tblNode;
- rtag.mmrt_relid = rnode.relNode;
-
- rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
- (void *) &rtag,
- HASH_REMOVE, NULL);
-
- if (rentry == NULL)
- {
- LWLockRelease(MMCacheLock);
- elog(FATAL, "rel cache hash table corrupted");
- }
-
- (*MMCurRelno)--;
-
- LWLockRelease(MMCacheLock);
- return 1;
-}
-
-/*
- * mmextend() -- Add a block to the specified relation.
- *
- * This routine returns SM_FAIL or SM_SUCCESS, with errno set as
- * appropriate.
- */
-int
-mmextend(Relation reln, BlockNumber blocknum, char *buffer)
-{
- MMRelHashEntry *rentry;
- MMHashEntry *entry;
- int i;
- Oid reldbid;
- int offset;
- bool found;
- MMRelTag rtag;
- MMCacheTag tag;
-
- if (reln->rd_rel->relisshared)
- reldbid = (Oid) 0;
- else
- reldbid = MyDatabaseId;
-
- tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
- tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln);
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
- if (*MMCurTop == MMNBUFFERS)
- {
- for (i = 0; i < MMNBUFFERS; i++)
- {
- if (MMBlockTags[i].mmct_dbid == 0 &&
- MMBlockTags[i].mmct_relid == 0)
- break;
- }
- if (i == MMNBUFFERS)
- {
- LWLockRelease(MMCacheLock);
- return SM_FAIL;
- }
- }
- else
- {
- i = *MMCurTop;
- (*MMCurTop)++;
- }
-
- rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
- (void *) &rtag,
- HASH_FIND, NULL);
- if (rentry == NULL)
- {
- LWLockRelease(MMCacheLock);
- elog(FATAL, "rel cache hash table corrupted");
- }
-
- tag.mmct_blkno = rentry->mmrhe_nblocks;
-
- entry = (MMHashEntry *) hash_search(MMCacheHT,
- (void *) &tag,
- HASH_ENTER, &found);
- if (entry == NULL || found)
- {
- LWLockRelease(MMCacheLock);
- elog(FATAL, "cache hash table corrupted");
- }
-
- entry->mmhe_bufno = i;
- MMBlockTags[i].mmct_dbid = reldbid;
- MMBlockTags[i].mmct_relid = RelationGetRelid(reln);
- MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks;
-
- /* page numbers are zero-based, so we increment this at the end */
- (rentry->mmrhe_nblocks)++;
-
- /* write the extended page */
- offset = (i * BLCKSZ);
- memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
-
- LWLockRelease(MMCacheLock);
-
- return SM_SUCCESS;
-}
-
-/*
- * mmopen() -- Open the specified relation.
- */
-int
-mmopen(Relation reln)
-{
- /* automatically successful */
- return 0;
-}
-
-/*
- * mmclose() -- Close the specified relation.
- *
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mmclose(Relation reln)
-{
- /* automatically successful */
- return SM_SUCCESS;
-}
-
-/*
- * mmread() -- Read the specified block from a relation.
- *
- * Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mmread(Relation reln, BlockNumber blocknum, char *buffer)
-{
- MMHashEntry *entry;
- int offset;
- MMCacheTag tag;
-
- if (reln->rd_rel->relisshared)
- tag.mmct_dbid = (Oid) 0;
- else
- tag.mmct_dbid = MyDatabaseId;
-
- tag.mmct_relid = RelationGetRelid(reln);
- tag.mmct_blkno = blocknum;
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
- entry = (MMHashEntry *) hash_search(MMCacheHT,
- (void *) &tag,
- HASH_FIND, NULL);
-
- if (entry == NULL)
- {
- /* reading nonexistent pages is defined to fill them with zeroes */
- LWLockRelease(MMCacheLock);
- MemSet(buffer, 0, BLCKSZ);
- return SM_SUCCESS;
- }
-
- offset = (entry->mmhe_bufno * BLCKSZ);
- memmove(buffer, &MMBlockCache[offset], BLCKSZ);
-
- LWLockRelease(MMCacheLock);
-
- return SM_SUCCESS;
-}
-
-/*
- * mmwrite() -- Write the supplied block at the appropriate location.
- *
- * Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
-{
- MMHashEntry *entry;
- int offset;
- MMCacheTag tag;
-
- if (reln->rd_rel->relisshared)
- tag.mmct_dbid = (Oid) 0;
- else
- tag.mmct_dbid = MyDatabaseId;
-
- tag.mmct_relid = RelationGetRelid(reln);
- tag.mmct_blkno = blocknum;
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
- entry = (MMHashEntry *) hash_search(MMCacheHT,
- (void *) &tag,
- HASH_FIND, NULL);
-
- if (entry == NULL)
- {
- LWLockRelease(MMCacheLock);
- elog(FATAL, "cache hash table missing requested page");
- }
-
- offset = (entry->mmhe_bufno * BLCKSZ);
- memmove(&MMBlockCache[offset], buffer, BLCKSZ);
-
- LWLockRelease(MMCacheLock);
-
- return SM_SUCCESS;
-}
-
-/*
- * mmblindwrt() -- Write a block to stable storage blind.
- *
- * We have to be able to do this using only the rnode of the relation
- * in which the block belongs. Otherwise this is much like mmwrite().
- */
-int
-mmblindwrt(RelFileNode rnode,
- BlockNumber blkno,
- char *buffer)
-{
- return SM_FAIL;
-}
-
-/*
- * mmnblocks() -- Get the number of blocks stored in a relation.
- *
- * Returns # of blocks or InvalidBlockNumber on error.
- */
-BlockNumber
-mmnblocks(Relation reln)
-{
- MMRelTag rtag;
- MMRelHashEntry *rentry;
- BlockNumber nblocks;
-
- if (reln->rd_rel->relisshared)
- rtag.mmrt_dbid = (Oid) 0;
- else
- rtag.mmrt_dbid = MyDatabaseId;
-
- rtag.mmrt_relid = RelationGetRelid(reln);
-
- LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
- rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
- (void *) &rtag,
- HASH_FIND, NULL);
-
- if (rentry)
- nblocks = rentry->mmrhe_nblocks;
- else
- nblocks = InvalidBlockNumber;
-
- LWLockRelease(MMCacheLock);
-
- return nblocks;
-}
-
-/*
- * mmcommit() -- Commit a transaction.
- *
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mmcommit(void)
-{
- return SM_SUCCESS;
-}
-
-/*
- * mmabort() -- Abort a transaction.
- */
-
-int
-mmabort(void)
-{
- return SM_SUCCESS;
-}
-
-/*
- * MMShmemSize() -- Declare amount of shared memory we require.
- *
- * The shared memory initialization code creates a block of shared
- * memory exactly big enough to hold all the structures it needs to.
- * This routine declares how much space the main memory storage
- * manager will use.
- */
-int
-MMShmemSize(void)
-{
- int size = 0;
-
- /*
- * first compute space occupied by the (dbid,relid,blkno) hash table
- */
- size += hash_estimate_size(MMNBUFFERS, sizeof(MMHashEntry));
-
- /*
- * now do the same for the rel hash table
- */
- size += hash_estimate_size(MMNRELATIONS, sizeof(MMRelHashEntry));
-
- /*
- * finally, add in the memory block we use directly
- */
-
- size += MAXALIGN(BLCKSZ * MMNBUFFERS);
- size += MAXALIGN(sizeof(*MMCurTop));
- size += MAXALIGN(sizeof(*MMCurRelno));
- size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag));
-
- return size;
-}
-
-#endif /* STABLE_MEMORY_STORAGE */
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.68 2004/01/06 18:07:31 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.69 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/smgr.h"
+#include "utils/hsearch.h"
#include "utils/memutils.h"
-static void smgrshutdown(int code, Datum arg);
-
+/*
+ * This struct of function pointers defines the API between smgr.c and
+ * any individual storage manager module. Note that smgr subfunctions are
+ * generally expected to return TRUE on success, FALSE on error. (For
+ * nblocks and truncate we instead say that returning InvalidBlockNumber
+ * indicates an error.)
+ */
typedef struct f_smgr
{
- int (*smgr_init) (void); /* may be NULL */
- int (*smgr_shutdown) (void); /* may be NULL */
- int (*smgr_create) (Relation reln);
- int (*smgr_unlink) (RelFileNode rnode);
- int (*smgr_extend) (Relation reln, BlockNumber blocknum,
+ bool (*smgr_init) (void); /* may be NULL */
+ bool (*smgr_shutdown) (void); /* may be NULL */
+ bool (*smgr_close) (SMgrRelation reln);
+ bool (*smgr_create) (SMgrRelation reln, bool isRedo);
+ bool (*smgr_unlink) (RelFileNode rnode, bool isRedo);
+ bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
- int (*smgr_open) (Relation reln);
- int (*smgr_close) (Relation reln);
- int (*smgr_read) (Relation reln, BlockNumber blocknum,
+ bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
- int (*smgr_write) (Relation reln, BlockNumber blocknum,
+ bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
- int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno,
- char *buffer);
- BlockNumber (*smgr_nblocks) (Relation reln);
- BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks);
- int (*smgr_commit) (void); /* may be NULL */
- int (*smgr_abort) (void); /* may be NULL */
- int (*smgr_sync) (void);
+ BlockNumber (*smgr_nblocks) (SMgrRelation reln);
+ BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks);
+ bool (*smgr_commit) (void); /* may be NULL */
+ bool (*smgr_abort) (void); /* may be NULL */
+ bool (*smgr_sync) (void); /* may be NULL */
} f_smgr;
-/*
- * The weird placement of commas in this init block is to keep the compiler
- * happy, regardless of what storage managers we have (or don't have).
- */
-
-static f_smgr smgrsw[] = {
+static const f_smgr smgrsw[] = {
/* magnetic disk */
- {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
- mdread, mdwrite, mdblindwrt,
- mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
- },
-
-#ifdef STABLE_MEMORY_STORAGE
- /* main memory */
- {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
- mmread, mmwrite, mmblindwrt,
- mmnblocks, NULL, mmcommit, mmabort, NULL},
-#endif
+ {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
+ mdread, mdwrite, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
+ }
};
-/*
- * This array records which storage managers are write-once, and which
- * support overwrite. A 'true' entry means that the storage manager is
- * write-once. In the best of all possible worlds, there would be no
- * write-once storage managers.
- */
+static const int NSmgr = lengthof(smgrsw);
-#ifdef NOT_USED
-static bool smgrwo[] = {
- false, /* magnetic disk */
-#ifdef STABLE_MEMORY_STORAGE
- false, /* main memory */
-#endif
-};
-#endif
-static int NSmgr = lengthof(smgrsw);
+/*
+ * Each backend has a hashtable that stores all extant SMgrRelation objects.
+ */
+static HTAB *SMgrRelationHash = NULL;
/*
* We keep a list of all relations (represented as RelFileNode values)
typedef struct PendingRelDelete
{
RelFileNode relnode; /* relation that may need to be deleted */
- int16 which; /* which storage manager? */
+ int which; /* which storage manager? */
bool isTemp; /* is it a temporary relation? */
bool atCommit; /* T=delete at commit; F=delete at abort */
struct PendingRelDelete *next; /* linked-list link */
static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
+/* local function prototypes */
+static void smgrshutdown(int code, Datum arg);
+static void smgr_internal_unlink(RelFileNode rnode, int which,
+ bool isTemp, bool isRedo);
+
+
/*
* smgrinit(), smgrshutdown() -- Initialize or shut down all storage
* managers.
*
+ * Note: in the normal multiprocess scenario with a postmaster, these are
+ * called at postmaster start and stop, not per-backend.
*/
-int
+void
smgrinit(void)
{
int i;
{
if (smgrsw[i].smgr_init)
{
- if ((*(smgrsw[i].smgr_init)) () == SM_FAIL)
+ if (! (*(smgrsw[i].smgr_init)) ())
elog(FATAL, "smgr initialization failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
Int16GetDatum(i))));
/* register the shutdown proc */
on_proc_exit(smgrshutdown, 0);
-
- return SM_SUCCESS;
}
static void
{
if (smgrsw[i].smgr_shutdown)
{
- if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL)
+ if (! (*(smgrsw[i].smgr_shutdown)) ())
elog(FATAL, "smgr shutdown failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
Int16GetDatum(i))));
}
}
+/*
+ * smgropen() -- Return an SMgrRelation object, creating it if need be.
+ *
+ * This does not attempt to actually open the object.
+ */
+SMgrRelation
+smgropen(RelFileNode rnode)
+{
+ SMgrRelation reln;
+ bool found;
+
+ if (SMgrRelationHash == NULL)
+ {
+ /* First time through: initialize the hash table */
+ HASHCTL ctl;
+
+ MemSet(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(RelFileNode);
+ ctl.entrysize = sizeof(SMgrRelationData);
+ ctl.hash = tag_hash;
+ SMgrRelationHash = hash_create("smgr relation table", 400,
+ &ctl, HASH_ELEM | HASH_FUNCTION);
+ }
+
+ /* Look up or create an entry */
+ reln = (SMgrRelation) hash_search(SMgrRelationHash,
+ (void *) &rnode,
+ HASH_ENTER, &found);
+ if (reln == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Initialize it if not present before */
+ if (!found)
+ {
+ /* hash_search already filled in the lookup key */
+ reln->smgr_which = 0; /* we only have md.c at present */
+ reln->md_fd = NULL; /* mark it not open */
+ }
+
+ return reln;
+}
+
+/*
+ * smgrclose() -- Close and delete an SMgrRelation object.
+ *
+ * It is the caller's responsibility not to leave any dangling references
+ * to the object. (Pointers should be cleared after successful return;
+ * on the off chance of failure, the SMgrRelation object will still exist.)
+ */
+void
+smgrclose(SMgrRelation reln)
+{
+ if (! (*(smgrsw[reln->smgr_which].smgr_close)) (reln))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close relation %u/%u: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
+
+ if (hash_search(SMgrRelationHash,
+ (void *) &(reln->smgr_rnode),
+ HASH_REMOVE, NULL) == NULL)
+ elog(ERROR, "SMgrRelation hashtable corrupted");
+}
+
+/*
+ * smgrcloseall() -- Close all existing SMgrRelation objects.
+ *
+ * It is the caller's responsibility not to leave any dangling references.
+ */
+void
+smgrcloseall(void)
+{
+ HASH_SEQ_STATUS status;
+ SMgrRelation reln;
+
+ /* Nothing to do if hashtable not set up */
+ if (SMgrRelationHash == NULL)
+ return;
+
+ hash_seq_init(&status, SMgrRelationHash);
+
+ while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
+ {
+ smgrclose(reln);
+ }
+}
+
+/*
+ * smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
+ * if one exists.
+ *
+ * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
+ * uselessly creating a hashtable entry only to drop it again when no
+ * such entry exists already.
+ *
+ * It is the caller's responsibility not to leave any dangling references.
+ */
+void
+smgrclosenode(RelFileNode rnode)
+{
+ SMgrRelation reln;
+
+ /* Nothing to do if hashtable not set up */
+ if (SMgrRelationHash == NULL)
+ return;
+
+ reln = (SMgrRelation) hash_search(SMgrRelationHash,
+ (void *) &rnode,
+ HASH_FIND, NULL);
+ if (reln != NULL)
+ smgrclose(reln);
+}
+
/*
* smgrcreate() -- Create a new relation.
*
- * This routine takes a reldesc, creates the relation on the appropriate
- * device, and returns a file descriptor for it.
+ * Given an already-created (but presumably unused) SMgrRelation,
+ * cause the underlying disk file or other storage to be created.
+ *
+ * If isRedo is true, it is okay for the underlying file to exist
+ * already because we are in a WAL replay sequence. In this case
+ * we should make no PendingRelDelete entry; the WAL sequence will
+ * tell whether to drop the file.
*/
-int
-smgrcreate(int16 which, Relation reln)
+void
+smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
{
- int fd;
PendingRelDelete *pending;
- if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
+ if (! (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo))
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not create relation \"%s\": %m",
- RelationGetRelationName(reln))));
+ errmsg("could not create relation %u/%u: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
+
+ if (isRedo)
+ return;
/* Add the relation to the list of stuff to delete at abort */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
- pending->relnode = reln->rd_node;
- pending->which = which;
- pending->isTemp = reln->rd_istemp;
+ pending->relnode = reln->smgr_rnode;
+ pending->which = reln->smgr_which;
+ pending->isTemp = isTemp;
pending->atCommit = false; /* delete if abort */
pending->next = pendingDeletes;
pendingDeletes = pending;
-
- return fd;
}
/*
- * smgrunlink() -- Unlink a relation.
+ * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit.
+ *
+ * The relation is marked to be removed from the store if we
+ * successfully commit the current transaction.
*
- * The relation is removed from the store. Actually, we just remember
- * that we want to do this at transaction commit.
+ * This also implies smgrclose() on the SMgrRelation object.
*/
-int
-smgrunlink(int16 which, Relation reln)
+void
+smgrscheduleunlink(SMgrRelation reln, bool isTemp)
{
PendingRelDelete *pending;
- /* Make sure the file is closed */
- if (reln->rd_fd >= 0)
- smgrclose(which, reln);
-
/* Add the relation to the list of stuff to delete at commit */
pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
- pending->relnode = reln->rd_node;
- pending->which = which;
- pending->isTemp = reln->rd_istemp;
+ pending->relnode = reln->smgr_rnode;
+ pending->which = reln->smgr_which;
+ pending->isTemp = isTemp;
pending->atCommit = true; /* delete if commit */
pending->next = pendingDeletes;
pendingDeletes = pending;
* immediately, but for now I'll keep the logic simple.
*/
- return SM_SUCCESS;
+ /* Now close the file and throw away the hashtable entry */
+ smgrclose(reln);
}
/*
- * smgrextend() -- Add a new block to a file.
+ * smgrdounlink() -- Immediately unlink a relation.
*
- * The semantics are basically the same as smgrwrite(): write at the
- * specified position. However, we are expecting to extend the
- * relation (ie, blocknum is the current EOF), and so in case of
- * failure we clean up by truncating.
+ * The relation is removed from the store. This should not be used
+ * during transactional operations, since it can't be undone.
*
- * Returns SM_SUCCESS on success; aborts the current transaction on
- * failure.
+ * If isRedo is true, it is okay for the underlying file to be gone
+ * already. (In practice isRedo will always be true.)
+ *
+ * This also implies smgrclose() on the SMgrRelation object.
*/
-int
-smgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
+void
+smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo)
{
- int status;
+ RelFileNode rnode = reln->smgr_rnode;
+ int which = reln->smgr_which;
- status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer);
-
- if (status == SM_FAIL)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not extend relation \"%s\": %m",
- RelationGetRelationName(reln)),
- errhint("Check free disk space.")));
+ /* Close the file and throw away the hashtable entry */
+ smgrclose(reln);
- return status;
+ smgr_internal_unlink(rnode, which, isTemp, isRedo);
}
/*
- * smgropen() -- Open a relation using a particular storage manager.
- *
- * Returns the fd for the open relation on success.
- *
- * On failure, returns -1 if failOK, else aborts the transaction.
+ * Shared subroutine that actually does the unlink ...
*/
-int
-smgropen(int16 which, Relation reln, bool failOK)
+static void
+smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
{
- int fd;
-
- if (reln->rd_rel->relkind == RELKIND_VIEW)
- return -1;
- if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
- return -1;
- if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
- if (!failOK)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m",
- RelationGetRelationName(reln))));
-
- return fd;
+ /*
+ * Get rid of any leftover buffers for the rel (shouldn't be any in the
+ * commit case, but there can be in the abort case).
+ */
+ DropRelFileNodeBuffers(rnode, isTemp);
+
+ /*
+ * Tell the free space map to forget this relation. It won't be accessed
+ * any more anyway, but we may as well recycle the map space quickly.
+ */
+ FreeSpaceMapForgetRel(&rnode);
+
+ /*
+ * And delete the physical files.
+ *
+ * Note: we treat deletion failure as a WARNING, not an error,
+ * because we've already decided to commit or abort the current xact.
+ */
+ if (! (*(smgrsw[which].smgr_unlink)) (rnode, isRedo))
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not unlink relation %u/%u: %m",
+ rnode.tblNode,
+ rnode.relNode)));
}
/*
- * smgrclose() -- Close a relation.
+ * smgrextend() -- Add a new block to a file.
*
- * Returns SM_SUCCESS on success, aborts on failure.
+ * The semantics are basically the same as smgrwrite(): write at the
+ * specified position. However, we are expecting to extend the
+ * relation (ie, blocknum is the current EOF), and so in case of
+ * failure we clean up by truncating.
*/
-int
-smgrclose(int16 which, Relation reln)
+void
+smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
- if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL)
+ if (! (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer))
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not close relation \"%s\": %m",
- RelationGetRelationName(reln))));
-
- return SM_SUCCESS;
+ errmsg("could not extend relation %u/%u: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode),
+ errhint("Check free disk space.")));
}
/*
*
* This routine is called from the buffer manager in order to
* instantiate pages in the shared buffer cache. All storage managers
- * return pages in the format that POSTGRES expects. This routine
- * dispatches the read. On success, it returns SM_SUCCESS. On failure,
- * the current transaction is aborted.
+ * return pages in the format that POSTGRES expects.
*/
-int
-smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
+void
+smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
- int status;
-
- status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer);
-
- if (status == SM_FAIL)
+ if (! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer))
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not read block %d of relation \"%s\": %m",
- blocknum, RelationGetRelationName(reln))));
-
- return status;
+ errmsg("could not read block %u of relation %u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
}
/*
*
* This is not a synchronous write -- the block is not necessarily
* on disk at return, only dumped out to the kernel.
- *
- * The buffer is written out via the appropriate
- * storage manager. This routine returns SM_SUCCESS or aborts
- * the current transaction.
*/
-int
-smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
-{
- int status;
-
- status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer);
-
- if (status == SM_FAIL)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not write block %d of relation \"%s\": %m",
- blocknum, RelationGetRelationName(reln))));
-
- return status;
-}
-
-/*
- * smgrblindwrt() -- Write a page out blind.
- *
- * In some cases, we may find a page in the buffer cache that we
- * can't make a reldesc for. This happens, for example, when we
- * want to reuse a dirty page that was written by a transaction
- * that has not yet committed, which created a new relation. In
- * this case, the buffer manager will call smgrblindwrt() with
- * the name and OID of the database and the relation to which the
- * buffer belongs. Every storage manager must be able to write
- * this page out to stable storage in this circumstance.
- */
-int
-smgrblindwrt(int16 which,
- RelFileNode rnode,
- BlockNumber blkno,
- char *buffer)
+void
+smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
- int status;
-
- status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer);
-
- if (status == SM_FAIL)
+ if (! (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer))
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not write block %d of %u/%u blind: %m",
- blkno, rnode.tblNode, rnode.relNode)));
-
- return status;
+ errmsg("could not write block %u of relation %u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
}
/*
* transaction on failure.
*/
BlockNumber
-smgrnblocks(int16 which, Relation reln)
+smgrnblocks(SMgrRelation reln)
{
BlockNumber nblocks;
- nblocks = (*(smgrsw[which].smgr_nblocks)) (reln);
+ nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
/*
* NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
if (nblocks == InvalidBlockNumber)
ereport(ERROR,
(errcode_for_file_access(),
- errmsg("could not count blocks of relation \"%s\": %m",
- RelationGetRelationName(reln))));
+ errmsg("could not count blocks of relation %u/%u: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode)));
return nblocks;
}
* transaction on failure.
*/
BlockNumber
-smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
+smgrtruncate(SMgrRelation reln, BlockNumber nblocks)
{
BlockNumber newblks;
- newblks = nblocks;
- if (smgrsw[which].smgr_truncate)
- {
- /*
- * Tell the free space map to forget anything it may have stored
- * for the about-to-be-deleted blocks. We want to be sure it
- * won't return bogus block numbers later on.
- */
- FreeSpaceMapTruncateRel(&reln->rd_node, nblocks);
-
- newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
- if (newblks == InvalidBlockNumber)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not truncate relation \"%s\" to %u blocks: %m",
- RelationGetRelationName(reln), nblocks)));
- }
+ /*
+ * Tell the free space map to forget anything it may have stored
+ * for the about-to-be-deleted blocks. We want to be sure it
+ * won't return bogus block numbers later on.
+ */
+ FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
+
+ newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks);
+ if (newblks == InvalidBlockNumber)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not truncate relation %u/%u to %u blocks: %m",
+ reln->smgr_rnode.tblNode,
+ reln->smgr_rnode.relNode,
+ nblocks)));
return newblks;
}
/*
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
*/
-int
+void
smgrDoPendingDeletes(bool isCommit)
{
while (pendingDeletes != NULL)
pendingDeletes = pending->next;
if (pending->atCommit == isCommit)
- {
- /*
- * Get rid of any leftover buffers for the rel (shouldn't be
- * any in the commit case, but there can be in the abort
- * case).
- */
- DropRelFileNodeBuffers(pending->relnode, pending->isTemp);
-
- /*
- * Tell the free space map to forget this relation. It won't
- * be accessed any more anyway, but we may as well recycle the
- * map space quickly.
- */
- FreeSpaceMapForgetRel(&pending->relnode);
-
- /*
- * And delete the physical files.
- *
- * Note: we treat deletion failure as a WARNING, not an error,
- * because we've already decided to commit or abort the
- * current xact.
- */
- if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL)
- ereport(WARNING,
- (errcode_for_file_access(),
- errmsg("could not unlink %u/%u: %m",
- pending->relnode.tblNode,
- pending->relnode.relNode)));
- }
+ smgr_internal_unlink(pending->relnode,
+ pending->which,
+ pending->isTemp,
+ false);
pfree(pending);
}
-
- return SM_SUCCESS;
}
/*
*
* This is called before we actually commit.
*/
-int
+void
smgrcommit(void)
{
int i;
{
if (smgrsw[i].smgr_commit)
{
- if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL)
+ if (! (*(smgrsw[i].smgr_commit)) ())
elog(FATAL, "transaction commit failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
Int16GetDatum(i))));
}
}
-
- return SM_SUCCESS;
}
/*
* smgrabort() -- Abort changes made during the current transaction.
*/
-int
+void
smgrabort(void)
{
int i;
{
if (smgrsw[i].smgr_abort)
{
- if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL)
+ if (! (*(smgrsw[i].smgr_abort)) ())
elog(FATAL, "transaction abort failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
Int16GetDatum(i))));
}
}
-
- return SM_SUCCESS;
}
/*
* smgrsync() -- Sync files to disk at checkpoint time.
*/
-int
+void
smgrsync(void)
{
int i;
{
if (smgrsw[i].smgr_sync)
{
- if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL)
+ if (! (*(smgrsw[i].smgr_sync)) ())
elog(PANIC, "storage sync failed on %s: %m",
DatumGetCString(DirectFunctionCall1(smgrout,
Int16GetDatum(i))));
}
}
-
- return SM_SUCCESS;
}
-#ifdef NOT_USED
-bool
-smgriswo(int16 smgrno)
-{
- if (smgrno < 0 || smgrno >= NSmgr)
- elog(ERROR, "invalid storage manager id: %d", smgrno);
-
- return smgrwo[smgrno];
-}
-#endif
void
smgr_redo(XLogRecPtr lsn, XLogRecord *record)
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.22 2003/11/29 19:51:57 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.23 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "storage/smgr.h"
+
typedef struct smgrid
{
- char *smgr_name;
+ const char *smgr_name;
} smgrid;
/*
* StorageManager[] -- List of defined storage managers.
- *
- * The weird comma placement is to keep compilers happy no matter
- * which of these is (or is not) defined.
*/
-
-static smgrid StorageManager[] = {
- {"magnetic disk"},
-#ifdef STABLE_MEMORY_STORAGE
- {"main memory"}
-#endif
+static const smgrid StorageManager[] = {
+ {"magnetic disk"}
};
-static int NStorageManagers = lengthof(StorageManager);
+static const int NStorageManagers = lengthof(StorageManager);
+
Datum
smgrin(PG_FUNCTION_ARGS)
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.209 2003/11/29 19:51:57 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.210 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "parser/parse_type.h"
#include "rewrite/rewriteDefine.h"
#include "rewrite/rewriteRemove.h"
+#include "storage/fd.h"
#include "tcop/pquery.h"
#include "tcop/utility.h"
#include "utils/acl.h"
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.59 2003/11/29 19:52:00 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.60 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/catalog.h"
#include "miscadmin.h"
#include "storage/sinval.h"
+#include "storage/smgr.h"
#include "utils/catcache.h"
#include "utils/inval.h"
#include "utils/memutils.h"
*/
static void
AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
- Oid dbId, Oid relId)
+ Oid dbId, Oid relId, RelFileNode physId)
{
SharedInvalidationMessage msg;
/* Don't add a duplicate item */
- /* We assume comparing relId is sufficient, needn't check dbId */
+ /* We assume dbId need not be checked because it will never change */
+ /* relfilenode fields must be checked to support reassignment */
ProcessMessageList(hdr->rclist,
- if (msg->rc.relId == relId) return);
+ if (msg->rc.relId == relId &&
+ RelFileNodeEquals(msg->rc.physId, physId)) return);
/* OK, add the item */
msg.rc.id = SHAREDINVALRELCACHE_ID;
msg.rc.dbId = dbId;
msg.rc.relId = relId;
+ msg.rc.physId = physId;
AddInvalidationMessage(&hdr->rclist, &msg);
}
* As above, but register a relcache invalidation event.
*/
static void
-RegisterRelcacheInvalidation(Oid dbId, Oid relId)
+RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId)
{
AddRelcacheInvalidationMessage(&CurrentCmdInvalidMsgs,
- dbId, relId);
+ dbId, relId, physId);
/*
* If the relation being invalidated is one of those cached in the
}
else if (msg->id == SHAREDINVALRELCACHE_ID)
{
- if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == 0)
+ /*
+ * If the message includes a valid relfilenode, we must ensure that
+ * smgr cache entry gets zapped. The relcache will handle this if
+ * called, otherwise we must do it directly.
+ */
+ if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
{
- RelationIdInvalidateRelationCacheByRelationId(msg->rc.relId);
+ if (OidIsValid(msg->rc.physId.relNode))
+ RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId);
+ else
+ RelationCacheInvalidateEntry(msg->rc.relId, NULL);
for (i = 0; i < cache_callback_count; i++)
{
(*ccitem->function) (ccitem->arg, msg->rc.relId);
}
}
+ else
+ {
+ /* might have smgr entry even if not in our database */
+ if (OidIsValid(msg->rc.physId.relNode))
+ smgrclosenode(msg->rc.physId);
+ }
}
else
elog(FATAL, "unrecognized SI message id: %d", msg->id);
* InvalidateSystemCaches
*
* This blows away all tuples in the system catalog caches and
- * all the cached relation descriptors (and closes their files too).
+ * all the cached relation descriptors and smgr cache entries.
* Relation descriptors that have positive refcounts are then rebuilt.
*
* We call this when we see a shared-inval-queue overflow signal,
int i;
ResetCatalogCaches();
- RelationCacheInvalidate();
+ RelationCacheInvalidate(); /* gets smgr cache too */
for (i = 0; i < cache_callback_count; i++)
{
PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
void (*CacheIdRegisterFunc) (int, uint32,
ItemPointer, Oid),
- void (*RelationIdRegisterFunc) (Oid, Oid))
+ void (*RelationIdRegisterFunc) (Oid, Oid,
+ RelFileNode))
{
Oid tupleRelId;
+ Oid databaseId;
Oid relationId;
+ RelFileNode rnode;
+ /* Do nothing during bootstrap */
if (IsBootstrapProcessingMode())
return;
tupleRelId = RelationGetRelid(relation);
if (tupleRelId == RelOid_pg_class)
+ {
+ Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
+
relationId = HeapTupleGetOid(tuple);
+ if (classtup->relisshared)
+ databaseId = InvalidOid;
+ else
+ databaseId = MyDatabaseId;
+ rnode.tblNode = databaseId; /* XXX change for tablespaces */
+ rnode.relNode = classtup->relfilenode;
+ /*
+ * Note: during a pg_class row update that assigns a new relfilenode
+ * value, we will be called on both the old and new tuples, and thus
+ * will broadcast invalidation messages showing both the old and new
+ * relfilenode values. This ensures that other backends will close
+ * smgr references to the old relfilenode file.
+ */
+ }
else if (tupleRelId == RelOid_pg_attribute)
- relationId = ((Form_pg_attribute) GETSTRUCT(tuple))->attrelid;
+ {
+ Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ relationId = atttup->attrelid;
+ /*
+ * KLUGE ALERT: we always send the relcache event with MyDatabaseId,
+ * even if the rel in question is shared (which we can't easily tell).
+ * This essentially means that only backends in this same database
+ * will react to the relcache flush request. This is in fact
+ * appropriate, since only those backends could see our pg_attribute
+ * change anyway. It looks a bit ugly though.
+ */
+ databaseId = MyDatabaseId;
+ /* We assume no smgr cache flush is needed, either */
+ rnode.tblNode = InvalidOid;
+ rnode.relNode = InvalidOid;
+ }
else
return;
/*
- * Yes. We need to register a relcache invalidation event for the
- * relation identified by relationId.
- *
- * KLUGE ALERT: we always send the relcache event with MyDatabaseId, even
- * if the rel in question is shared. This essentially means that only
- * backends in this same database will react to the relcache flush
- * request. This is in fact appropriate, since only those backends
- * could see our pg_class or pg_attribute change anyway. It looks a
- * bit ugly though.
+ * Yes. We need to register a relcache invalidation event.
*/
- (*RelationIdRegisterFunc) (MyDatabaseId, relationId);
+ (*RelationIdRegisterFunc) (databaseId, relationId, rnode);
}
/*
* CacheInvalidateHeapTuple
* Register the given tuple for invalidation at end of command
- * (ie, current command is outdating this tuple).
+ * (ie, current command is creating or outdating this tuple).
*/
void
CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
* This is used in places that need to force relcache rebuild but aren't
* changing any of the tuples recognized as contributors to the relcache
* entry by PrepareForTupleInvalidation. (An example is dropping an index.)
+ * We assume in particular that relfilenode isn't changing.
*/
void
-CacheInvalidateRelcache(Oid relationId)
+CacheInvalidateRelcache(Relation relation)
{
- /* See KLUGE ALERT in PrepareForTupleInvalidation */
- RegisterRelcacheInvalidation(MyDatabaseId, relationId);
+ Oid databaseId;
+ Oid relationId;
+
+ relationId = RelationGetRelid(relation);
+ if (relation->rd_rel->relisshared)
+ databaseId = InvalidOid;
+ else
+ databaseId = MyDatabaseId;
+
+ RegisterRelcacheInvalidation(databaseId, relationId, relation->rd_node);
+}
+
+/*
+ * CacheInvalidateRelcacheByTuple
+ * As above, but relation is identified by passing its pg_class tuple.
+ */
+void
+CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
+{
+ Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple);
+ Oid databaseId;
+ Oid relationId;
+ RelFileNode rnode;
+
+ relationId = HeapTupleGetOid(classTuple);
+ if (classtup->relisshared)
+ databaseId = InvalidOid;
+ else
+ databaseId = MyDatabaseId;
+ rnode.tblNode = databaseId; /* XXX change for tablespaces */
+ rnode.relNode = classtup->relfilenode;
+
+ RegisterRelcacheInvalidation(databaseId, relationId, rnode);
}
/*
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.196 2004/02/02 00:17:21 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.197 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "optimizer/clauses.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
+#include "storage/fd.h"
#include "storage/smgr.h"
#include "utils/builtins.h"
#include "utils/catcache.h"
static HTAB *RelationIdCache;
static HTAB *RelationSysNameCache;
-/*
- * Bufmgr uses RelFileNode for lookup. Actually, I would like to do
- * not pass Relation to bufmgr & beyond at all and keep some cache
- * in smgr, but no time to do it right way now. -- vadim 10/22/2000
- */
-static HTAB *RelationNodeCache;
-
/*
* This flag is false until we have prepared the critical relcache entries
* that are needed to do indexscans on the tables read by relcache building.
Relation reldesc;
} RelNameCacheEnt;
-typedef struct relnodecacheent
-{
- RelFileNode relnode;
- Relation reldesc;
-} RelNodeCacheEnt;
-
/*
* macros to manipulate the lookup hashtables
*/
#define RelationCacheInsert(RELATION) \
do { \
- RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; bool found; \
+ RelIdCacheEnt *idhentry; bool found; \
idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
(void *) &(RELATION->rd_id), \
HASH_ENTER, \
errmsg("out of memory"))); \
/* used to give notice if found -- now just keep quiet */ \
idhentry->reldesc = RELATION; \
- nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \
- (void *) &(RELATION->rd_node), \
- HASH_ENTER, \
- &found); \
- if (nodentry == NULL) \
- ereport(ERROR, \
- (errcode(ERRCODE_OUT_OF_MEMORY), \
- errmsg("out of memory"))); \
- /* used to give notice if found -- now just keep quiet */ \
- nodentry->reldesc = RELATION; \
if (IsSystemNamespace(RelationGetNamespace(RELATION))) \
{ \
char *relname = RelationGetRelationName(RELATION); \
RELATION = NULL; \
} while(0)
-#define RelationNodeCacheLookup(NODE, RELATION) \
-do { \
- RelNodeCacheEnt *hentry; \
- hentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \
- (void *)&(NODE), HASH_FIND,NULL); \
- if (hentry) \
- RELATION = hentry->reldesc; \
- else \
- RELATION = NULL; \
-} while(0)
-
#define RelationCacheDelete(RELATION) \
do { \
- RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; \
+ RelIdCacheEnt *idhentry; \
idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
(void *)&(RELATION->rd_id), \
HASH_REMOVE, NULL); \
if (idhentry == NULL) \
elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
- nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \
- (void *)&(RELATION->rd_node), \
- HASH_REMOVE, NULL); \
- if (nodentry == NULL) \
- elog(WARNING, "trying to delete a rd_node reldesc that does not exist"); \
if (IsSystemNamespace(RelationGetNamespace(RELATION))) \
{ \
char *relname = RelationGetRelationName(RELATION); \
relation->rd_targblock = InvalidBlockNumber;
/* make sure relation is marked as having no open file yet */
- relation->rd_fd = -1;
+ relation->rd_smgr = NULL;
/*
* Copy the relation tuple form
relation->rd_node.relNode = relation->rd_rel->relfilenode;
/* make sure relation is marked as having no open file yet */
- relation->rd_fd = -1;
+ relation->rd_smgr = NULL;
/*
* Insert newly created relation into relcache hash tables.
relation->rd_targblock = InvalidBlockNumber;
/* make sure relation is marked as having no open file yet */
- relation->rd_fd = -1;
+ relation->rd_smgr = NULL;
/*
* initialize reference count
return rd;
}
-/*
- * RelationNodeCacheGetRelation
- *
- * As above, but lookup by relfilenode.
- *
- * NOTE: this must NOT try to revalidate invalidated nailed indexes, since
- * that could cause us to return an entry with a different relfilenode than
- * the caller asked for. Currently this is used only by the buffer manager.
- * Really the bufmgr's idea of relations should be separated out from the
- * relcache ...
- */
-Relation
-RelationNodeCacheGetRelation(RelFileNode rnode)
-{
- Relation rd;
-
- RelationNodeCacheLookup(rnode, rd);
-
- if (RelationIsValid(rd))
- RelationIncrementReferenceCount(rd);
-
- return rd;
-}
-
/*
* RelationIdGetRelation
*
elog(ERROR, "could not find tuple for system relation %u",
relation->rd_id);
relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
- if (relation->rd_node.relNode != relp->relfilenode)
- {
- /* We have to re-insert the entry into the relcache indexes */
- RelationCacheDelete(relation);
- memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
- relation->rd_node.relNode = relp->relfilenode;
- RelationCacheInsert(relation);
- }
+ memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
+ relation->rd_node.relNode = relp->relfilenode;
heap_freetuple(pg_class_tuple);
/* Must adjust number of blocks after we know the new relfilenode */
relation->rd_targblock = InvalidBlockNumber;
* ensures that the low-level file access state is updated after, say,
* a vacuum truncation.
*/
- if (relation->rd_fd >= 0)
+ if (relation->rd_smgr)
{
- smgrclose(DEFAULT_SMGR, relation);
- relation->rd_fd = -1;
+ smgrclose(relation->rd_smgr);
+ relation->rd_smgr = NULL;
}
/*
}
/*
- * RelationIdInvalidateRelationCacheByRelationId
+ * RelationCacheInvalidateEntry
*
* This routine is invoked for SI cache flush messages.
*
- * We used to skip local relations, on the grounds that they could
- * not be targets of cross-backend SI update messages; but it seems
- * safer to process them, so that our *own* SI update messages will
- * have the same effects during CommandCounterIncrement for both
- * local and nonlocal relations.
+ * Any relcache entry matching the relid must be flushed. (Note: caller has
+ * already determined that the relid belongs to our database or is a shared
+ * relation.) If rnode isn't NULL, we must also ensure that any smgr cache
+ * entry matching that rnode is flushed.
+ *
+ * Ordinarily, if rnode is supplied then it will match the relfilenode of
+ * the target relid. However, it's possible for rnode to be different if
+ * someone is engaged in a relfilenode change. In that case we want to
+ * make sure we clear the right cache entries. This has to be done here
+ * to keep things in sync between relcache and smgr cache --- we can't have
+ * someone flushing an smgr cache entry that a relcache entry still points
+ * to.
+ *
+ * We used to skip local relations, on the grounds that they could
+ * not be targets of cross-backend SI update messages; but it seems
+ * safer to process them, so that our *own* SI update messages will
+ * have the same effects during CommandCounterIncrement for both
+ * local and nonlocal relations.
*/
void
-RelationIdInvalidateRelationCacheByRelationId(Oid relationId)
+RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode)
{
Relation relation;
if (PointerIsValid(relation))
{
relcacheInvalsReceived++;
+ if (rnode)
+ {
+ /* Need to be sure smgr is flushed, but don't do it twice */
+ if (relation->rd_smgr == NULL ||
+ !RelFileNodeEquals(*rnode, relation->rd_node))
+ smgrclosenode(*rnode);
+ }
RelationFlushRelation(relation);
}
+ else
+ {
+ if (rnode)
+ smgrclosenode(*rnode);
+ }
}
/*
* RelationCacheInvalidate
* Blow away cached relation descriptors that have zero reference counts,
- * and rebuild those with positive reference counts.
+ * and rebuild those with positive reference counts. Also reset the smgr
+ * relation cache.
*
* This is currently used only to recover from SI message buffer overflow,
* so we do not touch new-in-transaction relations; they cannot be targets
{
relation = idhentry->reldesc;
+ /* Must close all smgr references to avoid leaving dangling ptrs */
+ if (relation->rd_smgr)
+ {
+ smgrclose(relation->rd_smgr);
+ relation->rd_smgr = NULL;
+ }
+
/* Ignore new relations, since they are never SI targets */
if (relation->rd_isnew)
continue;
rebuildList = nconc(rebuildFirstList, rebuildList);
+ /*
+ * Now zap any remaining smgr cache entries. This must happen before
+ * we start to rebuild entries, since that may involve catalog fetches
+ * which will re-open catalog files.
+ */
+ smgrcloseall();
+
/* Phase 2: rebuild the items found to need rebuild in phase 1 */
foreach(l, rebuildList)
{
rel->rd_targblock = InvalidBlockNumber;
/* make sure relation is marked as having no open file yet */
- rel->rd_fd = -1;
+ rel->rd_smgr = NULL;
RelationSetReferenceCount(rel, 1);
RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
&ctl, HASH_ELEM | HASH_FUNCTION);
- ctl.keysize = sizeof(RelFileNode);
- ctl.entrysize = sizeof(RelNodeCacheEnt);
- ctl.hash = tag_hash;
- RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE,
- &ctl, HASH_ELEM | HASH_FUNCTION);
-
/*
* Try to load the relcache cache file. If successful, we're done for
* now. Otherwise, initialize the cache with pre-made descriptors for
}
}
-
-/* used by XLogInitCache */
-void CreateDummyCaches(void);
-void DestroyDummyCaches(void);
-
-void
-CreateDummyCaches(void)
-{
- MemoryContext oldcxt;
- HASHCTL ctl;
-
- if (!CacheMemoryContext)
- CreateCacheMemoryContext();
-
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
-
- MemSet(&ctl, 0, sizeof(ctl));
- ctl.keysize = sizeof(NameData);
- ctl.entrysize = sizeof(RelNameCacheEnt);
- RelationSysNameCache = hash_create("Relcache by name", INITRELCACHESIZE,
- &ctl, HASH_ELEM);
-
- ctl.keysize = sizeof(Oid);
- ctl.entrysize = sizeof(RelIdCacheEnt);
- ctl.hash = tag_hash;
- RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
- &ctl, HASH_ELEM | HASH_FUNCTION);
-
- ctl.keysize = sizeof(RelFileNode);
- ctl.entrysize = sizeof(RelNodeCacheEnt);
- ctl.hash = tag_hash;
- RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE,
- &ctl, HASH_ELEM | HASH_FUNCTION);
-
- MemoryContextSwitchTo(oldcxt);
-}
-
-void
-DestroyDummyCaches(void)
-{
- MemoryContext oldcxt;
-
- if (!CacheMemoryContext)
- return;
-
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
-
- if (RelationIdCache)
- hash_destroy(RelationIdCache);
- if (RelationSysNameCache)
- hash_destroy(RelationSysNameCache);
- if (RelationNodeCache)
- hash_destroy(RelationNodeCache);
-
- RelationIdCache = RelationSysNameCache = RelationNodeCache = NULL;
-
- MemoryContextSwitchTo(oldcxt);
-}
-
static void
AttrDefaultFetch(Relation relation)
{
/*
* Reset transient-state fields in the relcache entry
*/
- rel->rd_fd = -1;
+ rel->rd_smgr = NULL;
rel->rd_targblock = InvalidBlockNumber;
if (rel->rd_isnailed)
RelationSetReferenceCount(rel, 1);
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.122 2004/02/08 22:28:57 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.123 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/pg_shadow.h"
#include "libpq/libpq-be.h"
#include "miscadmin.h"
+#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "utils/builtins.h"
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.30 2003/11/29 22:40:58 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.31 2004/02/10 01:55:26 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
DATA(insert OID = 1 ( template1 PGUID ENCODING t t 0 0 0 "" _null_ _null_ ));
DESCR("Default template database");
-
#define TemplateDbOid 1
-/* Just to mark OID as used for unused_oid script -:) */
-#define DATAMARKOID(x)
-
-DATAMARKOID(= 2)
-#define RecoveryDb 2
-
-#undef DATAMARKOID
-
#endif /* PG_DATABASE_H */
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.32 2003/11/29 22:41:13 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.33 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "storage/backendid.h"
#include "storage/itemptr.h"
+#include "storage/relfilenode.h"
/*
* ID field). -1 means a relcache inval message. Other negative values
* are available to identify other inval message types.
*
+ * Relcache invalidation messages usually also cause invalidation of entries
+ * in the smgr's relation cache. This means they must carry both logical
+ * and physical relation ID info (ie, both dbOID/relOID and RelFileNode).
+ * In some cases RelFileNode information is not available so the sender fills
+ * those fields with zeroes --- this is okay so long as no smgr cache flush
+ * is required.
+ *
* Shared-inval events are initially driven by detecting tuple inserts,
* updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
* An update generates two inval events, one for the old tuple and one for
int16 id; /* type field --- must be first */
Oid dbId; /* database ID, or 0 if a shared relation */
Oid relId; /* relation ID */
+ RelFileNode physId; /* physical file ID */
+ /*
+ * Note: it is likely that RelFileNode will someday be changed to
+ * include database ID. In that case the dbId field will be redundant
+ * and should be removed to save space.
+ */
} SharedInvalRelcacheMsg;
typedef union
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.39 2003/11/29 22:41:13 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.40 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "access/xlog.h"
#include "fmgr.h"
-#include "storage/relfilenode.h"
#include "storage/block.h"
-#include "utils/rel.h"
+#include "storage/relfilenode.h"
+
+
+/*
+ * smgr.c maintains a table of SMgrRelation objects, which are essentially
+ * cached file handles. An SMgrRelation is created (if not already present)
+ * by smgropen(), and destroyed by smgrclose(). Note that neither of these
+ * operations imply I/O, they just create or destroy a hashtable entry.
+ * (But smgrclose() may release associated resources, such as OS-level file
+ * descriptors.)
+ */
+typedef struct SMgrRelationData
+{
+ /* rnode is the hashtable lookup key, so it must be first! */
+ RelFileNode smgr_rnode; /* relation physical identifier */
+ /* additional public fields may someday exist here */
-#define SM_FAIL 0
-#define SM_SUCCESS 1
+ /*
+ * Fields below here are intended to be private to smgr.c and its
+ * submodules. Do not touch them from elsewhere.
+ */
+ int smgr_which; /* storage manager selector */
-#define DEFAULT_SMGR 0
+ struct _MdfdVec *md_fd; /* for md.c; NULL if not open */
+} SMgrRelationData;
-extern int smgrinit(void);
-extern int smgrcreate(int16 which, Relation reln);
-extern int smgrunlink(int16 which, Relation reln);
-extern int smgrextend(int16 which, Relation reln, BlockNumber blocknum,
- char *buffer);
-extern int smgropen(int16 which, Relation reln, bool failOK);
-extern int smgrclose(int16 which, Relation reln);
-extern int smgrread(int16 which, Relation reln, BlockNumber blocknum,
- char *buffer);
-extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum,
- char *buffer);
-extern int smgrblindwrt(int16 which, RelFileNode rnode,
- BlockNumber blkno, char *buffer);
-extern BlockNumber smgrnblocks(int16 which, Relation reln);
-extern BlockNumber smgrtruncate(int16 which, Relation reln,
- BlockNumber nblocks);
-extern int smgrDoPendingDeletes(bool isCommit);
-extern int smgrcommit(void);
-extern int smgrabort(void);
-extern int smgrsync(void);
+typedef SMgrRelationData *SMgrRelation;
+
+
+extern void smgrinit(void);
+extern SMgrRelation smgropen(RelFileNode rnode);
+extern void smgrclose(SMgrRelation reln);
+extern void smgrcloseall(void);
+extern void smgrclosenode(RelFileNode rnode);
+extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo);
+extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp);
+extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo);
+extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern BlockNumber smgrnblocks(SMgrRelation reln);
+extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks);
+extern void smgrDoPendingDeletes(bool isCommit);
+extern void smgrcommit(void);
+extern void smgrabort(void);
+extern void smgrsync(void);
extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record);
/* internals: move me elsewhere -- ay 7/94 */
/* in md.c */
-extern int mdinit(void);
-extern int mdcreate(Relation reln);
-extern int mdunlink(RelFileNode rnode);
-extern int mdextend(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mdopen(Relation reln);
-extern int mdclose(Relation reln);
-extern int mdread(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mdblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer);
-extern BlockNumber mdnblocks(Relation reln);
-extern BlockNumber mdtruncate(Relation reln, BlockNumber nblocks);
-extern int mdcommit(void);
-extern int mdabort(void);
-extern int mdsync(void);
-
-/* mm.c */
-extern int mminit(void);
-extern int mmcreate(Relation reln);
-extern int mmunlink(RelFileNode rnode);
-extern int mmextend(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mmopen(Relation reln);
-extern int mmclose(Relation reln);
-extern int mmread(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
-extern int mmblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer);
-extern BlockNumber mmnblocks(Relation reln);
-extern BlockNumber mmtruncate(Relation reln, BlockNumber nblocks);
-extern int mmcommit(void);
-extern int mmabort(void);
-
-extern int mmshutdown(void);
-extern int MMShmemSize(void);
+extern bool mdinit(void);
+extern bool mdclose(SMgrRelation reln);
+extern bool mdcreate(SMgrRelation reln, bool isRedo);
+extern bool mdunlink(RelFileNode rnode, bool isRedo);
+extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern BlockNumber mdnblocks(SMgrRelation reln);
+extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks);
+extern bool mdcommit(void);
+extern bool mdabort(void);
+extern bool mdsync(void);
/* smgrtype.c */
extern Datum smgrout(PG_FUNCTION_ARGS);
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.29 2003/11/29 22:41:15 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.30 2004/02/10 01:55:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple);
-extern void CacheInvalidateRelcache(Oid relationId);
+extern void CacheInvalidateRelcache(Relation relation);
+
+extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
extern void CacheRegisterSyscacheCallback(int cacheid,
CacheCallbackFunction func,
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.72 2004/01/06 18:07:32 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.73 2004/02/10 01:55:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "catalog/pg_index.h"
#include "rewrite/prs2lock.h"
#include "storage/block.h"
-#include "storage/fd.h"
#include "storage/relfilenode.h"
bool index_scan_counted;
} PgStat_Info;
+
/*
* Here are the contents of a relation cache entry.
*/
typedef struct RelationData
{
- File rd_fd; /* open file descriptor, or -1 if
- * none; this is NOT an operating
- * system file descriptor */
- RelFileNode rd_node; /* file node (physical identifier) */
+ RelFileNode rd_node; /* relation physical identifier */
+ /* use "struct" here to avoid needing to include smgr.h: */
+ struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */
BlockNumber rd_nblocks; /* number of blocks in rel */
BlockNumber rd_targblock; /* current insertion target block, or
* InvalidBlockNumber */
*/
#define RelationGetRelid(relation) ((relation)->rd_id)
-/*
- * RelationGetFile
- * Returns the open file descriptor for the rel, or -1 if
- * none. This is NOT an operating system file descriptor; see md.c
- * for more information
- */
-#define RelationGetFile(relation) ((relation)->rd_fd)
-
/*
* RelationGetNumberOfAttributes
* Returns the number of attributes in a relation.
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.38 2003/11/29 22:41:16 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.39 2004/02/10 01:55:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/* finds an existing cache entry, but won't make a new one */
extern Relation RelationIdCacheGetRelation(Oid relationId);
-extern Relation RelationNodeCacheGetRelation(RelFileNode rnode);
extern void RelationClose(Relation relation);
*/
extern void RelationForgetRelation(Oid rid);
-extern void RelationIdInvalidateRelationCacheByRelationId(Oid relationId);
+extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode);
extern void RelationCacheInvalidate(void);
extern bool RelationIdIsInInitFile(Oid relationId);
extern void RelationCacheInitFileInvalidate(bool beforeSend);
-/* XLOG support */
-extern void CreateDummyCaches(void);
-extern void DestroyDummyCaches(void);
-
-
/* should be used only by relcache.c and catcache.c */
extern bool criticalRelcachesBuilt;