Restructure smgr API as per recent proposal. smgr no longer depends on

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 10 Feb 2004 01:55:27 +0000 (01:55 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 10 Feb 2004 01:55:27 +0000 (01:55 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Feb 2004 01:55:27 +0000 (01:55 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 10 Feb 2004 01:55:27 +0000 (01:55 +0000)
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 87a251915a5e0ae8a747ee24e70530bfea72dc71..fa77318ea3fbf97dd1806d5cbfc0232617552e79 100644 (file)
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.111 2004/02/06 19:36:17 wieck Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.112 2004/02/10 01:55:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -821,7 +821,9 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
                         /*
                          * Do the physical truncation.
                          */
-                       new_pages = smgrtruncate(DEFAULT_SMGR, rel, new_pages);
+                       if (rel->rd_smgr == NULL)
+                               rel->rd_smgr = smgropen(rel->rd_node);
+                       new_pages = smgrtruncate(rel->rd_smgr, new_pages);
                         rel->rd_nblocks = new_pages;            /* update relcache
                                                                                                  * immediately */
                         rel->rd_targblock = InvalidBlockNumber;
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c

index ba3054f14bbbbe52e94c2dd5c5f9541702552beb..c92f90f6ca83c70cce12aac1f335e0418fccbcfa 100644 (file)
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.10 2004/01/28 21:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.11 2004/02/10 01:55:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -19,6 +19,7 @@
  #include <unistd.h>
  
  #include "access/slru.h"
+#include "storage/fd.h"
  #include "storage/lwlock.h"
  #include "miscadmin.h"
  
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c

index d5f357bc5ffcb7475be6d3c8118fd480031ba7d1..06e152d1bba55263a3385a338b3207deafe02014 100644 (file)
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.161 2004/01/26 22:51:55 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.162 2004/02/10 01:55:24 tgl Exp $
   *
   * NOTES
   *             Transaction aborts can now occur two ways:
@@ -159,6 +159,7 @@
  #include "executor/spi.h"
  #include "libpq/be-fsstubs.h"
  #include "miscadmin.h"
+#include "storage/fd.h"
  #include "storage/proc.h"
  #include "storage/sinval.h"
  #include "storage/smgr.h"
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index fe1ecd453c5f1dd96db6195f4ffd940018382435..9056f0b454943170ed5293bbf05b8af498e9c0d0 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.133 2004/01/26 22:35:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.134 2004/02/10 01:55:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -30,6 +30,7 @@
  #include "catalog/catversion.h"
  #include "catalog/pg_control.h"
  #include "storage/bufpage.h"
+#include "storage/fd.h"
  #include "storage/lwlock.h"
  #include "storage/pmsignal.h"
  #include "storage/proc.h"
@@ -3126,7 +3127,6 @@ ShutdownXLOG(int code, Datum arg)
         MyXactMadeTempRelUpdate = false;
  
         CritSectionCount++;
-       CreateDummyCaches();
         CreateCheckPoint(true, true);
         ShutdownCLOG();
         CritSectionCount--;
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c

index d200b7e17adb5d8afb620b51884627e87f8d9bf4..0271742ce0ae1b16c600a9637bebd5058b67da84 100644 (file)
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.28 2003/12/14 00:34:47 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.29 2004/02/10 01:55:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -261,14 +261,12 @@ _xl_remove_hash_entry(XLogRelDesc *rdesc)
         if (hentry == NULL)
                 elog(PANIC, "_xl_remove_hash_entry: file was not found in cache");
  
-       if (rdesc->reldata.rd_fd >= 0)
-               smgrclose(DEFAULT_SMGR, &(rdesc->reldata));
+       if (rdesc->reldata.rd_smgr != NULL)
+               smgrclose(rdesc->reldata.rd_smgr);
  
         memset(rdesc, 0, sizeof(XLogRelDesc));
         memset(tpgc, 0, sizeof(FormData_pg_class));
         rdesc->reldata.rd_rel = tpgc;
-
-       return;
  }
  
  static XLogRelDesc *
@@ -296,7 +294,6 @@ _xl_new_reldesc(void)
  void
  XLogInitRelationCache(void)
  {
-       CreateDummyCaches();
         _xl_init_rel_cache();
  }
  
@@ -306,8 +303,6 @@ XLogCloseRelationCache(void)
         HASH_SEQ_STATUS status;
         XLogRelCacheEntry *hentry;
  
-       DestroyDummyCaches();
-
         if (!_xlrelarr)
                 return;
  
@@ -347,11 +342,18 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
  
                 sprintf(RelationGetRelationName(&(res->reldata)), "%u", rnode.relNode);
  
-               /* unexisting DB id */
-               res->reldata.rd_lockInfo.lockRelId.dbId = RecoveryDb;
-               res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
                 res->reldata.rd_node = rnode;
  
+               /*
+                * We set up the lockRelId in case anything tries to lock the dummy
+                * relation.  Note that this is fairly bogus since relNode may be
+                * different from the relation's OID.  It shouldn't really matter
+                * though, since we are presumably running by ourselves and can't
+                * have any lock conflicts ...
+                */
+               res->reldata.rd_lockInfo.lockRelId.dbId = rnode.tblNode;
+               res->reldata.rd_lockInfo.lockRelId.relId = rnode.relNode;
+
                 hentry = (XLogRelCacheEntry *)
                         hash_search(_xlrelcache, (void *) &rnode, HASH_ENTER, &found);
  
@@ -364,9 +366,17 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
                 hentry->rdesc = res;
  
                 res->reldata.rd_targblock = InvalidBlockNumber;
-               res->reldata.rd_fd = -1;
-               res->reldata.rd_fd = smgropen(DEFAULT_SMGR, &(res->reldata),
-                                                                         true /* allow failure */ );
+               res->reldata.rd_smgr = smgropen(res->reldata.rd_node);
+               /*
+                * Create the target file if it doesn't already exist.  This lets
+                * us cope if the replay sequence contains writes to a relation
+                * that is later deleted.  (The original coding of this routine
+                * would instead return NULL, causing the writes to be suppressed.
+                * But that seems like it risks losing valuable data if the filesystem
+                * loses an inode during a crash.  Better to write the data until we
+                * are actually told to delete the file.)
+                */
+               smgrcreate(res->reldata.rd_smgr, res->reldata.rd_istemp, true);
         }
  
         res->moreRecently = &(_xlrelarr[0]);
@@ -374,8 +384,5 @@ XLogOpenRelation(bool redo, RmgrId rmid, RelFileNode rnode)
         _xlrelarr[0].lessRecently = res;
         res->lessRecently->moreRecently = res;
  
-       if (res->reldata.rd_fd < 0) /* file doesn't exist */
-               return (NULL);
-
         return (&(res->reldata));
  }
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c

index 8d29134d39d01cc5209125e927a36a148ef17737..2f67061c48b036d92d520ddbe7885115a6398461 100644 (file)
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.175 2004/01/07 18:56:25 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.176 2004/02/10 01:55:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -465,14 +465,12 @@ BootstrapMain(int argc, char *argv[])
                         break;
  
                 case BS_XLOG_CHECKPOINT:
-                       CreateDummyCaches();
                         CreateCheckPoint(false, false);
                         SetSavedRedoRecPtr();           /* pass redo ptr back to
                                                                                  * postmaster */
                         proc_exit(0);           /* done */
  
                 case BS_XLOG_BGWRITER:
-                       CreateDummyCaches();
                         BufferBackgroundWriter();
                         proc_exit(0);           /* done */
  
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 14c3745e5e91d282bd015f64b723f14c5c4caac7..905aa5b0b222f2efcfdc08204976ce1ebb0d2bce 100644 (file)
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.257 2003/12/28 21:57:36 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.258 2004/02/10 01:55:24 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -298,9 +298,9 @@ heap_create(const char *relname,
  void
  heap_storage_create(Relation rel)
  {
-       Assert(rel->rd_fd < 0);
-       rel->rd_fd = smgrcreate(DEFAULT_SMGR, rel);
-       Assert(rel->rd_fd >= 0);
+       Assert(rel->rd_smgr == NULL);
+       rel->rd_smgr = smgropen(rel->rd_node);
+       smgrcreate(rel->rd_smgr, rel->rd_istemp, false);
  }
  
  /* ----------------------------------------------------------------
@@ -1210,7 +1210,12 @@ heap_drop_with_catalog(Oid rid)
          */
         if (rel->rd_rel->relkind != RELKIND_VIEW &&
                 rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE)
-               smgrunlink(DEFAULT_SMGR, rel);
+       {
+               if (rel->rd_smgr == NULL)
+                       rel->rd_smgr = smgropen(rel->rd_node);
+               smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
+               rel->rd_smgr = NULL;
+       }
  
         /*
          * Close relcache entry, but *keep* AccessExclusiveLock on the
@@ -1706,7 +1711,7 @@ SetRelationNumChecks(Relation rel, int numchecks)
         else
         {
                 /* Skip the disk update, but force relcache inval anyway */
-               CacheInvalidateRelcache(RelationGetRelid(rel));
+               CacheInvalidateRelcache(rel);
         }
  
         heap_freetuple(reltup);
@@ -1943,7 +1948,9 @@ RelationTruncateIndexes(Oid heapId)
                 DropRelationBuffers(currentIndex);
  
                 /* Now truncate the actual data and set blocks to zero */
-               smgrtruncate(DEFAULT_SMGR, currentIndex, 0);
+               if (currentIndex->rd_smgr == NULL)
+                       currentIndex->rd_smgr = smgropen(currentIndex->rd_node);
+               smgrtruncate(currentIndex->rd_smgr, 0);
                 currentIndex->rd_nblocks = 0;
                 currentIndex->rd_targblock = InvalidBlockNumber;
  
@@ -1990,7 +1997,9 @@ heap_truncate(Oid rid)
         DropRelationBuffers(rel);
  
         /* Now truncate the actual data and set blocks to zero */
-       smgrtruncate(DEFAULT_SMGR, rel, 0);
+       if (rel->rd_smgr == NULL)
+               rel->rd_smgr = smgropen(rel->rd_node);
+       smgrtruncate(rel->rd_smgr, 0);
         rel->rd_nblocks = 0;
         rel->rd_targblock = InvalidBlockNumber;
  
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 4180526301ac50cd73180e22b62777729afef95e..9c92f21740922fbfed4c6be69f4ce5e5617f35a9 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.226 2004/01/28 21:02:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.227 2004/02/10 01:55:24 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -808,7 +808,11 @@ index_drop(Oid indexId)
         if (i < 0)
                 elog(ERROR, "FlushRelationBuffers returned %d", i);
  
-       smgrunlink(DEFAULT_SMGR, userIndexRelation);
+       if (userIndexRelation->rd_smgr == NULL)
+               userIndexRelation->rd_smgr = smgropen(userIndexRelation->rd_node);
+       smgrscheduleunlink(userIndexRelation->rd_smgr,
+                                          userIndexRelation->rd_istemp);
+       userIndexRelation->rd_smgr = NULL;
  
         /*
          * We are presently too lazy to attempt to compute the new correct
@@ -818,7 +822,7 @@ index_drop(Oid indexId)
          * owning relation to ensure other backends update their relcache
          * lists of indexes.
          */
-       CacheInvalidateRelcache(heapId);
+       CacheInvalidateRelcache(userHeapRelation);
  
         /*
          * Close rels, but keep locks
@@ -1057,7 +1061,7 @@ setRelhasindex(Oid relid, bool hasindex, bool isprimary, Oid reltoastidxid)
         else
         {
                 /* no need to change tuple, but force relcache rebuild anyway */
-               CacheInvalidateRelcache(relid);
+               CacheInvalidateRelcacheByTuple(tuple);
         }
  
         if (!pg_class_scan)
@@ -1077,10 +1081,11 @@ void
  setNewRelfilenode(Relation relation)
  {
         Oid                     newrelfilenode;
+       RelFileNode newrnode;
+       SMgrRelation srel;
         Relation        pg_class;
         HeapTuple       tuple;
         Form_pg_class rd_rel;
-       RelationData workrel;
  
         /* Can't change relfilenode for nailed tables (indexes ok though) */
         Assert(!relation->rd_isnailed ||
@@ -1107,14 +1112,18 @@ setNewRelfilenode(Relation relation)
  
         /* create another storage file. Is it a little ugly ? */
         /* NOTE: any conflict in relfilenode value will be caught here */
-       memcpy((char *) &workrel, relation, sizeof(RelationData));
-       workrel.rd_fd = -1;
-       workrel.rd_node.relNode = newrelfilenode;
-       heap_storage_create(&workrel);
-       smgrclose(DEFAULT_SMGR, &workrel);
+       newrnode = relation->rd_node;
+       newrnode.relNode = newrelfilenode;
+
+       srel = smgropen(newrnode);
+       smgrcreate(srel, relation->rd_istemp, false);
+       smgrclose(srel);
  
         /* schedule unlinking old relfilenode */
-       smgrunlink(DEFAULT_SMGR, relation);
+       if (relation->rd_smgr == NULL)
+               relation->rd_smgr = smgropen(relation->rd_node);
+       smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
+       relation->rd_smgr = NULL;
  
         /* update the pg_class row */
         rd_rel->relfilenode = newrelfilenode;
@@ -1672,7 +1681,9 @@ reindex_index(Oid indexId)
                 DropRelationBuffers(iRel);
  
                 /* Now truncate the actual data and set blocks to zero */
-               smgrtruncate(DEFAULT_SMGR, iRel, 0);
+               if (iRel->rd_smgr == NULL)
+                       iRel->rd_smgr = smgropen(iRel->rd_node);
+               smgrtruncate(iRel->rd_smgr, 0);
                 iRel->rd_nblocks = 0;
                 iRel->rd_targblock = InvalidBlockNumber;
         }
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c

index 66850d32d56229523972129e428d14b6b9b3d96f..7af8200e0637a1591c18df221731c0c190d24780 100644 (file)
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.217 2004/01/28 21:02:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.218 2004/02/10 01:55:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -40,6 +40,7 @@
  #include "parser/parse_coerce.h"
  #include "parser/parse_relation.h"
  #include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
  #include "tcop/pquery.h"
  #include "tcop/tcopprot.h"
  #include "utils/acl.h"
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c

index 2b8fdb9a2d4abd2717c94a6c15ac04a7883674cc..85f49537efce589c7722f5d7491e455b232ce2dd 100644 (file)
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.130 2004/01/07 18:56:25 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.131 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -30,6 +30,7 @@
  #include "commands/comment.h"
  #include "commands/dbcommands.h"
  #include "miscadmin.h"
+#include "storage/fd.h"
  #include "storage/freespace.h"
  #include "storage/sinval.h"
  #include "utils/acl.h"
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c

index 809f425bc656a72b71a67ee52e283a6efb598aea..6fadd0d4e15f99ea9e9ffd7bf8a615593ecdf870 100644 (file)
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.97 2004/01/28 21:02:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.98 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1010,7 +1010,7 @@ setRelhassubclassInRelation(Oid relationId, bool relhassubclass)
         else
         {
                 /* no need to change tuple, but force relcache rebuild anyway */
-               CacheInvalidateRelcache(relationId);
+               CacheInvalidateRelcacheByTuple(tuple);
         }
  
         heap_freetuple(tuple);
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c

index 34cfc4d10e3d29f6a6555bda26dff9a760aebe2b..bddf3f5ad68b7e97c6de952cc21ce4aca587d74d 100644 (file)
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.163 2003/11/29 19:51:47 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.164 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -691,7 +691,7 @@ renametrig(Oid relid,
                  * relcache entries.  (Ideally this should happen
                  * automatically...)
                  */
-               CacheInvalidateRelcache(relid);
+               CacheInvalidateRelcache(targetrel);
         }
         else
         {
diff --git a/src/backend/commands/user.c b/src/backend/commands/user.c

index d2d1a3c7a950a59ac5616ab744b79dca786573ee..9352aeb0ec03784414a0643acfe5c354f3416ab8 100644 (file)
--- a/src/backend/commands/user.c
+++ b/src/backend/commands/user.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.136 2004/02/02 17:21:07 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.137 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -27,6 +27,7 @@
  #include "commands/user.h"
  #include "libpq/crypt.h"
  #include "miscadmin.h"
+#include "storage/fd.h"
  #include "storage/pmsignal.h"
  #include "utils/acl.h"
  #include "utils/array.h"
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index dae8c3f37cddfda321ae03623996c39acbd58f7a..29a2df1ef1d4aee68a01632f9f402d72672d2546 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.271 2004/01/07 18:56:25 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.272 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -2513,7 +2513,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
         /* truncate relation, if needed */
         if (blkno < nblocks)
         {
-               blkno = smgrtruncate(DEFAULT_SMGR, onerel, blkno);
+               if (onerel->rd_smgr == NULL)
+                       onerel->rd_smgr = smgropen(onerel->rd_node);
+               blkno = smgrtruncate(onerel->rd_smgr, blkno);
                 onerel->rd_nblocks = blkno;             /* update relcache immediately */
                 onerel->rd_targblock = InvalidBlockNumber;
                 vacrelstats->rel_pages = blkno; /* set new number of blocks */
@@ -2582,7 +2584,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
                                 (errmsg("\"%s\": truncated %u to %u pages",
                                                 RelationGetRelationName(onerel),
                                                 vacrelstats->rel_pages, relblocks)));
-               relblocks = smgrtruncate(DEFAULT_SMGR, onerel, relblocks);
+               if (onerel->rd_smgr == NULL)
+                       onerel->rd_smgr = smgropen(onerel->rd_node);
+               relblocks = smgrtruncate(onerel->rd_smgr, relblocks);
                 onerel->rd_nblocks = relblocks; /* update relcache immediately */
                 onerel->rd_targblock = InvalidBlockNumber;
                 vacrelstats->rel_pages = relblocks;             /* set new number of
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c

index c271152877054a24d2c94033ec0d75fc7b559a8f..17f91efef7065130b6cf8f573505bf695ea579a3 100644 (file)
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -31,7 +31,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.35 2004/02/06 19:36:17 wieck Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.36 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -148,9 +148,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
         vac_open_indexes(onerel, &nindexes, &Irel);
         hasindex = (nindexes > 0);
  
-       /* Turn on vacuum cost accounting */
-       if (VacuumCostNaptime > 0)
-               VacuumCostActive = true;
+       /* Turn vacuum cost accounting on or off */
+       VacuumCostActive = (VacuumCostNaptime > 0);
         VacuumCostBalance = 0;
  
         /* Do the vacuuming */
@@ -784,7 +783,9 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
         /*
          * Do the physical truncation.
          */
-       new_rel_pages = smgrtruncate(DEFAULT_SMGR, onerel, new_rel_pages);
+       if (onerel->rd_smgr == NULL)
+               onerel->rd_smgr = smgropen(onerel->rd_node);
+       new_rel_pages = smgrtruncate(onerel->rd_smgr, new_rel_pages);
         onerel->rd_nblocks = new_rel_pages; /* update relcache immediately */
         onerel->rd_targblock = InvalidBlockNumber;
         vacrelstats->rel_pages = new_rel_pages;         /* save new number of
diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c

index aa8ba2f884abb86a6253de9687046f34db8e43f0..ed19e76db2ccd3d3ea1ce4b4e4461ea41aaf7bc6 100644 (file)
--- a/src/backend/libpq/be-fsstubs.c
+++ b/src/backend/libpq/be-fsstubs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.69 2003/11/29 19:51:49 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.70 2004/02/10 01:55:25 tgl Exp $
   *
   * NOTES
   *       This should be moved to a more appropriate place.  It is here
@@ -41,6 +41,7 @@
  #include "libpq/be-fsstubs.h"
  #include "libpq/libpq-fs.h"
  #include "miscadmin.h"
+#include "storage/fd.h"
  #include "storage/large_object.h"
  #include "utils/memutils.h"
  
diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c

index f1cbe96fd2a2c206a3356f992f496b070e7f1f25..995afe5509e850c9de2aa48dd265d96e7d23f440 100644 (file)
--- a/src/backend/rewrite/rewriteDefine.c
+++ b/src/backend/rewrite/rewriteDefine.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.92 2004/01/14 23:01:55 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.93 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -480,7 +480,12 @@ DefineQueryRewrite(RuleStmt *stmt)
          * XXX what about getting rid of its TOAST table?  For now, we don't.
          */
         if (RelisBecomingView)
-               smgrunlink(DEFAULT_SMGR, event_relation);
+       {
+               if (event_relation->rd_smgr == NULL)
+                       event_relation->rd_smgr = smgropen(event_relation->rd_node);
+               smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp);
+               event_relation->rd_smgr = NULL;
+       }
  
         /* Close rel, but keep lock till commit... */
         heap_close(event_relation, NoLock);
diff --git a/src/backend/rewrite/rewriteSupport.c b/src/backend/rewrite/rewriteSupport.c

index 54fdcfcddeb1dd646be727defb2a5abb4a906fbb..6e01de4b5cb293f90f943f771f0698bcebc9f594 100644 (file)
--- a/src/backend/rewrite/rewriteSupport.c
+++ b/src/backend/rewrite/rewriteSupport.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.57 2003/11/29 19:51:55 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/rewrite/rewriteSupport.c,v 1.58 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -84,7 +84,7 @@ SetRelationRuleStatus(Oid relationId, bool relHasRules,
         else
         {
                 /* no need to change tuple, but force relcache rebuild anyway */
-               CacheInvalidateRelcache(relationId);
+               CacheInvalidateRelcacheByTuple(tuple);
         }
  
         heap_freetuple(tuple);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index b927b5ea5e7ed164e7b626d965e0095be56248a3..203e03ab059b094aca730f3df3fa0d2c569c7952 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.156 2004/02/06 19:36:18 wieck Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.157 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -85,7 +85,7 @@ static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
                                    bool bufferLockHeld);
  static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
                         bool *foundPtr);
-static bool BufferReplace(BufferDesc *bufHdr);
+static void BufferReplace(BufferDesc *bufHdr);
  
  #ifdef NOT_USED
  void           PrintBufferDescs(void);
@@ -127,7 +127,6 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
                                    bool bufferLockHeld)
  {
         BufferDesc *bufHdr;
-       int                     status;
         bool            found;
         bool            isExtend;
         bool            isLocalBuf;
@@ -135,6 +134,10 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
         isExtend = (blockNum == P_NEW);
         isLocalBuf = reln->rd_istemp;
  
+       /* Open it at the smgr level if not already done */
+       if (reln->rd_smgr == NULL)
+               reln->rd_smgr = smgropen(reln->rd_node);
+
         if (isLocalBuf)
         {
                 ReadLocalBufferCount++;
@@ -160,7 +163,7 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
                 if (isExtend)
                 {
                         /* must be sure we have accurate file length! */
-                       blockNum = reln->rd_nblocks = smgrnblocks(DEFAULT_SMGR, reln);
+                       blockNum = reln->rd_nblocks = smgrnblocks(reln->rd_smgr);
                         reln->rd_nblocks++;
                 }
  
@@ -207,23 +210,19 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
         }
  
         /*
-        * if we have gotten to this point, the reln pointer must be ok and
-        * the relation file must be open.
+        * if we have gotten to this point, the relation must be open in the smgr.
          */
         if (isExtend)
         {
                 /* new buffers are zero-filled */
                 MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
-               status = smgrextend(DEFAULT_SMGR, reln, blockNum,
-                                                       (char *) MAKE_PTR(bufHdr->data));
+               smgrextend(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data));
         }
         else
         {
-               status = smgrread(DEFAULT_SMGR, reln, blockNum,
-                                                 (char *) MAKE_PTR(bufHdr->data));
+               smgrread(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data));
                 /* check for garbage data */
-               if (status == SM_SUCCESS &&
-                       !PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data)))
+               if (!PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data)))
                 {
                         /*
                          * During WAL recovery, the first access to any data page should
@@ -250,47 +249,20 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
         if (isLocalBuf)
         {
                 /* No shared buffer state to update... */
-               if (status == SM_FAIL)
-               {
-                       bufHdr->flags |= BM_IO_ERROR;
-                       return InvalidBuffer;
-               }
                 return BufferDescriptorGetBuffer(bufHdr);
         }
  
         /* lock buffer manager again to update IO IN PROGRESS */
         LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
  
-       if (status == SM_FAIL)
-       {
-               /* IO Failed.  cleanup the data structures and go home */
-               StrategyInvalidateBuffer(bufHdr);
-
-               /* remember that BufferAlloc() pinned the buffer */
-               UnpinBuffer(bufHdr);
-
-               /*
-                * Have to reset the flag so that anyone waiting for the buffer
-                * can tell that the contents are invalid.
-                */
-               bufHdr->flags |= BM_IO_ERROR;
-               bufHdr->flags &= ~BM_IO_IN_PROGRESS;
-       }
-       else
-       {
-               /* IO Succeeded.  clear the flags, finish buffer update */
-
-               bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
-       }
+       /* IO Succeeded.  clear the flags, finish buffer update */
+       bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
  
         /* If anyone was waiting for IO to complete, wake them up now */
         TerminateBufferIO(bufHdr);
  
         LWLockRelease(BufMgrLock);
  
-       if (status == SM_FAIL)
-               return InvalidBuffer;
-
         return BufferDescriptorGetBuffer(bufHdr);
  }
  
@@ -391,8 +363,6 @@ BufferAlloc(Relation reln,
  
                 if (buf->flags & BM_DIRTY || buf->cntxDirty)
                 {
-                       bool    replace_ok;
-
                         /*
                          * skip write error buffers
                          */
@@ -425,39 +395,21 @@ BufferAlloc(Relation reln,
                          * Write the buffer out, being careful to release BufMgrLock
                          * before starting the I/O.
                          */
-                       replace_ok = BufferReplace(buf);
+                       BufferReplace(buf);
  
-                       if (replace_ok == false)
+                       /*
+                        * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
+                        * be set by anyone.            - vadim 01/17/97
+                        */
+                       if (buf->flags & BM_JUST_DIRTIED)
                         {
-                               ereport(WARNING,
-                                               (errcode(ERRCODE_IO_ERROR),
-                                                errmsg("could not write block %u of %u/%u",
-                                                               buf->tag.blockNum,
-                                                               buf->tag.rnode.tblNode,
-                                                               buf->tag.rnode.relNode)));
-                               inProgress = FALSE;
-                               buf->flags |= BM_IO_ERROR;
-                               buf->flags &= ~BM_IO_IN_PROGRESS;
-                               TerminateBufferIO(buf);
-                               UnpinBuffer(buf);
-                               buf = NULL;
+                               elog(PANIC, "content of block %u of %u/%u changed while flushing",
+                                        buf->tag.blockNum,
+                                        buf->tag.rnode.tblNode, buf->tag.rnode.relNode);
                         }
-                       else
-                       {
-                               /*
-                                * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
-                                * be set by anyone.            - vadim 01/17/97
-                                */
-                               if (buf->flags & BM_JUST_DIRTIED)
-                               {
-                                       elog(PANIC, "content of block %u of %u/%u changed while flushing",
-                                                buf->tag.blockNum,
-                                                buf->tag.rnode.tblNode, buf->tag.rnode.relNode);
-                               }
  
-                               buf->flags &= ~BM_DIRTY;
-                               buf->cntxDirty = false;
-                       }
+                       buf->flags &= ~BM_DIRTY;
+                       buf->cntxDirty = false;
  
                         /*
                          * Somebody could have pinned the buffer while we were doing
@@ -721,10 +673,8 @@ BufferSync(int percent, int maxpages)
         for (i = 0; i < num_buffer_dirty; i++)
         {
                 Buffer          buffer;
-               int                     status;
-               RelFileNode rnode;
                 XLogRecPtr      recptr;
-               Relation        reln;
+               SMgrRelation reln;
  
                 LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
  
@@ -775,15 +725,9 @@ BufferSync(int percent, int maxpages)
                 StartBufferIO(bufHdr, false);   /* output IO start */
  
                 buffer = BufferDescriptorGetBuffer(bufHdr);
-               rnode = bufHdr->tag.rnode;
  
                 LWLockRelease(BufMgrLock);
  
-               /*
-                * Try to find relation for buffer
-                */
-               reln = RelationNodeCacheGetRelation(rnode);
-
                 /*
                  * Protect buffer content against concurrent update
                  */
@@ -805,27 +749,13 @@ BufferSync(int percent, int maxpages)
                 bufHdr->flags &= ~BM_JUST_DIRTIED;
                 LWLockRelease(BufMgrLock);
  
-               if (reln == NULL)
-               {
-                       status = smgrblindwrt(DEFAULT_SMGR,
-                                                                 bufHdr->tag.rnode,
-                                                                 bufHdr->tag.blockNum,
-                                                                 (char *) MAKE_PTR(bufHdr->data));
-               }
-               else
-               {
-                       status = smgrwrite(DEFAULT_SMGR, reln,
-                                                          bufHdr->tag.blockNum,
-                                                          (char *) MAKE_PTR(bufHdr->data));
-               }
+               /* Find smgr relation for buffer */
+               reln = smgropen(bufHdr->tag.rnode);
  
-               if (status == SM_FAIL)  /* disk failure ?! */
-                       ereport(PANIC,
-                                       (errcode(ERRCODE_IO_ERROR),
-                                        errmsg("could not write block %u of %u/%u",
-                                                       bufHdr->tag.blockNum,
-                                                       bufHdr->tag.rnode.tblNode,
-                                                       bufHdr->tag.rnode.relNode)));
+               /* And write... */
+               smgrwrite(reln,
+                                 bufHdr->tag.blockNum,
+                                 (char *) MAKE_PTR(bufHdr->data));
  
                 /*
                  * Note that it's safe to change cntxDirty here because of we
@@ -853,10 +783,6 @@ BufferSync(int percent, int maxpages)
                         bufHdr->flags &= ~BM_DIRTY;
                 UnpinBuffer(bufHdr);
                 LWLockRelease(BufMgrLock);
-
-               /* drop refcnt obtained by RelationNodeCacheGetRelation */
-               if (reln != NULL)
-                       RelationDecrementReferenceCount(reln);
         }
  
         pfree(buffer_dirty);
@@ -1026,12 +952,22 @@ BufferBackgroundWriter(void)
                 n = BufferSync(BgWriterPercent, BgWriterMaxpages);
  
                 /*
-                * Whatever signal is sent to us, let's just die galantly. If
+                * Whatever signal is sent to us, let's just die gallantly. If
                  * it wasn't meant that way, the postmaster will reincarnate us.
                  */
                 if (InterruptPending)
                         return;
  
+               /*
+                * Whenever we have nothing to do, close all smgr files.  This
+                * is so we won't hang onto smgr references to deleted files
+                * indefinitely.  XXX this is a bogus, temporary solution.  'Twould
+                * be much better to do this once per checkpoint, but the bgwriter
+                * doesn't yet know anything about checkpoints.
+                */
+               if (n == 0)
+                       smgrcloseall();
+
                 /*
                  * Nap for the configured time or sleep for 10 seconds if
                  * there was nothing to do at all.
@@ -1073,17 +1009,15 @@ BufferGetBlockNumber(Buffer buffer)
  /*
   * BufferReplace
   *
- * Write out the buffer corresponding to 'bufHdr'. Returns 'true' if
- * the buffer was successfully written out, 'false' otherwise.
+ * Write out the buffer corresponding to 'bufHdr'.
   *
   * BufMgrLock must be held at entry, and the buffer must be pinned.
   */
-static bool
+static void
  BufferReplace(BufferDesc *bufHdr)
  {
-       Relation        reln;
+       SMgrRelation reln;
         XLogRecPtr      recptr;
-       int                     status;
         ErrorContextCallback errcontext;
  
         /* To check if block content changed while flushing. - vadim 01/17/97 */
@@ -1104,36 +1038,20 @@ BufferReplace(BufferDesc *bufHdr)
         recptr = BufferGetLSN(bufHdr);
         XLogFlush(recptr);
  
-       reln = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
+       /* Find smgr relation for buffer */
+       reln = smgropen(bufHdr->tag.rnode);
  
-       if (reln != NULL)
-       {
-               status = smgrwrite(DEFAULT_SMGR, reln,
-                                                  bufHdr->tag.blockNum,
-                                                  (char *) MAKE_PTR(bufHdr->data));
-       }
-       else
-       {
-               status = smgrblindwrt(DEFAULT_SMGR, bufHdr->tag.rnode,
-                                                         bufHdr->tag.blockNum,
-                                                         (char *) MAKE_PTR(bufHdr->data));
-       }
-
-       /* drop relcache refcnt incremented by RelationNodeCacheGetRelation */
-       if (reln != NULL)
-               RelationDecrementReferenceCount(reln);
+       /* And write... */
+       smgrwrite(reln,
+                         bufHdr->tag.blockNum,
+                         (char *) MAKE_PTR(bufHdr->data));
  
         /* Pop the error context stack */
         error_context_stack = errcontext.previous;
  
         LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
  
-       if (status == SM_FAIL)
-               return false;
-
         BufferFlushCount++;
-
-       return true;
  }
  
  /*
@@ -1151,12 +1069,17 @@ RelationGetNumberOfBlocks(Relation relation)
          *
          * Don't call smgr on a view or a composite type, either.
          */
-       if (relation->rd_rel->relkind == RELKIND_VIEW)
-               relation->rd_nblocks = 0;
-       else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+       if (relation->rd_rel->relkind == RELKIND_VIEW ||
+               relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
                 relation->rd_nblocks = 0;
         else if (!relation->rd_isnew && !relation->rd_istemp)
-               relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);
+       {
+               /* Open it at the smgr level if not already done */
+               if (relation->rd_smgr == NULL)
+                       relation->rd_smgr = smgropen(relation->rd_node);
+
+               relation->rd_nblocks = smgrnblocks(relation->rd_smgr);
+       }
  
         return relation->rd_nblocks;
  }
@@ -1172,12 +1095,17 @@ RelationGetNumberOfBlocks(Relation relation)
  void
  RelationUpdateNumberOfBlocks(Relation relation)
  {
-       if (relation->rd_rel->relkind == RELKIND_VIEW)
-               relation->rd_nblocks = 0;
-       else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+       if (relation->rd_rel->relkind == RELKIND_VIEW ||
+               relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
                 relation->rd_nblocks = 0;
         else
-               relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);
+       {
+               /* Open it at the smgr level if not already done */
+               if (relation->rd_smgr == NULL)
+                       relation->rd_smgr = smgropen(relation->rd_node);
+
+               relation->rd_nblocks = smgrnblocks(relation->rd_smgr);
+       }
  }
  
  /* ---------------------------------------------------------------------
@@ -1465,7 +1393,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
         int                     i;
         BufferDesc *bufHdr;
         XLogRecPtr      recptr;
-       int                     status;
         ErrorContextCallback errcontext;
  
         /* Setup error traceback support for ereport() */
@@ -1484,17 +1411,13 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
                         {
                                 if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
                                 {
-                                       status = smgrwrite(DEFAULT_SMGR, rel,
-                                                                          bufHdr->tag.blockNum,
-                                                                          (char *) MAKE_PTR(bufHdr->data));
-                                       if (status == SM_FAIL)
-                                       {
-                                               error_context_stack = errcontext.previous;
-                                               elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is dirty, could not flush it",
-                                                        RelationGetRelationName(rel), firstDelBlock,
-                                                        bufHdr->tag.blockNum);
-                                               return (-1);
-                                       }
+                                       /* Open it at the smgr level if not already done */
+                                       if (rel->rd_smgr == NULL)
+                                               rel->rd_smgr = smgropen(rel->rd_node);
+
+                                       smgrwrite(rel->rd_smgr,
+                                                         bufHdr->tag.blockNum,
+                                                         (char *) MAKE_PTR(bufHdr->data));
                                         bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
                                         bufHdr->cntxDirty = false;
                                 }
@@ -1553,17 +1476,13 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
  
                                         LWLockRelease(BufMgrLock);
  
-                                       status = smgrwrite(DEFAULT_SMGR, rel,
-                                                                          bufHdr->tag.blockNum,
-                                                                          (char *) MAKE_PTR(bufHdr->data));
+                                       /* Open it at the smgr level if not already done */
+                                       if (rel->rd_smgr == NULL)
+                                               rel->rd_smgr = smgropen(rel->rd_node);
  
-                                       if (status == SM_FAIL)          /* disk failure ?! */
-                                               ereport(PANIC,
-                                                               (errcode(ERRCODE_IO_ERROR),
-                                                         errmsg("could not write block %u of %u/%u",
-                                                                        bufHdr->tag.blockNum,
-                                                                        bufHdr->tag.rnode.tblNode,
-                                                                        bufHdr->tag.rnode.relNode)));
+                                       smgrwrite(rel->rd_smgr,
+                                                         bufHdr->tag.blockNum,
+                                                         (char *) MAKE_PTR(bufHdr->data));
  
                                         BufferFlushCount++;
  
@@ -2046,7 +1965,11 @@ AbortBufferIO(void)
                 LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
                 Assert(buf->flags & BM_IO_IN_PROGRESS);
                 if (IsForInput)
+               {
                         Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
+                       /* Don't think that buffer is valid */
+                       StrategyInvalidateBuffer(buf);
+               }
                 else
                 {
                         Assert(buf->flags & BM_DIRTY || buf->cntxDirty);
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c

index 01c8303928055e49a61bd38a1b983e701b708998..bcbedc9c6517a762cce4c437d80b32f9dd5e4d8b 100644 (file)
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.51 2004/01/07 18:56:27 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.52 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -90,24 +90,15 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
          */
         if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
         {
-               Relation        bufrel = RelationNodeCacheGetRelation(bufHdr->tag.rnode);
+               SMgrRelation reln;
  
-               /* flush this page */
-               if (bufrel == NULL)
-               {
-                       smgrblindwrt(DEFAULT_SMGR,
-                                                bufHdr->tag.rnode,
-                                                bufHdr->tag.blockNum,
-                                                (char *) MAKE_PTR(bufHdr->data));
-               }
-               else
-               {
-                       smgrwrite(DEFAULT_SMGR, bufrel,
-                                         bufHdr->tag.blockNum,
-                                         (char *) MAKE_PTR(bufHdr->data));
-                       /* drop refcount incremented by RelationNodeCacheGetRelation */
-                       RelationDecrementReferenceCount(bufrel);
-               }
+               /* Find smgr relation for buffer */
+               reln = smgropen(bufHdr->tag.rnode);
+
+               /* And write... */
+               smgrwrite(reln,
+                                 bufHdr->tag.blockNum,
+                                 (char *) MAKE_PTR(bufHdr->data));
  
                 LocalBufferFlushCount++;
         }
@@ -143,9 +134,6 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
  
         /*
          * it's all ours now.
-        *
-        * We need not in tblNode currently but will in future I think, when
-        * we'll give up rel->rd_fd to fmgr cache.
          */
         bufHdr->tag.rnode = reln->rd_node;
         bufHdr->tag.blockNum = blockNum;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c

index 7de7d85e74d6e87a345634c1f6a9520eccb4bec9..f95b1b3441057d6d70526f73291061d89b5ec02b 100644 (file)
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.63 2004/01/26 22:59:53 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.64 2004/02/10 01:55:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -65,9 +65,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
         size += FreeSpaceShmemSize();
  #ifdef EXEC_BACKEND
         size += ShmemBackendArraySize();
-#endif
-#ifdef STABLE_MEMORY_STORAGE
-       size += MMShmemSize();
  #endif
         size += 100000;
         /* might as well round it off to a multiple of a typical page size */
diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile

index 7c2a0f62b2af598e12297c87cff591c749d3f6b8..71695f9a749cb924b2c9a4261e629742970f33e0 100644 (file)
--- a/src/backend/storage/smgr/Makefile
+++ b/src/backend/storage/smgr/Makefile
@@ -4,7 +4,7 @@
  #    Makefile for storage/smgr
  #
  # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.14 2003/11/29 19:51:57 pgsql Exp $
+#    $PostgreSQL: pgsql/src/backend/storage/smgr/Makefile,v 1.15 2004/02/10 01:55:26 tgl Exp $
  #
  #-------------------------------------------------------------------------
  
@@ -12,7 +12,7 @@ subdir = src/backend/storage/smgr
  top_builddir = ../../../..
  include $(top_builddir)/src/Makefile.global
  
-OBJS = md.o mm.o smgr.o smgrtype.o
+OBJS = md.o smgr.o smgrtype.o
  
  all: SUBSYS.o
  
diff --git a/src/backend/storage/smgr/README b/src/backend/storage/smgr/README

index 606431f926fd9e49b17fcd5b0291a4b164e82145..124d5bcdffc2086bd26cb4bcfdfc321d89edd0a3 100644 (file)
--- a/src/backend/storage/smgr/README
+++ b/src/backend/storage/smgr/README
@@ -1,40 +1,31 @@
-# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.2 2003/11/29 19:51:57 pgsql Exp $
+# $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.3 2004/02/10 01:55:26 tgl Exp $
  
-This directory contains the code that supports the Postgres storage manager
-switch and all of the installed storage managers.  In released systems,
-the only supported storage manager is the magnetic disk manager.  At UC
-Berkeley, the Sony WORM optical disk jukebox and persistent main memory are
-also supported.
+In the original Berkeley Postgres system, there were several storage managers,
+of which only the "magnetic disk" manager remains.  (At Berkeley there were
+also managers for the Sony WORM optical disk jukebox and persistent main
+memory, but these were never supported in any externally released Postgres,
+nor in any version of PostgreSQL.)  However, we retain the notion of a storage
+manager switch in case anyone wants to reintroduce other kinds of storage
+managers.
  
-As of Postgres Release 3.0, every relation in the system is tagged with the
-storage manager on which it resides.  The storage manager switch code turns
-what used to by filesystem operations into operations on the correct store,
-for any given relation.
+In Berkeley Postgres each relation was tagged with the ID of the storage
+manager to use for it.  This is gone.  It would be more reasonable to
+associate storage managers with tablespaces (a feature not present as this
+text is being written, but one likely to emerge soon).
  
  The files in this directory, and their contents, are
  
      smgrtype.c Storage manager type -- maps string names to storage manager
                 IDs and provides simple comparison operators.  This is the
                 regproc support for type 'smgr' in the system catalogs.
+               (This is vestigial since no columns of type smgr exist
+               in the catalogs anymore.)
  
      smgr.c     The storage manager switch dispatch code.  The routines in
                 this file call the appropriate storage manager to do hardware
-               accesses requested by the backend.
+               accesses requested by the backend.  smgr.c also manages the
+               file handle cache (SMgrRelation table).
  
      md.c       The magnetic disk storage manager.
  
-    mm.c       The persistent main memory storage manager (#undef'ed in
-               tmp/c.h for all distributed systems).
-
-    sj.c       The sony jukebox storage manager and cache management code
-               (#undef'ed in tmp/c.h for all distributed systems).  The
-               routines in this file allocate extents, maintain block
-               maps, and guarantee the persistence and coherency of a cache
-               of jukebox blocks on magnetic disk.
-
-    pgjb.c     The postgres jukebox interface routines.  The routines here
-               handle exclusion on the physical device and translate requests
-               from the storage manager code (sj.c) into jbaccess calls.
-
-    jbaccess.c Access code for the physical Sony jukebox device.  This code
-               was swiped from Andy McFadden's jblib.a code at UC Berkeley.
+Note that md.c in turn relies on src/backend/storage/file/fd.c.
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c

index 0405c2849a0b8d201edbec9c4e0cda91e46f2927..58629218a3ceaecb3b2a760d1cd2d03e90615036 100644 (file)
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.101 2004/01/07 18:56:27 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.102 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,108 +21,81 @@
  
  #include "catalog/catalog.h"
  #include "miscadmin.h"
+#include "storage/fd.h"
  #include "storage/smgr.h"
-#include "utils/inval.h"
  #include "utils/memutils.h"
  
+
  /*
   *     The magnetic disk storage manager keeps track of open file
   *     descriptors in its own descriptor pool.  This is done to make it
   *     easier to support relations that are larger than the operating
- *     system's file size limit (often 2GBytes).  In order to do that, we
+ *     system's file size limit (often 2GBytes).  In order to do that,
   *     we break relations up into chunks of < 2GBytes and store one chunk
   *     in each of several files that represent the relation.  See the
   *     BLCKSZ and RELSEG_SIZE configuration constants in
- *     include/pg_config.h.
+ *     include/pg_config.h.  All chunks except the last MUST have size exactly
+ *     equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
   *
- *     The file descriptor stored in the relation cache (see RelationGetFile())
- *     is actually an index into the Md_fdvec array.  -1 indicates not open.
+ *     The file descriptor pointer (md_fd field) stored in the SMgrRelation
+ *     cache is, therefore, just the head of a list of MdfdVec objects.
+ *     But note the md_fd pointer can be NULL, indicating relation not open.
   *
- *     When a relation is broken into multiple chunks, only the first chunk
- *     has its own entry in the Md_fdvec array; the remaining chunks have
- *     palloc'd MdfdVec objects that are chained onto the first chunk via the
- *     mdfd_chain links.  All chunks except the last MUST have size exactly
- *     equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
+ *     All MdfdVec objects are palloc'd in the MdCxt memory context.
   */
  
  typedef struct _MdfdVec
  {
-       int                     mdfd_vfd;               /* fd number in vfd pool */
-       int                     mdfd_flags;             /* fd status flags */
+       File            mdfd_vfd;                       /* fd number in fd.c's pool */
  
-/* these are the assigned bits in mdfd_flags: */
-#define MDFD_FREE      (1 << 0)        /* unused entry */
-
-       int                     mdfd_nextFree;  /* link to next freelist member, if free */
  #ifndef LET_OS_MANAGE_FILESIZE
         struct _MdfdVec *mdfd_chain;    /* for large relations */
  #endif
  } MdfdVec;
  
-static int     Nfds = 100;                     /* initial/current size of Md_fdvec array */
-static MdfdVec *Md_fdvec = NULL;
-static int     Md_Free = -1;           /* head of freelist of unused fdvec
-                                                                * entries */
-static int     CurFd = 0;                      /* first never-used fdvec index */
  static MemoryContext MdCxt;            /* context for all md.c allocations */
  
-/* routines declared here */
-static void mdclose_fd(int fd);
-static int     _mdfd_getrelnfd(Relation reln);
-static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags);
-static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno);
-
-static int     _mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno);
  
-static int     _fdvec_alloc(void);
-static void _fdvec_free(int);
+/* routines declared here */
+static MdfdVec *mdopen(SMgrRelation reln);
+static MdfdVec *_fdvec_alloc(void);
+#ifndef LET_OS_MANAGE_FILESIZE
+static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
+                                                         int oflags);
+#endif
+static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno);
  static BlockNumber _mdnblocks(File file, Size blcksz);
  
+
  /*
   *     mdinit() -- Initialize private state for magnetic disk storage manager.
- *
- *             We keep a private table of all file descriptors.  This routine
- *             allocates and initializes the table.
- *
- *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
   */
-int
+bool
  mdinit(void)
  {
-       int                     i;
-
         MdCxt = AllocSetContextCreate(TopMemoryContext,
                                                                   "MdSmgr",
                                                                   ALLOCSET_DEFAULT_MINSIZE,
                                                                   ALLOCSET_DEFAULT_INITSIZE,
                                                                   ALLOCSET_DEFAULT_MAXSIZE);
  
-       Md_fdvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));
-
-       MemSet(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
-
-       /* Set free list */
-       for (i = 0; i < Nfds; i++)
-       {
-               Md_fdvec[i].mdfd_nextFree = i + 1;
-               Md_fdvec[i].mdfd_flags = MDFD_FREE;
-       }
-       Md_Free = 0;
-       Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
-
-       return SM_SUCCESS;
+       return true;
  }
  
-int
-mdcreate(Relation reln)
+/*
+ *     mdcreate() -- Create a new relation on magnetic disk.
+ *
+ * If isRedo is true, it's okay for the relation to exist already.
+ */
+bool
+mdcreate(SMgrRelation reln, bool isRedo)
  {
         char       *path;
-       int                     fd,
-                               vfd;
+       File            fd;
  
-       Assert(reln->rd_fd < 0);
+       Assert(reln->md_fd == NULL);
  
-       path = relpath(reln->rd_node);
+       path = relpath(reln->smgr_rnode);
  
         fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
  
@@ -134,43 +107,45 @@ mdcreate(Relation reln)
                  * During bootstrap, there are cases where a system relation will
                  * be accessed (by internal backend processes) before the
                  * bootstrap script nominally creates it.  Therefore, allow the
-                * file to exist already, but in bootstrap mode only.  (See also
+                * file to exist already, even if isRedo is not set.  (See also
                  * mdopen)
                  */
-               if (IsBootstrapProcessingMode())
+               if (isRedo || IsBootstrapProcessingMode())
                         fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
                 if (fd < 0)
                 {
                         pfree(path);
                         /* be sure to return the error reported by create, not open */
                         errno = save_errno;
-                       return -1;
+                       return false;
                 }
                 errno = 0;
         }
  
         pfree(path);
  
-       vfd = _fdvec_alloc();
-       if (vfd < 0)
-               return -1;
+       reln->md_fd = _fdvec_alloc();
  
-       Md_fdvec[vfd].mdfd_vfd = fd;
-       Md_fdvec[vfd].mdfd_flags = (uint16) 0;
+       reln->md_fd->mdfd_vfd = fd;
  #ifndef LET_OS_MANAGE_FILESIZE
-       Md_fdvec[vfd].mdfd_chain = NULL;
+       reln->md_fd->mdfd_chain = NULL;
  #endif
  
-       return vfd;
+       return true;
  }
  
  /*
   *     mdunlink() -- Unlink a relation.
+ *
+ * Note that we're passed a RelFileNode --- by the time this is called,
+ * there won't be an SMgrRelation hashtable entry anymore.
+ *
+ * If isRedo is true, it's okay for the relation to be already gone.
   */
-int
-mdunlink(RelFileNode rnode)
+bool
+mdunlink(RelFileNode rnode, bool isRedo)
  {
-       int                     status = SM_SUCCESS;
+       bool            status = true;
         int                     save_errno = 0;
         char       *path;
  
@@ -179,13 +154,16 @@ mdunlink(RelFileNode rnode)
         /* Delete the first segment, or only segment if not doing segmenting */
         if (unlink(path) < 0)
         {
-               status = SM_FAIL;
-               save_errno = errno;
+               if (!isRedo || errno != ENOENT)
+               {
+                       status = false;
+                       save_errno = errno;
+               }
         }
  
  #ifndef LET_OS_MANAGE_FILESIZE
         /* Get the additional segments, if any */
-       if (status == SM_SUCCESS)
+       if (status)
         {
                 char       *segpath = (char *) palloc(strlen(path) + 12);
                 BlockNumber segno;
@@ -198,7 +176,7 @@ mdunlink(RelFileNode rnode)
                                 /* ENOENT is expected after the last segment... */
                                 if (errno != ENOENT)
                                 {
-                                       status = SM_FAIL;
+                                       status = false;
                                         save_errno = errno;
                                 }
                                 break;
@@ -222,16 +200,15 @@ mdunlink(RelFileNode rnode)
   *             relation (ie, blocknum is the current EOF), and so in case of
   *             failure we clean up by truncating.
   *
- *             This routine returns SM_FAIL or SM_SUCCESS, with errno set as
- *             appropriate.
+ *             This routine returns true or false, with errno set as appropriate.
   *
   * Note: this routine used to call mdnblocks() to get the block position
   * to write at, but that's pretty silly since the caller needs to know where
   * the block will be written, and accordingly must have done mdnblocks()
   * already.  Might as well pass in the position and save a seek.
   */
-int
-mdextend(Relation reln, BlockNumber blocknum, char *buffer)
+bool
+mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
         long            seekpos;
         int                     nbytes;
@@ -256,7 +233,7 @@ mdextend(Relation reln, BlockNumber blocknum, char *buffer)
          * to make room for the new page's buffer.
          */
         if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return SM_FAIL;
+               return false;
  
         if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
         {
@@ -269,29 +246,32 @@ mdextend(Relation reln, BlockNumber blocknum, char *buffer)
                         FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
                         errno = save_errno;
                 }
-               return SM_FAIL;
+               return false;
         }
  
  #ifndef LET_OS_MANAGE_FILESIZE
         Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
  #endif
  
-       return SM_SUCCESS;
+       return true;
  }
  
  /*
- *     mdopen() -- Open the specified relation.
+ *     mdopen() -- Open the specified relation.  ereport's on failure.
+ *
+ * Note we only open the first segment, when there are multiple segments.
   */
-int
-mdopen(Relation reln)
+static MdfdVec *
+mdopen(SMgrRelation reln)
  {
         char       *path;
-       int                     fd;
-       int                     vfd;
+       File            fd;
  
-       Assert(reln->rd_fd < 0);
+       /* No work if already open */
+       if (reln->md_fd)
+               return reln->md_fd;
  
-       path = relpath(reln->rd_node);
+       path = relpath(reln->smgr_rnode);
  
         fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
  
@@ -309,57 +289,45 @@ mdopen(Relation reln)
                 if (fd < 0)
                 {
                         pfree(path);
-                       return -1;
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not open relation %u/%u: %m",
+                                                       reln->smgr_rnode.tblNode,
+                                                       reln->smgr_rnode.relNode)));
                 }
         }
  
         pfree(path);
  
-       vfd = _fdvec_alloc();
-       if (vfd < 0)
-               return -1;
+       reln->md_fd = _fdvec_alloc();
  
-       Md_fdvec[vfd].mdfd_vfd = fd;
-       Md_fdvec[vfd].mdfd_flags = (uint16) 0;
+       reln->md_fd->mdfd_vfd = fd;
  #ifndef LET_OS_MANAGE_FILESIZE
-       Md_fdvec[vfd].mdfd_chain = NULL;
+       reln->md_fd->mdfd_chain = NULL;
         Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
  #endif
  
-       return vfd;
+       return reln->md_fd;
  }
  
  /*
   *     mdclose() -- Close the specified relation, if it isn't closed already.
   *
- *             AND FREE fd vector! It may be re-used for other relations!
- *             reln should be flushed from cache after closing !..
- *
- *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
+ *             Returns true or false with errno set as appropriate.
   */
-int
-mdclose(Relation reln)
+bool
+mdclose(SMgrRelation reln)
  {
-       int                     fd;
-
-       fd = RelationGetFile(reln);
-       if (fd < 0)
-               return SM_SUCCESS;              /* already closed, so no work */
-
-       mdclose_fd(fd);
-
-       reln->rd_fd = -1;
+       MdfdVec    *v = reln->md_fd;
  
-       return SM_SUCCESS;
-}
+       /* No work if already closed */
+       if (v == NULL)
+               return true;
  
-static void
-mdclose_fd(int fd)
-{
-       MdfdVec    *v;
+       reln->md_fd = NULL;                     /* prevent dangling pointer after error */
  
  #ifndef LET_OS_MANAGE_FILESIZE
-       for (v = &Md_fdvec[fd]; v != NULL;)
+       while (v != NULL)
         {
                 MdfdVec    *ov = v;
  
@@ -368,32 +336,24 @@ mdclose_fd(int fd)
                         FileClose(v->mdfd_vfd);
                 /* Now free vector */
                 v = v->mdfd_chain;
-               if (ov != &Md_fdvec[fd])
-                       pfree(ov);
+               pfree(ov);
         }
-
-       Md_fdvec[fd].mdfd_chain = NULL;
  #else
-       v = &Md_fdvec[fd];
-       if (v != NULL)
-       {
-               if (v->mdfd_vfd >= 0)
-                       FileClose(v->mdfd_vfd);
-       }
+       if (v->mdfd_vfd >= 0)
+               FileClose(v->mdfd_vfd);
+       pfree(v);
  #endif
  
-       _fdvec_free(fd);
+       return true;
  }
  
  /*
   *     mdread() -- Read the specified block from a relation.
- *
- *             Returns SM_SUCCESS or SM_FAIL.
   */
-int
-mdread(Relation reln, BlockNumber blocknum, char *buffer)
+bool
+mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
-       int                     status;
+       bool            status;
         long            seekpos;
         int                     nbytes;
         MdfdVec    *v;
@@ -408,9 +368,9 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
  #endif
  
         if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return SM_FAIL;
+               return false;
  
-       status = SM_SUCCESS;
+       status = true;
         if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
         {
                 /*
@@ -425,7 +385,7 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
                         (nbytes > 0 && mdnblocks(reln) == blocknum))
                         MemSet(buffer, 0, BLCKSZ);
                 else
-                       status = SM_FAIL;
+                       status = false;
         }
  
         return status;
@@ -433,11 +393,9 @@ mdread(Relation reln, BlockNumber blocknum, char *buffer)
  
  /*
   *     mdwrite() -- Write the supplied block at the appropriate location.
- *
- *             Returns SM_SUCCESS or SM_FAIL.
   */
-int
-mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
+bool
+mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
         long            seekpos;
         MdfdVec    *v;
@@ -452,69 +410,12 @@ mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
  #endif
  
         if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return SM_FAIL;
+               return false;
  
         if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
-               return SM_FAIL;
+               return false;
  
-       return SM_SUCCESS;
-}
-
-/*
- *     mdblindwrt() -- Write a block to disk blind.
- *
- *             We have to be able to do this using only the rnode of the relation
- *             in which the block belongs.  Otherwise this is much like mdwrite().
- */
-int
-mdblindwrt(RelFileNode rnode,
-                  BlockNumber blkno,
-                  char *buffer)
-{
-       int                     status;
-       long            seekpos;
-       int                     fd;
-
-       fd = _mdfd_blind_getseg(rnode, blkno);
-
-       if (fd < 0)
-               return SM_FAIL;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-       seekpos = (long) (BLCKSZ * (blkno % ((BlockNumber) RELSEG_SIZE)));
-       Assert(seekpos < BLCKSZ * RELSEG_SIZE);
-#else
-       seekpos = (long) (BLCKSZ * (blkno));
-#endif
-
-       errno = 0;
-       if (lseek(fd, seekpos, SEEK_SET) != seekpos)
-       {
-               elog(LOG, "lseek(%ld) failed: %m", seekpos);
-               close(fd);
-               return SM_FAIL;
-       }
-
-       status = SM_SUCCESS;
-
-       /* write the block */
-       errno = 0;
-       if (write(fd, buffer, BLCKSZ) != BLCKSZ)
-       {
-               /* if write didn't set errno, assume problem is no disk space */
-               if (errno == 0)
-                       errno = ENOSPC;
-               elog(LOG, "write() failed: %m");
-               status = SM_FAIL;
-       }
-
-       if (close(fd) < 0)
-       {
-               elog(LOG, "close() failed: %m");
-               status = SM_FAIL;
-       }
-
-       return status;
+       return true;
  }
  
  /*
@@ -525,24 +426,16 @@ mdblindwrt(RelFileNode rnode,
   *             called, then only segments up to the last one actually touched
   *             are present in the chain...
   *
- *             Returns # of blocks, ereport's on error.
+ *             Returns # of blocks, or InvalidBlockNumber on error.
   */
  BlockNumber
-mdnblocks(Relation reln)
+mdnblocks(SMgrRelation reln)
  {
-       int                     fd;
-       MdfdVec    *v;
+       MdfdVec    *v = mdopen(reln);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         BlockNumber nblocks;
-       BlockNumber segno;
-#endif
-
-       fd = _mdfd_getrelnfd(reln);
-       v = &Md_fdvec[fd];
-
-#ifndef LET_OS_MANAGE_FILESIZE
-       segno = 0;
+       BlockNumber segno = 0;
  
         /*
          * Skip through any segments that aren't the last one, to avoid
@@ -583,8 +476,7 @@ mdnblocks(Relation reln)
                          */
                         v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
                         if (v->mdfd_chain == NULL)
-                               elog(ERROR, "could not count blocks of \"%s\": %m",
-                                        RelationGetRelationName(reln));
+                               return InvalidBlockNumber;              /* failed? */
                 }
  
                 v = v->mdfd_chain;
@@ -600,9 +492,8 @@ mdnblocks(Relation reln)
   *             Returns # of blocks or InvalidBlockNumber on error.
   */
  BlockNumber
-mdtruncate(Relation reln, BlockNumber nblocks)
+mdtruncate(SMgrRelation reln, BlockNumber nblocks)
  {
-       int                     fd;
         MdfdVec    *v;
         BlockNumber curnblk;
  
@@ -615,13 +506,14 @@ mdtruncate(Relation reln, BlockNumber nblocks)
          * that truncate/delete loop will get them all!
          */
         curnblk = mdnblocks(reln);
+       if (curnblk == InvalidBlockNumber)
+               return InvalidBlockNumber;              /* mdnblocks failed */
         if (nblocks > curnblk)
                 return InvalidBlockNumber;              /* bogus request */
         if (nblocks == curnblk)
                 return nblocks;                 /* no work */
  
-       fd = _mdfd_getrelnfd(reln);
-       v = &Md_fdvec[fd];
+       v = mdopen(reln);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         priorblocks = 0;
@@ -641,7 +533,7 @@ mdtruncate(Relation reln, BlockNumber nblocks)
                         FileTruncate(v->mdfd_vfd, 0);
                         FileUnlink(v->mdfd_vfd);
                         v = v->mdfd_chain;
-                       Assert(ov != &Md_fdvec[fd]);            /* we never drop the 1st
+                       Assert(ov != reln->md_fd);                      /* we never drop the 1st
                                                                                                  * segment */
                         pfree(ov);
                 }
@@ -682,115 +574,65 @@ mdtruncate(Relation reln, BlockNumber nblocks)
  
  /*
   *     mdcommit() -- Commit a transaction.
- *
- *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
   */
-int
+bool
  mdcommit(void)
  {
         /*
          * We don't actually have to do anything here...
          */
-       return SM_SUCCESS;
+       return true;
  }
  
  /*
   *     mdabort() -- Abort a transaction.
   */
-int
+bool
  mdabort(void)
  {
         /*
          * We don't actually have to do anything here...
          */
-       return SM_SUCCESS;
+       return true;
  }
  
  /*
   *     mdsync() -- Sync previous writes to stable storage.
   */
-int
+bool
  mdsync(void)
  {
         sync();
         if (IsUnderPostmaster)
                 sleep(2);
         sync();
-       return SM_SUCCESS;
+       return true;
  }
  
  /*
- *     _fdvec_alloc() -- Grab a free (or new) md file descriptor vector.
+ *     _fdvec_alloc() -- Make a MdfdVec object.
   */
-static int
+static MdfdVec *
  _fdvec_alloc(void)
  {
-       MdfdVec    *nvec;
-       int                     fdvec,
-                               i;
-
-       if (Md_Free >= 0)                       /* get from free list */
-       {
-               fdvec = Md_Free;
-               Md_Free = Md_fdvec[fdvec].mdfd_nextFree;
-               Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE);
-               Md_fdvec[fdvec].mdfd_flags = 0;
-               if (fdvec >= CurFd)
-               {
-                       Assert(fdvec == CurFd);
-                       CurFd++;
-               }
-               return fdvec;
-       }
-
-       /* Must allocate more room */
-
-       if (Nfds != CurFd)
-               elog(FATAL, "_fdvec_alloc error");
-
-       Nfds *= 2;
-
-       nvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));
-       MemSet(nvec, 0, Nfds * sizeof(MdfdVec));
-       memcpy(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec));
-       pfree(Md_fdvec);
+       MdfdVec *v;
  
-       Md_fdvec = nvec;
-
-       /* Set new free list */
-       for (i = CurFd; i < Nfds; i++)
-       {
-               Md_fdvec[i].mdfd_nextFree = i + 1;
-               Md_fdvec[i].mdfd_flags = MDFD_FREE;
-       }
-       Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
-       Md_Free = CurFd + 1;
-
-       fdvec = CurFd;
-       CurFd++;
-       Md_fdvec[fdvec].mdfd_flags = 0;
+       v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
+       v->mdfd_vfd = -1;
+#ifndef LET_OS_MANAGE_FILESIZE
+       v->mdfd_chain = NULL;
+#endif
  
-       return fdvec;
+       return v;
  }
  
+#ifndef LET_OS_MANAGE_FILESIZE
  /*
- *     _fdvec_free() -- free md file descriptor vector.
- *
+ * Open the specified segment of the relation,
+ * and make a MdfdVec object for it.  Returns NULL on failure.
   */
-static
-void
-_fdvec_free(int fdvec)
-{
-
-       Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE);
-       Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE);
-       Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
-       Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
-       Md_Free = fdvec;
-}
-
  static MdfdVec *
-_mdfd_openseg(Relation reln, BlockNumber segno, int oflags)
+_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
  {
         MdfdVec    *v;
         int                     fd;
@@ -798,7 +640,7 @@ _mdfd_openseg(Relation reln, BlockNumber segno, int oflags)
                            *fullpath;
  
         /* be sure we have enough space for the '.segno', if any */
-       path = relpath(reln->rd_node);
+       path = relpath(reln->smgr_rnode);
  
         if (segno > 0)
         {
@@ -818,61 +660,32 @@ _mdfd_openseg(Relation reln, BlockNumber segno, int oflags)
                 return NULL;
  
         /* allocate an mdfdvec entry for it */
-       v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
+       v = _fdvec_alloc();
  
         /* fill the entry */
         v->mdfd_vfd = fd;
-       v->mdfd_flags = (uint16) 0;
-#ifndef LET_OS_MANAGE_FILESIZE
         v->mdfd_chain = NULL;
         Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
-#endif
  
         /* all done */
         return v;
  }
-
-/*
- *     _mdfd_getrelnfd() -- Get the (virtual) fd for the relation,
- *                                              opening it if it's not already open
- *
- */
-static int
-_mdfd_getrelnfd(Relation reln)
-{
-       int                     fd;
-
-       fd = RelationGetFile(reln);
-       if (fd < 0)
-       {
-               if ((fd = mdopen(reln)) < 0)
-                       elog(ERROR, "could not open relation \"%s\": %m",
-                                RelationGetRelationName(reln));
-               reln->rd_fd = fd;
-       }
-       return fd;
-}
+#endif
  
  /*
   *     _mdfd_getseg() -- Find the segment of the relation holding the
- *                                       specified block
- *
+ *                                       specified block.  ereport's on failure.
   */
  static MdfdVec *
-_mdfd_getseg(Relation reln, BlockNumber blkno)
+_mdfd_getseg(SMgrRelation reln, BlockNumber blkno)
  {
-       MdfdVec    *v;
-       int                     fd;
+       MdfdVec    *v = mdopen(reln);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         BlockNumber segno;
         BlockNumber i;
-#endif
-
-       fd = _mdfd_getrelnfd(reln);
  
-#ifndef LET_OS_MANAGE_FILESIZE
-       for (v = &Md_fdvec[fd], segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1;
+       for (segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1;
                  segno > 0;
                  i++, segno--)
         {
@@ -892,65 +705,24 @@ _mdfd_getseg(Relation reln, BlockNumber blkno)
                         v->mdfd_chain = _mdfd_openseg(reln, i, (segno == 1) ? O_CREAT : 0);
  
                         if (v->mdfd_chain == NULL)
-                               elog(ERROR, "could not open segment %u of relation \"%s\" (target block %u): %m",
-                                        i, RelationGetRelationName(reln), blkno);
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not open segment %u of relation %u/%u (target block %u): %m",
+                                                               i,
+                                                               reln->smgr_rnode.tblNode,
+                                                               reln->smgr_rnode.relNode,
+                                                               blkno)));
                 }
                 v = v->mdfd_chain;
         }
-#else
-       v = &Md_fdvec[fd];
  #endif
  
         return v;
  }
  
  /*
- * Find the segment of the relation holding the specified block.
- *
- * This performs the same work as _mdfd_getseg() except that we must work
- * "blind" with no Relation struct.  We assume that we are not likely to
- * touch the same relation again soon, so we do not create an FD entry for
- * the relation --- we just open a kernel file descriptor which will be
- * used and promptly closed.  We also assume that the target block already
- * exists, ie, we need not extend the relation.
- *
- * The return value is the kernel descriptor, or -1 on failure.
+ * Get number of blocks present in a single disk file
   */
-static int
-_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno)
-{
-       char       *path;
-       int                     fd;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-       BlockNumber segno;
-#endif
-
-       path = relpath(rnode);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-       /* append the '.segno', if needed */
-       segno = blkno / ((BlockNumber) RELSEG_SIZE);
-       if (segno > 0)
-       {
-               char       *segpath = (char *) palloc(strlen(path) + 12);
-
-               sprintf(segpath, "%s.%u", path, segno);
-               pfree(path);
-               path = segpath;
-       }
-#endif
-
-       /* call fd.c to allow other FDs to be closed if needed */
-       fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
-       if (fd < 0)
-               elog(LOG, "could not open \"%s\": %m", path);
-
-       pfree(path);
-
-       return fd;
-}
-
  static BlockNumber
  _mdnblocks(File file, Size blcksz)
  {
diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c

deleted file mode 100644 (file)

index 5043fd6..0000000
--- a/src/backend/storage/smgr/mm.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * mm.c
- *       main memory storage manager
- *
- *       This code manages relations that reside in (presumably stable)
- *       main memory.
- *
- * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/mm.c,v 1.36 2004/01/07 18:56:27 neilc Exp $
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include <math.h>
-
-#include "storage/smgr.h"
-#include "miscadmin.h"
-
-
-#ifdef STABLE_MEMORY_STORAGE
-
-/*
- *     MMCacheTag -- Unique triplet for blocks stored by the main memory
- *                               storage manager.
- */
-
-typedef struct MMCacheTag
-{
-       Oid                     mmct_dbid;
-       Oid                     mmct_relid;
-       BlockNumber mmct_blkno;
-}      MMCacheTag;
-
-/*
- *     Shared-memory hash table for main memory relations contains
- *     entries of this form.
- */
-
-typedef struct MMHashEntry
-{
-       MMCacheTag      mmhe_tag;
-       int                     mmhe_bufno;
-}      MMHashEntry;
-
-/*
- * MMRelTag -- Unique identifier for each relation that is stored in the
- *                                     main-memory storage manager.
- */
-
-typedef struct MMRelTag
-{
-       Oid                     mmrt_dbid;
-       Oid                     mmrt_relid;
-}      MMRelTag;
-
-/*
- *     Shared-memory hash table for # blocks in main memory relations contains
- *     entries of this form.
- */
-
-typedef struct MMRelHashEntry
-{
-       MMRelTag        mmrhe_tag;
-       int                     mmrhe_nblocks;
-}      MMRelHashEntry;
-
-#define MMNBUFFERS             10
-#define MMNRELATIONS   2
-
-static int *MMCurTop;
-static int *MMCurRelno;
-static MMCacheTag *MMBlockTags;
-static char *MMBlockCache;
-static HTAB *MMCacheHT;
-static HTAB *MMRelCacheHT;
-
-int
-mminit(void)
-{
-       char       *mmcacheblk;
-       int                     mmsize = 0;
-       bool            found;
-       HASHCTL         info;
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
-       mmsize += MAXALIGN(BLCKSZ * MMNBUFFERS);
-       mmsize += MAXALIGN(sizeof(*MMCurTop));
-       mmsize += MAXALIGN(sizeof(*MMCurRelno));
-       mmsize += MAXALIGN((MMNBUFFERS * sizeof(MMCacheTag)));
-       mmcacheblk = (char *) ShmemInitStruct("Main memory smgr", mmsize, &found);
-
-       if (mmcacheblk == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               return SM_FAIL;
-       }
-
-       info.keysize = sizeof(MMCacheTag);
-       info.entrysize = sizeof(MMHashEntry);
-       info.hash = tag_hash;
-
-       MMCacheHT = ShmemInitHash("Main memory store HT",
-                                                         MMNBUFFERS, MMNBUFFERS,
-                                                         &info, (HASH_ELEM | HASH_FUNCTION));
-
-       if (MMCacheHT == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               return SM_FAIL;
-       }
-
-       info.keysize = sizeof(MMRelTag);
-       info.entrysize = sizeof(MMRelHashEntry);
-       info.hash = tag_hash;
-
-       MMRelCacheHT = ShmemInitHash("Main memory rel HT",
-                                                                MMNRELATIONS, MMNRELATIONS,
-                                                                &info, (HASH_ELEM | HASH_FUNCTION));
-
-       if (MMRelCacheHT == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               return SM_FAIL;
-       }
-
-       if (IsUnderPostmaster)          /* was IsPostmaster bjm */
-       {
-               MemSet(mmcacheblk, 0, mmsize);
-               LWLockRelease(MMCacheLock);
-               return SM_SUCCESS;
-       }
-
-       LWLockRelease(MMCacheLock);
-
-       MMCurTop = (int *) mmcacheblk;
-       mmcacheblk += sizeof(int);
-       MMCurRelno = (int *) mmcacheblk;
-       mmcacheblk += sizeof(int);
-       MMBlockTags = (MMCacheTag *) mmcacheblk;
-       mmcacheblk += (MMNBUFFERS * sizeof(MMCacheTag));
-       MMBlockCache = mmcacheblk;
-
-       return SM_SUCCESS;
-}
-
-int
-mmshutdown(void)
-{
-       return SM_SUCCESS;
-}
-
-int
-mmcreate(Relation reln)
-{
-       MMRelHashEntry *entry;
-       bool            found;
-       MMRelTag        tag;
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
-       if (*MMCurRelno == MMNRELATIONS)
-       {
-               LWLockRelease(MMCacheLock);
-               return SM_FAIL;
-       }
-
-       (*MMCurRelno)++;
-
-       tag.mmrt_relid = RelationGetRelid(reln);
-       if (reln->rd_rel->relisshared)
-               tag.mmrt_dbid = (Oid) 0;
-       else
-               tag.mmrt_dbid = MyDatabaseId;
-
-       entry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
-                                                                                  (void *) &tag,
-                                                                                  HASH_ENTER, &found);
-
-       if (entry == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               ereport(FATAL,
-                               (errcode(ERRCODE_OUT_OF_MEMORY),
-                                errmsg("out of memory")));
-       }
-
-       if (found)
-       {
-               /* already exists */
-               LWLockRelease(MMCacheLock);
-               return SM_FAIL;
-       }
-
-       entry->mmrhe_nblocks = 0;
-
-       LWLockRelease(MMCacheLock);
-
-       return SM_SUCCESS;
-}
-
-/*
- *     mmunlink() -- Unlink a relation.
- *
- * XXX currently broken: needs to accept RelFileNode, not Relation
- */
-int
-mmunlink(RelFileNode rnode)
-{
-       int                     i;
-       MMHashEntry *entry;
-       MMRelHashEntry *rentry;
-       MMRelTag        rtag;
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
-       for (i = 0; i < MMNBUFFERS; i++)
-       {
-               if (MMBlockTags[i].mmct_dbid == rnode.tblNode
-                       && MMBlockTags[i].mmct_relid == rnode.relNode)
-               {
-                       entry = (MMHashEntry *) hash_search(MMCacheHT,
-                                                                                               (void *) &MMBlockTags[i],
-                                                                                               HASH_REMOVE, NULL);
-                       if (entry == NULL)
-                       {
-                               LWLockRelease(MMCacheLock);
-                               elog(FATAL, "cache hash table corrupted");
-                       }
-                       MMBlockTags[i].mmct_dbid = (Oid) 0;
-                       MMBlockTags[i].mmct_relid = (Oid) 0;
-                       MMBlockTags[i].mmct_blkno = (BlockNumber) 0;
-               }
-       }
-       rtag.mmrt_dbid = rnode.tblNode;
-       rtag.mmrt_relid = rnode.relNode;
-
-       rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
-                                                                                       (void *) &rtag,
-                                                                                       HASH_REMOVE, NULL);
-
-       if (rentry == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               elog(FATAL, "rel cache hash table corrupted");
-       }
-
-       (*MMCurRelno)--;
-
-       LWLockRelease(MMCacheLock);
-       return 1;
-}
-
-/*
- *     mmextend() -- Add a block to the specified relation.
- *
- *             This routine returns SM_FAIL or SM_SUCCESS, with errno set as
- *             appropriate.
- */
-int
-mmextend(Relation reln, BlockNumber blocknum, char *buffer)
-{
-       MMRelHashEntry *rentry;
-       MMHashEntry *entry;
-       int                     i;
-       Oid                     reldbid;
-       int                     offset;
-       bool            found;
-       MMRelTag        rtag;
-       MMCacheTag      tag;
-
-       if (reln->rd_rel->relisshared)
-               reldbid = (Oid) 0;
-       else
-               reldbid = MyDatabaseId;
-
-       tag.mmct_dbid = rtag.mmrt_dbid = reldbid;
-       tag.mmct_relid = rtag.mmrt_relid = RelationGetRelid(reln);
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
-       if (*MMCurTop == MMNBUFFERS)
-       {
-               for (i = 0; i < MMNBUFFERS; i++)
-               {
-                       if (MMBlockTags[i].mmct_dbid == 0 &&
-                               MMBlockTags[i].mmct_relid == 0)
-                               break;
-               }
-               if (i == MMNBUFFERS)
-               {
-                       LWLockRelease(MMCacheLock);
-                       return SM_FAIL;
-               }
-       }
-       else
-       {
-               i = *MMCurTop;
-               (*MMCurTop)++;
-       }
-
-       rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
-                                                                                       (void *) &rtag,
-                                                                                       HASH_FIND, NULL);
-       if (rentry == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               elog(FATAL, "rel cache hash table corrupted");
-       }
-
-       tag.mmct_blkno = rentry->mmrhe_nblocks;
-
-       entry = (MMHashEntry *) hash_search(MMCacheHT,
-                                                                               (void *) &tag,
-                                                                               HASH_ENTER, &found);
-       if (entry == NULL || found)
-       {
-               LWLockRelease(MMCacheLock);
-               elog(FATAL, "cache hash table corrupted");
-       }
-
-       entry->mmhe_bufno = i;
-       MMBlockTags[i].mmct_dbid = reldbid;
-       MMBlockTags[i].mmct_relid = RelationGetRelid(reln);
-       MMBlockTags[i].mmct_blkno = rentry->mmrhe_nblocks;
-
-       /* page numbers are zero-based, so we increment this at the end */
-       (rentry->mmrhe_nblocks)++;
-
-       /* write the extended page */
-       offset = (i * BLCKSZ);
-       memmove(&(MMBlockCache[offset]), buffer, BLCKSZ);
-
-       LWLockRelease(MMCacheLock);
-
-       return SM_SUCCESS;
-}
-
-/*
- *     mmopen() -- Open the specified relation.
- */
-int
-mmopen(Relation reln)
-{
-       /* automatically successful */
-       return 0;
-}
-
-/*
- *     mmclose() -- Close the specified relation.
- *
- *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mmclose(Relation reln)
-{
-       /* automatically successful */
-       return SM_SUCCESS;
-}
-
-/*
- *     mmread() -- Read the specified block from a relation.
- *
- *             Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mmread(Relation reln, BlockNumber blocknum, char *buffer)
-{
-       MMHashEntry *entry;
-       int                     offset;
-       MMCacheTag      tag;
-
-       if (reln->rd_rel->relisshared)
-               tag.mmct_dbid = (Oid) 0;
-       else
-               tag.mmct_dbid = MyDatabaseId;
-
-       tag.mmct_relid = RelationGetRelid(reln);
-       tag.mmct_blkno = blocknum;
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-       entry = (MMHashEntry *) hash_search(MMCacheHT,
-                                                                               (void *) &tag,
-                                                                               HASH_FIND, NULL);
-
-       if (entry == NULL)
-       {
-               /* reading nonexistent pages is defined to fill them with zeroes */
-               LWLockRelease(MMCacheLock);
-               MemSet(buffer, 0, BLCKSZ);
-               return SM_SUCCESS;
-       }
-
-       offset = (entry->mmhe_bufno * BLCKSZ);
-       memmove(buffer, &MMBlockCache[offset], BLCKSZ);
-
-       LWLockRelease(MMCacheLock);
-
-       return SM_SUCCESS;
-}
-
-/*
- *     mmwrite() -- Write the supplied block at the appropriate location.
- *
- *             Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mmwrite(Relation reln, BlockNumber blocknum, char *buffer)
-{
-       MMHashEntry *entry;
-       int                     offset;
-       MMCacheTag      tag;
-
-       if (reln->rd_rel->relisshared)
-               tag.mmct_dbid = (Oid) 0;
-       else
-               tag.mmct_dbid = MyDatabaseId;
-
-       tag.mmct_relid = RelationGetRelid(reln);
-       tag.mmct_blkno = blocknum;
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-       entry = (MMHashEntry *) hash_search(MMCacheHT,
-                                                                               (void *) &tag,
-                                                                               HASH_FIND, NULL);
-
-       if (entry == NULL)
-       {
-               LWLockRelease(MMCacheLock);
-               elog(FATAL, "cache hash table missing requested page");
-       }
-
-       offset = (entry->mmhe_bufno * BLCKSZ);
-       memmove(&MMBlockCache[offset], buffer, BLCKSZ);
-
-       LWLockRelease(MMCacheLock);
-
-       return SM_SUCCESS;
-}
-
-/*
- *     mmblindwrt() -- Write a block to stable storage blind.
- *
- *             We have to be able to do this using only the rnode of the relation
- *             in which the block belongs.  Otherwise this is much like mmwrite().
- */
-int
-mmblindwrt(RelFileNode rnode,
-                  BlockNumber blkno,
-                  char *buffer)
-{
-       return SM_FAIL;
-}
-
-/*
- *     mmnblocks() -- Get the number of blocks stored in a relation.
- *
- *             Returns # of blocks or InvalidBlockNumber on error.
- */
-BlockNumber
-mmnblocks(Relation reln)
-{
-       MMRelTag        rtag;
-       MMRelHashEntry *rentry;
-       BlockNumber nblocks;
-
-       if (reln->rd_rel->relisshared)
-               rtag.mmrt_dbid = (Oid) 0;
-       else
-               rtag.mmrt_dbid = MyDatabaseId;
-
-       rtag.mmrt_relid = RelationGetRelid(reln);
-
-       LWLockAcquire(MMCacheLock, LW_EXCLUSIVE);
-
-       rentry = (MMRelHashEntry *) hash_search(MMRelCacheHT,
-                                                                                       (void *) &rtag,
-                                                                                       HASH_FIND, NULL);
-
-       if (rentry)
-               nblocks = rentry->mmrhe_nblocks;
-       else
-               nblocks = InvalidBlockNumber;
-
-       LWLockRelease(MMCacheLock);
-
-       return nblocks;
-}
-
-/*
- *     mmcommit() -- Commit a transaction.
- *
- *             Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mmcommit(void)
-{
-       return SM_SUCCESS;
-}
-
-/*
- *     mmabort() -- Abort a transaction.
- */
-
-int
-mmabort(void)
-{
-       return SM_SUCCESS;
-}
-
-/*
- *     MMShmemSize() -- Declare amount of shared memory we require.
- *
- *             The shared memory initialization code creates a block of shared
- *             memory exactly big enough to hold all the structures it needs to.
- *             This routine declares how much space the main memory storage
- *             manager will use.
- */
-int
-MMShmemSize(void)
-{
-       int                     size = 0;
-
-       /*
-        * first compute space occupied by the (dbid,relid,blkno) hash table
-        */
-       size += hash_estimate_size(MMNBUFFERS, sizeof(MMHashEntry));
-
-       /*
-        * now do the same for the rel hash table
-        */
-       size += hash_estimate_size(MMNRELATIONS, sizeof(MMRelHashEntry));
-
-       /*
-        * finally, add in the memory block we use directly
-        */
-
-       size += MAXALIGN(BLCKSZ * MMNBUFFERS);
-       size += MAXALIGN(sizeof(*MMCurTop));
-       size += MAXALIGN(sizeof(*MMCurRelno));
-       size += MAXALIGN(MMNBUFFERS * sizeof(MMCacheTag));
-
-       return size;
-}
-
-#endif   /* STABLE_MEMORY_STORAGE */
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c

index 0e33af5f28120a5f7fa8315bd891e0b0ed630a5f..09ee4144c50ac4b5dbb6274457bbacb46cc16574 100644 (file)
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.68 2004/01/06 18:07:31 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.69 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,72 +21,52 @@
  #include "storage/freespace.h"
  #include "storage/ipc.h"
  #include "storage/smgr.h"
+#include "utils/hsearch.h"
  #include "utils/memutils.h"
  
  
-static void smgrshutdown(int code, Datum arg);
-
+/*
+ * This struct of function pointers defines the API between smgr.c and
+ * any individual storage manager module.  Note that smgr subfunctions are
+ * generally expected to return TRUE on success, FALSE on error.  (For
+ * nblocks and truncate we instead say that returning InvalidBlockNumber
+ * indicates an error.)
+ */
  typedef struct f_smgr
  {
-       int                     (*smgr_init) (void);    /* may be NULL */
-       int                     (*smgr_shutdown) (void);                /* may be NULL */
-       int                     (*smgr_create) (Relation reln);
-       int                     (*smgr_unlink) (RelFileNode rnode);
-       int                     (*smgr_extend) (Relation reln, BlockNumber blocknum,
+       bool            (*smgr_init) (void);                    /* may be NULL */
+       bool            (*smgr_shutdown) (void);                /* may be NULL */
+       bool            (*smgr_close) (SMgrRelation reln);
+       bool            (*smgr_create) (SMgrRelation reln, bool isRedo);
+       bool            (*smgr_unlink) (RelFileNode rnode, bool isRedo);
+       bool            (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
                                                                                         char *buffer);
-       int                     (*smgr_open) (Relation reln);
-       int                     (*smgr_close) (Relation reln);
-       int                     (*smgr_read) (Relation reln, BlockNumber blocknum,
+       bool            (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
                                                                                   char *buffer);
-       int                     (*smgr_write) (Relation reln, BlockNumber blocknum,
+       bool            (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
                                                                                    char *buffer);
-       int                     (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno,
-                                                                                         char *buffer);
-       BlockNumber (*smgr_nblocks) (Relation reln);
-       BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks);
-       int                     (*smgr_commit) (void);  /* may be NULL */
-       int                     (*smgr_abort) (void);   /* may be NULL */
-       int                     (*smgr_sync) (void);
+       BlockNumber (*smgr_nblocks) (SMgrRelation reln);
+       BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks);
+       bool            (*smgr_commit) (void);                  /* may be NULL */
+       bool            (*smgr_abort) (void);                   /* may be NULL */
+       bool            (*smgr_sync) (void);                    /* may be NULL */
  } f_smgr;
  
-/*
- *     The weird placement of commas in this init block is to keep the compiler
- *     happy, regardless of what storage managers we have (or don't have).
- */
-
-static f_smgr smgrsw[] = {
  
+static const f_smgr smgrsw[] = {
         /* magnetic disk */
-       {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose,
-               mdread, mdwrite, mdblindwrt,
-               mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
-       },
-
-#ifdef STABLE_MEMORY_STORAGE
-       /* main memory */
-       {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose,
-               mmread, mmwrite, mmblindwrt,
-       mmnblocks, NULL, mmcommit, mmabort, NULL},
-#endif
+       {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend,
+        mdread, mdwrite, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync
+       }
  };
  
-/*
- *     This array records which storage managers are write-once, and which
- *     support overwrite.      A 'true' entry means that the storage manager is
- *     write-once.  In the best of all possible worlds, there would be no
- *     write-once storage managers.
- */
+static const int       NSmgr = lengthof(smgrsw);
  
-#ifdef NOT_USED
-static bool smgrwo[] = {
-       false,                                          /* magnetic disk */
-#ifdef STABLE_MEMORY_STORAGE
-       false,                                          /* main memory */
-#endif
-};
-#endif
  
-static int     NSmgr = lengthof(smgrsw);
+/*
+ * Each backend has a hashtable that stores all extant SMgrRelation objects.
+ */
+static HTAB *SMgrRelationHash = NULL;
  
  /*
   * We keep a list of all relations (represented as RelFileNode values)
@@ -105,7 +85,7 @@ static int   NSmgr = lengthof(smgrsw);
  typedef struct PendingRelDelete
  {
         RelFileNode relnode;            /* relation that may need to be deleted */
-       int16           which;                  /* which storage manager? */
+       int                     which;                  /* which storage manager? */
         bool            isTemp;                 /* is it a temporary relation? */
         bool            atCommit;               /* T=delete at commit; F=delete at abort */
         struct PendingRelDelete *next;          /* linked-list link */
@@ -114,12 +94,20 @@ typedef struct PendingRelDelete
  static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
  
  
+/* local function prototypes */
+static void smgrshutdown(int code, Datum arg);
+static void smgr_internal_unlink(RelFileNode rnode, int which,
+                                                                bool isTemp, bool isRedo);
+
+
  /*
   *     smgrinit(), smgrshutdown() -- Initialize or shut down all storage
   *                                                               managers.
   *
+ * Note: in the normal multiprocess scenario with a postmaster, these are
+ * called at postmaster start and stop, not per-backend.
   */
-int
+void
  smgrinit(void)
  {
         int                     i;
@@ -128,7 +116,7 @@ smgrinit(void)
         {
                 if (smgrsw[i].smgr_init)
                 {
-                       if ((*(smgrsw[i].smgr_init)) () == SM_FAIL)
+                       if (! (*(smgrsw[i].smgr_init)) ())
                                 elog(FATAL, "smgr initialization failed on %s: %m",
                                          DatumGetCString(DirectFunctionCall1(smgrout,
                                                                                                          Int16GetDatum(i))));
@@ -137,8 +125,6 @@ smgrinit(void)
  
         /* register the shutdown proc */
         on_proc_exit(smgrshutdown, 0);
-
-       return SM_SUCCESS;
  }
  
  static void
@@ -150,7 +136,7 @@ smgrshutdown(int code, Datum arg)
         {
                 if (smgrsw[i].smgr_shutdown)
                 {
-                       if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL)
+                       if (! (*(smgrsw[i].smgr_shutdown)) ())
                                 elog(FATAL, "smgr shutdown failed on %s: %m",
                                          DatumGetCString(DirectFunctionCall1(smgrout,
                                                                                                          Int16GetDatum(i))));
@@ -158,58 +144,178 @@ smgrshutdown(int code, Datum arg)
         }
  }
  
+/*
+ *     smgropen() -- Return an SMgrRelation object, creating it if need be.
+ *
+ *             This does not attempt to actually open the object.
+ */
+SMgrRelation
+smgropen(RelFileNode rnode)
+{
+       SMgrRelation    reln;
+       bool            found;
+
+       if (SMgrRelationHash == NULL)
+       {
+               /* First time through: initialize the hash table */
+               HASHCTL         ctl;
+
+               MemSet(&ctl, 0, sizeof(ctl));
+               ctl.keysize = sizeof(RelFileNode);
+               ctl.entrysize = sizeof(SMgrRelationData);
+               ctl.hash = tag_hash;
+               SMgrRelationHash = hash_create("smgr relation table", 400,
+                                                                          &ctl, HASH_ELEM | HASH_FUNCTION);
+       }
+
+       /* Look up or create an entry */
+       reln = (SMgrRelation) hash_search(SMgrRelationHash,
+                                                                         (void *) &rnode,
+                                                                         HASH_ENTER, &found);
+       if (reln == NULL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_OUT_OF_MEMORY),
+                                errmsg("out of memory")));
+
+       /* Initialize it if not present before */
+       if (!found)
+       {
+               /* hash_search already filled in the lookup key */
+               reln->smgr_which = 0;   /* we only have md.c at present */
+               reln->md_fd = NULL;             /* mark it not open */
+       }
+
+       return reln;
+}
+
+/*
+ *     smgrclose() -- Close and delete an SMgrRelation object.
+ *
+ * It is the caller's responsibility not to leave any dangling references
+ * to the object.  (Pointers should be cleared after successful return;
+ * on the off chance of failure, the SMgrRelation object will still exist.)
+ */
+void
+smgrclose(SMgrRelation reln)
+{
+       if (! (*(smgrsw[reln->smgr_which].smgr_close)) (reln))
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not close relation %u/%u: %m",
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode)));
+
+       if (hash_search(SMgrRelationHash,
+                                       (void *) &(reln->smgr_rnode),
+                                       HASH_REMOVE, NULL) == NULL)
+               elog(ERROR, "SMgrRelation hashtable corrupted");
+}
+
+/*
+ *     smgrcloseall() -- Close all existing SMgrRelation objects.
+ *
+ * It is the caller's responsibility not to leave any dangling references.
+ */
+void
+smgrcloseall(void)
+{
+       HASH_SEQ_STATUS status;
+       SMgrRelation reln;
+
+       /* Nothing to do if hashtable not set up */
+       if (SMgrRelationHash == NULL)
+               return;
+
+       hash_seq_init(&status, SMgrRelationHash);
+
+       while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
+       {
+               smgrclose(reln);
+       }
+}
+
+/*
+ *     smgrclosenode() -- Close SMgrRelation object for given RelFileNode,
+ *                                        if one exists.
+ *
+ * This has the same effects as smgrclose(smgropen(rnode)), but it avoids
+ * uselessly creating a hashtable entry only to drop it again when no
+ * such entry exists already.
+ *
+ * It is the caller's responsibility not to leave any dangling references.
+ */
+void
+smgrclosenode(RelFileNode rnode)
+{
+       SMgrRelation    reln;
+
+       /* Nothing to do if hashtable not set up */
+       if (SMgrRelationHash == NULL)
+               return;
+
+       reln = (SMgrRelation) hash_search(SMgrRelationHash,
+                                                                         (void *) &rnode,
+                                                                         HASH_FIND, NULL);
+       if (reln != NULL)
+               smgrclose(reln);
+}
+
  /*
   *     smgrcreate() -- Create a new relation.
   *
- *             This routine takes a reldesc, creates the relation on the appropriate
- *             device, and returns a file descriptor for it.
+ *             Given an already-created (but presumably unused) SMgrRelation,
+ *             cause the underlying disk file or other storage to be created.
+ *
+ *             If isRedo is true, it is okay for the underlying file to exist
+ *             already because we are in a WAL replay sequence.  In this case
+ *             we should make no PendingRelDelete entry; the WAL sequence will
+ *             tell whether to drop the file.
   */
-int
-smgrcreate(int16 which, Relation reln)
+void
+smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
  {
-       int                     fd;
         PendingRelDelete *pending;
  
-       if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0)
+       if (! (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo))
                 ereport(ERROR,
                                 (errcode_for_file_access(),
-                                errmsg("could not create relation \"%s\": %m",
-                                               RelationGetRelationName(reln))));
+                                errmsg("could not create relation %u/%u: %m",
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode)));
+
+       if (isRedo)
+               return;
  
         /* Add the relation to the list of stuff to delete at abort */
         pending = (PendingRelDelete *)
                 MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
-       pending->relnode = reln->rd_node;
-       pending->which = which;
-       pending->isTemp = reln->rd_istemp;
+       pending->relnode = reln->smgr_rnode;
+       pending->which = reln->smgr_which;
+       pending->isTemp = isTemp;
         pending->atCommit = false;      /* delete if abort */
         pending->next = pendingDeletes;
         pendingDeletes = pending;
-
-       return fd;
  }
  
  /*
- *     smgrunlink() -- Unlink a relation.
+ *     smgrscheduleunlink() -- Schedule unlinking a relation at xact commit.
+ *
+ *             The relation is marked to be removed from the store if we
+ *             successfully commit the current transaction.
   *
- *             The relation is removed from the store.  Actually, we just remember
- *             that we want to do this at transaction commit.
+ * This also implies smgrclose() on the SMgrRelation object.
   */
-int
-smgrunlink(int16 which, Relation reln)
+void
+smgrscheduleunlink(SMgrRelation reln, bool isTemp)
  {
         PendingRelDelete *pending;
  
-       /* Make sure the file is closed */
-       if (reln->rd_fd >= 0)
-               smgrclose(which, reln);
-
         /* Add the relation to the list of stuff to delete at commit */
         pending = (PendingRelDelete *)
                 MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
-       pending->relnode = reln->rd_node;
-       pending->which = which;
-       pending->isTemp = reln->rd_istemp;
+       pending->relnode = reln->smgr_rnode;
+       pending->which = reln->smgr_which;
+       pending->isTemp = isTemp;
         pending->atCommit = true;       /* delete if commit */
         pending->next = pendingDeletes;
         pendingDeletes = pending;
@@ -224,78 +330,83 @@ smgrunlink(int16 which, Relation reln)
          * immediately, but for now I'll keep the logic simple.
          */
  
-       return SM_SUCCESS;
+       /* Now close the file and throw away the hashtable entry */
+       smgrclose(reln);
  }
  
  /*
- *     smgrextend() -- Add a new block to a file.
+ *     smgrdounlink() -- Immediately unlink a relation.
   *
- *             The semantics are basically the same as smgrwrite(): write at the
- *             specified position.  However, we are expecting to extend the
- *             relation (ie, blocknum is the current EOF), and so in case of
- *             failure we clean up by truncating.
+ *             The relation is removed from the store.  This should not be used
+ *             during transactional operations, since it can't be undone.
   *
- *             Returns SM_SUCCESS on success; aborts the current transaction on
- *             failure.
+ *             If isRedo is true, it is okay for the underlying file to be gone
+ *             already.  (In practice isRedo will always be true.)
+ *
+ * This also implies smgrclose() on the SMgrRelation object.
   */
-int
-smgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
+void
+smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo)
  {
-       int                     status;
+       RelFileNode     rnode = reln->smgr_rnode;
+       int                     which = reln->smgr_which;
  
-       status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer);
-
-       if (status == SM_FAIL)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not extend relation \"%s\": %m",
-                                               RelationGetRelationName(reln)),
-                                errhint("Check free disk space.")));
+       /* Close the file and throw away the hashtable entry */
+       smgrclose(reln);
  
-       return status;
+       smgr_internal_unlink(rnode, which, isTemp, isRedo);
  }
  
  /*
- *     smgropen() -- Open a relation using a particular storage manager.
- *
- *             Returns the fd for the open relation on success.
- *
- *             On failure, returns -1 if failOK, else aborts the transaction.
+ * Shared subroutine that actually does the unlink ...
   */
-int
-smgropen(int16 which, Relation reln, bool failOK)
+static void
+smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
  {
-       int                     fd;
-
-       if (reln->rd_rel->relkind == RELKIND_VIEW)
-               return -1;
-       if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
-               return -1;
-       if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0)
-               if (!failOK)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open file \"%s\": %m",
-                                                       RelationGetRelationName(reln))));
-
-       return fd;
+       /*
+        * Get rid of any leftover buffers for the rel (shouldn't be any in the
+        * commit case, but there can be in the abort case).
+        */
+       DropRelFileNodeBuffers(rnode, isTemp);
+
+       /*
+        * Tell the free space map to forget this relation.  It won't be accessed
+        * any more anyway, but we may as well recycle the map space quickly.
+        */
+       FreeSpaceMapForgetRel(&rnode);
+
+       /*
+        * And delete the physical files.
+        *
+        * Note: we treat deletion failure as a WARNING, not an error,
+        * because we've already decided to commit or abort the current xact.
+        */
+       if (! (*(smgrsw[which].smgr_unlink)) (rnode, isRedo))
+               ereport(WARNING,
+                               (errcode_for_file_access(),
+                                errmsg("could not unlink relation %u/%u: %m",
+                                               rnode.tblNode,
+                                               rnode.relNode)));
  }
  
  /*
- *     smgrclose() -- Close a relation.
+ *     smgrextend() -- Add a new block to a file.
   *
- *             Returns SM_SUCCESS on success, aborts on failure.
+ *             The semantics are basically the same as smgrwrite(): write at the
+ *             specified position.  However, we are expecting to extend the
+ *             relation (ie, blocknum is the current EOF), and so in case of
+ *             failure we clean up by truncating.
   */
-int
-smgrclose(int16 which, Relation reln)
+void
+smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
-       if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL)
+       if (! (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer))
                 ereport(ERROR,
                                 (errcode_for_file_access(),
-                                errmsg("could not close relation \"%s\": %m",
-                                               RelationGetRelationName(reln))));
-
-       return SM_SUCCESS;
+                                errmsg("could not extend relation %u/%u: %m",
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode),
+                                errhint("Check free disk space.")));
  }
  
  /*
@@ -304,24 +415,18 @@ smgrclose(int16 which, Relation reln)
   *
   *             This routine is called from the buffer manager in order to
   *             instantiate pages in the shared buffer cache.  All storage managers
- *             return pages in the format that POSTGRES expects.  This routine
- *             dispatches the read.  On success, it returns SM_SUCCESS.  On failure,
- *             the current transaction is aborted.
+ *             return pages in the format that POSTGRES expects.
   */
-int
-smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
+void
+smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
-       int                     status;
-
-       status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer);
-
-       if (status == SM_FAIL)
+       if (! (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer))
                 ereport(ERROR,
                                 (errcode_for_file_access(),
-                                errmsg("could not read block %d of relation \"%s\": %m",
-                                               blocknum, RelationGetRelationName(reln))));
-
-       return status;
+                                errmsg("could not read block %u of relation %u/%u: %m",
+                                               blocknum,
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode)));
  }
  
  /*
@@ -329,56 +434,17 @@ smgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
   *
   *             This is not a synchronous write -- the block is not necessarily
   *             on disk at return, only dumped out to the kernel.
- *
- *             The buffer is written out via the appropriate
- *             storage manager.  This routine returns SM_SUCCESS or aborts
- *             the current transaction.
   */
-int
-smgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
-{
-       int                     status;
-
-       status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer);
-
-       if (status == SM_FAIL)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not write block %d of relation \"%s\": %m",
-                                               blocknum, RelationGetRelationName(reln))));
-
-       return status;
-}
-
-/*
- *     smgrblindwrt() -- Write a page out blind.
- *
- *             In some cases, we may find a page in the buffer cache that we
- *             can't make a reldesc for.  This happens, for example, when we
- *             want to reuse a dirty page that was written by a transaction
- *             that has not yet committed, which created a new relation.  In
- *             this case, the buffer manager will call smgrblindwrt() with
- *             the name and OID of the database and the relation to which the
- *             buffer belongs.  Every storage manager must be able to write
- *             this page out to stable storage in this circumstance.
- */
-int
-smgrblindwrt(int16 which,
-                        RelFileNode rnode,
-                        BlockNumber blkno,
-                        char *buffer)
+void
+smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
-       int                     status;
-
-       status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer);
-
-       if (status == SM_FAIL)
+       if (! (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer))
                 ereport(ERROR,
                                 (errcode_for_file_access(),
-                                errmsg("could not write block %d of %u/%u blind: %m",
-                                               blkno, rnode.tblNode, rnode.relNode)));
-
-       return status;
+                                errmsg("could not write block %u of relation %u/%u: %m",
+                                               blocknum,
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode)));
  }
  
  /*
@@ -389,11 +455,11 @@ smgrblindwrt(int16 which,
   *             transaction on failure.
   */
  BlockNumber
-smgrnblocks(int16 which, Relation reln)
+smgrnblocks(SMgrRelation reln)
  {
         BlockNumber nblocks;
  
-       nblocks = (*(smgrsw[which].smgr_nblocks)) (reln);
+       nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
  
         /*
          * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
@@ -404,8 +470,9 @@ smgrnblocks(int16 which, Relation reln)
         if (nblocks == InvalidBlockNumber)
                 ereport(ERROR,
                                 (errcode_for_file_access(),
-                                errmsg("could not count blocks of relation \"%s\": %m",
-                                               RelationGetRelationName(reln))));
+                                errmsg("could not count blocks of relation %u/%u: %m",
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode)));
  
         return nblocks;
  }
@@ -418,27 +485,25 @@ smgrnblocks(int16 which, Relation reln)
   *             transaction on failure.
   */
  BlockNumber
-smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
+smgrtruncate(SMgrRelation reln, BlockNumber nblocks)
  {
         BlockNumber newblks;
  
-       newblks = nblocks;
-       if (smgrsw[which].smgr_truncate)
-       {
-               /*
-                * Tell the free space map to forget anything it may have stored
-                * for the about-to-be-deleted blocks.  We want to be sure it
-                * won't return bogus block numbers later on.
-                */
-               FreeSpaceMapTruncateRel(&reln->rd_node, nblocks);
-
-               newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks);
-               if (newblks == InvalidBlockNumber)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not truncate relation \"%s\" to %u blocks: %m",
-                                                       RelationGetRelationName(reln), nblocks)));
-       }
+       /*
+        * Tell the free space map to forget anything it may have stored
+        * for the about-to-be-deleted blocks.  We want to be sure it
+        * won't return bogus block numbers later on.
+        */
+       FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
+
+       newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks);
+       if (newblks == InvalidBlockNumber)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not truncate relation %u/%u to %u blocks: %m",
+                                               reln->smgr_rnode.tblNode,
+                                               reln->smgr_rnode.relNode,
+                                               nblocks)));
  
         return newblks;
  }
@@ -446,7 +511,7 @@ smgrtruncate(int16 which, Relation reln, BlockNumber nblocks)
  /*
   *     smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
   */
-int
+void
  smgrDoPendingDeletes(bool isCommit)
  {
         while (pendingDeletes != NULL)
@@ -455,39 +520,12 @@ smgrDoPendingDeletes(bool isCommit)
  
                 pendingDeletes = pending->next;
                 if (pending->atCommit == isCommit)
-               {
-                       /*
-                        * Get rid of any leftover buffers for the rel (shouldn't be
-                        * any in the commit case, but there can be in the abort
-                        * case).
-                        */
-                       DropRelFileNodeBuffers(pending->relnode, pending->isTemp);
-
-                       /*
-                        * Tell the free space map to forget this relation.  It won't
-                        * be accessed any more anyway, but we may as well recycle the
-                        * map space quickly.
-                        */
-                       FreeSpaceMapForgetRel(&pending->relnode);
-
-                       /*
-                        * And delete the physical files.
-                        *
-                        * Note: we treat deletion failure as a WARNING, not an error,
-                        * because we've already decided to commit or abort the
-                        * current xact.
-                        */
-                       if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL)
-                               ereport(WARNING,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not unlink %u/%u: %m",
-                                                               pending->relnode.tblNode,
-                                                               pending->relnode.relNode)));
-               }
+                       smgr_internal_unlink(pending->relnode,
+                                                                pending->which,
+                                                                pending->isTemp,
+                                                                false);
                 pfree(pending);
         }
-
-       return SM_SUCCESS;
  }
  
  /*
@@ -496,7 +534,7 @@ smgrDoPendingDeletes(bool isCommit)
   *
   *             This is called before we actually commit.
   */
-int
+void
  smgrcommit(void)
  {
         int                     i;
@@ -505,20 +543,18 @@ smgrcommit(void)
         {
                 if (smgrsw[i].smgr_commit)
                 {
-                       if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL)
+                       if (! (*(smgrsw[i].smgr_commit)) ())
                                 elog(FATAL, "transaction commit failed on %s: %m",
                                          DatumGetCString(DirectFunctionCall1(smgrout,
                                                                                                          Int16GetDatum(i))));
                 }
         }
-
-       return SM_SUCCESS;
  }
  
  /*
   *     smgrabort() -- Abort changes made during the current transaction.
   */
-int
+void
  smgrabort(void)
  {
         int                     i;
@@ -527,20 +563,18 @@ smgrabort(void)
         {
                 if (smgrsw[i].smgr_abort)
                 {
-                       if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL)
+                       if (! (*(smgrsw[i].smgr_abort)) ())
                                 elog(FATAL, "transaction abort failed on %s: %m",
                                          DatumGetCString(DirectFunctionCall1(smgrout,
                                                                                                          Int16GetDatum(i))));
                 }
         }
-
-       return SM_SUCCESS;
  }
  
  /*
   *     smgrsync() -- Sync files to disk at checkpoint time.
   */
-int
+void
  smgrsync(void)
  {
         int                     i;
@@ -549,26 +583,14 @@ smgrsync(void)
         {
                 if (smgrsw[i].smgr_sync)
                 {
-                       if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL)
+                       if (! (*(smgrsw[i].smgr_sync)) ())
                                 elog(PANIC, "storage sync failed on %s: %m",
                                          DatumGetCString(DirectFunctionCall1(smgrout,
                                                                                                          Int16GetDatum(i))));
                 }
         }
-
-       return SM_SUCCESS;
  }
  
-#ifdef NOT_USED
-bool
-smgriswo(int16 smgrno)
-{
-       if (smgrno < 0 || smgrno >= NSmgr)
-               elog(ERROR, "invalid storage manager id: %d", smgrno);
-
-       return smgrwo[smgrno];
-}
-#endif
  
  void
  smgr_redo(XLogRecPtr lsn, XLogRecord *record)
diff --git a/src/backend/storage/smgr/smgrtype.c b/src/backend/storage/smgr/smgrtype.c

index 10e084524188f12d0ce058044e1af1eca697d082..60cc305bd1b1030ce972b5e529ac957da23c17e7 100644 (file)
--- a/src/backend/storage/smgr/smgrtype.c
+++ b/src/backend/storage/smgr/smgrtype.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.22 2003/11/29 19:51:57 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgrtype.c,v 1.23 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,26 +16,21 @@
  
  #include "storage/smgr.h"
  
+
  typedef struct smgrid
  {
-       char       *smgr_name;
+       const char         *smgr_name;
  } smgrid;
  
  /*
   *     StorageManager[] -- List of defined storage managers.
- *
- *             The weird comma placement is to keep compilers happy no matter
- *             which of these is (or is not) defined.
   */
-
-static smgrid StorageManager[] = {
-       {"magnetic disk"},
-#ifdef STABLE_MEMORY_STORAGE
-       {"main memory"}
-#endif
+static const smgrid StorageManager[] = {
+       {"magnetic disk"}
  };
  
-static int     NStorageManagers = lengthof(StorageManager);
+static const int       NStorageManagers = lengthof(StorageManager);
+
  
  Datum
  smgrin(PG_FUNCTION_ARGS)
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index 72c9310110625831f6192b77e81d7a36967802be..8a23fcc70ef66908f409d8bc2ee0a232d0ad115e 100644 (file)
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.209 2003/11/29 19:51:57 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.210 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -48,6 +48,7 @@
  #include "parser/parse_type.h"
  #include "rewrite/rewriteDefine.h"
  #include "rewrite/rewriteRemove.h"
+#include "storage/fd.h"
  #include "tcop/pquery.h"
  #include "tcop/utility.h"
  #include "utils/acl.h"
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c

index 90577cb6e4021a892d8250c4242aa3cf7a7edf7f..3364322dd5b89eff163493c56642475b0f016eb0 100644 (file)
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -74,7 +74,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.59 2003/11/29 19:52:00 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.60 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -83,6 +83,7 @@
  #include "catalog/catalog.h"
  #include "miscadmin.h"
  #include "storage/sinval.h"
+#include "storage/smgr.h"
  #include "utils/catcache.h"
  #include "utils/inval.h"
  #include "utils/memutils.h"
@@ -298,19 +299,22 @@ AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
   */
  static void
  AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
-                                                          Oid dbId, Oid relId)
+                                                          Oid dbId, Oid relId, RelFileNode physId)
  {
         SharedInvalidationMessage msg;
  
         /* Don't add a duplicate item */
-       /* We assume comparing relId is sufficient, needn't check dbId */
+       /* We assume dbId need not be checked because it will never change */
+       /* relfilenode fields must be checked to support reassignment */
         ProcessMessageList(hdr->rclist,
-                                          if (msg->rc.relId == relId) return);
+                                          if (msg->rc.relId == relId && 
+                                                  RelFileNodeEquals(msg->rc.physId, physId)) return);
  
         /* OK, add the item */
         msg.rc.id = SHAREDINVALRELCACHE_ID;
         msg.rc.dbId = dbId;
         msg.rc.relId = relId;
+       msg.rc.physId = physId;
         AddInvalidationMessage(&hdr->rclist, &msg);
  }
  
@@ -391,10 +395,10 @@ RegisterCatcacheInvalidation(int cacheId,
   * As above, but register a relcache invalidation event.
   */
  static void
-RegisterRelcacheInvalidation(Oid dbId, Oid relId)
+RegisterRelcacheInvalidation(Oid dbId, Oid relId, RelFileNode physId)
  {
         AddRelcacheInvalidationMessage(&CurrentCmdInvalidMsgs,
-                                                                  dbId, relId);
+                                                                  dbId, relId, physId);
  
         /*
          * If the relation being invalidated is one of those cached in the
@@ -435,9 +439,17 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
         }
         else if (msg->id == SHAREDINVALRELCACHE_ID)
         {
-               if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == 0)
+               /*
+                * If the message includes a valid relfilenode, we must ensure that
+                * smgr cache entry gets zapped.  The relcache will handle this if
+                * called, otherwise we must do it directly.
+                */
+               if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
                 {
-                       RelationIdInvalidateRelationCacheByRelationId(msg->rc.relId);
+                       if (OidIsValid(msg->rc.physId.relNode))
+                               RelationCacheInvalidateEntry(msg->rc.relId, &msg->rc.physId);
+                       else
+                               RelationCacheInvalidateEntry(msg->rc.relId, NULL);
  
                         for (i = 0; i < cache_callback_count; i++)
                         {
@@ -447,6 +459,12 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
                                         (*ccitem->function) (ccitem->arg, msg->rc.relId);
                         }
                 }
+               else
+               {
+                       /* might have smgr entry even if not in our database */
+                       if (OidIsValid(msg->rc.physId.relNode))
+                               smgrclosenode(msg->rc.physId);
+               }
         }
         else
                 elog(FATAL, "unrecognized SI message id: %d", msg->id);
@@ -456,7 +474,7 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
   *             InvalidateSystemCaches
   *
   *             This blows away all tuples in the system catalog caches and
- *             all the cached relation descriptors (and closes their files too).
+ *             all the cached relation descriptors and smgr cache entries.
   *             Relation descriptors that have positive refcounts are then rebuilt.
   *
   *             We call this when we see a shared-inval-queue overflow signal,
@@ -469,7 +487,7 @@ InvalidateSystemCaches(void)
         int                     i;
  
         ResetCatalogCaches();
-       RelationCacheInvalidate();
+       RelationCacheInvalidate();      /* gets smgr cache too */
  
         for (i = 0; i < cache_callback_count; i++)
         {
@@ -488,11 +506,15 @@ static void
  PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
                                                         void (*CacheIdRegisterFunc) (int, uint32,
                                                                                                            ItemPointer, Oid),
-                                                       void (*RelationIdRegisterFunc) (Oid, Oid))
+                                                       void (*RelationIdRegisterFunc) (Oid, Oid,
+                                                                                                                       RelFileNode))
  {
         Oid                     tupleRelId;
+       Oid                     databaseId;
         Oid                     relationId;
+       RelFileNode     rnode;
  
+       /* Do nothing during bootstrap */
         if (IsBootstrapProcessingMode())
                 return;
  
@@ -524,24 +546,49 @@ PrepareForTupleInvalidation(Relation relation, HeapTuple tuple,
         tupleRelId = RelationGetRelid(relation);
  
         if (tupleRelId == RelOid_pg_class)
+       {
+               Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
+
                 relationId = HeapTupleGetOid(tuple);
+               if (classtup->relisshared)
+                       databaseId = InvalidOid;
+               else
+                       databaseId = MyDatabaseId;
+               rnode.tblNode = databaseId;                     /* XXX change for tablespaces */
+               rnode.relNode = classtup->relfilenode;
+               /*
+                * Note: during a pg_class row update that assigns a new relfilenode
+                * value, we will be called on both the old and new tuples, and thus
+                * will broadcast invalidation messages showing both the old and new
+                * relfilenode values.  This ensures that other backends will close
+                * smgr references to the old relfilenode file.
+                */
+       }
         else if (tupleRelId == RelOid_pg_attribute)
-               relationId = ((Form_pg_attribute) GETSTRUCT(tuple))->attrelid;
+       {
+               Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+               relationId = atttup->attrelid;
+               /*
+                * KLUGE ALERT: we always send the relcache event with MyDatabaseId,
+                * even if the rel in question is shared (which we can't easily tell).
+                * This essentially means that only backends in this same database
+                * will react to the relcache flush request.  This is in fact
+                * appropriate, since only those backends could see our pg_attribute
+                * change anyway.  It looks a bit ugly though.
+                */
+               databaseId = MyDatabaseId;
+               /* We assume no smgr cache flush is needed, either */
+               rnode.tblNode = InvalidOid;
+               rnode.relNode = InvalidOid;
+       }
         else
                 return;
  
         /*
-        * Yes.  We need to register a relcache invalidation event for the
-        * relation identified by relationId.
-        *
-        * KLUGE ALERT: we always send the relcache event with MyDatabaseId, even
-        * if the rel in question is shared.  This essentially means that only
-        * backends in this same database will react to the relcache flush
-        * request.  This is in fact appropriate, since only those backends
-        * could see our pg_class or pg_attribute change anyway.  It looks a
-        * bit ugly though.
+        * Yes.  We need to register a relcache invalidation event.
          */
-       (*RelationIdRegisterFunc) (MyDatabaseId, relationId);
+       (*RelationIdRegisterFunc) (databaseId, relationId, rnode);
  }
  
  
@@ -660,7 +707,7 @@ CommandEndInvalidationMessages(bool isCommit)
  /*
   * CacheInvalidateHeapTuple
   *             Register the given tuple for invalidation at end of command
- *             (ie, current command is outdating this tuple).
+ *             (ie, current command is creating or outdating this tuple).
   */
  void
  CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
@@ -678,12 +725,44 @@ CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
   * This is used in places that need to force relcache rebuild but aren't
   * changing any of the tuples recognized as contributors to the relcache
   * entry by PrepareForTupleInvalidation.  (An example is dropping an index.)
+ * We assume in particular that relfilenode isn't changing.
   */
  void
-CacheInvalidateRelcache(Oid relationId)
+CacheInvalidateRelcache(Relation relation)
  {
-       /* See KLUGE ALERT in PrepareForTupleInvalidation */
-       RegisterRelcacheInvalidation(MyDatabaseId, relationId);
+       Oid                     databaseId;
+       Oid                     relationId;
+
+       relationId = RelationGetRelid(relation);
+       if (relation->rd_rel->relisshared)
+               databaseId = InvalidOid;
+       else
+               databaseId = MyDatabaseId;
+
+       RegisterRelcacheInvalidation(databaseId, relationId, relation->rd_node);
+}
+
+/*
+ * CacheInvalidateRelcacheByTuple
+ *             As above, but relation is identified by passing its pg_class tuple.
+ */
+void
+CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
+{
+       Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple);
+       Oid                     databaseId;
+       Oid                     relationId;
+       RelFileNode     rnode;
+
+       relationId = HeapTupleGetOid(classTuple);
+       if (classtup->relisshared)
+               databaseId = InvalidOid;
+       else
+               databaseId = MyDatabaseId;
+       rnode.tblNode = databaseId;                     /* XXX change for tablespaces */
+       rnode.relNode = classtup->relfilenode;
+
+       RegisterRelcacheInvalidation(databaseId, relationId, rnode);
  }
  
  /*
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c

index 37b81f1244f8021ffb3671ca234a7dc2ca45584d..8561cff549abe3b978be060d3af618812b895d58 100644 (file)
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.196 2004/02/02 00:17:21 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.197 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -54,6 +54,7 @@
  #include "optimizer/clauses.h"
  #include "optimizer/planmain.h"
  #include "optimizer/prep.h"
+#include "storage/fd.h"
  #include "storage/smgr.h"
  #include "utils/builtins.h"
  #include "utils/catcache.h"
@@ -91,13 +92,6 @@ static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
  static HTAB *RelationIdCache;
  static HTAB *RelationSysNameCache;
  
-/*
- * Bufmgr uses RelFileNode for lookup. Actually, I would like to do
- * not pass Relation to bufmgr & beyond at all and keep some cache
- * in smgr, but no time to do it right way now.                -- vadim 10/22/2000
- */
-static HTAB *RelationNodeCache;
-
  /*
   * This flag is false until we have prepared the critical relcache entries
   * that are needed to do indexscans on the tables read by relcache building.
@@ -152,18 +146,12 @@ typedef struct relnamecacheent
         Relation        reldesc;
  } RelNameCacheEnt;
  
-typedef struct relnodecacheent
-{
-       RelFileNode relnode;
-       Relation        reldesc;
-} RelNodeCacheEnt;
-
  /*
   *             macros to manipulate the lookup hashtables
   */
  #define RelationCacheInsert(RELATION)  \
  do { \
-       RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; bool found; \
+       RelIdCacheEnt *idhentry; bool found; \
         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
                                                                                    (void *) &(RELATION->rd_id), \
                                                                                    HASH_ENTER, \
@@ -174,16 +162,6 @@ do { \
                                  errmsg("out of memory"))); \
         /* used to give notice if found -- now just keep quiet */ \
         idhentry->reldesc = RELATION; \
-       nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \
-                                                                                  (void *) &(RELATION->rd_node), \
-                                                                                  HASH_ENTER, \
-                                                                                  &found); \
-       if (nodentry == NULL) \
-               ereport(ERROR, \
-                               (errcode(ERRCODE_OUT_OF_MEMORY), \
-                                errmsg("out of memory"))); \
-       /* used to give notice if found -- now just keep quiet */ \
-       nodentry->reldesc = RELATION; \
         if (IsSystemNamespace(RelationGetNamespace(RELATION))) \
         { \
                 char *relname = RelationGetRelationName(RELATION); \
@@ -223,30 +201,14 @@ do { \
                 RELATION = NULL; \
  } while(0)
  
-#define RelationNodeCacheLookup(NODE, RELATION) \
-do { \
-       RelNodeCacheEnt *hentry; \
-       hentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \
-                                                                                  (void *)&(NODE), HASH_FIND,NULL); \
-       if (hentry) \
-               RELATION = hentry->reldesc; \
-       else \
-               RELATION = NULL; \
-} while(0)
-
  #define RelationCacheDelete(RELATION) \
  do { \
-       RelIdCacheEnt *idhentry; RelNodeCacheEnt *nodentry; \
+       RelIdCacheEnt *idhentry; \
         idhentry = (RelIdCacheEnt*)hash_search(RelationIdCache, \
                                                                                    (void *)&(RELATION->rd_id), \
                                                                                    HASH_REMOVE, NULL); \
         if (idhentry == NULL) \
                 elog(WARNING, "trying to delete a rd_id reldesc that does not exist"); \
-       nodentry = (RelNodeCacheEnt*)hash_search(RelationNodeCache, \
-                                                                                  (void *)&(RELATION->rd_node), \
-                                                                                  HASH_REMOVE, NULL); \
-       if (nodentry == NULL) \
-               elog(WARNING, "trying to delete a rd_node reldesc that does not exist"); \
         if (IsSystemNamespace(RelationGetNamespace(RELATION))) \
         { \
                 char *relname = RelationGetRelationName(RELATION); \
@@ -423,7 +385,7 @@ AllocateRelationDesc(Relation relation, Form_pg_class relp)
         relation->rd_targblock = InvalidBlockNumber;
  
         /* make sure relation is marked as having no open file yet */
-       relation->rd_fd = -1;
+       relation->rd_smgr = NULL;
  
         /*
          * Copy the relation tuple form
@@ -914,7 +876,7 @@ RelationBuildDesc(RelationBuildDescInfo buildinfo,
         relation->rd_node.relNode = relation->rd_rel->relfilenode;
  
         /* make sure relation is marked as having no open file yet */
-       relation->rd_fd = -1;
+       relation->rd_smgr = NULL;
  
         /*
          * Insert newly created relation into relcache hash tables.
@@ -1303,7 +1265,7 @@ formrdesc(const char *relationName,
         relation->rd_targblock = InvalidBlockNumber;
  
         /* make sure relation is marked as having no open file yet */
-       relation->rd_fd = -1;
+       relation->rd_smgr = NULL;
  
         /*
          * initialize reference count
@@ -1481,30 +1443,6 @@ RelationSysNameCacheGetRelation(const char *relationName)
         return rd;
  }
  
-/*
- *             RelationNodeCacheGetRelation
- *
- *             As above, but lookup by relfilenode.
- *
- * NOTE: this must NOT try to revalidate invalidated nailed indexes, since
- * that could cause us to return an entry with a different relfilenode than
- * the caller asked for.  Currently this is used only by the buffer manager.
- * Really the bufmgr's idea of relations should be separated out from the
- * relcache ...
- */
-Relation
-RelationNodeCacheGetRelation(RelFileNode rnode)
-{
-       Relation        rd;
-
-       RelationNodeCacheLookup(rnode, rd);
-
-       if (RelationIsValid(rd))
-               RelationIncrementReferenceCount(rd);
-
-       return rd;
-}
-
  /*
   *             RelationIdGetRelation
   *
@@ -1635,14 +1573,8 @@ RelationReloadClassinfo(Relation relation)
                 elog(ERROR, "could not find tuple for system relation %u",
                          relation->rd_id);
         relp = (Form_pg_class) GETSTRUCT(pg_class_tuple);
-       if (relation->rd_node.relNode != relp->relfilenode)
-       {
-               /* We have to re-insert the entry into the relcache indexes */
-               RelationCacheDelete(relation);
-               memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
-               relation->rd_node.relNode = relp->relfilenode;
-               RelationCacheInsert(relation);
-       }
+       memcpy((char *) relation->rd_rel, (char *) relp, CLASS_TUPLE_SIZE);
+       relation->rd_node.relNode = relp->relfilenode;
         heap_freetuple(pg_class_tuple);
         /* Must adjust number of blocks after we know the new relfilenode */
         relation->rd_targblock = InvalidBlockNumber;
@@ -1672,10 +1604,10 @@ RelationClearRelation(Relation relation, bool rebuild)
          * ensures that the low-level file access state is updated after, say,
          * a vacuum truncation.
          */
-       if (relation->rd_fd >= 0)
+       if (relation->rd_smgr)
         {
-               smgrclose(DEFAULT_SMGR, relation);
-               relation->rd_fd = -1;
+               smgrclose(relation->rd_smgr);
+               relation->rd_smgr = NULL;
         }
  
         /*
@@ -1866,18 +1798,31 @@ RelationForgetRelation(Oid rid)
  }
  
  /*
- *             RelationIdInvalidateRelationCacheByRelationId
+ *             RelationCacheInvalidateEntry
   *
   *             This routine is invoked for SI cache flush messages.
   *
- *             We used to skip local relations, on the grounds that they could
- *             not be targets of cross-backend SI update messages; but it seems
- *             safer to process them, so that our *own* SI update messages will
- *             have the same effects during CommandCounterIncrement for both
- *             local and nonlocal relations.
+ * Any relcache entry matching the relid must be flushed.  (Note: caller has
+ * already determined that the relid belongs to our database or is a shared
+ * relation.)  If rnode isn't NULL, we must also ensure that any smgr cache
+ * entry matching that rnode is flushed.
+ *
+ * Ordinarily, if rnode is supplied then it will match the relfilenode of
+ * the target relid.  However, it's possible for rnode to be different if
+ * someone is engaged in a relfilenode change.  In that case we want to
+ * make sure we clear the right cache entries.  This has to be done here
+ * to keep things in sync between relcache and smgr cache --- we can't have
+ * someone flushing an smgr cache entry that a relcache entry still points
+ * to.
+ *
+ * We used to skip local relations, on the grounds that they could
+ * not be targets of cross-backend SI update messages; but it seems
+ * safer to process them, so that our *own* SI update messages will
+ * have the same effects during CommandCounterIncrement for both
+ * local and nonlocal relations.
   */
  void
-RelationIdInvalidateRelationCacheByRelationId(Oid relationId)
+RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode)
  {
         Relation        relation;
  
@@ -1886,14 +1831,27 @@ RelationIdInvalidateRelationCacheByRelationId(Oid relationId)
         if (PointerIsValid(relation))
         {
                 relcacheInvalsReceived++;
+               if (rnode)
+               {
+                       /* Need to be sure smgr is flushed, but don't do it twice */
+                       if (relation->rd_smgr == NULL ||
+                               !RelFileNodeEquals(*rnode, relation->rd_node))
+                               smgrclosenode(*rnode);
+               }
                 RelationFlushRelation(relation);
         }
+       else
+       {
+               if (rnode)
+                       smgrclosenode(*rnode);
+       }
  }
  
  /*
   * RelationCacheInvalidate
   *      Blow away cached relation descriptors that have zero reference counts,
- *      and rebuild those with positive reference counts.
+ *      and rebuild those with positive reference counts.  Also reset the smgr
+ *      relation cache.
   *
   *      This is currently used only to recover from SI message buffer overflow,
   *      so we do not touch new-in-transaction relations; they cannot be targets
@@ -1934,6 +1892,13 @@ RelationCacheInvalidate(void)
         {
                 relation = idhentry->reldesc;
  
+               /* Must close all smgr references to avoid leaving dangling ptrs */
+               if (relation->rd_smgr)
+               {
+                       smgrclose(relation->rd_smgr);
+                       relation->rd_smgr = NULL;
+               }
+
                 /* Ignore new relations, since they are never SI targets */
                 if (relation->rd_isnew)
                         continue;
@@ -1970,6 +1935,13 @@ RelationCacheInvalidate(void)
  
         rebuildList = nconc(rebuildFirstList, rebuildList);
  
+       /*
+        * Now zap any remaining smgr cache entries.  This must happen before
+        * we start to rebuild entries, since that may involve catalog fetches
+        * which will re-open catalog files.
+        */
+       smgrcloseall();
+
         /* Phase 2: rebuild the items found to need rebuild in phase 1 */
         foreach(l, rebuildList)
         {
@@ -2107,7 +2079,7 @@ RelationBuildLocalRelation(const char *relname,
         rel->rd_targblock = InvalidBlockNumber;
  
         /* make sure relation is marked as having no open file yet */
-       rel->rd_fd = -1;
+       rel->rd_smgr = NULL;
  
         RelationSetReferenceCount(rel, 1);
  
@@ -2233,12 +2205,6 @@ RelationCacheInitialize(void)
         RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
                                                                   &ctl, HASH_ELEM | HASH_FUNCTION);
  
-       ctl.keysize = sizeof(RelFileNode);
-       ctl.entrysize = sizeof(RelNodeCacheEnt);
-       ctl.hash = tag_hash;
-       RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE,
-                                                                       &ctl, HASH_ELEM | HASH_FUNCTION);
-
         /*
          * Try to load the relcache cache file.  If successful, we're done for
          * now.  Otherwise, initialize the cache with pre-made descriptors for
@@ -2406,65 +2372,6 @@ RelationCacheInitializePhase3(void)
         }
  }
  
-
-/* used by XLogInitCache */
-void           CreateDummyCaches(void);
-void           DestroyDummyCaches(void);
-
-void
-CreateDummyCaches(void)
-{
-       MemoryContext oldcxt;
-       HASHCTL         ctl;
-
-       if (!CacheMemoryContext)
-               CreateCacheMemoryContext();
-
-       oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
-
-       MemSet(&ctl, 0, sizeof(ctl));
-       ctl.keysize = sizeof(NameData);
-       ctl.entrysize = sizeof(RelNameCacheEnt);
-       RelationSysNameCache = hash_create("Relcache by name", INITRELCACHESIZE,
-                                                                          &ctl, HASH_ELEM);
-
-       ctl.keysize = sizeof(Oid);
-       ctl.entrysize = sizeof(RelIdCacheEnt);
-       ctl.hash = tag_hash;
-       RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
-                                                                 &ctl, HASH_ELEM | HASH_FUNCTION);
-
-       ctl.keysize = sizeof(RelFileNode);
-       ctl.entrysize = sizeof(RelNodeCacheEnt);
-       ctl.hash = tag_hash;
-       RelationNodeCache = hash_create("Relcache by rnode", INITRELCACHESIZE,
-                                                                       &ctl, HASH_ELEM | HASH_FUNCTION);
-
-       MemoryContextSwitchTo(oldcxt);
-}
-
-void
-DestroyDummyCaches(void)
-{
-       MemoryContext oldcxt;
-
-       if (!CacheMemoryContext)
-               return;
-
-       oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
-
-       if (RelationIdCache)
-               hash_destroy(RelationIdCache);
-       if (RelationSysNameCache)
-               hash_destroy(RelationSysNameCache);
-       if (RelationNodeCache)
-               hash_destroy(RelationNodeCache);
-
-       RelationIdCache = RelationSysNameCache = RelationNodeCache = NULL;
-
-       MemoryContextSwitchTo(oldcxt);
-}
-
  static void
  AttrDefaultFetch(Relation relation)
  {
@@ -3125,7 +3032,7 @@ load_relcache_init_file(void)
                 /*
                  * Reset transient-state fields in the relcache entry
                  */
-               rel->rd_fd = -1;
+               rel->rd_smgr = NULL;
                 rel->rd_targblock = InvalidBlockNumber;
                 if (rel->rd_isnailed)
                         RelationSetReferenceCount(rel, 1);
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c

index e93dcb8a84c7eb50d02bf1685dcaca7f0da18238..075269b4ad07cbea0b2c8a57c043ee2ead6308d6 100644 (file)
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.122 2004/02/08 22:28:57 neilc Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.123 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -33,6 +33,7 @@
  #include "catalog/pg_shadow.h"
  #include "libpq/libpq-be.h"
  #include "miscadmin.h"
+#include "storage/fd.h"
  #include "storage/ipc.h"
  #include "storage/pg_shmem.h"
  #include "utils/builtins.h"
diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h

index af113eb66a0ffbe64ca54413d2760afed072f035..226c5c2f99c045b217d2ba258259d95f9579be2b 100644 (file)
--- a/src/include/catalog/pg_database.h
+++ b/src/include/catalog/pg_database.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.30 2003/11/29 22:40:58 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_database.h,v 1.31 2004/02/10 01:55:26 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -72,15 +72,6 @@ typedef FormData_pg_database *Form_pg_database;
  
  DATA(insert OID = 1 (  template1 PGUID ENCODING t t 0 0 0 "" _null_ _null_ ));
  DESCR("Default template database");
-
  #define TemplateDbOid                  1
  
-/* Just to mark OID as used for unused_oid script -:) */
-#define DATAMARKOID(x)
-
-DATAMARKOID(= 2)
-#define RecoveryDb     2
-
-#undef DATAMARKOID
-
  #endif   /* PG_DATABASE_H */
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h

index a0b523da3d41c14f17e96a1c6d9bf45a567ac85a..84706272decba1d241e2f366b8922f0050c0a989 100644 (file)
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.32 2003/11/29 22:41:13 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/sinval.h,v 1.33 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,6 +16,7 @@
  
  #include "storage/backendid.h"
  #include "storage/itemptr.h"
+#include "storage/relfilenode.h"
  
  
  /*
@@ -27,6 +28,13 @@
   * ID field).  -1 means a relcache inval message.  Other negative values
   * are available to identify other inval message types.
   *
+ * Relcache invalidation messages usually also cause invalidation of entries
+ * in the smgr's relation cache.  This means they must carry both logical
+ * and physical relation ID info (ie, both dbOID/relOID and RelFileNode).
+ * In some cases RelFileNode information is not available so the sender fills
+ * those fields with zeroes --- this is okay so long as no smgr cache flush
+ * is required.
+ *
   * Shared-inval events are initially driven by detecting tuple inserts,
   * updates and deletions in system catalogs (see CacheInvalidateHeapTuple).
   * An update generates two inval events, one for the old tuple and one for
@@ -63,6 +71,12 @@ typedef struct
         int16           id;                             /* type field --- must be first */
         Oid                     dbId;                   /* database ID, or 0 if a shared relation */
         Oid                     relId;                  /* relation ID */
+       RelFileNode     physId;                 /* physical file ID */
+       /*
+        * Note: it is likely that RelFileNode will someday be changed to
+        * include database ID.  In that case the dbId field will be redundant
+        * and should be removed to save space.
+        */
  } SharedInvalRelcacheMsg;
  
  typedef union
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h

index 0fd20fd436f3a0d8d49017490f1f3dcb7d49e7e7..738e436fb7d3320c8d3c22c841cedeb68fd2442d 100644 (file)
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.39 2003/11/29 22:41:13 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.40 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -16,36 +16,54 @@
  
  #include "access/xlog.h"
  #include "fmgr.h"
-#include "storage/relfilenode.h"
  #include "storage/block.h"
-#include "utils/rel.h"
+#include "storage/relfilenode.h"
+
+
+/*
+ * smgr.c maintains a table of SMgrRelation objects, which are essentially
+ * cached file handles.  An SMgrRelation is created (if not already present)
+ * by smgropen(), and destroyed by smgrclose().  Note that neither of these
+ * operations imply I/O, they just create or destroy a hashtable entry.
+ * (But smgrclose() may release associated resources, such as OS-level file
+ * descriptors.)
+ */
+typedef struct SMgrRelationData
+{
+       /* rnode is the hashtable lookup key, so it must be first! */
+       RelFileNode     smgr_rnode;             /* relation physical identifier */
  
+       /* additional public fields may someday exist here */
  
-#define SM_FAIL                        0
-#define SM_SUCCESS             1
+       /*
+        * Fields below here are intended to be private to smgr.c and its
+        * submodules.  Do not touch them from elsewhere.
+        */
+       int                     smgr_which;             /* storage manager selector */
  
-#define DEFAULT_SMGR   0
+       struct _MdfdVec *md_fd;         /* for md.c; NULL if not open */
+} SMgrRelationData;
  
-extern int     smgrinit(void);
-extern int     smgrcreate(int16 which, Relation reln);
-extern int     smgrunlink(int16 which, Relation reln);
-extern int smgrextend(int16 which, Relation reln, BlockNumber blocknum,
-                  char *buffer);
-extern int     smgropen(int16 which, Relation reln, bool failOK);
-extern int     smgrclose(int16 which, Relation reln);
-extern int smgrread(int16 which, Relation reln, BlockNumber blocknum,
-                char *buffer);
-extern int smgrwrite(int16 which, Relation reln, BlockNumber blocknum,
-                 char *buffer);
-extern int smgrblindwrt(int16 which, RelFileNode rnode,
-                        BlockNumber blkno, char *buffer);
-extern BlockNumber smgrnblocks(int16 which, Relation reln);
-extern BlockNumber smgrtruncate(int16 which, Relation reln,
-                        BlockNumber nblocks);
-extern int     smgrDoPendingDeletes(bool isCommit);
-extern int     smgrcommit(void);
-extern int     smgrabort(void);
-extern int     smgrsync(void);
+typedef SMgrRelationData *SMgrRelation;
+
+
+extern void smgrinit(void);
+extern SMgrRelation smgropen(RelFileNode rnode);
+extern void smgrclose(SMgrRelation reln);
+extern void smgrcloseall(void);
+extern void smgrclosenode(RelFileNode rnode);
+extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo);
+extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp);
+extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo);
+extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern BlockNumber smgrnblocks(SMgrRelation reln);
+extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks);
+extern void smgrDoPendingDeletes(bool isCommit);
+extern void smgrcommit(void);
+extern void smgrabort(void);
+extern void smgrsync(void);
  
  extern void smgr_redo(XLogRecPtr lsn, XLogRecord *record);
  extern void smgr_undo(XLogRecPtr lsn, XLogRecord *record);
@@ -55,38 +73,18 @@ extern void smgr_desc(char *buf, uint8 xl_info, char *rec);
  /* internals: move me elsewhere -- ay 7/94 */
  
  /* in md.c */
-extern int     mdinit(void);
-extern int     mdcreate(Relation reln);
-extern int     mdunlink(RelFileNode rnode);
-extern int     mdextend(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mdopen(Relation reln);
-extern int     mdclose(Relation reln);
-extern int     mdread(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mdblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer);
-extern BlockNumber mdnblocks(Relation reln);
-extern BlockNumber mdtruncate(Relation reln, BlockNumber nblocks);
-extern int     mdcommit(void);
-extern int     mdabort(void);
-extern int     mdsync(void);
-
-/* mm.c */
-extern int     mminit(void);
-extern int     mmcreate(Relation reln);
-extern int     mmunlink(RelFileNode rnode);
-extern int     mmextend(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mmopen(Relation reln);
-extern int     mmclose(Relation reln);
-extern int     mmread(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
-extern int     mmblindwrt(RelFileNode rnode, BlockNumber blkno, char *buffer);
-extern BlockNumber mmnblocks(Relation reln);
-extern BlockNumber mmtruncate(Relation reln, BlockNumber nblocks);
-extern int     mmcommit(void);
-extern int     mmabort(void);
-
-extern int     mmshutdown(void);
-extern int     MMShmemSize(void);
+extern bool mdinit(void);
+extern bool mdclose(SMgrRelation reln);
+extern bool mdcreate(SMgrRelation reln, bool isRedo);
+extern bool mdunlink(RelFileNode rnode, bool isRedo);
+extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern BlockNumber mdnblocks(SMgrRelation reln);
+extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks);
+extern bool mdcommit(void);
+extern bool mdabort(void);
+extern bool mdsync(void);
  
  /* smgrtype.c */
  extern Datum smgrout(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h

index 467d15ee8395160de2c91b7cd04db52bc6757948..e7052726f27c9772b5f714b5fe429bc525329e13 100644 (file)
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.29 2003/11/29 22:41:15 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.30 2004/02/10 01:55:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -28,7 +28,9 @@ extern void CommandEndInvalidationMessages(bool isCommit);
  
  extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple);
  
-extern void CacheInvalidateRelcache(Oid relationId);
+extern void CacheInvalidateRelcache(Relation relation);
+
+extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple);
  
  extern void CacheRegisterSyscacheCallback(int cacheid,
                                                           CacheCallbackFunction func,
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h

index dfdb8491e3c4f70b7b1415d2569f59300d489abc..8532c5a737abf689a959cc904375167881e2b827 100644 (file)
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.72 2004/01/06 18:07:32 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.73 2004/02/10 01:55:27 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,7 +20,6 @@
  #include "catalog/pg_index.h"
  #include "rewrite/prs2lock.h"
  #include "storage/block.h"
-#include "storage/fd.h"
  #include "storage/relfilenode.h"
  
  
@@ -98,16 +97,16 @@ typedef struct PgStat_Info
         bool            index_scan_counted;
  } PgStat_Info;
  
+
  /*
   * Here are the contents of a relation cache entry.
   */
  
  typedef struct RelationData
  {
-       File            rd_fd;                  /* open file descriptor, or -1 if
-                                                                * none; this is NOT an operating
-                                                                * system file descriptor */
-       RelFileNode rd_node;            /* file node (physical identifier) */
+       RelFileNode rd_node;            /* relation physical identifier */
+       /* use "struct" here to avoid needing to include smgr.h: */
+       struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */
         BlockNumber rd_nblocks;         /* number of blocks in rel */
         BlockNumber rd_targblock;       /* current insertion target block, or
                                                                  * InvalidBlockNumber */
@@ -226,14 +225,6 @@ typedef Relation *RelationPtr;
   */
  #define RelationGetRelid(relation) ((relation)->rd_id)
  
-/*
- * RelationGetFile
- *       Returns the open file descriptor for the rel, or -1 if
- *       none. This is NOT an operating system file descriptor; see md.c
- *       for more information
- */
-#define RelationGetFile(relation) ((relation)->rd_fd)
-
  /*
   * RelationGetNumberOfAttributes
   *             Returns the number of attributes in a relation.
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h

index c7c6a9231f5d7447ce4f283a6ebdd48ed93ac5df..848d68b2077c0b4cca11fd1c382467c799b066c8 100644 (file)
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.38 2003/11/29 22:41:16 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/relcache.h,v 1.39 2004/02/10 01:55:27 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -24,7 +24,6 @@ extern Relation RelationSysNameGetRelation(const char *relationName);
  
  /* finds an existing cache entry, but won't make a new one */
  extern Relation RelationIdCacheGetRelation(Oid relationId);
-extern Relation RelationNodeCacheGetRelation(RelFileNode rnode);
  
  extern void RelationClose(Relation relation);
  
@@ -61,7 +60,7 @@ extern Relation RelationBuildLocalRelation(const char *relname,
   */
  extern void RelationForgetRelation(Oid rid);
  
-extern void RelationIdInvalidateRelationCacheByRelationId(Oid relationId);
+extern void RelationCacheInvalidateEntry(Oid relationId, RelFileNode *rnode);
  
  extern void RelationCacheInvalidate(void);
  
@@ -73,11 +72,6 @@ extern void AtEOXact_RelationCache(bool commit);
  extern bool RelationIdIsInInitFile(Oid relationId);
  extern void RelationCacheInitFileInvalidate(bool beforeSend);
  
-/* XLOG support */
-extern void CreateDummyCaches(void);
-extern void DestroyDummyCaches(void);
-
-
  /* should be used only by relcache.c and catcache.c */
  extern bool criticalRelcachesBuilt;
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 10 Feb 2004 01:55:27 +0000 (01:55 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 10 Feb 2004 01:55:27 +0000 (01:55 +0000)
src/backend/access/nbtree/nbtree.c		patch \| blob \| history
src/backend/access/transam/slru.c		patch \| blob \| history
src/backend/access/transam/xact.c		patch \| blob \| history
src/backend/access/transam/xlog.c		patch \| blob \| history
src/backend/access/transam/xlogutils.c		patch \| blob \| history
src/backend/bootstrap/bootstrap.c		patch \| blob \| history
src/backend/catalog/heap.c		patch \| blob \| history
src/backend/catalog/index.c		patch \| blob \| history
src/backend/commands/copy.c		patch \| blob \| history
src/backend/commands/dbcommands.c		patch \| blob \| history
src/backend/commands/tablecmds.c		patch \| blob \| history
src/backend/commands/trigger.c		patch \| blob \| history
src/backend/commands/user.c		patch \| blob \| history
src/backend/commands/vacuum.c		patch \| blob \| history
src/backend/commands/vacuumlazy.c		patch \| blob \| history
src/backend/libpq/be-fsstubs.c		patch \| blob \| history
src/backend/rewrite/rewriteDefine.c		patch \| blob \| history
src/backend/rewrite/rewriteSupport.c		patch \| blob \| history
src/backend/storage/buffer/bufmgr.c		patch \| blob \| history
src/backend/storage/buffer/localbuf.c		patch \| blob \| history
src/backend/storage/ipc/ipci.c		patch \| blob \| history
src/backend/storage/smgr/Makefile		patch \| blob \| history
src/backend/storage/smgr/README		patch \| blob \| history
src/backend/storage/smgr/md.c		patch \| blob \| history
src/backend/storage/smgr/mm.c	[deleted file]	patch \| blob \| history
src/backend/storage/smgr/smgr.c		patch \| blob \| history
src/backend/storage/smgr/smgrtype.c		patch \| blob \| history
src/backend/tcop/utility.c		patch \| blob \| history
src/backend/utils/cache/inval.c		patch \| blob \| history
src/backend/utils/cache/relcache.c		patch \| blob \| history
src/backend/utils/init/miscinit.c		patch \| blob \| history
src/include/catalog/pg_database.h		patch \| blob \| history
src/include/storage/sinval.h		patch \| blob \| history
src/include/storage/smgr.h		patch \| blob \| history
src/include/utils/inval.h		patch \| blob \| history
src/include/utils/rel.h		patch \| blob \| history
src/include/utils/relcache.h		patch \| blob \| history