]> granicus.if.org Git - postgresql/commitdiff
Tweak smgrblindwrt per advice from Vadim: add parameter indicating
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 10 Apr 2000 23:41:52 +0000 (23:41 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 10 Apr 2000 23:41:52 +0000 (23:41 +0000)
whether to do fsync or not, and if so (which should be seldom) just
do the fsync immediately.  This way we need not build data structures
in md.c/fd.c for blind writes.

src/backend/storage/buffer/bufmgr.c
src/backend/storage/smgr/md.c
src/backend/storage/smgr/mm.c
src/backend/storage/smgr/smgr.c
src/include/storage/smgr.h

index 0887f3d1ecda533c6988fc129e41ceb02724a18a..b5eb53b03a345df99eb74a8453a0e30673207131 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.78 2000/04/09 04:43:18 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.79 2000/04/10 23:41:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1127,7 +1127,8 @@ BufferSync()
                                                                                          bufHdr->blind.relname,
                                                                                          bufdb, bufrel,
                                                                                          bufHdr->tag.blockNum,
-                                                                                         (char *) MAKE_PTR(bufHdr->data));
+                                                                                         (char *) MAKE_PTR(bufHdr->data),
+                                                                                         true); /* must fsync */
                                        }
                                        else
                                        {
@@ -1529,7 +1530,8 @@ BufferReplace(BufferDesc *bufHdr)
                status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname,
                                                          bufHdr->blind.relname, bufdb, bufrel,
                                                          bufHdr->tag.blockNum,
-                                                         (char *) MAKE_PTR(bufHdr->data));
+                                                         (char *) MAKE_PTR(bufHdr->data),
+                                                         false); /* no fsync */
        }
 
 #ifndef OPTIMIZE_SINGLE
@@ -1544,9 +1546,11 @@ BufferReplace(BufferDesc *bufHdr)
                return FALSE;
 
        /* If we had marked this buffer as needing to be fsync'd, we can forget
-        * about that, because it's now the storage manager's responsibility.
+        * about that, because it's now the storage manager's responsibility
+        * (but only if we called smgrwrite, not smgrblindwrt).
         */
-       ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
+       if (reln != (Relation) NULL)
+               ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
 
        BufferFlushCount++;
 
index 233bbb0ac25ef9a11630bdd0f5f820c9c2079d64..b30b0386af8fc482fd00ff8490eea6d46449c7f9 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.65 2000/04/09 04:43:20 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.66 2000/04/10 23:41:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 typedef struct _MdfdVec
 {
        int                     mdfd_vfd;               /* fd number in vfd pool */
-       int                     mdfd_flags;             /* free, temporary */
+       int                     mdfd_flags;             /* fd status flags */
 
 /* these are the assigned bits in mdfd_flags: */
 #define MDFD_FREE              (1 << 0)/* unused entry */
-#define MDFD_TEMP              (1 << 1)/* close this entry at transaction end */
 
        int                     mdfd_lstbcnt;   /* most recent block count */
        int                     mdfd_nextFree;  /* next free vector */
@@ -72,8 +71,8 @@ static void mdclose_fd(int fd);
 static int _mdfd_getrelnfd(Relation reln);
 static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
 static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
-static MdfdVec *_mdfd_blind_getseg(char *dbname, char *relname,
-                                                                  Oid dbid, Oid relid, int blkno);
+static int _mdfd_blind_getseg(char *dbname, char *relname,
+                                                         Oid dbid, Oid relid, int blkno);
 static int     _fdvec_alloc(void);
 static void _fdvec_free(int);
 static BlockNumber _mdnblocks(File file, Size blcksz);
@@ -572,7 +571,8 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
  *
  *             We have to be able to do this using only the name and OID of
  *             the database and relation in which the block belongs.  Otherwise
- *             this is just like mdwrite().
+ *             this is much like mdwrite().  If dofsync is TRUE, then we fsync
+ *             the file, making it more like mdflush().
  */
 int
 mdblindwrt(char *dbname,
@@ -580,15 +580,16 @@ mdblindwrt(char *dbname,
                   Oid dbid,
                   Oid relid,
                   BlockNumber blkno,
-                  char *buffer)
+                  char *buffer,
+                  bool dofsync)
 {
        int                     status;
        long            seekpos;
-       MdfdVec    *v;
+       int                     fd;
 
-       v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
+       fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
 
-       if (v == NULL)
+       if (fd < 0)
                return SM_FAIL;
 
 #ifndef LET_OS_MANAGE_FILESIZE
@@ -601,11 +602,22 @@ mdblindwrt(char *dbname,
        seekpos = (long) (BLCKSZ * (blkno));
 #endif
 
-       if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
+       if (lseek(fd, seekpos, SEEK_SET) != seekpos)
+       {
+               close(fd);
                return SM_FAIL;
+       }
 
        status = SM_SUCCESS;
-       if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
+
+       /* write and optionally sync the block */
+       if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+               status = SM_FAIL;
+       else if (dofsync &&
+                        pg_fsync(fd) < 0)
+               status = SM_FAIL;
+
+       if (close(fd) < 0)
                status = SM_FAIL;
 
        return status;
@@ -633,7 +645,8 @@ mdmarkdirty(Relation reln, BlockNumber blkno)
  *
  *             We have to be able to do this using only the name and OID of
  *             the database and relation in which the block belongs.  Otherwise
- *             this is just like mdmarkdirty().
+ *             this is much like mdmarkdirty().  However, we do the fsync immediately
+ *             rather than building md/fd datastructures to postpone it till later.
  */
 int
 mdblindmarkdirty(char *dbname,
@@ -642,16 +655,23 @@ mdblindmarkdirty(char *dbname,
                                 Oid relid,
                                 BlockNumber blkno)
 {
-       MdfdVec    *v;
+       int                     status;
+       int                     fd;
 
-       v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
+       fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
 
-       if (v == NULL)
+       if (fd < 0)
                return SM_FAIL;
 
-       FileMarkDirty(v->mdfd_vfd);
+       status = SM_SUCCESS;
 
-       return SM_SUCCESS;
+       if (pg_fsync(fd) < 0)
+               status = SM_FAIL;
+
+       if (close(fd) < 0)
+               status = SM_FAIL;
+
+       return status;
 }
 
 /*
@@ -820,24 +840,15 @@ mdcommit()
                v = &Md_fdvec[i];
                if (v->mdfd_flags & MDFD_FREE)
                        continue;
-               if (v->mdfd_flags & MDFD_TEMP)
-               {
-                       /* Sync and close the file */
-                       mdclose_fd(i);
-               }
-               else
-               {
-                       /* Sync, but keep the file entry */
-
+               /* Sync the file entry */
 #ifndef LET_OS_MANAGE_FILESIZE
-                       for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
+               for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
 #else
-                       if (v != (MdfdVec *) NULL)
+               if (v != (MdfdVec *) NULL)
 #endif
-                       {
-                               if (FileSync(v->mdfd_vfd) < 0)
-                                       return SM_FAIL;
-                       }
+               {
+                       if (FileSync(v->mdfd_vfd) < 0)
+                               return SM_FAIL;
                }
        }
 
@@ -854,21 +865,9 @@ mdcommit()
 int
 mdabort()
 {
-       int                     i;
-       MdfdVec    *v;
-
-       for (i = 0; i < CurFd; i++)
-       {
-               v = &Md_fdvec[i];
-               if (v->mdfd_flags & MDFD_FREE)
-                       continue;
-               if (v->mdfd_flags & MDFD_TEMP)
-               {
-                       /* Close the file */
-                       mdclose_fd(i);
-               }
-       }
-
+       /* We don't actually have to do anything here.  fd.c will discard
+        * fsync-needed bits in its AtEOXact_Files() routine.
+        */
        return SM_SUCCESS;
 }
 
@@ -1057,102 +1056,52 @@ _mdfd_getseg(Relation reln, int blkno)
        return v;
 }
 
-/* Find the segment of the relation holding the specified block.
- * This is the same as _mdfd_getseg() except that we must work
- * "blind" with no Relation struct.
+/*
+ * Find the segment of the relation holding the specified block.
  *
- * NOTE: we have no easy way to tell whether a FD already exists for the
- * target relation, so we always make a new one.  This should probably
- * be improved somehow, but I doubt it's a significant performance issue
- * under normal circumstances.  The FD is marked to be closed at end of xact
- * so that we don't accumulate a lot of dead FDs.
+ * This performs the same work as _mdfd_getseg() except that we must work
+ * "blind" with no Relation struct.  We assume that we are not likely to
+ * touch the same relation again soon, so we do not create an FD entry for
+ * the relation --- we just open a kernel file descriptor which will be
+ * used and promptly closed.  The return value is the kernel descriptor,
+ * or -1 on failure.
  */
 
-static MdfdVec *
+static int
 _mdfd_blind_getseg(char *dbname, char *relname, Oid dbid, Oid relid,
                                   int blkno)
 {
-       MdfdVec    *v;
        char       *path;
        int                     fd;
-       int                     vfd;
 #ifndef LET_OS_MANAGE_FILESIZE
        int                     segno;
-       int                     targsegno;
 #endif
 
-       /* construct the path to the file and open it */
+       /* construct the path to the relation */
        path = relpath_blind(dbname, relname, dbid, relid);
 
-#ifndef __CYGWIN32__
-       fd = FileNameOpenFile(path, O_RDWR, 0600);
-#else
-       fd = FileNameOpenFile(path, O_RDWR | O_BINARY, 0600);
-#endif
-
-       if (fd < 0)
-               return NULL;
-
-       vfd = _fdvec_alloc();
-       if (vfd < 0)
-               return NULL;
-
-       Md_fdvec[vfd].mdfd_vfd = fd;
-       Md_fdvec[vfd].mdfd_flags = MDFD_TEMP;
-       Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
 #ifndef LET_OS_MANAGE_FILESIZE
-       Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
-
-#ifdef DIAGNOSTIC
-       if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
-               elog(FATAL, "segment too big on relopen!");
-#endif
-
-       targsegno = blkno / RELSEG_SIZE;
-       for (v = &Md_fdvec[vfd], segno = 1; segno <= targsegno; segno++)
+       /* append the '.segno', if needed */
+       segno = blkno / RELSEG_SIZE;
+       if (segno > 0)
        {
-               char       *segpath;
-               MdfdVec    *newv;
-               MemoryContext oldcxt;
+               char   *segpath = (char *) palloc(strlen(path) + 12);
 
-               segpath = (char *) palloc(strlen(path) + 12);
                sprintf(segpath, "%s.%d", path, segno);
-
-#ifndef __CYGWIN32__
-               fd = FileNameOpenFile(segpath, O_RDWR | O_CREAT, 0600);
-#else
-               fd = FileNameOpenFile(segpath, O_RDWR | O_BINARY | O_CREAT, 0600);
+               pfree(path);
+               path = segpath;
+       }
 #endif
 
-               pfree(segpath);
-
-               if (fd < 0)
-                       return (MdfdVec *) NULL;
-
-               /* allocate an mdfdvec entry for it */
-               oldcxt = MemoryContextSwitchTo(MdCxt);
-               newv = (MdfdVec *) palloc(sizeof(MdfdVec));
-               MemoryContextSwitchTo(oldcxt);
-
-               /* fill the entry */
-               newv->mdfd_vfd = fd;
-               newv->mdfd_flags = MDFD_TEMP;
-               newv->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
-               newv->mdfd_chain = (MdfdVec *) NULL;
-#ifdef DIAGNOSTIC
-               if (newv->mdfd_lstbcnt > RELSEG_SIZE)
-                       elog(FATAL, "segment too big on open!");
-#endif
-               v->mdfd_chain = newv;
-               v = newv;
-       }
+#ifndef __CYGWIN32__
+       fd = open(path, O_RDWR, 0600);
 #else
-       v = &Md_fdvec[vfd];
+       fd = open(path, O_RDWR | O_BINARY, 0600);
 #endif
 
        pfree(path);
 
-       return v;
+       return fd;
 }
 
 static BlockNumber
index fc3acead661545150b549b9423795fa7b2797393..a5b22cbcc5ca38e3a9582da1cfa6c225a9964a4c 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.18 2000/01/26 05:57:05 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.19 2000/04/10 23:41:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -478,7 +478,8 @@ mmblindwrt(char *dbstr,
                   Oid dbid,
                   Oid relid,
                   BlockNumber blkno,
-                  char *buffer)
+                  char *buffer,
+                  bool dofsync)
 {
        return SM_FAIL;
 }
index 839636b118b930a413e8d407c7a4eaa887627ff5..27cad952aeb57149eea765e86b49bbb38efe7634 100644 (file)
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.33 2000/04/09 04:43:20 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.34 2000/04/10 23:41:52 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,8 @@ typedef struct f_smgr
                                                           char *buffer);
        int                     (*smgr_blindwrt) (char *dbname, char *relname,
                                                                  Oid dbid, Oid relid,
-                                                                 BlockNumber blkno, char *buffer);
+                                                                 BlockNumber blkno, char *buffer,
+                                                                 bool dofsync);
        int                     (*smgr_markdirty) (Relation reln, BlockNumber blkno);
        int                     (*smgr_blindmarkdirty) (char *dbname, char *relname,
                                                                                Oid dbid, Oid relid,
@@ -293,7 +294,8 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
  *             this case, the buffer manager will call smgrblindwrt() with
  *             the name and OID of the database and the relation to which the
  *             buffer belongs.  Every storage manager must be able to force
- *             this page down to stable storage in this circumstance.
+ *             this page down to stable storage in this circumstance.  The
+ *             write should be synchronous if dofsync is true.
  */
 int
 smgrblindwrt(int16 which,
@@ -302,7 +304,8 @@ smgrblindwrt(int16 which,
                         Oid dbid,
                         Oid relid,
                         BlockNumber blkno,
-                        char *buffer)
+                        char *buffer,
+                        bool dofsync)
 {
        char       *dbstr;
        char       *relstr;
@@ -313,7 +316,7 @@ smgrblindwrt(int16 which,
        relstr = pstrdup(relname);
 
        status = (*(smgrsw[which].smgr_blindwrt)) (dbstr, relstr, dbid, relid,
-                                                                                          blkno, buffer);
+                                                                                          blkno, buffer, dofsync);
 
        if (status == SM_FAIL)
                elog(ERROR, "cannot write block %d of %s [%s] blind",
index 053a63196e5b23ce36e1ba4f09808294586d4331..bc0ec04bb2b5bb7fd92f934749440d9987f8b965 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: smgr.h,v 1.18 2000/04/09 04:43:18 tgl Exp $
+ * $Id: smgr.h,v 1.19 2000/04/10 23:41:45 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,7 +37,8 @@ extern int    smgrflush(int16 which, Relation reln, BlockNumber blocknum,
                                          char *buffer);
 extern int     smgrblindwrt(int16 which, char *dbname, char *relname,
                                                 Oid dbid, Oid relid,
-                                                BlockNumber blkno, char *buffer);
+                                                BlockNumber blkno, char *buffer,
+                                                bool dofsync);
 extern int     smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno);
 extern int     smgrblindmarkdirty(int16 which, char *dbname, char *relname,
                                                           Oid dbid, Oid relid,
@@ -62,7 +63,8 @@ extern int    mdread(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mdflush(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mdblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
-                                          BlockNumber blkno, char *buffer);
+                                          BlockNumber blkno, char *buffer,
+                                          bool dofsync);
 extern int     mdmarkdirty(Relation reln, BlockNumber blkno);
 extern int     mdblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
                                                         BlockNumber blkno);
@@ -84,7 +86,8 @@ extern int    mmread(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mmflush(Relation reln, BlockNumber blocknum, char *buffer);
 extern int     mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
-                                          BlockNumber blkno, char *buffer);
+                                          BlockNumber blkno, char *buffer,
+                                          bool dofsync);
 extern int     mmmarkdirty(Relation reln, BlockNumber blkno);
 extern int     mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
                                                         BlockNumber blkno);