Extend yesterday's patch so that the bgwriter is also told to forget

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 17 Jan 2007 16:25:01 +0000 (16:25 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 17 Jan 2007 16:25:01 +0000 (16:25 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Jan 2007 16:25:01 +0000 (16:25 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Jan 2007 16:25:01 +0000 (16:25 +0000)
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c

index 18340481d238175d8294641e7d08323aa2ccb1f9..38db4ae3b9247aa06783f3414b01f6825254bb9d 100644 (file)
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -13,7 +13,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.189 2007/01/16 13:28:56 alvherre Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.190 2007/01/17 16:25:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -40,6 +40,7 @@
  #include "postmaster/bgwriter.h"
  #include "storage/freespace.h"
  #include "storage/procarray.h"
+#include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
  #include "utils/flatfiles.h"
@@ -643,6 +644,12 @@ dropdb(const char *dbname, bool missing_ok)
          */
         FreeSpaceMapForgetDatabase(db_id);
  
+       /*
+        * Tell bgwriter to forget any pending fsync requests for files in the
+        * database; else it'll fail at next checkpoint.
+        */
+       ForgetDatabaseFsyncRequests(db_id);
+
         /*
          * On Windows, force a checkpoint so that the bgwriter doesn't hold any
          * open files, which would cause rmdir() to fail.
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c

index 26650253361333d132b1e6cf8a5f8f1ae04b4f86..1224f556e8328e66f261de118d0bcff010ba6477 100644 (file)
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -37,7 +37,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.35 2007/01/17 00:17:20 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.36 2007/01/17 16:25:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -103,7 +103,7 @@
  typedef struct
  {
         RelFileNode rnode;
-       BlockNumber segno;                      /* InvalidBlockNumber means "revoke" */
+       BlockNumber segno;                      /* see md.c for special values */
         /* might add a real request-type field later; not needed yet */
  } BgWriterRequest;
  
@@ -695,16 +695,16 @@ RequestCheckpoint(bool waitforit, bool warnontime)
   * ForwardFsyncRequest
   *             Forward a file-fsync request from a backend to the bgwriter
   *
- * segno specifies which segment (not block!) of the relation needs to be
- * fsync'd.  If segno == InvalidBlockNumber, the meaning is to revoke any
- * pending fsync requests for the entire relation (this message is sent
- * when the relation is about to be deleted).
- *
   * Whenever a backend is compelled to write directly to a relation
   * (which should be seldom, if the bgwriter is getting its job done),
   * the backend calls this routine to pass over knowledge that the relation
   * is dirty and must be fsync'd before next checkpoint.
   *
+ * segno specifies which segment (not block!) of the relation needs to be
+ * fsync'd.  (Since the valid range is much less than BlockNumber, we can
+ * use high values for special flags; that's all internal to md.c, which
+ * see for details.)
+ *
   * If we are unable to pass over the request (at present, this can happen
   * if the shared memory queue is full), we return false.  That forces
   * the backend to do its own fsync.  We hope that will be even more seldom.
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c

index e3cc00ca1a92fc1633267741f2315d2d341e4eed..e8396b698d4036fcfc8dbbbcdfe99a3aaa31cd12 100644 (file)
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.126 2007/01/17 00:17:21 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.127 2007/01/17 16:25:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -31,6 +31,10 @@
  /* interval for calling AbsorbFsyncRequests in mdsync */
  #define FSYNCS_PER_ABSORB              10
  
+/* special values for the segno arg to RememberFsyncRequest */
+#define FORGET_RELATION_FSYNC  (InvalidBlockNumber)
+#define FORGET_DATABASE_FSYNC  (InvalidBlockNumber-1)
+
  /*
   * On Windows, we have to interpret EACCES as possibly meaning the same as
   * ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
@@ -258,30 +262,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
          * We have to clean out any pending fsync requests for the doomed relation,
          * else the next mdsync() will fail.
          */
-       if (pendingOpsTable)
-       {
-               /* standalone backend or startup process: fsync state is local */
-               RememberFsyncRequest(rnode, InvalidBlockNumber);
-       }
-       else if (IsUnderPostmaster)
-       {
-               /*
-                * Notify the bgwriter about it.  If we fail to queue the revoke
-                * message, we have to sleep and try again ... ugly, but hopefully
-                * won't happen often.
-                *
-                * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with
-                * an error would leave the no-longer-used file still present on
-                * disk, which would be bad, so I'm inclined to assume that the
-                * bgwriter will always empty the queue soon.
-                */
-               while (!ForwardFsyncRequest(rnode, InvalidBlockNumber))
-                       pg_usleep(10000L);      /* 10 msec seems a good number */
-               /*
-                * Note we don't wait for the bgwriter to actually absorb the
-                * revoke message; see mdsync() for the implications.
-                */
-       }
+       ForgetRelationFsyncRequests(rnode);
  
         path = relpath(rnode);
  
@@ -894,7 +875,8 @@ mdsync(void)
          * what we will do is retry the whole process after absorbing fsync
          * request messages again.  Since mdunlink() queues a "revoke" message
          * before actually unlinking, the fsync request is guaranteed to be gone
-        * the second time if it really was this case.
+        * the second time if it really was this case.  DROP DATABASE likewise
+        * has to tell us to forget fsync requests before it starts deletions.
          */
         do {
                 HASH_SEQ_STATUS hstat;
@@ -1043,17 +1025,58 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
   * We stuff the fsync request into the local hash table for execution
   * during the bgwriter's next checkpoint.
   *
- * segno == InvalidBlockNumber is a "revoke" request: remove any pending
- * fsync requests for the whole relation.
+ * The range of possible segment numbers is way less than the range of
+ * BlockNumber, so we can reserve high values of segno for special purposes.
+ * We define two: FORGET_RELATION_FSYNC means to drop pending fsyncs for
+ * a relation, and FORGET_DATABASE_FSYNC means to drop pending fsyncs for
+ * a whole database.  (These are a tad slow because the hash table has to be
+ * searched linearly, but it doesn't seem worth rethinking the table structure
+ * for them.)
   */
  void
  RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
  {
         Assert(pendingOpsTable);
  
-       if (segno != InvalidBlockNumber)
+       if (segno == FORGET_RELATION_FSYNC)
+       {
+               /* Remove any pending requests for the entire relation */
+               HASH_SEQ_STATUS hstat;
+               PendingOperationEntry *entry;
+
+               hash_seq_init(&hstat, pendingOpsTable);
+               while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
+               {
+                       if (RelFileNodeEquals(entry->tag.rnode, rnode))
+                       {
+                               /* Okay, delete this entry */
+                               if (hash_search(pendingOpsTable, &entry->tag,
+                                                               HASH_REMOVE, NULL) == NULL)
+                                       elog(ERROR, "pendingOpsTable corrupted");
+                       }
+               }
+       }
+       else if (segno == FORGET_DATABASE_FSYNC)
+       {
+               /* Remove any pending requests for the entire database */
+               HASH_SEQ_STATUS hstat;
+               PendingOperationEntry *entry;
+
+               hash_seq_init(&hstat, pendingOpsTable);
+               while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
+               {
+                       if (entry->tag.rnode.dbNode == rnode.dbNode)
+                       {
+                               /* Okay, delete this entry */
+                               if (hash_search(pendingOpsTable, &entry->tag,
+                                                               HASH_REMOVE, NULL) == NULL)
+                                       elog(ERROR, "pendingOpsTable corrupted");
+                       }
+               }
+       }
+       else
         {
-               /* Enter a request to fsync this segment */
+               /* Normal case: enter a request to fsync this segment */
                 PendingOperationTag key;
                 PendingOperationEntry *entry;
                 bool            found;
@@ -1070,29 +1093,66 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
                 if (!found)                             /* new entry, so initialize it */
                         entry->failures = 0;
         }
-       else
+}
+
+/*
+ * ForgetRelationFsyncRequests -- ensure any fsyncs for a rel are forgotten
+ */
+void
+ForgetRelationFsyncRequests(RelFileNode rnode)
+{
+       if (pendingOpsTable)
+       {
+               /* standalone backend or startup process: fsync state is local */
+               RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);
+       }
+       else if (IsUnderPostmaster)
         {
                 /*
-                * Remove any pending requests for the entire relation.  (This is a
-                * tad slow but it doesn't seem worth rethinking the table structure.)
+                * Notify the bgwriter about it.  If we fail to queue the revoke
+                * message, we have to sleep and try again ... ugly, but hopefully
+                * won't happen often.
+                *
+                * XXX should we CHECK_FOR_INTERRUPTS in this loop?  Escaping with
+                * an error would leave the no-longer-used file still present on
+                * disk, which would be bad, so I'm inclined to assume that the
+                * bgwriter will always empty the queue soon.
                  */
-               HASH_SEQ_STATUS hstat;
-               PendingOperationEntry *entry;
+               while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
+                       pg_usleep(10000L);      /* 10 msec seems a good number */
+               /*
+                * Note we don't wait for the bgwriter to actually absorb the
+                * revoke message; see mdsync() for the implications.
+                */
+       }
+}
  
-               hash_seq_init(&hstat, pendingOpsTable);
-               while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
-               {
-                       if (RelFileNodeEquals(entry->tag.rnode, rnode))
-                       {
-                               /* Okay, delete this entry */
-                               if (hash_search(pendingOpsTable, &entry->tag,
-                                                               HASH_REMOVE, NULL) == NULL)
-                                       elog(ERROR, "pendingOpsTable corrupted");
-                       }
-               }
+/*
+ * ForgetDatabaseFsyncRequests -- ensure any fsyncs for a DB are forgotten
+ */
+void
+ForgetDatabaseFsyncRequests(Oid dbid)
+{
+       RelFileNode rnode;
+
+       rnode.dbNode = dbid;
+       rnode.spcNode = 0;
+       rnode.relNode = 0;
+
+       if (pendingOpsTable)
+       {
+               /* standalone backend or startup process: fsync state is local */
+               RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);
+       }
+       else if (IsUnderPostmaster)
+       {
+               /* see notes in ForgetRelationFsyncRequests */
+               while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))
+                       pg_usleep(10000L);      /* 10 msec seems a good number */
         }
  }
  
+
  /*
   *     _fdvec_alloc() -- Make a MdfdVec object.
   */
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h

index ffa135c028446b7cbc292edb7629ea66c350f4e6..3beb14febaf13bca85a620287a921167942efa55 100644 (file)
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.57 2007/01/05 22:19:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -106,6 +106,8 @@ extern void mdimmedsync(SMgrRelation reln);
  extern void mdsync(void);
  
  extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
+extern void ForgetRelationFsyncRequests(RelFileNode rnode);
+extern void ForgetDatabaseFsyncRequests(Oid dbid);
  
  /* smgrtype.c */
  extern Datum smgrout(PG_FUNCTION_ARGS);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 17 Jan 2007 16:25:01 +0000 (16:25 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 17 Jan 2007 16:25:01 +0000 (16:25 +0000)
src/backend/commands/dbcommands.c		patch \| blob \| history
src/backend/postmaster/bgwriter.c		patch \| blob \| history
src/backend/storage/smgr/md.c		patch \| blob \| history
src/include/storage/smgr.h		patch \| blob \| history