From eddbf39756fb7d58f1bdc7582af7d6462410a3e1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 17 Jan 2007 16:25:01 +0000 Subject: [PATCH] Extend yesterday's patch so that the bgwriter is also told to forget pending fsyncs during DROP DATABASE. Obviously necessary in hindsight :-( --- src/backend/commands/dbcommands.c | 9 +- src/backend/postmaster/bgwriter.c | 14 +-- src/backend/storage/smgr/md.c | 152 +++++++++++++++++++++--------- src/include/storage/smgr.h | 4 +- 4 files changed, 124 insertions(+), 55 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 18340481d2..38db4ae3b9 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.189 2007/01/16 13:28:56 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.190 2007/01/17 16:25:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -40,6 +40,7 @@ #include "postmaster/bgwriter.h" #include "storage/freespace.h" #include "storage/procarray.h" +#include "storage/smgr.h" #include "utils/acl.h" #include "utils/builtins.h" #include "utils/flatfiles.h" @@ -643,6 +644,12 @@ dropdb(const char *dbname, bool missing_ok) */ FreeSpaceMapForgetDatabase(db_id); + /* + * Tell bgwriter to forget any pending fsync requests for files in the + * database; else it'll fail at next checkpoint. + */ + ForgetDatabaseFsyncRequests(db_id); + /* * On Windows, force a checkpoint so that the bgwriter doesn't hold any * open files, which would cause rmdir() to fail. diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 2665025336..1224f556e8 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -37,7 +37,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.35 2007/01/17 00:17:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.36 2007/01/17 16:25:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -103,7 +103,7 @@ typedef struct { RelFileNode rnode; - BlockNumber segno; /* InvalidBlockNumber means "revoke" */ + BlockNumber segno; /* see md.c for special values */ /* might add a real request-type field later; not needed yet */ } BgWriterRequest; @@ -695,16 +695,16 @@ RequestCheckpoint(bool waitforit, bool warnontime) * ForwardFsyncRequest * Forward a file-fsync request from a backend to the bgwriter * - * segno specifies which segment (not block!) of the relation needs to be - * fsync'd. If segno == InvalidBlockNumber, the meaning is to revoke any - * pending fsync requests for the entire relation (this message is sent - * when the relation is about to be deleted). - * * Whenever a backend is compelled to write directly to a relation * (which should be seldom, if the bgwriter is getting its job done), * the backend calls this routine to pass over knowledge that the relation * is dirty and must be fsync'd before next checkpoint. * + * segno specifies which segment (not block!) of the relation needs to be + * fsync'd. (Since the valid range is much less than BlockNumber, we can + * use high values for special flags; that's all internal to md.c, which + * see for details.) + * * If we are unable to pass over the request (at present, this can happen * if the shared memory queue is full), we return false. That forces * the backend to do its own fsync. We hope that will be even more seldom. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index e3cc00ca1a..e8396b698d 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.126 2007/01/17 00:17:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.127 2007/01/17 16:25:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,10 @@ /* interval for calling AbsorbFsyncRequests in mdsync */ #define FSYNCS_PER_ABSORB 10 +/* special values for the segno arg to RememberFsyncRequest */ +#define FORGET_RELATION_FSYNC (InvalidBlockNumber) +#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1) + /* * On Windows, we have to interpret EACCES as possibly meaning the same as * ENOENT, because if a file is unlinked-but-not-yet-gone on that platform, @@ -258,30 +262,7 @@ mdunlink(RelFileNode rnode, bool isRedo) * We have to clean out any pending fsync requests for the doomed relation, * else the next mdsync() will fail. */ - if (pendingOpsTable) - { - /* standalone backend or startup process: fsync state is local */ - RememberFsyncRequest(rnode, InvalidBlockNumber); - } - else if (IsUnderPostmaster) - { - /* - * Notify the bgwriter about it. If we fail to queue the revoke - * message, we have to sleep and try again ... ugly, but hopefully - * won't happen often. - * - * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with - * an error would leave the no-longer-used file still present on - * disk, which would be bad, so I'm inclined to assume that the - * bgwriter will always empty the queue soon. - */ - while (!ForwardFsyncRequest(rnode, InvalidBlockNumber)) - pg_usleep(10000L); /* 10 msec seems a good number */ - /* - * Note we don't wait for the bgwriter to actually absorb the - * revoke message; see mdsync() for the implications. - */ - } + ForgetRelationFsyncRequests(rnode); path = relpath(rnode); @@ -894,7 +875,8 @@ mdsync(void) * what we will do is retry the whole process after absorbing fsync * request messages again. Since mdunlink() queues a "revoke" message * before actually unlinking, the fsync request is guaranteed to be gone - * the second time if it really was this case. + * the second time if it really was this case. DROP DATABASE likewise + * has to tell us to forget fsync requests before it starts deletions. */ do { HASH_SEQ_STATUS hstat; @@ -1043,17 +1025,58 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg) * We stuff the fsync request into the local hash table for execution * during the bgwriter's next checkpoint. * - * segno == InvalidBlockNumber is a "revoke" request: remove any pending - * fsync requests for the whole relation. + * The range of possible segment numbers is way less than the range of + * BlockNumber, so we can reserve high values of segno for special purposes. + * We define two: FORGET_RELATION_FSYNC means to drop pending fsyncs for + * a relation, and FORGET_DATABASE_FSYNC means to drop pending fsyncs for + * a whole database. (These are a tad slow because the hash table has to be + * searched linearly, but it doesn't seem worth rethinking the table structure + * for them.) */ void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) { Assert(pendingOpsTable); - if (segno != InvalidBlockNumber) + if (segno == FORGET_RELATION_FSYNC) + { + /* Remove any pending requests for the entire relation */ + HASH_SEQ_STATUS hstat; + PendingOperationEntry *entry; + + hash_seq_init(&hstat, pendingOpsTable); + while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) + { + if (RelFileNodeEquals(entry->tag.rnode, rnode)) + { + /* Okay, delete this entry */ + if (hash_search(pendingOpsTable, &entry->tag, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "pendingOpsTable corrupted"); + } + } + } + else if (segno == FORGET_DATABASE_FSYNC) + { + /* Remove any pending requests for the entire database */ + HASH_SEQ_STATUS hstat; + PendingOperationEntry *entry; + + hash_seq_init(&hstat, pendingOpsTable); + while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) + { + if (entry->tag.rnode.dbNode == rnode.dbNode) + { + /* Okay, delete this entry */ + if (hash_search(pendingOpsTable, &entry->tag, + HASH_REMOVE, NULL) == NULL) + elog(ERROR, "pendingOpsTable corrupted"); + } + } + } + else { - /* Enter a request to fsync this segment */ + /* Normal case: enter a request to fsync this segment */ PendingOperationTag key; PendingOperationEntry *entry; bool found; @@ -1070,29 +1093,66 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) if (!found) /* new entry, so initialize it */ entry->failures = 0; } - else +} + +/* + * ForgetRelationFsyncRequests -- ensure any fsyncs for a rel are forgotten + */ +void +ForgetRelationFsyncRequests(RelFileNode rnode) +{ + if (pendingOpsTable) + { + /* standalone backend or startup process: fsync state is local */ + RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC); + } + else if (IsUnderPostmaster) { /* - * Remove any pending requests for the entire relation. (This is a - * tad slow but it doesn't seem worth rethinking the table structure.) + * Notify the bgwriter about it. If we fail to queue the revoke + * message, we have to sleep and try again ... ugly, but hopefully + * won't happen often. + * + * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with + * an error would leave the no-longer-used file still present on + * disk, which would be bad, so I'm inclined to assume that the + * bgwriter will always empty the queue soon. */ - HASH_SEQ_STATUS hstat; - PendingOperationEntry *entry; + while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC)) + pg_usleep(10000L); /* 10 msec seems a good number */ + /* + * Note we don't wait for the bgwriter to actually absorb the + * revoke message; see mdsync() for the implications. + */ + } +} - hash_seq_init(&hstat, pendingOpsTable); - while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) - { - if (RelFileNodeEquals(entry->tag.rnode, rnode)) - { - /* Okay, delete this entry */ - if (hash_search(pendingOpsTable, &entry->tag, - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "pendingOpsTable corrupted"); - } - } +/* + * ForgetDatabaseFsyncRequests -- ensure any fsyncs for a DB are forgotten + */ +void +ForgetDatabaseFsyncRequests(Oid dbid) +{ + RelFileNode rnode; + + rnode.dbNode = dbid; + rnode.spcNode = 0; + rnode.relNode = 0; + + if (pendingOpsTable) + { + /* standalone backend or startup process: fsync state is local */ + RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC); + } + else if (IsUnderPostmaster) + { + /* see notes in ForgetRelationFsyncRequests */ + while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC)) + pg_usleep(10000L); /* 10 msec seems a good number */ } } + /* * _fdvec_alloc() -- Make a MdfdVec object. */ diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index ffa135c028..3beb14feba 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.57 2007/01/05 22:19:58 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -106,6 +106,8 @@ extern void mdimmedsync(SMgrRelation reln); extern void mdsync(void); extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno); +extern void ForgetRelationFsyncRequests(RelFileNode rnode); +extern void ForgetDatabaseFsyncRequests(Oid dbid); /* smgrtype.c */ extern Datum smgrout(PG_FUNCTION_ARGS); -- 2.40.0