From 9cb91f90c95906347cd4d42fcf24a12a6e6a679e Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 18 Apr 2008 06:48:38 +0000 Subject: [PATCH] Fix two race conditions between the pending unlink mechanism that was put in place to prevent reusing relation OIDs before next checkpoint, and DROP DATABASE. First, if a database was dropped, bgwriter would still try to unlink the files that the rmtree() call by the DROP DATABASE command has already deleted, or is just about to delete. Second, if a database is dropped, and another database is created with the same OID, bgwriter would in the worst case delete a relation in the new database that happened to get the same OID as a dropped relation in the old database. To fix these race conditions: - make rmtree() ignore ENOENT errors. This fixes the 1st race condition. - make ForgetDatabaseFsyncRequests forget unlink requests as well. - force checkpoint on in dropdb on all platforms Since ForgetDatabaseFsyncRequests() is asynchronous, the 2nd change isn't enough on its own to fix the problem of dropping and creating a database with same OID, but forcing a checkpoint on DROP DATABASE makes it sufficient. Per Tom Lane's bug report and proposal. Backpatch to 8.3. --- src/backend/commands/dbcommands.c | 16 ++++++++------- src/backend/storage/smgr/md.c | 33 ++++++++++++++++++++++++++----- src/port/dirmod.c | 25 ++++++++++++++++++++--- 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 857db388a8..7055b0fe98 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -13,7 +13,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.206 2008/04/16 23:59:40 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.207 2008/04/18 06:48:38 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -715,18 +715,20 @@ dropdb(const char *dbname, bool missing_ok) pgstat_drop_database(db_id); /* - * Tell bgwriter to forget any pending fsync requests for files in the - * database; else it'll fail at next checkpoint. + * Tell bgwriter to forget any pending fsync and unlink requests for files + * in the database; else the fsyncs will fail at next checkpoint, or worse, + * it will delete files that belong to a newly created database with the + * same OID. */ ForgetDatabaseFsyncRequests(db_id); /* - * On Windows, force a checkpoint so that the bgwriter doesn't hold any - * open files, which would cause rmdir() to fail. + * Force a checkpoint to make sure the bgwriter has received the message + * sent by ForgetDatabaseFsyncRequests. On Windows, this also ensures that + * the bgwriter doesn't hold any open files, which would cause rmdir() to + * fail. */ -#ifdef WIN32 RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); -#endif /* * Remove all tablespace subdirs belonging to the database. diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 6ea4a00b01..fdc7c7d072 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.136 2008/03/10 20:06:27 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.137 2008/04/18 06:48:38 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -1196,8 +1196,11 @@ mdpostckpt(void) if (unlink(path) < 0) { /* - * ENOENT shouldn't happen either, but it doesn't really matter - * because we would've deleted it now anyway. + * There's a race condition, when the database is dropped at the + * same time that we process the pending unlink requests. If the + * DROP DATABASE deletes the file before we do, we will get ENOENT + * here. rmtree() also has to ignore ENOENT errors, to deal with + * the possibility that we delete the file first. */ if (errno != ENOENT) ereport(WARNING, @@ -1321,7 +1324,11 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) /* Remove any pending requests for the entire database */ HASH_SEQ_STATUS hstat; PendingOperationEntry *entry; + ListCell *cell, + *prev, + *next; + /* Remove fsync requests */ hash_seq_init(&hstat, pendingOpsTable); while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) { @@ -1331,6 +1338,22 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) entry->canceled = true; } } + + /* Remove unlink requests */ + prev = NULL; + for (cell = list_head(pendingUnlinks); cell; cell = next) + { + PendingUnlinkEntry *entry = (PendingUnlinkEntry *) lfirst(cell); + + next = lnext(cell); + if (entry->rnode.dbNode == rnode.dbNode) + { + pendingUnlinks = list_delete_cell(pendingUnlinks, cell, prev); + pfree(entry); + } + else + prev = cell; + } } else if (segno == UNLINK_RELATION_REQUEST) { @@ -1386,7 +1409,7 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) } /* - * ForgetRelationFsyncRequests -- ensure any fsyncs for a rel are forgotten + * ForgetRelationFsyncRequests -- forget any fsyncs for a rel */ void ForgetRelationFsyncRequests(RelFileNode rnode) @@ -1419,7 +1442,7 @@ ForgetRelationFsyncRequests(RelFileNode rnode) } /* - * ForgetDatabaseFsyncRequests -- ensure any fsyncs for a DB are forgotten + * ForgetDatabaseFsyncRequests -- forget any fsyncs and unlinks for a DB */ void ForgetDatabaseFsyncRequests(Oid dbid) diff --git a/src/port/dirmod.c b/src/port/dirmod.c index 087116c42c..fa95254966 100644 --- a/src/port/dirmod.c +++ b/src/port/dirmod.c @@ -10,7 +10,7 @@ * Win32 (NT, Win2k, XP). replace() doesn't work on Win95/98/Me. * * IDENTIFICATION - * $PostgreSQL: pgsql/src/port/dirmod.c,v 1.53 2008/04/11 23:53:00 tgl Exp $ + * $PostgreSQL: pgsql/src/port/dirmod.c,v 1.54 2008/04/18 06:48:38 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -406,8 +406,24 @@ rmtree(char *path, bool rmtopdir) { snprintf(filepath, MAXPGPATH, "%s/%s", path, *filename); + /* + * It's ok if the file is not there anymore; we were just about to + * delete it anyway. + * + * This is not an academic possibility. One scenario where this + * happens is when bgwriter has a pending unlink request for a file + * in a database that's being dropped. In dropdb(), we call + * ForgetDatabaseFsyncRequests() to flush out any such pending unlink + * requests, but because that's asynchronous, it's not guaranteed + * that the bgwriter receives the message in time. + */ if (lstat(filepath, &statbuf) != 0) - goto report_and_fail; + { + if (errno != ENOENT) + goto report_and_fail; + else + continue; + } if (S_ISDIR(statbuf.st_mode)) { @@ -422,7 +438,10 @@ rmtree(char *path, bool rmtopdir) else { if (unlink(filepath) != 0) - goto report_and_fail; + { + if (errno != ENOENT) + goto report_and_fail; + } } } -- 2.40.0