]> granicus.if.org Git - postgresql/commitdiff
Treat directory open failures as hard errors in ResetUnloggedRelations().
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 5 Dec 2017 01:52:48 +0000 (20:52 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 5 Dec 2017 01:52:59 +0000 (20:52 -0500)
Previously, this code just reported such problems at LOG level and kept
going.  The problem with this approach is that transient failures (e.g.,
ENFILE) could prevent us from resetting unlogged relations to empty,
yet allow recovery to appear to complete successfully.  That seems like
a data corruption hazard large enough to treat such problems as reasons
to fail startup.

For the same reason, treat unlink failures for unlogged files as hard
errors not just LOG messages.  It's a little odd that we did it like that
when file-level errors in other steps (copy_file, fsync_fname) are ERRORs.

The sole case that I left alone is that ENOENT failure on a tablespace
(not database) directory is not an error, though it will now be logged
rather than just silently ignored.  This is to cover the scenario where
a previous DROP TABLESPACE removed the tablespace directory but failed
before removing the pg_tblspc symlink.  I'm not sure that that's very
likely in practice, but that seems like the only real excuse for the
old behavior here, so let's allow for it.  (As coded, this will also
allow ENOENT on $PGDATA/base/.  But since we'll fail soon enough if
that's gone, I don't think we need to complicate this code by
distinguishing that from a true tablespace case.)

Discussion: https://postgr.es/m/21040.1512418508@sss.pgh.pa.us

src/backend/storage/file/reinit.c

index 99c443c75361dfefdd212bfad31b55f17da39511..00678cb182885e17d1bb441bf7c9586f47402462 100644 (file)
@@ -98,7 +98,9 @@ ResetUnloggedRelations(int op)
        MemoryContextDelete(tmpctx);
 }
 
-/* Process one tablespace directory for ResetUnloggedRelations */
+/*
+ * Process one tablespace directory for ResetUnloggedRelations
+ */
 static void
 ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 {
@@ -107,29 +109,32 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
        char            dbspace_path[MAXPGPATH * 2];
 
        ts_dir = AllocateDir(tsdirname);
-       if (ts_dir == NULL)
+
+       /*
+        * If we get ENOENT on a tablespace directory, log it and return.  This
+        * can happen if a previous DROP TABLESPACE crashed between removing the
+        * tablespace directory and removing the symlink in pg_tblspc.  We don't
+        * really want to prevent database startup in that scenario, so let it
+        * pass instead.  Any other type of error will be reported by ReadDir
+        * (causing a startup failure).
+        */
+       if (ts_dir == NULL && errno == ENOENT)
        {
-               /* anything except ENOENT is fishy */
-               if (errno != ENOENT)
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open directory \"%s\": %m",
-                                                       tsdirname)));
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not open directory \"%s\": %m",
+                                               tsdirname)));
                return;
        }
 
        while ((de = ReadDir(ts_dir, tsdirname)) != NULL)
        {
-               int                     i = 0;
-
                /*
                 * We're only interested in the per-database directories, which have
                 * numeric names.  Note that this code will also (properly) ignore "."
                 * and "..".
                 */
-               while (isdigit((unsigned char) de->d_name[i]))
-                       ++i;
-               if (de->d_name[i] != '\0' || i == 0)
+               if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
                        continue;
 
                snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
@@ -140,7 +145,9 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
        FreeDir(ts_dir);
 }
 
-/* Process one per-dbspace directory for ResetUnloggedRelations */
+/*
+ * Process one per-dbspace directory for ResetUnloggedRelations
+ */
 static void
 ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 {
@@ -158,20 +165,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
         */
        if ((op & UNLOGGED_RELATION_CLEANUP) != 0)
        {
-               HTAB       *hash = NULL;
+               HTAB       *hash;
                HASHCTL         ctl;
 
-               /* Open the directory. */
-               dbspace_dir = AllocateDir(dbspacedirname);
-               if (dbspace_dir == NULL)
-               {
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open directory \"%s\": %m",
-                                                       dbspacedirname)));
-                       return;
-               }
-
                /*
                 * It's possible that someone could create a ton of unlogged relations
                 * in the same database & tablespace, so we'd better use a hash table
@@ -179,11 +175,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                 * need to be reset.  Otherwise, this cleanup operation would be
                 * O(n^2).
                 */
+               memset(&ctl, 0, sizeof(ctl));
                ctl.keysize = sizeof(unlogged_relation_entry);
                ctl.entrysize = sizeof(unlogged_relation_entry);
                hash = hash_create("unlogged hash", 32, &ctl, HASH_ELEM);
 
                /* Scan the directory. */
+               dbspace_dir = AllocateDir(dbspacedirname);
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
@@ -222,21 +220,9 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                }
 
                /*
-                * Now, make a second pass and remove anything that matches. First,
-                * reopen the directory.
+                * Now, make a second pass and remove anything that matches.
                 */
                dbspace_dir = AllocateDir(dbspacedirname);
-               if (dbspace_dir == NULL)
-               {
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open directory \"%s\": %m",
-                                                       dbspacedirname)));
-                       hash_destroy(hash);
-                       return;
-               }
-
-               /* Scan the directory. */
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
@@ -266,15 +252,11 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                        {
                                snprintf(rm_path, sizeof(rm_path), "%s/%s",
                                                 dbspacedirname, de->d_name);
-
-                               /*
-                                * It's tempting to actually throw an error here, but since
-                                * this code gets run during database startup, that could
-                                * result in the database failing to start.  (XXX Should we do
-                                * it anyway?)
-                                */
-                               if (unlink(rm_path))
-                                       elog(LOG, "could not unlink file \"%s\": %m", rm_path);
+                               if (unlink(rm_path) < 0)
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not remove file \"%s\": %m",
+                                                                       rm_path)));
                                else
                                        elog(DEBUG2, "unlinked file \"%s\"", rm_path);
                        }
@@ -294,19 +276,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
         */
        if ((op & UNLOGGED_RELATION_INIT) != 0)
        {
-               /* Open the directory. */
-               dbspace_dir = AllocateDir(dbspacedirname);
-               if (dbspace_dir == NULL)
-               {
-                       /* we just saw this directory, so it really ought to be there */
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open directory \"%s\": %m",
-                                                       dbspacedirname)));
-                       return;
-               }
-
                /* Scan the directory. */
+               dbspace_dir = AllocateDir(dbspacedirname);
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
@@ -350,16 +321,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
                 * (especially the metadata ones) at once.
                 */
                dbspace_dir = AllocateDir(dbspacedirname);
-               if (dbspace_dir == NULL)
-               {
-                       /* we just saw this directory, so it really ought to be there */
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not open directory \"%s\": %m",
-                                                       dbspacedirname)));
-                       return;
-               }
-
                while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
                {
                        ForkNumber      forkNum;
@@ -388,6 +349,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
 
                FreeDir(dbspace_dir);
 
+               /*
+                * Lastly, fsync the database directory itself, ensuring the
+                * filesystem remembers the file creations and deletions we've done.
+                * We don't bother with this during a call that does only
+                * UNLOGGED_RELATION_CLEANUP, because if recovery crashes before we
+                * get to doing UNLOGGED_RELATION_INIT, we'll redo the cleanup step
+                * too at the next startup attempt.
+                */
                fsync_fname(dbspacedirname, true);
        }
 }