]> granicus.if.org Git - zfs/commitdiff
Illumos 3749 - zfs event processing should work on R/O root filesystems
authorWill Andrews <willa@spectralogic.com>
Thu, 31 Dec 2015 16:38:59 +0000 (17:38 +0100)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 12 Jan 2016 22:42:32 +0000 (14:42 -0800)
3749 zfs event processing should work on R/O root filesystems
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Approved by: Christopher Siden <christopher.siden@delphix.com>

References:
  https://www.illumos.org/issues/3749
  https://github.com/illumos/illumos-gate/commit/3cb69f7

Porting notes:
- [include/sys/spa_impl.h]
  - ffe9d38 Add generic errata infrastructure
  - 1421c89 Add visibility in to arc_read
- [include/sys/fm/fs/zfs.h]
  - 2668527 Add linux events
  - 6283f55 Support custom build directories and move includes
- [module/zfs/spa_config.c]
  - Updated spa_config_sync() to match illumos with the exception
    of a Linux specific block.

Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
include/sys/fm/fs/zfs.h
include/sys/spa_impl.h
module/zfs/spa.c
module/zfs/spa_config.c

index 0d7eadd4f445903b6af4e2f046406252daa5ff3f..26f400303d80bea90a5f85acea7b761e2dcc6f97 100644 (file)
@@ -56,6 +56,7 @@ extern "C" {
 #define        FM_EREPORT_ZFS_IO_FAILURE               "io_failure"
 #define        FM_EREPORT_ZFS_PROBE_FAILURE            "probe_failure"
 #define        FM_EREPORT_ZFS_LOG_REPLAY               "log_replay"
+#define        FM_EREPORT_ZFS_CONFIG_CACHE_WRITE       "config_cache_write"
 #define        FM_EREPORT_ZFS_RESILVER_START           "resilver.start"
 #define        FM_EREPORT_ZFS_RESILVER_FINISH          "resilver.finish"
 #define        FM_EREPORT_ZFS_SCRUB_START              "scrub.start"
index 0b49c7147b1086c840c06b1bcbfb93bcfd28fed9..0bb6dccdc2f9d22767f2761da1f5ed13afffa901 100644 (file)
@@ -252,6 +252,7 @@ struct spa {
        uint64_t        spa_deadman_synctime;   /* deadman expiration timer */
        uint64_t        spa_errata;             /* errata issues detected */
        spa_stats_t     spa_stats;              /* assorted spa statistics */
+       hrtime_t        spa_ccw_fail_time;      /* Conf cache write fail time */
 
        /*
         * spa_refcount & spa_config_lock must be the last elements
index d7b800adfa61f8809329cd2e779c3bae2155168c..ffefbd3458104d272fab7b8ac370a9b57549ac16 100644 (file)
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
+/*
+ * The interval, in seconds, at which failed configuration cache file writes
+ * should be retried.
+ */
+static int zfs_ccw_retry_interval = 300;
+
 typedef enum zti_modes {
        ZTI_MODE_FIXED,                 /* value is # of threads (min 1) */
        ZTI_MODE_BATCH,                 /* cpu-intensive; value is ignored */
@@ -5912,13 +5918,34 @@ spa_async_resume(spa_t *spa)
        mutex_exit(&spa->spa_async_lock);
 }
 
+static boolean_t
+spa_async_tasks_pending(spa_t *spa)
+{
+       uint_t non_config_tasks;
+       uint_t config_task;
+       boolean_t config_task_suspended;
+
+       non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
+       config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
+       if (spa->spa_ccw_fail_time == 0) {
+               config_task_suspended = B_FALSE;
+       } else {
+               config_task_suspended =
+                   (gethrtime() - spa->spa_ccw_fail_time) <
+                   (zfs_ccw_retry_interval * NANOSEC);
+       }
+
+       return (non_config_tasks || (config_task && !config_task_suspended));
+}
+
 static void
 spa_async_dispatch(spa_t *spa)
 {
        mutex_enter(&spa->spa_async_lock);
-       if (spa->spa_async_tasks && !spa->spa_async_suspended &&
+       if (spa_async_tasks_pending(spa) &&
+           !spa->spa_async_suspended &&
            spa->spa_async_thread == NULL &&
-           rootdir != NULL && !vn_is_readonly(rootdir))
+           rootdir != NULL)
                spa->spa_async_thread = thread_create(NULL, 0,
                    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
        mutex_exit(&spa->spa_async_lock);
index 19432e0a024ea46788564dd6cbc4f33e6d682174..a62d25bd3ababd1aab20b305104c97e82a85ec3a 100644 (file)
@@ -26,6 +26,7 @@
  */
 
 #include <sys/spa.h>
+#include <sys/fm/fs/zfs.h>
 #include <sys/spa_impl.h>
 #include <sys/nvpair.h>
 #include <sys/uio.h>
@@ -145,22 +146,22 @@ out:
        kobj_close_file(file);
 }
 
-static void
+static int
 spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 {
        size_t buflen;
        char *buf;
        vnode_t *vp;
        int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
-       int error;
        char *temp;
+       int err;
 
        /*
         * If the nvlist is empty (NULL), then remove the old cachefile.
         */
        if (nvl == NULL) {
-               (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
-               return;
+               err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+               return (err);
        }
 
        /*
@@ -181,16 +182,16 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
         * and overwritten in place.  In the event of an error the file is
         * unlinked to make sure we always have a consistent view of the data.
         */
-       error = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0);
-       if (error == 0) {
-               error = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0,
+       err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0);
+       if (err == 0) {
+               err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0,
                    UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL);
-               if (error == 0)
-                       error = VOP_FSYNC(vp, FSYNC, kcred, NULL);
+               if (err == 0)
+                       err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
 
                (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
 
-               if (error)
+               if (err)
                        (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
        }
 #else
@@ -201,13 +202,14 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
         */
        (void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
 
-       error = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
-       if (error == 0) {
-               if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
-                   0, RLIM64_INFINITY, kcred, NULL) == 0 &&
-                   VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
-                       (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
-               }
+       err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
+       if (err == 0) {
+               err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
+                   0, RLIM64_INFINITY, kcred, NULL);
+               if (err == 0)
+                       err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
+               if (err == 0)
+                       err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
                (void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
        }
 
@@ -216,6 +218,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
 
        vmem_free(buf, buflen);
        kmem_free(temp, MAXPATHLEN);
+       return (err);
 }
 
 /*
@@ -233,6 +236,8 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
        spa_config_dirent_t *dp, *tdp;
        nvlist_t *nvl;
        char *pool_name;
+       boolean_t ccw_failure;
+       int error = 0;
 
        ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
@@ -244,6 +249,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
         * cachefile is changed, the new one is pushed onto this list, allowing
         * us to update previous cachefiles that no longer contain this pool.
         */
+       ccw_failure = B_FALSE;
        for (dp = list_head(&target->spa_config_list); dp != NULL;
            dp = list_next(&target->spa_config_list, dp)) {
                spa_t *spa = NULL;
@@ -290,10 +296,32 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
                        mutex_exit(&spa->spa_props_lock);
                }
 
-               spa_config_write(dp, nvl);
+               error = spa_config_write(dp, nvl);
+               if (error != 0)
+                       ccw_failure = B_TRUE;
                nvlist_free(nvl);
        }
 
+       if (ccw_failure) {
+               /*
+                * Keep trying so that configuration data is
+                * written if/when any temporary filesystem
+                * resource issues are resolved.
+                */
+               if (target->spa_ccw_fail_time == 0) {
+                       zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
+                           target, NULL, NULL, 0, 0);
+               }
+               target->spa_ccw_fail_time = gethrtime();
+               spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
+       } else {
+               /*
+                * Do not rate limit future attempts to update
+                * the config cache.
+                */
+               target->spa_ccw_fail_time = 0;
+       }
+
        /*
         * Remove any config entries older than the current one.
         */