From ca95f70dff2915a1a5d838ecafc28646f5f5a42e Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Thu, 9 May 2019 10:08:05 -0700 Subject: [PATCH] zpool import progress kstat When an import requires a long MMP activity check, or when the user requests pool recovery, the import make take a long time. The user may not know why, or be able to tell whether the import is progressing or is hung. Add a kstat which lists all imports currently being processed by the kernel (currently only one at a time is possible, but the kstat allows for more than one). The kstat is /proc/spl/kstat/zfs/import_progress. The kstat contents are as follows: pool_guid load_state multihost_secs max_txg pool_name 16667015954387398 3 15 0 tank3 load_state: the value of spa_load_state multihost_secs: seconds until the end of the multihost activity check; if over, or none required, this is 0 max_txg: current spa_load_max_txg, if rewind is occurring This could be used by outside tools, such as a pacemaker resource agent, to report import progress, or as a part of manual troubleshooting. The zpool import subcommand could also be modified to report this information. Reviewed-by: Brian Behlendorf Signed-off-by: Olaf Faaland Closes #8696 --- include/sys/spa.h | 8 ++ module/zfs/spa.c | 26 +++++- module/zfs/spa_misc.c | 210 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+), 2 deletions(-) diff --git a/include/sys/spa.h b/include/sys/spa.h index 343977b30..23434edbc 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -965,6 +965,14 @@ extern void spa_iostats_trim_add(spa_t *spa, trim_type_t type, uint64_t extents_written, uint64_t bytes_written, uint64_t extents_skipped, uint64_t bytes_skipped, uint64_t extents_failed, uint64_t bytes_failed); +extern void spa_import_progress_add(spa_t *spa); +extern void spa_import_progress_remove(uint64_t spa_guid); +extern int spa_import_progress_set_mmp_check(uint64_t pool_guid, + uint64_t mmp_sec_remaining); +extern int spa_import_progress_set_max_txg(uint64_t pool_guid, + uint64_t max_txg); +extern int spa_import_progress_set_state(uint64_t pool_guid, + spa_load_state_t spa_load_state); /* Pool configuration locks */ extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 4d26d698e..eb3ff91a0 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1437,6 +1437,7 @@ spa_unload(spa_t *spa) ASSERT(MUTEX_HELD(&spa_namespace_lock)); + spa_import_progress_remove(spa_guid(spa)); spa_load_note(spa, "UNLOADING"); /* @@ -2375,6 +2376,8 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) int error; spa->spa_load_state = state; + (void) spa_import_progress_set_state(spa_guid(spa), + spa_load_state(spa)); gethrestime(&spa->spa_loaded_ts); error = spa_load_impl(spa, type, &ereport); @@ -2397,6 +2400,9 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type) spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE; spa->spa_ena = 0; + (void) spa_import_progress_set_state(spa_guid(spa), + spa_load_state(spa)); + return (error); } @@ -2469,6 +2475,7 @@ spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label, */ if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0) return (B_FALSE); + /* * If the tryconfig_ values are nonzero, they are the results of an * earlier tryimport. If they all match the uberblock we just found, @@ -2617,10 +2624,14 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) import_delay = spa_activity_check_duration(spa, ub); /* Add a small random factor in case of simultaneous imports (0-25%) */ - import_expire = gethrtime() + import_delay + - (import_delay * spa_get_random(250) / 1000); + import_delay += import_delay * spa_get_random(250) / 1000; + + import_expire = gethrtime() + import_delay; while (gethrtime() < import_expire) { + (void) spa_import_progress_set_mmp_check(spa_guid(spa), + NSEC2SEC(import_expire - gethrtime())); + vdev_uberblock_load(rvd, ub, &mmp_label); if (txg != ub->ub_txg || timestamp != ub->ub_timestamp || @@ -2987,6 +2998,10 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO)); } + if (spa->spa_load_max_txg != UINT64_MAX) { + (void) spa_import_progress_set_max_txg(spa_guid(spa), + (u_longlong_t)spa->spa_load_max_txg); + } spa_load_note(spa, "using uberblock with txg=%llu", (u_longlong_t)ub->ub_txg); @@ -3916,6 +3931,8 @@ spa_ld_mos_init(spa_t *spa, spa_import_type_t type) if (error != 0) return (error); + spa_import_progress_add(spa); + /* * Now that we have the vdev tree, try to open each vdev. This involves * opening the underlying physical device, retrieving its geometry and @@ -4346,6 +4363,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport) spa_config_exit(spa, SCL_CONFIG, FTAG); } + spa_import_progress_remove(spa_guid(spa)); spa_load_note(spa, "LOADED"); return (0); @@ -4406,6 +4424,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, * from previous txgs when spa_load fails. */ ASSERT(spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT); + spa_import_progress_remove(spa_guid(spa)); return (load_error); } @@ -4417,6 +4436,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, if (rewind_flags & ZPOOL_NEVER_REWIND) { nvlist_free(config); + spa_import_progress_remove(spa_guid(spa)); return (load_error); } @@ -4459,6 +4479,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, if (state == SPA_LOAD_RECOVER) { ASSERT3P(loadinfo, ==, NULL); + spa_import_progress_remove(spa_guid(spa)); return (rewind_error); } else { /* Store the rewind info as part of the initial load info */ @@ -4469,6 +4490,7 @@ spa_load_best(spa_t *spa, spa_load_state_t state, uint64_t max_request, fnvlist_free(spa->spa_load_info); spa->spa_load_info = loadinfo; + spa_import_progress_remove(spa_guid(spa)); return (load_error); } } diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index f4497ca1c..e2d1ae3fc 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2019,6 +2019,214 @@ spa_dirty_data(spa_t *spa) return (spa->spa_dsl_pool->dp_dirty_total); } +/* + * ========================================================================== + * SPA Import Progress Routines + * ========================================================================== + */ + +typedef struct spa_import_progress { + uint64_t pool_guid; /* unique id for updates */ + char *pool_name; + spa_load_state_t spa_load_state; + uint64_t mmp_sec_remaining; /* MMP activity check */ + uint64_t spa_load_max_txg; /* rewind txg */ + procfs_list_node_t smh_node; +} spa_import_progress_t; + +spa_history_list_t *spa_import_progress_list = NULL; + +static int +spa_import_progress_show_header(struct seq_file *f) +{ + seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid", + "load_state", "multihost_secs", "max_txg", + "pool_name"); + return (0); +} + +static int +spa_import_progress_show(struct seq_file *f, void *data) +{ + spa_import_progress_t *sip = (spa_import_progress_t *)data; + + seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n", + (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state, + (u_longlong_t)sip->mmp_sec_remaining, + (u_longlong_t)sip->spa_load_max_txg, + (sip->pool_name ? sip->pool_name : "-")); + + return (0); +} + +/* Remove oldest elements from list until there are no more than 'size' left */ +static void +spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size) +{ + spa_import_progress_t *sip; + while (shl->size > size) { + sip = list_remove_head(&shl->procfs_list.pl_list); + if (sip->pool_name) + spa_strfree(sip->pool_name); + kmem_free(sip, sizeof (spa_import_progress_t)); + shl->size--; + } + + IMPLY(size == 0, list_is_empty(&shl->procfs_list.pl_list)); +} + +static void +spa_import_progress_init(void) +{ + spa_import_progress_list = kmem_zalloc(sizeof (spa_history_list_t), + KM_SLEEP); + + spa_import_progress_list->size = 0; + + spa_import_progress_list->procfs_list.pl_private = + spa_import_progress_list; + + procfs_list_install("zfs", + "import_progress", + 0644, + &spa_import_progress_list->procfs_list, + spa_import_progress_show, + spa_import_progress_show_header, + NULL, + offsetof(spa_import_progress_t, smh_node)); +} + +static void +spa_import_progress_destroy(void) +{ + spa_history_list_t *shl = spa_import_progress_list; + procfs_list_uninstall(&shl->procfs_list); + spa_import_progress_truncate(shl, 0); + kmem_free(shl, sizeof (spa_history_list_t)); + procfs_list_destroy(&shl->procfs_list); +} + +int +spa_import_progress_set_state(uint64_t pool_guid, + spa_load_state_t load_state) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + int error = ENOENT; + + if (shl->size == 0) + return (0); + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + sip->spa_load_state = load_state; + error = 0; + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); + + return (error); +} + +int +spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + int error = ENOENT; + + if (shl->size == 0) + return (0); + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + sip->spa_load_max_txg = load_max_txg; + error = 0; + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); + + return (error); +} + +int +spa_import_progress_set_mmp_check(uint64_t pool_guid, + uint64_t mmp_sec_remaining) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + int error = ENOENT; + + if (shl->size == 0) + return (0); + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + sip->mmp_sec_remaining = mmp_sec_remaining; + error = 0; + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); + + return (error); +} + +/* + * A new import is in progress, add an entry. + */ +void +spa_import_progress_add(spa_t *spa) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + char *poolname = NULL; + + sip = kmem_zalloc(sizeof (spa_import_progress_t), KM_SLEEP); + sip->pool_guid = spa_guid(spa); + + (void) nvlist_lookup_string(spa->spa_config, ZPOOL_CONFIG_POOL_NAME, + &poolname); + if (poolname == NULL) + poolname = spa_name(spa); + sip->pool_name = spa_strdup(poolname); + sip->spa_load_state = spa_load_state(spa); + + mutex_enter(&shl->procfs_list.pl_lock); + procfs_list_add(&shl->procfs_list, sip); + shl->size++; + mutex_exit(&shl->procfs_list.pl_lock); +} + +void +spa_import_progress_remove(uint64_t pool_guid) +{ + spa_history_list_t *shl = spa_import_progress_list; + spa_import_progress_t *sip; + + mutex_enter(&shl->procfs_list.pl_lock); + for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL; + sip = list_prev(&shl->procfs_list.pl_list, sip)) { + if (sip->pool_guid == pool_guid) { + if (sip->pool_name) + spa_strfree(sip->pool_name); + list_remove(&shl->procfs_list.pl_list, sip); + shl->size--; + kmem_free(sip, sizeof (spa_import_progress_t)); + break; + } + } + mutex_exit(&shl->procfs_list.pl_lock); +} + /* * ========================================================================== * Initialization and Termination @@ -2099,6 +2307,7 @@ spa_init(int mode) l2arc_start(); scan_init(); qat_init(); + spa_import_progress_init(); } void @@ -2123,6 +2332,7 @@ spa_fini(void) fm_fini(); scan_fini(); qat_fini(); + spa_import_progress_destroy(); avl_destroy(&spa_namespace_avl); avl_destroy(&spa_spare_avl); -- 2.40.0