]> granicus.if.org Git - zfs/commitdiff
Prevent race in blkptr_verify against device removal
authorPaul Dagnelie <pcd@delphix.com>
Wed, 14 Aug 2019 03:24:43 +0000 (20:24 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 14 Aug 2019 03:24:43 +0000 (21:24 -0600)
When we check the vdev of the blkptr in zfs_blkptr_verify, we can run
into a race condition where that vdev is temporarily unavailable. This
happens when a device removal operation and the old vdev_t has been
removed from the array, but the new indirect vdev has not yet been
inserted.

We hold the spa_config_lock while doing our sensitive verification.
To ensure that we don't deadlock, we only grab the lock if we don't
have config_writer held. In addition, I had to const the tags of the
refcounts and the spa_config_lock arguments.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #9112

include/sys/refcount.h
include/sys/spa.h
module/zfs/refcount.c
module/zfs/spa_misc.c
module/zfs/zio.c

index e982faeba0f29031b2f5ad035da40374bd25f783..c8f58623039279a33e9e8d9b697af06244b695eb 100644 (file)
@@ -44,7 +44,7 @@ extern "C" {
 #ifdef ZFS_DEBUG
 typedef struct reference {
        list_node_t ref_link;
-       void *ref_holder;
+       const void *ref_holder;
        uint64_t ref_number;
        uint8_t *ref_removed;
 } reference_t;
@@ -70,16 +70,17 @@ void zfs_refcount_destroy(zfs_refcount_t *);
 void zfs_refcount_destroy_many(zfs_refcount_t *, uint64_t);
 int zfs_refcount_is_zero(zfs_refcount_t *);
 int64_t zfs_refcount_count(zfs_refcount_t *);
-int64_t zfs_refcount_add(zfs_refcount_t *, void *);
-int64_t zfs_refcount_remove(zfs_refcount_t *, void *);
-int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, void *);
-int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, void *);
+int64_t zfs_refcount_add(zfs_refcount_t *, const void *);
+int64_t zfs_refcount_remove(zfs_refcount_t *, const void *);
+int64_t zfs_refcount_add_many(zfs_refcount_t *, uint64_t, const void *);
+int64_t zfs_refcount_remove_many(zfs_refcount_t *, uint64_t, const void *);
 void zfs_refcount_transfer(zfs_refcount_t *, zfs_refcount_t *);
-void zfs_refcount_transfer_ownership(zfs_refcount_t *, void *, void *);
+void zfs_refcount_transfer_ownership(zfs_refcount_t *, const void *,
+    const void *);
 void zfs_refcount_transfer_ownership_many(zfs_refcount_t *, uint64_t,
-    void *, void *);
-boolean_t zfs_refcount_held(zfs_refcount_t *, void *);
-boolean_t zfs_refcount_not_held(zfs_refcount_t *, void *);
+    const void *, const void *);
+boolean_t zfs_refcount_held(zfs_refcount_t *, const void *);
+boolean_t zfs_refcount_not_held(zfs_refcount_t *, const void *);
 
 void zfs_refcount_init(void);
 void zfs_refcount_fini(void);
index e6431378327453986e66140e0fc4920643e496af..49432684346ed5e1d9e1b5b502fb9f5ed156d6a6 100644 (file)
@@ -1007,8 +1007,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
 
 /* Pool configuration locks */
 extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
-extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
-extern void spa_config_exit(spa_t *spa, int locks, void *tag);
+extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
+extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
 extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
 
 /* Pool vdev add/remove lock */
@@ -1123,7 +1123,6 @@ extern boolean_t spa_has_checkpoint(spa_t *spa);
 extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa);
 extern boolean_t spa_suspend_async_destroy(spa_t *spa);
 extern uint64_t spa_min_claim_txg(spa_t *spa);
-extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp);
 extern boolean_t zfs_dva_valid(spa_t *spa, const dva_t *dva,
     const blkptr_t *bp);
 typedef void (*spa_remap_cb_t)(uint64_t vdev, uint64_t offset, uint64_t size,
index 89528e6d3637c09fbd4ea494ad5f640f6d3bd381..a612b2f40c05a9158d28db229c8a1022c3339e4d 100644 (file)
@@ -121,7 +121,7 @@ zfs_refcount_count(zfs_refcount_t *rc)
 }
 
 int64_t
-zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder)
+zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, const void *holder)
 {
        reference_t *ref = NULL;
        int64_t count;
@@ -143,13 +143,14 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder)
 }
 
 int64_t
-zfs_refcount_add(zfs_refcount_t *rc, void *holder)
+zfs_refcount_add(zfs_refcount_t *rc, const void *holder)
 {
        return (zfs_refcount_add_many(rc, 1, holder));
 }
 
 int64_t
-zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder)
+zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number,
+    const void *holder)
 {
        reference_t *ref;
        int64_t count;
@@ -197,7 +198,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder)
 }
 
 int64_t
-zfs_refcount_remove(zfs_refcount_t *rc, void *holder)
+zfs_refcount_remove(zfs_refcount_t *rc, const void *holder)
 {
        return (zfs_refcount_remove_many(rc, 1, holder));
 }
@@ -235,7 +236,7 @@ zfs_refcount_transfer(zfs_refcount_t *dst, zfs_refcount_t *src)
 
 void
 zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
-    void *current_holder, void *new_holder)
+    const void *current_holder, const void *new_holder)
 {
        reference_t *ref;
        boolean_t found = B_FALSE;
@@ -260,8 +261,8 @@ zfs_refcount_transfer_ownership_many(zfs_refcount_t *rc, uint64_t number,
 }
 
 void
-zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
-    void *new_holder)
+zfs_refcount_transfer_ownership(zfs_refcount_t *rc, const void *current_holder,
+    const void *new_holder)
 {
        return (zfs_refcount_transfer_ownership_many(rc, 1, current_holder,
            new_holder));
@@ -273,7 +274,7 @@ zfs_refcount_transfer_ownership(zfs_refcount_t *rc, void *current_holder,
  * might be held.
  */
 boolean_t
-zfs_refcount_held(zfs_refcount_t *rc, void *holder)
+zfs_refcount_held(zfs_refcount_t *rc, const void *holder)
 {
        reference_t *ref;
 
@@ -301,7 +302,7 @@ zfs_refcount_held(zfs_refcount_t *rc, void *holder)
  * since the reference might not be held.
  */
 boolean_t
-zfs_refcount_not_held(zfs_refcount_t *rc, void *holder)
+zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder)
 {
        reference_t *ref;
 
index 3d6c0ee3eb352a135be2292a309e7244ac20bbaa..d998fe2255cc239a77d73bb30541f241d2ea7166 100644 (file)
@@ -484,7 +484,7 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
 }
 
 void
-spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
+spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
 {
        int wlocks_held = 0;
 
@@ -517,7 +517,7 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
 }
 
 void
-spa_config_exit(spa_t *spa, int locks, void *tag)
+spa_config_exit(spa_t *spa, int locks, const void *tag)
 {
        for (int i = SCL_LOCKS - 1; i >= 0; i--) {
                spa_config_lock_t *scl = &spa->spa_config_lock[i];
index b740afde6757a08732376c475423341872252139..7b4ab341f9c56960cd768c9a4c316427630d3f8f 100644 (file)
@@ -881,8 +881,8 @@ zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
        return (zio_null(NULL, spa, NULL, done, private, flags));
 }
 
-void
-zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
+static void
+zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp, boolean_t config_held)
 {
        if (!DMU_OT_IS_VALID(BP_GET_TYPE(bp))) {
                zfs_panic_recover("blkptr at %p has invalid TYPE %llu",
@@ -921,6 +921,10 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
        if (!spa->spa_trust_config)
                return;
 
+       if (!config_held)
+               spa_config_enter(spa, SCL_VDEV, bp, RW_READER);
+       else
+               ASSERT(spa_config_held(spa, SCL_VDEV, RW_WRITER));
        /*
         * Pool-specific checks.
         *
@@ -969,6 +973,8 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
                            bp, i, (longlong_t)offset);
                }
        }
+       if (!config_held)
+               spa_config_exit(spa, SCL_VDEV, bp);
 }
 
 boolean_t
@@ -1008,7 +1014,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
 {
        zio_t *zio;
 
-       zfs_blkptr_verify(spa, bp);
+       zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER);
 
        zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
            data, size, size, done, private,
@@ -1101,7 +1107,7 @@ void
 zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
 {
 
-       zfs_blkptr_verify(spa, bp);
+       zfs_blkptr_verify(spa, bp, B_FALSE);
 
        /*
         * The check for EMBEDDED is a performance optimization.  We
@@ -1171,7 +1177,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 {
        zio_t *zio;
 
-       zfs_blkptr_verify(spa, bp);
+       zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER);
 
        if (BP_IS_EMBEDDED(bp))
                return (zio_null(pio, spa, NULL, NULL, NULL, 0));