]> granicus.if.org Git - zfs/commitdiff
OpenZFS 6676 - Race between unique_insert() and unique_remove() causes ZFS fsid change
authorGeorge Melikov <mail@gmelikov.ru>
Thu, 26 Jan 2017 22:43:28 +0000 (01:43 +0300)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 26 Jan 2017 22:43:28 +0000 (14:43 -0800)
Authored by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Dan Vatca <dan.vatca@gmail.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: George Melikov <mail@gmelikov.ru>
OpenZFS-issue: https://www.illumos.org/issues/6676
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/40510e8
Closes #5667

include/sys/dmu.h
include/sys/zap_impl.h
module/zfs/dbuf.c
module/zfs/dnode.c
module/zfs/dsl_dataset.c
module/zfs/dsl_dir.c
module/zfs/sa.c
module/zfs/zap.c
module/zfs/zap_micro.c

index 3c57de32e192b960a549598f2cca22808abd5a65..9c8ca7c3668621b1e8b26cacfb3009b41d732223 100644 (file)
@@ -549,8 +549,14 @@ typedef struct dmu_buf_user {
         */
        taskq_ent_t     dbu_tqent;
 
-       /* This instance's eviction function pointer. */
-       dmu_buf_evict_func_t *dbu_evict_func;
+       /*
+        * This instance's eviction function pointers.
+        *
+        * dbu_evict_func_sync is called synchronously and then
+        * dbu_evict_func_async is executed asynchronously on a taskq.
+        */
+       dmu_buf_evict_func_t *dbu_evict_func_sync;
+       dmu_buf_evict_func_t *dbu_evict_func_async;
 #ifdef ZFS_DEBUG
        /*
         * Pointer to user's dbuf pointer.  NULL for clients that do
@@ -572,12 +578,16 @@ typedef struct dmu_buf_user {
  */
 /*ARGSUSED*/
 static inline void
-dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
-    dmu_buf_t **clear_on_evict_dbufp)
+dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
+    dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp)
 {
-       ASSERT(dbu->dbu_evict_func == NULL);
-       ASSERT(evict_func != NULL);
-       dbu->dbu_evict_func = evict_func;
+       ASSERT(dbu->dbu_evict_func_sync == NULL);
+       ASSERT(dbu->dbu_evict_func_async == NULL);
+
+       /* must have at least one evict func */
+       IMPLY(evict_func_sync == NULL, evict_func_async != NULL);
+       dbu->dbu_evict_func_sync = evict_func_sync;
+       dbu->dbu_evict_func_async = evict_func_async;
        taskq_init_ent(&dbu->dbu_tqent);
 #ifdef ZFS_DEBUG
        dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
index 13a208faff949225d286611d807e018b7577e1a2..cbe7f3c5b9d4031a60f6974d55a2eaf20ec9da23 100644 (file)
@@ -198,7 +198,7 @@ boolean_t zap_match(zap_name_t *zn, const char *matchname);
 int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp);
 void zap_unlockdir(zap_t *zap, void *tag);
-void zap_evict(void *dbu);
+void zap_evict_sync(void *dbu);
 zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
 void zap_name_free(zap_name_t *zn);
 int zap_hashbits(zap_t *zap);
index ca1a443032c8f7779b5d3138afdf398779112296..907a849413a4fe9afa095e974962ccfb6835a71d 100644 (file)
@@ -85,7 +85,9 @@ static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
 
 #ifndef __lint
 extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
-    dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
+    dmu_buf_evict_func_t *evict_func_sync,
+    dmu_buf_evict_func_t *evict_func_async,
+    dmu_buf_t **clear_on_evict_dbufp);
 #endif /* ! __lint */
 
 /*
@@ -385,6 +387,7 @@ static void
 dbuf_evict_user(dmu_buf_impl_t *db)
 {
        dmu_buf_user_t *dbu = db->db_user;
+       boolean_t has_async;
 
        ASSERT(MUTEX_HELD(&db->db_mtx));
 
@@ -400,11 +403,24 @@ dbuf_evict_user(dmu_buf_impl_t *db)
 #endif
 
        /*
-        * Invoke the callback from a taskq to avoid lock order reversals
-        * and limit stack depth.
+        * There are two eviction callbacks - one that we call synchronously
+        * and one that we invoke via a taskq.  The async one is useful for
+        * avoiding lock order reversals and limiting stack depth.
+        *
+        * Note that if we have a sync callback but no async callback,
+        * it's likely that the sync callback will free the structure
+        * containing the dbu.  In that case we need to take care to not
+        * dereference dbu after calling the sync evict func.
         */
-       taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
-           &dbu->dbu_tqent);
+       has_async = (dbu->dbu_evict_func_async != NULL);
+
+       if (dbu->dbu_evict_func_sync != NULL)
+               dbu->dbu_evict_func_sync(dbu);
+
+       if (has_async) {
+               taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func_async,
+                   dbu, 0, &dbu->dbu_tqent);
+       }
 }
 
 boolean_t
index cbd54be04dbe15ac45882f69115514c440a2165e..4c37ce383897f6ef099b9142366fe4b3d21a52a6 100644 (file)
@@ -1021,7 +1021,7 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
 }
 
 static void
-dnode_buf_pageout(void *dbu)
+dnode_buf_evict_async(void *dbu)
 {
        dnode_children_t *children_dnodes = dbu;
        int i;
@@ -1271,8 +1271,8 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
                for (i = 0; i < epb; i++) {
                        zrl_init(&dnh[i].dnh_zrlock);
                }
-               dmu_buf_init_user(&children_dnodes->dnc_dbu,
-                   dnode_buf_pageout, NULL);
+               dmu_buf_init_user(&children_dnodes->dnc_dbu, NULL,
+                   dnode_buf_evict_async, NULL);
                winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
                if (winner != NULL) {
 
index ddab3eddbea6937461eea429d34d52d73f44e249..fb6feeec9d58f48b0042b5367df0fc9c0fdfca05 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 RackTop Systems.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -273,16 +273,30 @@ dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
        return (B_TRUE);
 }
 
+/*
+ * We have to release the fsid syncronously or we risk that a subsequent
+ * mount of the same dataset will fail to unique_insert the fsid.  This
+ * failure would manifest itself as the fsid of this dataset changing
+ * between mounts which makes NFS clients quite unhappy.
+ */
 static void
-dsl_dataset_evict(void *dbu)
+dsl_dataset_evict_sync(void *dbu)
 {
        dsl_dataset_t *ds = dbu;
 
        ASSERT(ds->ds_owner == NULL);
 
-       ds->ds_dbuf = NULL;
-
        unique_remove(ds->ds_fsid_guid);
+}
+
+static void
+dsl_dataset_evict_async(void *dbu)
+{
+       dsl_dataset_t *ds = dbu;
+
+       ASSERT(ds->ds_owner == NULL);
+
+       ds->ds_dbuf = NULL;
 
        if (ds->ds_objset != NULL)
                dmu_objset_evict(ds->ds_objset);
@@ -525,7 +539,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
                        ds->ds_reserved = ds->ds_quota = 0;
                }
 
-               dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
+               dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,
+                   dsl_dataset_evict_async, &ds->ds_dbuf);
                if (err == 0)
                        winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
 
@@ -548,6 +563,16 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
                } else {
                        ds->ds_fsid_guid =
                            unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
+                       if (ds->ds_fsid_guid !=
+                           dsl_dataset_phys(ds)->ds_fsid_guid) {
+                               zfs_dbgmsg("ds_fsid_guid changed from "
+                                   "%llx to %llx for pool %s dataset id %llu",
+                                   (long long)
+                                   dsl_dataset_phys(ds)->ds_fsid_guid,
+                                   (long long)ds->ds_fsid_guid,
+                                   spa_name(dp->dp_spa),
+                                   dsobj);
+                       }
                }
        }
        ASSERT3P(ds->ds_dbuf, ==, dbuf);
index b2173523590831e18debb99250ce746b7959eba9..305a87ed9a31fa09498ba0976a00e45deca4327a 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
  * Copyright (c) 2014 Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -129,7 +129,7 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
 
 static void
-dsl_dir_evict(void *dbu)
+dsl_dir_evict_async(void *dbu)
 {
        dsl_dir_t *dd = dbu;
        int t;
@@ -237,7 +237,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
                        dmu_buf_rele(origin_bonus, FTAG);
                }
 
-               dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
+               dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
+                   &dd->dd_dbuf);
                winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
                if (winner != NULL) {
                        if (dd->dd_parent)
index aa8f3192d89091a1606ce01eb361b39006a3a048..dda51529c3bcaf18353f66401560780cdcab17be 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
@@ -1297,7 +1297,7 @@ sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
 
 /*ARGSUSED*/
 static void
-sa_evict(void *dbu)
+sa_evict_sync(void *dbu)
 {
        panic("evicting sa dbuf\n");
 }
@@ -1394,7 +1394,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
                sa_handle_t *winner = NULL;
 
                handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
-               handle->sa_dbu.dbu_evict_func = NULL;
+               handle->sa_dbu.dbu_evict_func_sync = NULL;
+               handle->sa_dbu.dbu_evict_func_async = NULL;
                handle->sa_userp = userp;
                handle->sa_bonus = db;
                handle->sa_os = os;
@@ -1405,7 +1406,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
                error = sa_build_index(handle, SA_BONUS);
 
                if (hdl_type == SA_HDL_SHARED) {
-                       dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
+                       dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL,
+                           NULL);
                        winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
                }
 
index 17107eb2882fbe17b2edcef4f2d75edd1ec7c459..907ab2aa5021fddea236c0e7cb8fe0a06b2edb41 100644 (file)
@@ -81,7 +81,8 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
        ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
        zap->zap_ismicro = FALSE;
 
-       zap->zap_dbu.dbu_evict_func = zap_evict;
+       zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
+       zap->zap_dbu.dbu_evict_func_async = NULL;
 
        mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
        zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
@@ -399,7 +400,7 @@ zap_allocate_blocks(zap_t *zap, int nblocks)
 }
 
 static void
-zap_leaf_pageout(void *dbu)
+zap_leaf_evict_sync(void *dbu)
 {
        zap_leaf_t *l = dbu;
 
@@ -423,7 +424,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
        VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
            l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
            DMU_READ_NO_PREFETCH));
-       dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+       dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
        winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
        ASSERT(winner == NULL);
        dmu_buf_will_dirty(l->l_dbuf, tx);
@@ -470,13 +471,13 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
        l->l_bs = highbit64(db->db_size) - 1;
        l->l_dbuf = db;
 
-       dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+       dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
        winner = dmu_buf_set_user(db, &l->l_dbu);
 
        rw_exit(&l->l_rwlock);
        if (winner != NULL) {
                /* someone else set it first */
-               zap_leaf_pageout(&l->l_dbu);
+               zap_leaf_evict_sync(&l->l_dbu);
                l = winner;
        }
 
index 97c0fff654422b7ab32f9723eb4729e2eb51e610..cf552ed10ec0002e920db96c81b1019d14168f75 100644 (file)
@@ -392,7 +392,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
         * it, because zap_lockdir() checks zap_ismicro without the lock
         * held.
         */
-       dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
+       dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
        winner = dmu_buf_set_user(db, &zap->zap_dbu);
 
        if (winner != NULL)
@@ -774,7 +774,7 @@ zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
 }
 
 void
-zap_evict(void *dbu)
+zap_evict_sync(void *dbu)
 {
        zap_t *zap = dbu;