]> granicus.if.org Git - zfs/commitdiff
Improved error handling for extreme rewinds
authorBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 9 Oct 2018 22:42:42 +0000 (15:42 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 12 Oct 2018 18:24:04 +0000 (11:24 -0700)
The vdev_checkpoint_sm_object(), vdev_obsolete_sm_object(), and
vdev_obsolete_counts_are_precise() functions assume that the
only way a zap_lookup() can fail is if the requested entry is
missing.  While this is the most common cause, it's not the only
cause.  Attemping to access a damaged ZAP will result in other
errors.

The most likely scenario for accessing a damaged ZAP is during
an extreme rewind pool import.  Under these conditions the pool
is expected to contain damaged objects and the import code was
updated to handle this gracefully.  Getting an ECKSUM error from
these ZAPs after the pool in import a far less likely, therefore
the behavior for call paths was not modified.

Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #7809
Closes #7921

cmd/zdb/zdb.c
include/sys/vdev_impl.h
module/zfs/spa.c
module/zfs/vdev.c
module/zfs/vdev_indirect.c
module/zfs/vdev_removal.c

index 21113da2f03cf666a12601ea529714c2d21abeda..a558f60c095c1102c05be79d3390c88155daeeee 100644 (file)
@@ -696,19 +696,20 @@ get_metaslab_refcount(vdev_t *vd)
 static int
 get_obsolete_refcount(vdev_t *vd)
 {
+       uint64_t obsolete_sm_object;
        int refcount = 0;
 
-       uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
-       if (vd->vdev_top == vd && obsolete_sm_obj != 0) {
+       VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+       if (vd->vdev_top == vd && obsolete_sm_object != 0) {
                dmu_object_info_t doi;
                VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
-                   obsolete_sm_obj, &doi));
+                   obsolete_sm_object, &doi));
                if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
                        refcount++;
                }
        } else {
                ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
-               ASSERT3U(obsolete_sm_obj, ==, 0);
+               ASSERT3U(obsolete_sm_object, ==, 0);
        }
        for (unsigned c = 0; c < vd->vdev_children; c++) {
                refcount += get_obsolete_refcount(vd->vdev_child[c]);
@@ -1051,7 +1052,8 @@ print_vdev_indirect(vdev_t *vd)
        }
        (void) printf("\n");
 
-       uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
+       uint64_t obsolete_sm_object;
+       VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
        if (obsolete_sm_object != 0) {
                objset_t *mos = vd->vdev_spa->spa_meta_objset;
                (void) printf("obsolete space map object %llu:\n",
@@ -3595,9 +3597,11 @@ zdb_load_obsolete_counts(vdev_t *vd)
        spa_t *spa = vd->vdev_spa;
        spa_condensing_indirect_phys_t *scip =
            &spa->spa_condensing_indirect_phys;
+       uint64_t obsolete_sm_object;
        uint32_t *counts;
 
-       EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
+       VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+       EQUIV(obsolete_sm_object != 0, vd->vdev_obsolete_sm != NULL);
        counts = vdev_indirect_mapping_load_obsolete_counts(vim);
        if (vd->vdev_obsolete_sm != NULL) {
                vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
@@ -3972,6 +3976,7 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
        boolean_t leaks = B_FALSE;
        vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
        uint64_t total_leaked = 0;
+       boolean_t are_precise = B_FALSE;
 
        ASSERT(vim != NULL);
 
@@ -3999,9 +4004,9 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
                    zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
                ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
                    zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
-               if (bytes_leaked != 0 &&
-                   (vdev_obsolete_counts_are_precise(vd) ||
-                   dump_opt['d'] >= 5)) {
+
+               VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
+               if (bytes_leaked != 0 && (are_precise || dump_opt['d'] >= 5)) {
                        (void) printf("obsolete indirect mapping count "
                            "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
                            (u_longlong_t)vd->vdev_id,
@@ -4012,7 +4017,8 @@ zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
                total_leaked += ABS(bytes_leaked);
        }
 
-       if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) {
+       VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
+       if (!are_precise && total_leaked > 0) {
                int pct_leaked = total_leaked * 100 /
                    vdev_indirect_mapping_bytes_mapped(vim);
                (void) printf("cannot verify obsolete indirect mapping "
@@ -4576,11 +4582,17 @@ verify_device_removal_feature_counts(spa_t *spa)
                                obsolete_counts_count++;
                        }
                }
-               if (vdev_obsolete_counts_are_precise(vd)) {
+
+               boolean_t are_precise;
+               VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
+               if (are_precise) {
                        ASSERT(vic->vic_mapping_object != 0);
                        precise_vdev_count++;
                }
-               if (vdev_obsolete_sm_object(vd) != 0) {
+
+               uint64_t obsolete_sm_object;
+               VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+               if (obsolete_sm_object != 0) {
                        ASSERT(vic->vic_mapping_object != 0);
                        obsolete_sm_count++;
                }
index e055161e83743d4bc75e7eaa777254e83e602afa..c5b0c0cd8f4ad0ac45a418c72ac9961d48ac7fce 100644 (file)
@@ -493,13 +493,13 @@ extern int zfs_vdev_cache_size;
 extern void vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx);
 extern boolean_t vdev_indirect_should_condense(vdev_t *vd);
 extern void spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t *tx);
-extern int vdev_obsolete_sm_object(vdev_t *vd);
-extern boolean_t vdev_obsolete_counts_are_precise(vdev_t *vd);
+extern int vdev_obsolete_sm_object(vdev_t *vd, uint64_t *sm_obj);
+extern int vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise);
 
 /*
  * Other miscellaneous functions
  */
-int vdev_checkpoint_sm_object(vdev_t *vd);
+int vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj);
 
 #ifdef __cplusplus
 }
index a1851bca25abeb34cb4694935e06103db770ecaf..fdce49c40c75d735bb1f92c424ed295bfdfb7142 100644 (file)
@@ -7614,14 +7614,15 @@ vdev_indirect_state_sync_verify(vdev_t *vd)
                ASSERT(vib != NULL);
        }
 
-       if (vdev_obsolete_sm_object(vd) != 0) {
+       uint64_t obsolete_sm_object = 0;
+       ASSERT0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+       if (obsolete_sm_object != 0) {
                ASSERT(vd->vdev_obsolete_sm != NULL);
                ASSERT(vd->vdev_removing ||
                    vd->vdev_ops == &vdev_indirect_ops);
                ASSERT(vdev_indirect_mapping_num_entries(vim) > 0);
                ASSERT(vdev_indirect_mapping_bytes_mapped(vim) > 0);
-
-               ASSERT3U(vdev_obsolete_sm_object(vd), ==,
+               ASSERT3U(obsolete_sm_object, ==,
                    space_map_object(vd->vdev_obsolete_sm));
                ASSERT3U(vdev_indirect_mapping_bytes_mapped(vim), >=,
                    space_map_allocated(vd->vdev_obsolete_sm));
index 5f4ff86207123feb4be75f1115c55d043a3a5e54..2c95626c4d78a07f134649fb7cd7afc79b3e6c71 100644 (file)
@@ -2895,26 +2895,28 @@ vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
 }
 
 /*
- * Gets the checkpoint space map object from the vdev's ZAP.
- * Returns the spacemap object, or 0 if it wasn't in the ZAP
- * or the ZAP doesn't exist yet.
+ * Gets the checkpoint space map object from the vdev's ZAP.  On success sm_obj
+ * will contain either the checkpoint spacemap object or zero if none exists.
+ * All other errors are returned to the caller.
  */
 int
-vdev_checkpoint_sm_object(vdev_t *vd)
+vdev_checkpoint_sm_object(vdev_t *vd, uint64_t *sm_obj)
 {
        ASSERT0(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
+
        if (vd->vdev_top_zap == 0) {
+               *sm_obj = 0;
                return (0);
        }
 
-       uint64_t sm_obj = 0;
-       int err = zap_lookup(spa_meta_objset(vd->vdev_spa), vd->vdev_top_zap,
-           VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, &sm_obj);
-
-       if (err != 0)
-               VERIFY3S(err, ==, ENOENT);
+       int error = zap_lookup(spa_meta_objset(vd->vdev_spa), vd->vdev_top_zap,
+           VDEV_TOP_ZAP_POOL_CHECKPOINT_SM, sizeof (uint64_t), 1, sm_obj);
+       if (error == ENOENT) {
+               *sm_obj = 0;
+               error = 0;
+       }
 
-       return (sm_obj);
+       return (error);
 }
 
 int
@@ -2970,8 +2972,9 @@ vdev_load(vdev_t *vd)
                        return (error);
                }
 
-               uint64_t checkpoint_sm_obj = vdev_checkpoint_sm_object(vd);
-               if (checkpoint_sm_obj != 0) {
+               uint64_t checkpoint_sm_obj;
+               error = vdev_checkpoint_sm_object(vd, &checkpoint_sm_obj);
+               if (error == 0 && checkpoint_sm_obj != 0) {
                        objset_t *mos = spa_meta_objset(vd->vdev_spa);
                        ASSERT(vd->vdev_asize != 0);
                        ASSERT3P(vd->vdev_checkpoint_sm, ==, NULL);
@@ -2991,12 +2994,17 @@ vdev_load(vdev_t *vd)
                        /*
                         * Since the checkpoint_sm contains free entries
                         * exclusively we can use sm_alloc to indicate the
-                        * culmulative checkpointed space that has been freed.
+                        * cumulative checkpointed space that has been freed.
                         */
                        vd->vdev_stat.vs_checkpoint_space =
                            -vd->vdev_checkpoint_sm->sm_alloc;
                        vd->vdev_spa->spa_checkpoint_info.sci_dspace +=
                            vd->vdev_stat.vs_checkpoint_space;
+               } else if (error != 0) {
+                       vdev_dbgmsg(vd, "vdev_load: failed to retrieve "
+                           "checkpoint space map object from vdev ZAP "
+                           "[error=%d]", error);
+                       return (error);
                }
        }
 
@@ -3011,8 +3019,9 @@ vdev_load(vdev_t *vd)
                return (error);
        }
 
-       uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
-       if (obsolete_sm_object != 0) {
+       uint64_t obsolete_sm_object;
+       error = vdev_obsolete_sm_object(vd, &obsolete_sm_object);
+       if (error == 0 && obsolete_sm_object != 0) {
                objset_t *mos = vd->vdev_spa->spa_meta_objset;
                ASSERT(vd->vdev_asize != 0);
                ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
@@ -3027,6 +3036,10 @@ vdev_load(vdev_t *vd)
                        return (error);
                }
                space_map_update(vd->vdev_obsolete_sm);
+       } else if (error != 0) {
+               vdev_dbgmsg(vd, "vdev_load: failed to retrieve obsolete "
+                   "space map object from vdev ZAP [error=%d]", error);
+               return (error);
        }
 
        return (0);
index d57bb717a6852d4ec0121d02904e38fe86dfb21d..097be6f0e3123418cbf5a196fc54a10e26ee618e 100644 (file)
@@ -420,15 +420,16 @@ vdev_indirect_should_condense(vdev_t *vd)
         * If nothing new has been marked obsolete, there is no
         * point in condensing.
         */
+       ASSERTV(uint64_t obsolete_sm_obj);
+       ASSERT0(vdev_obsolete_sm_object(vd, &obsolete_sm_obj));
        if (vd->vdev_obsolete_sm == NULL) {
-               ASSERT0(vdev_obsolete_sm_object(vd));
+               ASSERT0(obsolete_sm_obj);
                return (B_FALSE);
        }
 
        ASSERT(vd->vdev_obsolete_sm != NULL);
 
-       ASSERT3U(vdev_obsolete_sm_object(vd), ==,
-           space_map_object(vd->vdev_obsolete_sm));
+       ASSERT3U(obsolete_sm_obj, ==, space_map_object(vd->vdev_obsolete_sm));
 
        uint64_t bytes_mapped = vdev_indirect_mapping_bytes_mapped(vim);
        uint64_t bytes_obsolete = space_map_allocated(vd->vdev_obsolete_sm);
@@ -770,8 +771,9 @@ spa_condense_indirect_start_sync(vdev_t *vd, dmu_tx_t *tx)
        ASSERT(spa_feature_is_active(spa, SPA_FEATURE_OBSOLETE_COUNTS));
        ASSERT(vdev_indirect_mapping_num_entries(vd->vdev_indirect_mapping));
 
-       uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
-       ASSERT(obsolete_sm_obj != 0);
+       uint64_t obsolete_sm_obj;
+       VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_obj));
+       ASSERT3U(obsolete_sm_obj, !=, 0);
 
        scip->scip_vdev = vd->vdev_id;
        scip->scip_next_mapping_object =
@@ -822,16 +824,18 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx)
        ASSERT(vd->vdev_removing || vd->vdev_ops == &vdev_indirect_ops);
        ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_OBSOLETE_COUNTS));
 
-       if (vdev_obsolete_sm_object(vd) == 0) {
-               uint64_t obsolete_sm_object =
-                   space_map_alloc(spa->spa_meta_objset,
+       uint64_t obsolete_sm_object;
+       VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+       if (obsolete_sm_object == 0) {
+               obsolete_sm_object = space_map_alloc(spa->spa_meta_objset,
                    vdev_standard_sm_blksz, tx);
 
                ASSERT(vd->vdev_top_zap != 0);
                VERIFY0(zap_add(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
                    VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM,
                    sizeof (obsolete_sm_object), 1, &obsolete_sm_object, tx));
-               ASSERT3U(vdev_obsolete_sm_object(vd), !=, 0);
+               ASSERT0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+               ASSERT3U(obsolete_sm_object, !=, 0);
 
                spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
                VERIFY0(space_map_open(&vd->vdev_obsolete_sm,
@@ -841,7 +845,7 @@ vdev_indirect_sync_obsolete(vdev_t *vd, dmu_tx_t *tx)
        }
 
        ASSERT(vd->vdev_obsolete_sm != NULL);
-       ASSERT3U(vdev_obsolete_sm_object(vd), ==,
+       ASSERT3U(obsolete_sm_object, ==,
            space_map_object(vd->vdev_obsolete_sm));
 
        space_map_write(vd->vdev_obsolete_sm,
@@ -889,44 +893,56 @@ spa_start_indirect_condensing_thread(spa_t *spa)
 }
 
 /*
- * Gets the obsolete spacemap object from the vdev's ZAP.
- * Returns the spacemap object, or 0 if it wasn't in the ZAP or the ZAP doesn't
- * exist yet.
+ * Gets the obsolete spacemap object from the vdev's ZAP.  On success sm_obj
+ * will contain either the obsolete spacemap object or zero if none exists.
+ * All other errors are returned to the caller.
  */
 int
-vdev_obsolete_sm_object(vdev_t *vd)
+vdev_obsolete_sm_object(vdev_t *vd, uint64_t *sm_obj)
 {
        ASSERT0(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
+
        if (vd->vdev_top_zap == 0) {
+               *sm_obj = 0;
                return (0);
        }
 
-       uint64_t sm_obj = 0;
-       int err;
-       err = zap_lookup(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
-           VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, sizeof (sm_obj), 1, &sm_obj);
-
-       ASSERT(err == 0 || err == ENOENT);
+       int error = zap_lookup(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+           VDEV_TOP_ZAP_INDIRECT_OBSOLETE_SM, sizeof (sm_obj), 1, sm_obj);
+       if (error == ENOENT) {
+               *sm_obj = 0;
+               error = 0;
+       }
 
-       return (sm_obj);
+       return (error);
 }
 
-boolean_t
-vdev_obsolete_counts_are_precise(vdev_t *vd)
+/*
+ * Gets the obsolete count are precise spacemap object from the vdev's ZAP.
+ * On success are_precise will be set to reflect if the counts are precise.
+ * All other errors are returned to the caller.
+ */
+int
+vdev_obsolete_counts_are_precise(vdev_t *vd, boolean_t *are_precise)
 {
        ASSERT0(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
+
        if (vd->vdev_top_zap == 0) {
-               return (B_FALSE);
+               *are_precise = B_FALSE;
+               return (0);
        }
 
        uint64_t val = 0;
-       int err;
-       err = zap_lookup(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
+       int error = zap_lookup(vd->vdev_spa->spa_meta_objset, vd->vdev_top_zap,
            VDEV_TOP_ZAP_OBSOLETE_COUNTS_ARE_PRECISE, sizeof (val), 1, &val);
+       if (error == 0) {
+               *are_precise = (val != 0);
+       } else if (error == ENOENT) {
+               *are_precise = B_FALSE;
+               error = 0;
+       }
 
-       ASSERT(err == 0 || err == ENOENT);
-
-       return (val != 0);
+       return (error);
 }
 
 /* ARGSUSED */
index 9db6fe37b4db4346b25cf2a5ab8ded4070230471..4e4a6c4f5a250d0554e411860cf3fc7efd853dbd 100644 (file)
@@ -251,7 +251,9 @@ vdev_remove_initiate_sync(void *arg, dmu_tx_t *tx)
                VERIFY0(zap_add(spa->spa_meta_objset, vd->vdev_top_zap,
                    VDEV_TOP_ZAP_OBSOLETE_COUNTS_ARE_PRECISE, sizeof (one), 1,
                    &one, tx));
-               ASSERT3U(vdev_obsolete_counts_are_precise(vd), !=, 0);
+               ASSERTV(boolean_t are_precise);
+               ASSERT0(vdev_obsolete_counts_are_precise(vd, &are_precise));
+               ASSERT3B(are_precise, ==, B_TRUE);
        }
 
        vic->vic_mapping_object = vdev_indirect_mapping_alloc(mos, tx);
@@ -1563,15 +1565,20 @@ spa_vdev_remove_cancel_sync(void *arg, dmu_tx_t *tx)
        ASSERT3P(svr->svr_thread, ==, NULL);
 
        spa_feature_decr(spa, SPA_FEATURE_DEVICE_REMOVAL, tx);
-       if (vdev_obsolete_counts_are_precise(vd)) {
+
+       boolean_t are_precise;
+       VERIFY0(vdev_obsolete_counts_are_precise(vd, &are_precise));
+       if (are_precise) {
                spa_feature_decr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
                VERIFY0(zap_remove(spa->spa_meta_objset, vd->vdev_top_zap,
                    VDEV_TOP_ZAP_OBSOLETE_COUNTS_ARE_PRECISE, tx));
        }
 
-       if (vdev_obsolete_sm_object(vd) != 0) {
+       uint64_t obsolete_sm_object;
+       VERIFY0(vdev_obsolete_sm_object(vd, &obsolete_sm_object));
+       if (obsolete_sm_object != 0) {
                ASSERT(vd->vdev_obsolete_sm != NULL);
-               ASSERT3U(vdev_obsolete_sm_object(vd), ==,
+               ASSERT3U(obsolete_sm_object, ==,
                    space_map_object(vd->vdev_obsolete_sm));
 
                space_map_free(vd->vdev_obsolete_sm, tx);