From 0aa5916a30745c131c4e19464027ea793e66603d Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Fri, 12 Oct 2018 11:28:26 -0700 Subject: [PATCH] OpenZFS 9847 - leaking dd_clones (DMU_OT_DSL_CLONES) objects (#7979) OpenZFS 9847 - leaking dd_clones (DMU_OT_DSL_CLONES) objects We're leaking the dd_clones objects in dsl_dir_destroy_sync. This bug appears to have been around forever. Thankfully the amount of space typically involved is tiny. In addition this adds a mechanism in ZDB to find objects in the MOS which are leaked (not referenced anywhere). Porting notes: * Added dd_crypto_obj to ZDB MOS object leak tracking Authored by: Matthew Ahrens Reviewed-by: George Wilson Reviewed-by: Serapheim Dimitropoulos Reviewed-by: Brian Behlendorf Ported-by: Matthew Ahrens OpenZFS-issue: https://illumos.org/issues/9847 Closes #7979 --- cmd/zdb/zdb.c | 256 ++++++++++++++++++++++++++++- module/zfs/dsl_destroy.c | 2 + module/zfs/vdev_indirect_mapping.c | 1 - 3 files changed, 253 insertions(+), 6 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index a558f60c0..6072783c3 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -108,8 +109,11 @@ static unsigned zopt_objects = 0; libzfs_handle_t *g_zfs; uint64_t max_inflight = 1000; static int leaked_objects = 0; +static range_tree_t *mos_refd_objs; static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *); +static void mos_obj_refd(uint64_t); +static void mos_obj_refd_multiple(uint64_t); /* * These libumem hooks provide a reasonable set of defaults for the allocator's @@ -1592,6 +1596,8 @@ dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size) DO(CHILD_RSRV); DO(REFRSRV); #undef DO + (void) printf("\t\tclones = %llu\n", + (u_longlong_t)dd->dd_clones); } /*ARGSUSED*/ @@ -1774,6 +1780,33 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent) } } +static void +bpobj_count_refd(bpobj_t *bpo) +{ + mos_obj_refd(bpo->bpo_object); + + if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) { + mos_obj_refd(bpo->bpo_phys->bpo_subobjs); + for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) { + uint64_t subobj; + bpobj_t subbpo; + int error; + VERIFY0(dmu_read(bpo->bpo_os, + bpo->bpo_phys->bpo_subobjs, + i * sizeof (subobj), sizeof (subobj), &subobj, 0)); + error = bpobj_open(&subbpo, bpo->bpo_os, subobj); + if (error != 0) { + (void) printf("ERROR %u while trying to open " + "subobj id %llu\n", + error, (u_longlong_t)subobj); + continue; + } + bpobj_count_refd(&subbpo); + bpobj_close(&subbpo); + } + } +} + static void dump_deadlist(dsl_deadlist_t *dl) { @@ -1782,6 +1815,23 @@ dump_deadlist(dsl_deadlist_t *dl) char bytes[32]; char comp[32]; char uncomp[32]; + uint64_t empty_bpobj = + dmu_objset_spa(dl->dl_os)->spa_dsl_pool->dp_empty_bpobj; + + /* force the tree to be loaded */ + dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused); + + if (dl->dl_oldfmt) { + if (dl->dl_bpobj.bpo_object != empty_bpobj) + bpobj_count_refd(&dl->dl_bpobj); + } else { + mos_obj_refd(dl->dl_object); + for (dle = avl_first(&dl->dl_tree); dle; + dle = AVL_NEXT(&dl->dl_tree, dle)) { + if (dle->dle_bpobj.bpo_object != empty_bpobj) + bpobj_count_refd(&dle->dle_bpobj); + } + } /* make sure nicenum has enough space */ CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ); @@ -1807,9 +1857,6 @@ dump_deadlist(dsl_deadlist_t *dl) (void) printf("\n"); - /* force the tree to be loaded */ - dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused); - for (dle = avl_first(&dl->dl_tree); dle; dle = AVL_NEXT(&dl->dl_tree, dle)) { if (dump_opt['d'] >= 5) { @@ -1824,7 +1871,6 @@ dump_deadlist(dsl_deadlist_t *dl) (void) printf("mintxg %llu -> obj %llu\n", (longlong_t)dle->dle_mintxg, (longlong_t)dle->dle_bpobj.bpo_object); - } } } @@ -2322,6 +2368,36 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header, dnode_rele(dn, FTAG); } +static void +count_dir_mos_objects(dsl_dir_t *dd) +{ + mos_obj_refd(dd->dd_object); + mos_obj_refd(dsl_dir_phys(dd)->dd_child_dir_zapobj); + mos_obj_refd(dsl_dir_phys(dd)->dd_deleg_zapobj); + mos_obj_refd(dsl_dir_phys(dd)->dd_props_zapobj); + mos_obj_refd(dsl_dir_phys(dd)->dd_clones); + + /* + * The dd_crypto_obj can be referenced by multiple dsl_dir's. + * Ignore the references after the first one. + */ + mos_obj_refd_multiple(dd->dd_crypto_obj); +} + +static void +count_ds_mos_objects(dsl_dataset_t *ds) +{ + mos_obj_refd(ds->ds_object); + mos_obj_refd(dsl_dataset_phys(ds)->ds_next_clones_obj); + mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj); + mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj); + mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj); + + if (!dsl_dataset_is_snapshot(ds)) { + count_dir_mos_objects(ds->ds_dir); + } +} + static const char *objset_types[DMU_OST_NUMTYPES] = { "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" }; @@ -2401,6 +2477,7 @@ dump_dir(objset_t *os) (void) printf("ds_remap_deadlist:\n"); dump_deadlist(&ds->ds_remap_deadlist); } + count_ds_mos_objects(ds); } if (verbosity < 2) @@ -5055,6 +5132,170 @@ verify_checkpoint(spa_t *spa) return (error); } +/* ARGSUSED */ +static void +mos_leaks_cb(void *arg, uint64_t start, uint64_t size) +{ + for (uint64_t i = start; i < size; i++) { + (void) printf("MOS object %llu referenced but not allocated\n", + (u_longlong_t)i); + } +} + +static void +mos_obj_refd(uint64_t obj) +{ + if (obj != 0 && mos_refd_objs != NULL) + range_tree_add(mos_refd_objs, obj, 1); +} + +/* + * Call on a MOS object that may already have been referenced. + */ +static void +mos_obj_refd_multiple(uint64_t obj) +{ + if (obj != 0 && mos_refd_objs != NULL && + !range_tree_contains(mos_refd_objs, obj, 1)) + range_tree_add(mos_refd_objs, obj, 1); +} + +static void +mos_leak_vdev(vdev_t *vd) +{ + mos_obj_refd(vd->vdev_dtl_object); + mos_obj_refd(vd->vdev_ms_array); + mos_obj_refd(vd->vdev_top_zap); + mos_obj_refd(vd->vdev_indirect_config.vic_births_object); + mos_obj_refd(vd->vdev_indirect_config.vic_mapping_object); + mos_obj_refd(vd->vdev_leaf_zap); + if (vd->vdev_checkpoint_sm != NULL) + mos_obj_refd(vd->vdev_checkpoint_sm->sm_object); + if (vd->vdev_indirect_mapping != NULL) { + mos_obj_refd(vd->vdev_indirect_mapping-> + vim_phys->vimp_counts_object); + } + if (vd->vdev_obsolete_sm != NULL) + mos_obj_refd(vd->vdev_obsolete_sm->sm_object); + + for (uint64_t m = 0; m < vd->vdev_ms_count; m++) { + metaslab_t *ms = vd->vdev_ms[m]; + mos_obj_refd(space_map_object(ms->ms_sm)); + } + + for (uint64_t c = 0; c < vd->vdev_children; c++) { + mos_leak_vdev(vd->vdev_child[c]); + } +} + +static int +dump_mos_leaks(spa_t *spa) +{ + int rv = 0; + objset_t *mos = spa->spa_meta_objset; + dsl_pool_t *dp = spa->spa_dsl_pool; + + /* Visit and mark all referenced objects in the MOS */ + + mos_obj_refd(DMU_POOL_DIRECTORY_OBJECT); + mos_obj_refd(spa->spa_pool_props_object); + mos_obj_refd(spa->spa_config_object); + mos_obj_refd(spa->spa_ddt_stat_object); + mos_obj_refd(spa->spa_feat_desc_obj); + mos_obj_refd(spa->spa_feat_enabled_txg_obj); + mos_obj_refd(spa->spa_feat_for_read_obj); + mos_obj_refd(spa->spa_feat_for_write_obj); + mos_obj_refd(spa->spa_history); + mos_obj_refd(spa->spa_errlog_last); + mos_obj_refd(spa->spa_errlog_scrub); + mos_obj_refd(spa->spa_all_vdev_zaps); + mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); + mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); + mos_obj_refd(spa->spa_dsl_pool->dp_scan->scn_phys.scn_queue_obj); + bpobj_count_refd(&spa->spa_deferred_bpobj); + mos_obj_refd(dp->dp_empty_bpobj); + bpobj_count_refd(&dp->dp_obsolete_bpobj); + bpobj_count_refd(&dp->dp_free_bpobj); + mos_obj_refd(spa->spa_l2cache.sav_object); + mos_obj_refd(spa->spa_spares.sav_object); + + mos_obj_refd(spa->spa_condensing_indirect_phys. + scip_next_mapping_object); + mos_obj_refd(spa->spa_condensing_indirect_phys. + scip_prev_obsolete_sm_object); + if (spa->spa_condensing_indirect_phys.scip_next_mapping_object != 0) { + vdev_indirect_mapping_t *vim = + vdev_indirect_mapping_open(mos, + spa->spa_condensing_indirect_phys.scip_next_mapping_object); + mos_obj_refd(vim->vim_phys->vimp_counts_object); + vdev_indirect_mapping_close(vim); + } + + if (dp->dp_origin_snap != NULL) { + dsl_dataset_t *ds; + + dsl_pool_config_enter(dp, FTAG); + VERIFY0(dsl_dataset_hold_obj(dp, + dsl_dataset_phys(dp->dp_origin_snap)->ds_next_snap_obj, + FTAG, &ds)); + count_ds_mos_objects(ds); + dump_deadlist(&ds->ds_deadlist); + dsl_dataset_rele(ds, FTAG); + dsl_pool_config_exit(dp, FTAG); + + count_ds_mos_objects(dp->dp_origin_snap); + dump_deadlist(&dp->dp_origin_snap->ds_deadlist); + } + count_dir_mos_objects(dp->dp_mos_dir); + if (dp->dp_free_dir != NULL) + count_dir_mos_objects(dp->dp_free_dir); + if (dp->dp_leak_dir != NULL) + count_dir_mos_objects(dp->dp_leak_dir); + + mos_leak_vdev(spa->spa_root_vdev); + + for (uint64_t class = 0; class < DDT_CLASSES; class++) { + for (uint64_t type = 0; type < DDT_TYPES; type++) { + for (uint64_t cksum = 0; + cksum < ZIO_CHECKSUM_FUNCTIONS; cksum++) { + ddt_t *ddt = spa->spa_ddt[cksum]; + mos_obj_refd(ddt->ddt_object[type][class]); + } + } + } + + /* + * Visit all allocated objects and make sure they are referenced. + */ + uint64_t object = 0; + while (dmu_object_next(mos, &object, B_FALSE, 0) == 0) { + if (range_tree_contains(mos_refd_objs, object, 1)) { + range_tree_remove(mos_refd_objs, object, 1); + } else { + dmu_object_info_t doi; + const char *name; + dmu_object_info(mos, object, &doi); + if (doi.doi_type & DMU_OT_NEWTYPE) { + dmu_object_byteswap_t bswap = + DMU_OT_BYTESWAP(doi.doi_type); + name = dmu_ot_byteswap[bswap].ob_name; + } else { + name = dmu_ot[doi.doi_type].ot_name; + } + + (void) printf("MOS object %llu (%s) leaked\n", + (u_longlong_t)object, name); + rv = 2; + } + } + (void) range_tree_walk(mos_refd_objs, mos_leaks_cb, NULL); + if (!range_tree_is_empty(mos_refd_objs)) + rv = 2; + range_tree_vacate(mos_refd_objs, NULL, NULL); + range_tree_destroy(mos_refd_objs); + return (rv); +} + static void dump_zpool(spa_t *spa) { @@ -5087,8 +5328,9 @@ dump_zpool(spa_t *spa) if (dump_opt['d'] || dump_opt['i']) { spa_feature_t f; - + mos_refd_objs = range_tree_create(NULL, NULL); dump_dir(dp->dp_meta_objset); + if (dump_opt['d'] >= 3) { dsl_pool_t *dp = spa->spa_dsl_pool; dump_full_bpobj(&spa->spa_deferred_bpobj, @@ -5115,6 +5357,9 @@ dump_zpool(spa_t *spa) (void) dmu_objset_find(spa_name(spa), dump_one_dir, NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN); + if (rc == 0 && !dump_opt['L']) + rc = dump_mos_leaks(spa); + for (f = 0; f < SPA_FEATURES; f++) { uint64_t refcount; @@ -5146,6 +5391,7 @@ dump_zpool(spa_t *spa) rc = verify_device_removal_feature_counts(spa); } } + if (rc == 0 && (dump_opt['b'] || dump_opt['c'])) rc = dump_block_stats(spa); diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index daa583040..b80d34672 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -823,6 +823,8 @@ dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); + if (dsl_dir_phys(dd)->dd_clones != 0) + VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_clones, tx)); VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); VERIFY0(zap_remove(mos, dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, diff --git a/module/zfs/vdev_indirect_mapping.c b/module/zfs/vdev_indirect_mapping.c index a2766bd0d..c02a4f5a4 100644 --- a/module/zfs/vdev_indirect_mapping.c +++ b/module/zfs/vdev_indirect_mapping.c @@ -282,7 +282,6 @@ vdev_indirect_mapping_entry_for_offset_or_next(vdev_indirect_mapping_t *vim, B_TRUE)); } - void vdev_indirect_mapping_close(vdev_indirect_mapping_t *vim) { -- 2.40.0