]> granicus.if.org Git - zfs/commitdiff
Fix zdb -c traverse stop on damaged objset root
authorChunwei Chen <david.chen@nutanix.com>
Tue, 30 Jan 2018 21:39:11 +0000 (13:39 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 9 Feb 2018 18:05:25 +0000 (10:05 -0800)
If a corruption happens to be on a root block of an objset, zdb -c will
not correctly report the error, and it will not traverse the datasets
that come after. This is because traverse_visitbp, which does the
callback and reset error for TRAVERSE_HARD, is skipped when traversing
zil is failed in traverse_impl.

Here's example of what 'zdb -eLcc' command looks like on a pool with
damaged objset root:

== before patch:

Traversing all blocks to verify checksums ...

Error counts:

errno  count
block traversal size 379392 != alloc 33987072 (unreachable 33607680)

bp count:             172
ganged count:           0
bp logical:       1678336      avg:   9757
bp physical:       130560      avg:    759     compression:  12.85
bp allocated:      379392      avg:   2205     compression:   4.42
bp deduped:             0    ref>1:      0   deduplication:   1.00
SPA allocated:   33987072     used:  0.80%

additional, non-pointer bps of type 0:         71
Dittoed blocks on same vdev: 101

== after patch:

Traversing all blocks to verify checksums ...

zdb_blkptr_cb: Got error 52 reading <54, 0, -1, 0>  -- skipping

Error counts:

errno  count
   52  1
block traversal size 33963520 != alloc 33987072 (unreachable 23552)

bp count:             447
ganged count:           0
bp logical:      36093440      avg:  80745
bp physical:     33699840      avg:  75391     compression:   1.07
bp allocated:    33963520      avg:  75981     compression:   1.06
bp deduped:             0    ref>1:      0   deduplication:   1.00
SPA allocated:   33987072     used:  0.80%

additional, non-pointer bps of type 0:         76
Dittoed blocks on same vdev: 115

==

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Signed-off-by: Chunwei Chen <david.chen@nutanix.com>
Closes #7099

module/zfs/dmu_traverse.c

index 280e0ee347e9c71881bdf4a6e15f9ce1c0a5d7d9..15d29198fb7e6ada7b722e9ede6a2c186e68ec34 100644 (file)
@@ -634,12 +634,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 
                err = arc_read(NULL, td->td_spa, rootbp, arc_getbuf_func,
                    &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, czb);
-               if (err != 0)
-                       return (err);
-
-               osp = buf->b_data;
-               traverse_zil(td, &osp->os_zil_header);
-               arc_buf_destroy(buf, &buf);
+               if (err != 0) {
+                       /*
+                        * If both TRAVERSE_HARD and TRAVERSE_PRE are set,
+                        * continue to visitbp so that td_func can be called
+                        * in pre stage, and err will reset to zero.
+                        */
+                       if (!(td->td_flags & TRAVERSE_HARD) ||
+                           !(td->td_flags & TRAVERSE_PRE))
+                               return (err);
+               } else {
+                       osp = buf->b_data;
+                       traverse_zil(td, &osp->os_zil_header);
+                       arc_buf_destroy(buf, &buf);
+               }
        }
 
        if (!(flags & TRAVERSE_PREFETCH_DATA) ||