]> granicus.if.org Git - zfs/commitdiff
Illumos 5347 - idle pool may run itself out of space
authorMatthew Ahrens <mahrens@delphix.com>
Sat, 11 Jul 2015 00:19:41 +0000 (02:19 +0200)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 14 Jul 2015 17:35:21 +0000 (10:35 -0700)
5347 idle pool may run itself out of space
Reviewed by: Alex Reece <alex.reece@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Steven Hartland <killing@multiplay.co.uk>
Reviewed by: Richard Elling <richard.elling@richardelling.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://github.com/illumos/illumos-gate/commit/231aab8
  https://github.com/illumos/illumos-gate/commit/4a92375 3642
  https://www.illumos.org/issues/5347
  https://github.com/zfsonlinux/zfs/commit/89b1cd6 (partial commit & fix)
  https://github.com/zfsonlinux/zfs/commit/fbeddd6 Illumos 4390
  https://github.com/zfsonlinux/zfs/commit/2696dfa Illumos 3642, 3643

Porting notes:
This is completing the partial fix from FreeBSD

Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3586

include/sys/uberblock.h
module/zfs/dsl_scan.c
module/zfs/spa.c
module/zfs/uberblock.c

index b5bb91573145273eeb6c22cf2896690aa119b0f8..21e7ae0de7a7cfa76c63d8985a0b8be682f4f1da 100644 (file)
@@ -22,6 +22,9 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
 
 #ifndef _SYS_UBERBLOCK_H
 #define        _SYS_UBERBLOCK_H
@@ -36,8 +39,8 @@ extern "C" {
 
 typedef struct uberblock uberblock_t;
 
-extern int uberblock_verify(uberblock_t *ub);
-extern int uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg);
+extern int uberblock_verify(uberblock_t *);
+extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t);
 
 #ifdef __cplusplus
 }
index 0489359710b8be72e19b1c7fb934347b11ebcdee..b989e763386b2759420841a4eb637340307e812e 100644 (file)
@@ -1528,11 +1528,15 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
                            dp->dp_bptree_obj, tx));
                        dp->dp_bptree_obj = 0;
                        scn->scn_async_destroying = B_FALSE;
+                       scn->scn_async_stalled = B_FALSE;
                } else {
                        /*
-                        * If we didn't make progress, mark the async destroy as
-                        * stalled, so that we will not initiate a spa_sync() on
-                        * its behalf.
+                        * If we didn't make progress, mark the async
+                        * destroy as stalled, so that we will not initiate
+                        * a spa_sync() on its behalf.  Note that we only
+                        * check this if we are not finished, because if the
+                        * bptree had no blocks for us to visit, we can
+                        * finish without "making progress".
                         */
                        scn->scn_async_stalled =
                            (scn->scn_visited_this_txg == 0);
index b5e024c3f2911b069b3c1b46bb5e16f13662576c..d8eaf9979fafefd53eb6f868c2d43fcd207cd476 100644 (file)
@@ -6393,21 +6393,6 @@ spa_sync(spa_t *spa, uint64_t txg)
                }
        }
 
-       /*
-        * If anything has changed in this txg, or if someone is waiting
-        * for this txg to sync (eg, spa_vdev_remove()), push the
-        * deferred frees from the previous txg.  If not, leave them
-        * alone so that we don't generate work on an otherwise idle
-        * system.
-        */
-       if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
-           !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
-           !txg_list_empty(&dp->dp_sync_tasks, txg) ||
-           ((dsl_scan_active(dp->dp_scan) ||
-           txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
-               spa_sync_deferred_frees(spa, tx);
-       }
-
        /*
         * Iterate to convergence.
         */
@@ -6425,6 +6410,11 @@ spa_sync(spa_t *spa, uint64_t txg)
                if (pass < zfs_sync_pass_deferred_free) {
                        spa_sync_frees(spa, free_bpl, tx);
                } else {
+                       /*
+                        * We can not defer frees in pass 1, because
+                        * we sync the deferred frees later in pass 1.
+                        */
+                       ASSERT3U(pass, >, 1);
                        bplist_iterate(free_bpl, bpobj_enqueue_cb,
                            &spa->spa_deferred_bpobj, tx);
                }
@@ -6435,8 +6425,37 @@ spa_sync(spa_t *spa, uint64_t txg)
                while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)))
                        vdev_sync(vd, txg);
 
-               if (pass == 1)
+               if (pass == 1) {
                        spa_sync_upgrades(spa, tx);
+                       ASSERT3U(txg, >=,
+                           spa->spa_uberblock.ub_rootbp.blk_birth);
+                       /*
+                        * Note: We need to check if the MOS is dirty
+                        * because we could have marked the MOS dirty
+                        * without updating the uberblock (e.g. if we
+                        * have sync tasks but no dirty user data).  We
+                        * need to check the uberblock's rootbp because
+                        * it is updated if we have synced out dirty
+                        * data (though in this case the MOS will most
+                        * likely also be dirty due to second order
+                        * effects, we don't want to rely on that here).
+                        */
+                       if (spa->spa_uberblock.ub_rootbp.blk_birth < txg &&
+                           !dmu_objset_is_dirty(mos, txg)) {
+                               /*
+                                * Nothing changed on the first pass,
+                                * therefore this TXG is a no-op.  Avoid
+                                * syncing deferred frees, so that we
+                                * can keep this TXG as a no-op.
+                                */
+                               ASSERT(txg_list_empty(&dp->dp_dirty_datasets,
+                                   txg));
+                               ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
+                               ASSERT(txg_list_empty(&dp->dp_sync_tasks, txg));
+                               break;
+                       }
+                       spa_sync_deferred_frees(spa, tx);
+               }
 
        } while (dmu_objset_is_dirty(mos, txg));
 
index a07dc00ae19a84ee787445176e7c5e38ac13a452..f8bdecdf57497c7e797bc71b2f21c7185fab1e49 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -40,10 +40,10 @@ uberblock_verify(uberblock_t *ub)
 }
 
 /*
- * Update the uberblock and return a boolean value indicating whether
- * anything changed in this transaction group.
+ * Update the uberblock and return TRUE if anything changed in this
+ * transaction group.
  */
-int
+boolean_t
 uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
 {
        ASSERT(ub->ub_txg < txg);