Illumos 4754, 4755

author George Wilson <george.wilson@delphix.com>

Fri, 18 Apr 2014 16:35:03 +0000 (08:35 -0800)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Wed, 30 Jul 2014 17:30:05 +0000 (10:30 -0700)
author George Wilson <george.wilson@delphix.com>
Fri, 18 Apr 2014 16:35:03 +0000 (08:35 -0800)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Wed, 30 Jul 2014 17:30:05 +0000 (10:30 -0700)
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h

index 3e9f32e1c6d5cb91af962b696dfdae6bbe5e71d2..3cd27d75e3e93b059de8b4564228599696f7295a 100644 (file)
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -24,7 +24,7 @@
   */
  
  /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
   */
  
  #ifndef _SYS_METASLAB_IMPL_H
@@ -58,7 +58,6 @@ struct metaslab_group {
         kmutex_t                mg_lock;
         avl_tree_t              mg_metaslab_tree;
         uint64_t                mg_aliquot;
-       uint64_t                mg_alloc_failures;
         boolean_t               mg_allocatable;         /* can we allocate? */
         uint64_t                mg_free_capacity;       /* percentage free */
         int64_t                 mg_bias;
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c

index 06d7f7cd203c4dd3b9443751402410fb685a24cd..2dfdafb3ab8c25dae7a37ba9da3b1ac7fd9b6caa 100644 (file)
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -40,7 +40,7 @@
   * avoid having to load lots of space_maps in a given txg. There are,
   * however, some cases where we want to avoid "fast" ganging and instead
   * we want to do an exhaustive search of all metaslabs on this device.
- * Currently we don't allow any gang, zil, or dump device related allocations
+ * Currently we don't allow any gang, slog, or dump device related allocations
   * to "fast" gang.
   */
  #define        CAN_FASTGANG(flags) \
@@ -63,14 +63,6 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1;  /* force gang blocks */
   */
  int zfs_condense_pct = 200;
  
-/*
- * This value defines the number of allowed allocation failures per vdev.
- * If a device reaches this threshold in a given txg then we consider skipping
- * allocations on that device. The value of zfs_mg_alloc_failures is computed
- * in zio_init() unless it has been overridden in /etc/system.
- */
-int zfs_mg_alloc_failures = 0;
-
  /*
   * The zfs_mg_noalloc_threshold defines which metaslab groups should
   * be eligible for allocation. The value is defined as a percentage of
@@ -1660,10 +1652,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
  void
  metaslab_sync_reassess(metaslab_group_t *mg)
  {
-       int64_t failures = mg->mg_alloc_failures;
-
         metaslab_group_alloc_update(mg);
-       atomic_add_64(&mg->mg_alloc_failures, -failures);
  
         /*
          * Preload the next potential metaslabs
@@ -1690,7 +1679,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
  
  static uint64_t
  metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
-    uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
+    uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
  {
         spa_t *spa = mg->mg_vd->vdev_spa;
         metaslab_t *msp = NULL;
@@ -1717,10 +1706,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
                                 spa_dbgmsg(spa, "%s: failed to meet weight "
                                     "requirement: vdev %llu, txg %llu, mg %p, "
                                     "msp %p, psize %llu, asize %llu, "
-                                   "failures %llu, weight %llu",
-                                   spa_name(spa), mg->mg_vd->vdev_id, txg,
-                                   mg, msp, psize, asize,
-                                   mg->mg_alloc_failures, msp->ms_weight);
+                                   "weight %llu", spa_name(spa),
+                                   mg->mg_vd->vdev_id, txg,
+                                   mg, msp, psize, asize, msp->ms_weight);
                                 mutex_exit(&mg->mg_lock);
                                 return (-1ULL);
                         }
@@ -1752,27 +1740,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
  
                 mutex_enter(&msp->ms_lock);
  
-               /*
-                * If we've already reached the allowable number of failed
-                * allocation attempts on this metaslab group then we
-                * consider skipping it. We skip it only if we're allowed
-                * to "fast" gang, the physical size is larger than
-                * a gang block, and we're attempting to allocate from
-                * the primary metaslab.
-                */
-               if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
-                   CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
-                   activation_weight == METASLAB_WEIGHT_PRIMARY) {
-                       spa_dbgmsg(spa, "%s: skipping metaslab group: "
-                           "vdev %llu, txg %llu, mg %p, msp[%llu] %p, "
-                           "psize %llu, asize %llu, failures %llu",
-                           spa_name(spa), mg->mg_vd->vdev_id, txg, mg,
-                           msp->ms_id, msp, psize, asize,
-                           mg->mg_alloc_failures);
-                       mutex_exit(&msp->ms_lock);
-                       return (-1ULL);
-               }
-
                 /*
                  * Ensure that the metaslab we have selected is still
                  * capable of handling our request. It's possible that
@@ -1812,8 +1779,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
                 if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL)
                         break;
  
-               atomic_inc_64(&mg->mg_alloc_failures);
-
                 metaslab_passivate(msp, metaslab_block_maxsize(msp));
                 mutex_exit(&msp->ms_lock);
         }
@@ -1980,7 +1945,7 @@ top:
                 ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
  
                 offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
-                   dva, d, flags);
+                   dva, d);
                 if (offset != -1ULL) {
                         /*
                          * If we've just selected this metaslab group,
diff --git a/module/zfs/zio.c b/module/zfs/zio.c

index 7e440a381c1a6b0216f8ee1862398e238771a801..6352ab3a3fe27b3231a8d897b0abafcaef9918ba 100644 (file)
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -60,8 +60,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
  int zio_bulk_flags = 0;
  int zio_delay_max = ZIO_DELAY_MAX;
  
-extern int zfs_mg_alloc_failures;
-
  /*
   * The following actions directly effect the spa's sync-to-convergence logic.
   * The values below define the sync pass when we start performing the action.
@@ -193,13 +191,6 @@ zio_init(void)
                         zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
         }
  
-       /*
-        * The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
-        * to fail 3 times per txg or 8 failures, whichever is greater.
-        */
-       if (zfs_mg_alloc_failures == 0)
-               zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
-
         zio_inject_init();
  
         lz4_init();
author	George Wilson <george.wilson@delphix.com>
	Fri, 18 Apr 2014 16:35:03 +0000 (08:35 -0800)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Wed, 30 Jul 2014 17:30:05 +0000 (10:30 -0700)
include/sys/metaslab_impl.h		patch \| blob \| history
module/zfs/metaslab.c		patch \| blob \| history
module/zfs/zio.c		patch \| blob \| history