]> granicus.if.org Git - zfs/commitdiff
Disable LBA weighting on files and SSDs
authorRichard Yao <ryao@gentoo.org>
Sat, 29 Aug 2015 16:01:07 +0000 (12:01 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 1 Sep 2015 22:22:07 +0000 (15:22 -0700)
The LBA weighting makes sense on rotational media where the outer tracks
have twice the bandwidth of the inner tracks. However, it is detrimental
on nonrotational media such as solid state disks, where the only effect
is to ensure that metaslabs enter the best-fit allocation behavior
sooner, which is detrimental to performance. It also makes no sense on
files where the underlying filesystem can arrange things however it
wants.

Signed-off-by: Richard Yao <ryao@gentoo.org>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #3712

include/sys/vdev_impl.h
module/zfs/metaslab.c
module/zfs/vdev.c
module/zfs/vdev_disk.c
module/zfs/vdev_file.c

index 6b27e75ae54d6e4b7ce0f646626f9cc1c28b3ecf..1371a3f0391f17c1e3fd072bbdc67d40405040e0 100644 (file)
@@ -151,6 +151,7 @@ struct vdev {
        vdev_stat_t     vdev_stat;      /* virtual device statistics    */
        boolean_t       vdev_expanding; /* expand the vdev?             */
        boolean_t       vdev_reopening; /* reopen in progress?          */
+       boolean_t       vdev_nonrot;    /* true if solid state          */
        int             vdev_open_error; /* error on last open          */
        kthread_t       *vdev_open_thread; /* thread opening children   */
        uint64_t        vdev_crtxg;     /* txg when top-level was added */
index b328cbb0a106262e81a7b71444e6b6005fc18102..59bcefd346c0e41a0cb68cfad9f8879416fa7dae 100644 (file)
@@ -1518,7 +1518,7 @@ metaslab_weight(metaslab_t *msp)
         * In effect, this means that we'll select the metaslab with the most
         * free bandwidth rather than simply the one with the most free space.
         */
-       if (metaslab_lba_weighting_enabled) {
+       if (!vd->vdev_nonrot && metaslab_lba_weighting_enabled) {
                weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count;
                ASSERT(weight >= space && weight <= 2 * space);
        }
index 8e50ababc13e28bc4408184e47942129dd88c882..7aff5455b10b8a871c4869a3999146ae746333ce 100644 (file)
@@ -1108,6 +1108,7 @@ vdev_open_child(void *arg)
        vd->vdev_open_thread = curthread;
        vd->vdev_open_error = vdev_open(vd);
        vd->vdev_open_thread = NULL;
+       vd->vdev_parent->vdev_nonrot &= vd->vdev_nonrot;
 }
 
 static boolean_t
@@ -1134,15 +1135,19 @@ vdev_open_children(vdev_t *vd)
        int children = vd->vdev_children;
        int c;
 
+       vd->vdev_nonrot = B_TRUE;
+
        /*
         * in order to handle pools on top of zvols, do the opens
         * in a single thread so that the same thread holds the
         * spa_namespace_lock
         */
        if (vdev_uses_zvols(vd)) {
-               for (c = 0; c < children; c++)
+               for (c = 0; c < children; c++) {
                        vd->vdev_child[c]->vdev_open_error =
                            vdev_open(vd->vdev_child[c]);
+                       vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
+               }
                return;
        }
        tq = taskq_create("vdev_open", children, minclsyspri,
@@ -1153,6 +1158,9 @@ vdev_open_children(vdev_t *vd)
                    TQ_SLEEP) != 0);
 
        taskq_destroy(tq);
+
+       for (c = 0; c < children; c++)
+               vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
 }
 
 /*
index eb77c269c2480680c2762f72008f90f99790624c..380ede35b517cf640d7e3a6b4e9e299f02c4b8f3 100644 (file)
@@ -301,6 +301,9 @@ skip_open:
        /* Clear the nowritecache bit, causes vdev_reopen() to try again. */
        v->vdev_nowritecache = B_FALSE;
 
+       /* Inform the ZIO pipeline that we are non-rotational */
+       v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
+
        /* Physical volume size in bytes */
        *psize = bdev_capacity(vd->vd_bdev);
 
index e61240fdcc5481369331875c9ec730832a64eb72..a29ea7bf9515a2b5106417b4fd036e0cfc3fe616 100644 (file)
@@ -57,6 +57,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
        vattr_t vattr;
        int error;
 
+       /* Rotational optimizations only make sense on block devices */
+       vd->vdev_nonrot = B_TRUE;
+
        /*
         * We must have a pathname, and it must be absolute.
         */