]> granicus.if.org Git - zfs/commitdiff
Enable Linux read-ahead for a single page on ZVOLs
authorRichard Yao <ryao@gentoo.org>
Fri, 11 Jul 2014 18:35:58 +0000 (14:35 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 4 May 2017 22:00:27 +0000 (18:00 -0400)
Linux has read-ahead logic designed to accelerate sequential workloads.
ZFS has its own read-ahead logic called zprefetch that operates on both
ZVOLs and datasets. Having two prefetchers active at the same time can
cause overprefetching, which unnecessarily reduces IOPS performance on
CoW filesystems like ZFS.

Testing shows that entirely disabling the Linux prefetch results in
a significant performance penalty for reads while commensurate benefits
are seen in random writes. It appears that read-ahead benefits are
inversely proportional to random write benefits, and so a single page
of Linux-layer read-ahead appears to offer the middle ground for both
workloads.

Reviewed-by: Chunwei Chen <david.chen@osnexus.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Issue #5902

config/kernel-blk-queue-bdi.m4 [new file with mode: 0644]
config/kernel.m4
include/linux/blkdev_compat.h
module/zfs/zvol.c

diff --git a/config/kernel-blk-queue-bdi.m4 b/config/kernel-blk-queue-bdi.m4
new file mode 100644 (file)
index 0000000..8164711
--- /dev/null
@@ -0,0 +1,20 @@
+dnl #
+dnl # 2.6.32 - 4.11, statically allocated bdi in request_queue
+dnl # 4.12 - x.y, dynamically allocated bdi in request_queue
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [
+       AC_MSG_CHECKING([whether blk_queue bdi is dynamic])
+       ZFS_LINUX_TRY_COMPILE([
+               #include <linux/blkdev.h>
+       ],[
+               struct request_queue q;
+               struct backing_dev_info bdi;
+               q.backing_dev_info = &bdi;
+       ],[
+               AC_MSG_RESULT(yes)
+               AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1,
+                   [blk queue backing_dev_info is dynamic])
+       ],[
+               AC_MSG_RESULT(no)
+       ])
+])
index 638d9e143bba8bae23cac6a1d05209c77f7b88c8..57dad7b3aa88837f1318347b23f0fffc8987ad7c 100644 (file)
@@ -32,6 +32,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
        ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
        ZFS_AC_KERNEL_BIO_RW_BARRIER
        ZFS_AC_KERNEL_BIO_RW_DISCARD
+       ZFS_AC_KERNEL_BLK_QUEUE_BDI
        ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
        ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
        ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
index 15824c0b85fede18613143d0f4e95713055f88a0..822e964a71377e8cc943c464c9aa0a0ce96ddb54 100644 (file)
@@ -31,6 +31,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
+#include <linux/backing-dev.h>
 
 #ifndef HAVE_FMODE_T
 typedef unsigned __bitwise__ fmode_t;
@@ -128,6 +129,16 @@ __blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
 }
 #endif
 
+static inline void
+blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
+{
+#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
+       q->backing_dev_info->ra_pages = ra_pages;
+#else
+       q->backing_dev_info.ra_pages = ra_pages;
+#endif
+}
+
 #ifndef HAVE_GET_DISK_RO
 static inline int
 get_disk_ro(struct gendisk *disk)
index d0f7b9912b31f6737108f3af6c4597cf02093faa..fef1d299a0c15068aaaabd43de2c45c8864270ac 100644 (file)
@@ -1468,6 +1468,9 @@ zvol_alloc(dev_t dev, const char *name)
        blk_queue_make_request(zv->zv_queue, zvol_request);
        blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE);
 
+       /* Limit read-ahead to a single page to prevent over-prefetching. */
+       blk_queue_set_read_ahead(zv->zv_queue, 1);
+
        /* Disable write merging in favor of the ZIO pipeline. */
        queue_flag_set(QUEUE_FLAG_NOMERGES, zv->zv_queue);