]> granicus.if.org Git - zfs/commitdiff
Add l2arc_max_block_size tunable
authorBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 10 Feb 2016 18:42:01 +0000 (10:42 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 25 Feb 2016 17:44:00 +0000 (09:44 -0800)
Set a limit for the largest compressed block which can be written
to an L2ARC device.  By default this limit is set to 16M so there
is no change in behavior.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Elling <Richard.Elling@RichardElling.com>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes #4323

man/man5/zfs-module-parameters.5
module/zfs/arc.c

index f801f257b566dad6f2d3ec25e9b02259d8e76b2d..6cba7f02c2b9eec3acbaed4bbf78d811628df6ec 100644 (file)
@@ -79,6 +79,20 @@ Compressed l2arc_headroom multiplier
 Default value: \fB200\fR.
 .RE
 
+.sp
+.ne 2
+.na
+\fBl2arc_max_block_size\fR (ulong)
+.ad
+.RS 12n
+The maximum block size which may be written to an L2ARC device, after
+compression and other factors.  This setting is used to prevent a small
+number of large blocks from pushing a larger number of small blocks out
+of the cache.
+.sp
+Default value: \fB16,777,216\fR.
+.RE
+
 .sp
 .ne 2
 .na
index 4d7bb8f8f2b6d14936e05dab26f84dcf195ca6ef..faed67aa4ae5481fe3a9c51db6356705eb657dfc 100644 (file)
@@ -446,6 +446,7 @@ typedef struct arc_stats {
        kstat_named_t arcstat_l2_writes_done;
        kstat_named_t arcstat_l2_writes_error;
        kstat_named_t arcstat_l2_writes_lock_retry;
+       kstat_named_t arcstat_l2_writes_skip_toobig;
        kstat_named_t arcstat_l2_evict_lock_retry;
        kstat_named_t arcstat_l2_evict_reading;
        kstat_named_t arcstat_l2_evict_l1cached;
@@ -542,6 +543,7 @@ static arc_stats_t arc_stats = {
        { "l2_writes_done",             KSTAT_DATA_UINT64 },
        { "l2_writes_error",            KSTAT_DATA_UINT64 },
        { "l2_writes_lock_retry",       KSTAT_DATA_UINT64 },
+       { "l2_writes_skip_toobig",      KSTAT_DATA_UINT64 },
        { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
        { "l2_evict_reading",           KSTAT_DATA_UINT64 },
        { "l2_evict_l1cached",          KSTAT_DATA_UINT64 },
@@ -726,6 +728,8 @@ uint64_t zfs_crc64_table[256];
 
 #define        L2ARC_WRITE_SIZE        (8 * 1024 * 1024)       /* initial write max */
 #define        L2ARC_HEADROOM          2                       /* num of writes */
+#define        L2ARC_MAX_BLOCK_SIZE    (16 * 1024 * 1024)      /* max compress size */
+
 /*
  * If we discover during ARC scan any buffers to be compressed, we boost
  * our headroom for the next scanning cycle by this percentage multiple.
@@ -734,6 +738,7 @@ uint64_t zfs_crc64_table[256];
 #define        L2ARC_FEED_SECS         1               /* caching interval secs */
 #define        L2ARC_FEED_MIN_MS       200             /* min caching interval ms */
 
+
 /*
  * Used to distinguish headers that are being process by
  * l2arc_write_buffers(), but have yet to be assigned to a l2arc disk
@@ -752,6 +757,7 @@ unsigned long l2arc_write_max = L2ARC_WRITE_SIZE;   /* def max write size */
 unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE;    /* extra warmup write */
 unsigned long l2arc_headroom = L2ARC_HEADROOM;         /* # of dev writes */
 unsigned long l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
+unsigned long l2arc_max_block_size = L2ARC_MAX_BLOCK_SIZE;
 unsigned long l2arc_feed_secs = L2ARC_FEED_SECS;       /* interval seconds */
 unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS;   /* min interval msecs */
 int l2arc_noprefetch = B_TRUE;                 /* don't cache prefetch bufs */
@@ -6020,7 +6026,20 @@ top:
                 */
                l2arc_release_cdata_buf(hdr);
 
-               if (zio->io_error != 0) {
+               /*
+                * Skipped - drop L2ARC entry and mark the header as no
+                * longer L2 eligibile.
+                */
+               if (hdr->b_l2hdr.b_daddr == L2ARC_ADDR_UNSET) {
+                       list_remove(buflist, hdr);
+                       hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
+                       hdr->b_flags &= ~ARC_FLAG_L2CACHE;
+
+                       ARCSTAT_BUMP(arcstat_l2_writes_skip_toobig);
+
+                       (void) refcount_remove_many(&dev->l2ad_alloc,
+                           hdr->b_l2hdr.b_asize, hdr);
+               } else if (zio->io_error != 0) {
                        /*
                         * Error - drop L2ARC entry.
                         */
@@ -6567,6 +6586,16 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
                if (buf_sz != 0) {
                        uint64_t buf_a_sz;
 
+                       /*
+                        * Buffers which are larger than l2arc_max_block_size
+                        * after compression are skipped and removed from L2
+                        * eligibility.
+                        */
+                       if (buf_sz > l2arc_max_block_size) {
+                               hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
+                               continue;
+                       }
+
                        wzio = zio_write_phys(pio, dev->l2ad_vdev,
                            dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
                            NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
@@ -7129,6 +7158,9 @@ MODULE_PARM_DESC(l2arc_headroom, "Number of max device writes to precache");
 module_param(l2arc_headroom_boost, ulong, 0644);
 MODULE_PARM_DESC(l2arc_headroom_boost, "Compressed l2arc_headroom multiplier");
 
+module_param(l2arc_max_block_size, ulong, 0644);
+MODULE_PARM_DESC(l2arc_max_block_size, "Skip L2ARC buffers larger than N");
+
 module_param(l2arc_feed_secs, ulong, 0644);
 MODULE_PARM_DESC(l2arc_feed_secs, "Seconds between L2ARC writing");