]> granicus.if.org Git - zfs/commitdiff
Illumos 5408 - managing ZFS cache devices requires lots of RAM
authorChris Williamson <Chris.Williamson@delphix.com>
Tue, 30 Dec 2014 03:12:23 +0000 (19:12 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 11 Jun 2015 17:27:25 +0000 (10:27 -0700)
5408 managing ZFS cache devices requires lots of RAM
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Don Brady <dev.fs.zfs@gmail.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Approved by: Garrett D'Amore <garrett@damore.org>

Porting notes:

Due to the restructuring of the ARC-related structures, this
patch conflicts with at least the following existing ZoL commits:

    6e1d7276c94cbd7c2e19f9232f6ba4bafa62dbe0
    Fix inaccurate arcstat_l2_hdr_size calculations

        The ARC_SPACE_HDRS constant no longer exists and has been
        somewhat equivalently replaced by HDR_L2ONLY_SIZE.

    e0b0ca983d6897bcddf05af2c0e5d01ff66f90db
    Add visibility in to cached dbufs

        The new layering of l{1,2}arc_buf_hdr_t within the arc_buf_hdr
        struct requires additional structure member names to be used
        when referencing the inner items.  Also, the presence of L1 or L2
        inner member is indicated by flags using the new HDR_HAS_L{1,2}HDR
        macros.

Ported by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
cmd/ztest/ztest.c
include/sys/arc.h
include/sys/arc_impl.h
include/sys/trace_arc.h
module/zfs/arc.c

index f1a8ff61d7ee2c5aefd73f2feea3f02778e6cca0..642cab5f2f33960c36752673887f4d44163deb17 100644 (file)
@@ -4042,7 +4042,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
                 * assign an arcbuf to a dbuf.
                 */
                for (j = 0; j < s; j++) {
-                       if (i != 5) {
+                       if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
                                bigbuf_arcbufs[j] =
                                    dmu_request_arcbuf(bonus_db, chunksize);
                        } else {
@@ -4066,7 +4066,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
                        umem_free(packbuf, packsize);
                        umem_free(bigbuf, bigsize);
                        for (j = 0; j < s; j++) {
-                               if (i != 5) {
+                               if (i != 5 ||
+                                   chunksize < (SPA_MINBLOCKSIZE * 2)) {
                                        dmu_return_arcbuf(bigbuf_arcbufs[j]);
                                } else {
                                        dmu_return_arcbuf(
@@ -4111,7 +4112,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
                }
                for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
                        dmu_buf_t *dbt;
-                       if (i != 5) {
+                       if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
                                bcopy((caddr_t)bigbuf + (off - bigoff),
                                    bigbuf_arcbufs[j]->b_data, chunksize);
                        } else {
@@ -4128,7 +4129,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
                                VERIFY(dmu_buf_hold(os, bigobj, off,
                                    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
                        }
-                       if (i != 5) {
+                       if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
                                dmu_assign_arcbuf(bonus_db, off,
                                    bigbuf_arcbufs[j], tx);
                        } else {
index 25e2f035253d6da049baccaf5ff19913597d031c..903f0b4131677a4b98dab1729925958ffd3ca9aa 100644 (file)
@@ -81,10 +81,29 @@ typedef enum arc_flags
        ARC_FLAG_FREED_IN_READ          = 1 << 10,      /* freed during read */
        ARC_FLAG_BUF_AVAILABLE          = 1 << 11,      /* block not in use */
        ARC_FLAG_INDIRECT               = 1 << 12,      /* indirect block */
-       ARC_FLAG_FREE_IN_PROGRESS       = 1 << 13,      /*  about to be freed */
-       ARC_FLAG_L2_WRITING             = 1 << 14,      /* write in progress */
-       ARC_FLAG_L2_EVICTED             = 1 << 15,      /* evicted during I/O */
-       ARC_FLAG_L2_WRITE_HEAD          = 1 << 16,      /* head of write list */
+       ARC_FLAG_L2_WRITING             = 1 << 13,      /* write in progress */
+       ARC_FLAG_L2_EVICTED             = 1 << 14,      /* evicted during I/O */
+       ARC_FLAG_L2_WRITE_HEAD          = 1 << 15,      /* head of write list */
+       /* indicates that the buffer contains metadata (otherwise, data) */
+       ARC_FLAG_BUFC_METADATA          = 1 << 16,
+
+       /* Flags specifying whether optional hdr struct fields are defined */
+       ARC_FLAG_HAS_L1HDR              = 1 << 17,
+       ARC_FLAG_HAS_L2HDR              = 1 << 18,
+
+       /*
+        * The arc buffer's compression mode is stored in the top 7 bits of the
+        * flags field, so these dummy flags are included so that MDB can
+        * interpret the enum properly.
+        */
+       ARC_FLAG_COMPRESS_0             = 1 << 24,
+       ARC_FLAG_COMPRESS_1             = 1 << 25,
+       ARC_FLAG_COMPRESS_2             = 1 << 26,
+       ARC_FLAG_COMPRESS_3             = 1 << 27,
+       ARC_FLAG_COMPRESS_4             = 1 << 28,
+       ARC_FLAG_COMPRESS_5             = 1 << 29,
+       ARC_FLAG_COMPRESS_6             = 1 << 30
+
 } arc_flags_t;
 
 struct arc_buf {
index 1f8351a6784b7d17ba60d6363e61652d130a61f7..556cc258330d0702a418bd1f129e50aadccf6538 100644 (file)
@@ -74,8 +74,6 @@ typedef struct arc_state {
        arc_state_type_t arcs_state;
 } arc_state_t;
 
-typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
-
 typedef struct arc_callback arc_callback_t;
 
 struct arc_callback {
@@ -96,27 +94,45 @@ struct arc_write_callback {
        arc_buf_t       *awcb_buf;
 };
 
-struct arc_buf_hdr {
-       /* protected by hash lock */
-       dva_t                   b_dva;
-       uint64_t                b_birth;
-       uint64_t                b_cksum0;
-
+/*
+ * ARC buffers are separated into multiple structs as a memory saving measure:
+ *   - Common fields struct, always defined, and embedded within it:
+ *       - L2-only fields, always allocated but undefined when not in L2ARC
+ *       - L1-only fields, only allocated when in L1ARC
+ *
+ *           Buffer in L1                     Buffer only in L2
+ *    +------------------------+          +------------------------+
+ *    | arc_buf_hdr_t          |          | arc_buf_hdr_t          |
+ *    |                        |          |                        |
+ *    |                        |          |                        |
+ *    |                        |          |                        |
+ *    +------------------------+          +------------------------+
+ *    | l2arc_buf_hdr_t        |          | l2arc_buf_hdr_t        |
+ *    | (undefined if L1-only) |          |                        |
+ *    +------------------------+          +------------------------+
+ *    | l1arc_buf_hdr_t        |
+ *    |                        |
+ *    |                        |
+ *    |                        |
+ *    |                        |
+ *    +------------------------+
+ *
+ * Because it's possible for the L2ARC to become extremely large, we can wind
+ * up eating a lot of memory in L2ARC buffer headers, so the size of a header
+ * is minimized by only allocating the fields necessary for an L1-cached buffer
+ * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
+ * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
+ * words in pointers. arc_hdr_realloc() is used to switch a header between
+ * these two allocation states.
+ */
+typedef struct l1arc_buf_hdr {
        kmutex_t                b_freeze_lock;
-       zio_cksum_t             *b_freeze_cksum;
 
-       arc_buf_hdr_t           *b_hash_next;
        arc_buf_t               *b_buf;
-       arc_flags_t             b_flags;
        uint32_t                b_datacnt;
-
-       arc_callback_t          *b_acb;
+       /* for waiting on writes to complete */
        kcondvar_t              b_cv;
 
-       /* immutable */
-       arc_buf_contents_t      b_type;
-       uint64_t                b_size;
-       uint64_t                b_spa;
 
        /* protected by arc state mutex */
        arc_state_t             *b_state;
@@ -133,9 +149,10 @@ struct arc_buf_hdr {
        /* self protecting */
        refcount_t              b_refcnt;
 
-       l2arc_buf_hdr_t         *b_l2hdr;
-       list_node_t             b_l2node;
-};
+       arc_callback_t          *b_acb;
+       /* temporary buffer holder for in-flight compressed data */
+       void                    *b_tmp_cdata;
+} l1arc_buf_hdr_t;
 
 typedef struct l2arc_dev {
        vdev_t                  *l2ad_vdev;     /* vdev */
@@ -146,15 +163,51 @@ typedef struct l2arc_dev {
        uint64_t                l2ad_evict;     /* last addr eviction reached */
        boolean_t               l2ad_first;     /* first sweep through */
        boolean_t               l2ad_writing;   /* currently writing */
-       list_t                  *l2ad_buflist;  /* buffer list */
+       kmutex_t                l2ad_mtx;       /* lock for buffer list */
+       list_t                  l2ad_buflist;   /* buffer list */
        list_node_t             l2ad_node;      /* device list node */
 } l2arc_dev_t;
 
+typedef struct l2arc_buf_hdr {
+       /* protected by arc_buf_hdr mutex */
+       l2arc_dev_t             *b_dev;         /* L2ARC device */
+       uint64_t                b_daddr;        /* disk address, offset byte */
+       /* real alloc'd buffer size depending on b_compress applied */
+       uint32_t                b_hits;
+       int32_t                 b_asize;
+
+       list_node_t             b_l2node;
+} l2arc_buf_hdr_t;
+
 typedef struct l2arc_write_callback {
        l2arc_dev_t     *l2wcb_dev;             /* device info */
        arc_buf_hdr_t   *l2wcb_head;            /* head of write buflist */
 } l2arc_write_callback_t;
 
+struct arc_buf_hdr {
+       /* protected by hash lock */
+       dva_t                   b_dva;
+       uint64_t                b_birth;
+       /*
+        * Even though this checksum is only set/verified when a buffer is in
+        * the L1 cache, it needs to be in the set of common fields because it
+        * must be preserved from the time before a buffer is written out to
+        * L2ARC until after it is read back in.
+        */
+       zio_cksum_t             *b_freeze_cksum;
+
+       arc_buf_hdr_t           *b_hash_next;
+       arc_flags_t             b_flags;
+
+       /* immutable */
+       int32_t                 b_size;
+       uint64_t                b_spa;
+
+       /* L2ARC fields. Undefined when not in L2ARC. */
+       l2arc_buf_hdr_t         b_l2hdr;
+       /* L1ARC fields. Undefined when in l2arc_only state */
+       l1arc_buf_hdr_t         b_l1hdr;
+};
 #ifdef __cplusplus
 }
 #endif
index 8b885eff73a76bafd2bf4e362c67815351e6fb7e..b9df228eae338ffd0f1990c70fcbd5517ea4420c 100644 (file)
@@ -45,7 +45,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
        TP_STRUCT__entry(
            __array(uint64_t,           hdr_dva_word, 2)
            __field(uint64_t,           hdr_birth)
-           __field(uint64_t,           hdr_cksum0)
            __field(uint32_t,           hdr_flags)
            __field(uint32_t,           hdr_datacnt)
            __field(arc_buf_contents_t, hdr_type)
@@ -64,27 +63,25 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
            __entry->hdr_dva_word[0]    = ab->b_dva.dva_word[0];
            __entry->hdr_dva_word[1]    = ab->b_dva.dva_word[1];
            __entry->hdr_birth          = ab->b_birth;
-           __entry->hdr_cksum0         = ab->b_cksum0;
            __entry->hdr_flags          = ab->b_flags;
-           __entry->hdr_datacnt        = ab->b_datacnt;
-           __entry->hdr_type           = ab->b_type;
+           __entry->hdr_datacnt        = ab->b_l1hdr.b_datacnt;
            __entry->hdr_size           = ab->b_size;
            __entry->hdr_spa            = ab->b_spa;
-           __entry->hdr_state_type     = ab->b_state->arcs_state;
-           __entry->hdr_access         = ab->b_arc_access;
-           __entry->hdr_mru_hits       = ab->b_mru_hits;
-           __entry->hdr_mru_ghost_hits = ab->b_mru_ghost_hits;
-           __entry->hdr_mfu_hits       = ab->b_mfu_hits;
-           __entry->hdr_mfu_ghost_hits = ab->b_mfu_ghost_hits;
-           __entry->hdr_l2_hits        = ab->b_l2_hits;
-           __entry->hdr_refcount       = ab->b_refcnt.rc_count;
+           __entry->hdr_state_type     = ab->b_l1hdr.b_state->arcs_state;
+           __entry->hdr_access         = ab->b_l1hdr.b_arc_access;
+           __entry->hdr_mru_hits       = ab->b_l1hdr.b_mru_hits;
+           __entry->hdr_mru_ghost_hits = ab->b_l1hdr.b_mru_ghost_hits;
+           __entry->hdr_mfu_hits       = ab->b_l1hdr.b_mfu_hits;
+           __entry->hdr_mfu_ghost_hits = ab->b_l1hdr.b_mfu_ghost_hits;
+           __entry->hdr_l2_hits        = ab->b_l1hdr.b_l2_hits;
+           __entry->hdr_refcount       = ab->b_l1hdr.b_refcnt.rc_count;
        ),
-       TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
+       TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
            "flags 0x%x datacnt %u type %u size %llu spa %llu "
            "state_type %u access %lu mru_hits %u mru_ghost_hits %u "
            "mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
            __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
-           __entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
+           __entry->hdr_birth, __entry->hdr_flags,
            __entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
            __entry->hdr_spa, __entry->hdr_state_type,
            __entry->hdr_access, __entry->hdr_mru_hits,
@@ -261,7 +258,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
        TP_STRUCT__entry(
            __array(uint64_t,           hdr_dva_word, 2)
            __field(uint64_t,           hdr_birth)
-           __field(uint64_t,           hdr_cksum0)
            __field(uint32_t,           hdr_flags)
            __field(uint32_t,           hdr_datacnt)
            __field(arc_buf_contents_t, hdr_type)
@@ -292,20 +288,18 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
            __entry->hdr_dva_word[0]    = hdr->b_dva.dva_word[0];
            __entry->hdr_dva_word[1]    = hdr->b_dva.dva_word[1];
            __entry->hdr_birth          = hdr->b_birth;
-           __entry->hdr_cksum0         = hdr->b_cksum0;
            __entry->hdr_flags          = hdr->b_flags;
-           __entry->hdr_datacnt        = hdr->b_datacnt;
-           __entry->hdr_type           = hdr->b_type;
+           __entry->hdr_datacnt        = hdr->b_l1hdr.b_datacnt;
            __entry->hdr_size           = hdr->b_size;
            __entry->hdr_spa            = hdr->b_spa;
-           __entry->hdr_state_type     = hdr->b_state->arcs_state;
-           __entry->hdr_access         = hdr->b_arc_access;
-           __entry->hdr_mru_hits       = hdr->b_mru_hits;
-           __entry->hdr_mru_ghost_hits = hdr->b_mru_ghost_hits;
-           __entry->hdr_mfu_hits       = hdr->b_mfu_hits;
-           __entry->hdr_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
-           __entry->hdr_l2_hits        = hdr->b_l2_hits;
-           __entry->hdr_refcount       = hdr->b_refcnt.rc_count;
+           __entry->hdr_state_type     = hdr->b_l1hdr.b_state->arcs_state;
+           __entry->hdr_access         = hdr->b_l1hdr.b_arc_access;
+           __entry->hdr_mru_hits       = hdr->b_l1hdr.b_mru_hits;
+           __entry->hdr_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
+           __entry->hdr_mfu_hits       = hdr->b_l1hdr.b_mfu_hits;
+           __entry->hdr_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
+           __entry->hdr_l2_hits        = hdr->b_l1hdr.b_l2_hits;
+           __entry->hdr_refcount       = hdr->b_l1hdr.b_refcnt.rc_count;
 
            __entry->bp_dva0[0]         = bp->blk_dva[0].dva_word[0];
            __entry->bp_dva0[1]         = bp->blk_dva[0].dva_word[1];
@@ -325,8 +319,8 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
            __entry->zb_level           = zb->zb_level;
            __entry->zb_blkid           = zb->zb_blkid;
        ),
-       TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
-           "flags 0x%x datacnt %u type %u size %llu spa %llu state_type %u "
+       TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
+           "flags 0x%x datacnt %u size %llu spa %llu state_type %u "
            "access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
            "mfu_ghost_hits %u l2_hits %u refcount %lli } "
            "bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
@@ -334,8 +328,8 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
            "lsize %llu } zb { objset %llu object %llu level %lli "
            "blkid %llu }",
            __entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
-           __entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
-           __entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
+           __entry->hdr_birth, __entry->hdr_flags,
+           __entry->hdr_datacnt, __entry->hdr_size,
            __entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
            __entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
            __entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
index 7b34e68258145eb0c6ce7582dd2cf5a5dbf8313f..e69889ab58106102597db5f24632b41d02b15c95 100644 (file)
  * Note that the majority of the performance stats are manipulated
  * with atomic operations.
  *
- * The L2ARC uses the l2arc_buflist_mtx global mutex for the following:
+ * The L2ARC uses the l2ad_mtx on each vdev for the following:
  *
  *     - L2ARC buflist creation
  *     - L2ARC buflist eviction
@@ -308,6 +308,7 @@ typedef struct arc_stats {
        kstat_named_t arcstat_l2_writes_hdr_miss;
        kstat_named_t arcstat_l2_evict_lock_retry;
        kstat_named_t arcstat_l2_evict_reading;
+       kstat_named_t arcstat_l2_evict_l1cached;
        kstat_named_t arcstat_l2_free_on_write;
        kstat_named_t arcstat_l2_abort_lowmem;
        kstat_named_t arcstat_l2_cksum_bad;
@@ -396,6 +397,7 @@ static arc_stats_t arc_stats = {
        { "l2_writes_hdr_miss",         KSTAT_DATA_UINT64 },
        { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
        { "l2_evict_reading",           KSTAT_DATA_UINT64 },
+       { "l2_evict_l1cached",          KSTAT_DATA_UINT64 },
        { "l2_free_on_write",           KSTAT_DATA_UINT64 },
        { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
        { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
@@ -506,22 +508,38 @@ static arc_buf_hdr_t arc_eviction_hdr;
 #define        HDR_PREFETCH(hdr)       ((hdr)->b_flags & ARC_FLAG_PREFETCH)
 #define        HDR_FREED_IN_READ(hdr)  ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ)
 #define        HDR_BUF_AVAILABLE(hdr)  ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE)
-#define        HDR_FREE_IN_PROGRESS(hdr)       \
-       ((hdr)->b_flags & ARC_FLAG_FREE_IN_PROGRESS)
+
 #define        HDR_L2CACHE(hdr)        ((hdr)->b_flags & ARC_FLAG_L2CACHE)
+#define        HDR_L2COMPRESS(hdr)     ((hdr)->b_flags & ARC_FLAG_L2COMPRESS)
 #define        HDR_L2_READING(hdr)     \
-       ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS &&    \
-           (hdr)->b_l2hdr != NULL)
+           (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) &&      \
+           ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
 #define        HDR_L2_WRITING(hdr)     ((hdr)->b_flags & ARC_FLAG_L2_WRITING)
 #define        HDR_L2_EVICTED(hdr)     ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
 #define        HDR_L2_WRITE_HEAD(hdr)  ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
 
+#define        HDR_ISTYPE_METADATA(hdr)        \
+           ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
+#define        HDR_ISTYPE_DATA(hdr)    (!HDR_ISTYPE_METADATA(hdr))
+
+#define        HDR_HAS_L1HDR(hdr)      ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
+#define        HDR_HAS_L2HDR(hdr)      ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
+
+/* For storing compression mode in b_flags */
+#define        HDR_COMPRESS_OFFSET     24
+#define        HDR_COMPRESS_NBITS      7
+
+#define        HDR_GET_COMPRESS(hdr)   ((enum zio_compress)BF32_GET(hdr->b_flags, \
+           HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS))
+#define        HDR_SET_COMPRESS(hdr, cmp) BF32_SET(hdr->b_flags, \
+           HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS, (cmp))
+
 /*
  * Other sizes
  */
 
-#define        HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
-#define        L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
+#define        HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
+#define        HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
 
 /*
  * Hash table routines
@@ -591,7 +609,6 @@ static list_t L2ARC_dev_list;                       /* device list */
 static list_t *l2arc_dev_list;                 /* device list pointer */
 static kmutex_t l2arc_dev_mtx;                 /* device list mutex */
 static l2arc_dev_t *l2arc_dev_last;            /* last device used */
-static kmutex_t l2arc_buflist_mtx;             /* mutex for all buflists */
 static list_t L2ARC_free_on_write;             /* free after write buf list */
 static list_t *l2arc_free_on_write;            /* free after write list ptr */
 static kmutex_t l2arc_free_on_write_mtx;       /* mutex for list */
@@ -606,19 +623,6 @@ typedef struct l2arc_read_callback {
        enum zio_compress       l2rcb_compress;         /* applied compress */
 } l2arc_read_callback_t;
 
-struct l2arc_buf_hdr {
-       /* protected by arc_buf_hdr  mutex */
-       l2arc_dev_t             *b_dev;         /* L2ARC device */
-       uint64_t                b_daddr;        /* disk address, offset byte */
-       /* compression applied to buffer data */
-       enum zio_compress       b_compress;
-       /* real alloc'd buffer size depending on b_compress applied */
-       uint32_t                b_hits;
-       uint64_t                b_asize;
-       /* temporary buffer holder for in-flight compressed data */
-       void                    *b_tmp_cdata;
-};
-
 typedef struct l2arc_data_free {
        /* protected by l2arc_free_on_write_mtx */
        void            *l2df_data;
@@ -637,12 +641,13 @@ static int arc_evict_needed(arc_buf_contents_t);
 static void arc_evict_ghost(arc_state_t *, uint64_t, int64_t);
 static void arc_buf_watch(arc_buf_t *);
 
+static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
+static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
+
 static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
 static void l2arc_read_done(zio_t *);
-static void l2arc_hdr_stat_add(void);
-static void l2arc_hdr_stat_remove(void);
 
-static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *);
+static boolean_t l2arc_compress_buf(arc_buf_hdr_t *);
 static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress);
 static void l2arc_release_cdata_buf(arc_buf_hdr_t *);
 
@@ -665,8 +670,7 @@ buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
 
 #define        BUF_EMPTY(buf)                                          \
        ((buf)->b_dva.dva_word[0] == 0 &&                       \
-       (buf)->b_dva.dva_word[1] == 0 &&                        \
-       (buf)->b_cksum0 == 0)
+       (buf)->b_dva.dva_word[1] == 0)
 
 #define        BUF_EQUAL(spa, dva, birth, buf)                         \
        ((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) &&     \
@@ -679,7 +683,6 @@ buf_discard_identity(arc_buf_hdr_t *hdr)
        hdr->b_dva.dva_word[0] = 0;
        hdr->b_dva.dva_word[1] = 0;
        hdr->b_birth = 0;
-       hdr->b_cksum0 = 0;
 }
 
 static arc_buf_hdr_t *
@@ -709,6 +712,7 @@ buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
  * equal to elem in the hash table, then the already existing element
  * will be returned and the new element will not be inserted.
  * Otherwise returns NULL.
+ * If lockp == NULL, the caller is assumed to already hold the hash lock.
  */
 static arc_buf_hdr_t *
 buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
@@ -721,8 +725,14 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
        ASSERT(!DVA_IS_EMPTY(&hdr->b_dva));
        ASSERT(hdr->b_birth != 0);
        ASSERT(!HDR_IN_HASH_TABLE(hdr));
-       *lockp = hash_lock;
-       mutex_enter(hash_lock);
+
+       if (lockp != NULL) {
+               *lockp = hash_lock;
+               mutex_enter(hash_lock);
+       } else {
+               ASSERT(MUTEX_HELD(hash_lock));
+       }
+
        for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL;
            fhdr = fhdr->b_hash_next, i++) {
                if (BUF_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr))
@@ -777,7 +787,8 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
 /*
  * Global data structures and functions for the buf kmem cache.
  */
-static kmem_cache_t *hdr_cache;
+static kmem_cache_t *hdr_full_cache;
+static kmem_cache_t *hdr_l2only_cache;
 static kmem_cache_t *buf_cache;
 
 static void
@@ -798,7 +809,8 @@ buf_fini(void)
 #endif
        for (i = 0; i < BUF_LOCKS; i++)
                mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
-       kmem_cache_destroy(hdr_cache);
+       kmem_cache_destroy(hdr_full_cache);
+       kmem_cache_destroy(hdr_l2only_cache);
        kmem_cache_destroy(buf_cache);
 }
 
@@ -808,17 +820,29 @@ buf_fini(void)
  */
 /* ARGSUSED */
 static int
-hdr_cons(void *vbuf, void *unused, int kmflag)
+hdr_full_cons(void *vbuf, void *unused, int kmflag)
+{
+       arc_buf_hdr_t *hdr = vbuf;
+
+       bzero(hdr, HDR_FULL_SIZE);
+       cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL);
+       refcount_create(&hdr->b_l1hdr.b_refcnt);
+       mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
+       list_link_init(&hdr->b_l1hdr.b_arc_node);
+       list_link_init(&hdr->b_l2hdr.b_l2node);
+       arc_space_consume(HDR_FULL_SIZE, ARC_SPACE_HDRS);
+
+       return (0);
+}
+
+/* ARGSUSED */
+static int
+hdr_l2only_cons(void *vbuf, void *unused, int kmflag)
 {
        arc_buf_hdr_t *hdr = vbuf;
 
-       bzero(hdr, sizeof (arc_buf_hdr_t));
-       refcount_create(&hdr->b_refcnt);
-       cv_init(&hdr->b_cv, NULL, CV_DEFAULT, NULL);
-       mutex_init(&hdr->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
-       list_link_init(&hdr->b_arc_node);
-       list_link_init(&hdr->b_l2node);
-       arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
+       bzero(hdr, HDR_L2ONLY_SIZE);
+       arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 
        return (0);
 }
@@ -842,15 +866,25 @@ buf_cons(void *vbuf, void *unused, int kmflag)
  */
 /* ARGSUSED */
 static void
-hdr_dest(void *vbuf, void *unused)
+hdr_full_dest(void *vbuf, void *unused)
 {
        arc_buf_hdr_t *hdr = vbuf;
 
        ASSERT(BUF_EMPTY(hdr));
-       refcount_destroy(&hdr->b_refcnt);
-       cv_destroy(&hdr->b_cv);
-       mutex_destroy(&hdr->b_freeze_lock);
-       arc_space_return(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
+       cv_destroy(&hdr->b_l1hdr.b_cv);
+       refcount_destroy(&hdr->b_l1hdr.b_refcnt);
+       mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
+       arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS);
+}
+
+/* ARGSUSED */
+static void
+hdr_l2only_dest(void *vbuf, void *unused)
+{
+       ASSERTV(arc_buf_hdr_t *hdr = vbuf);
+
+       ASSERT(BUF_EMPTY(hdr));
+       arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
 }
 
 /* ARGSUSED */
@@ -897,8 +931,11 @@ retry:
                goto retry;
        }
 
-       hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
-           0, hdr_cons, hdr_dest, NULL, NULL, NULL, 0);
+       hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE,
+           0, hdr_full_cons, hdr_full_dest, NULL, NULL, NULL, 0);
+       hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only",
+           HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, NULL,
+           NULL, NULL, 0);
        buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
            0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
 
@@ -912,6 +949,81 @@ retry:
        }
 }
 
+/*
+ * Transition between the two allocation states for the arc_buf_hdr struct.
+ * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without
+ * (hdr_l2only_cache) the fields necessary for the L1 cache - the smaller
+ * version is used when a cache buffer is only in the L2ARC in order to reduce
+ * memory usage.
+ */
+static arc_buf_hdr_t *
+arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
+{
+       arc_buf_hdr_t *nhdr;
+       l2arc_dev_t *dev;
+
+       ASSERT(HDR_HAS_L2HDR(hdr));
+       ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
+           (old == hdr_l2only_cache && new == hdr_full_cache));
+
+       dev = hdr->b_l2hdr.b_dev;
+       nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
+
+       ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+       buf_hash_remove(hdr);
+
+       bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+       if (new == hdr_full_cache) {
+               nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+               /*
+                * arc_access and arc_change_state need to be aware that a
+                * header has just come out of L2ARC, so we set its state to
+                * l2c_only even though it's about to change.
+                */
+               nhdr->b_l1hdr.b_state = arc_l2c_only;
+       } else {
+               ASSERT(hdr->b_l1hdr.b_buf == NULL);
+               ASSERT0(hdr->b_l1hdr.b_datacnt);
+               ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+               /*
+                * We might be removing the L1hdr of a buffer which was just
+                * written out to L2ARC. If such a buffer is compressed then we
+                * need to free its b_tmp_cdata before destroying the header.
+                */
+               if (hdr->b_l1hdr.b_tmp_cdata != NULL &&
+                   HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF)
+                       l2arc_release_cdata_buf(hdr);
+               nhdr->b_flags &= ~ARC_FLAG_HAS_L1HDR;
+       }
+       /*
+        * The header has been reallocated so we need to re-insert it into any
+        * lists it was on.
+        */
+       (void) buf_hash_insert(nhdr, NULL);
+
+       ASSERT(list_link_active(&hdr->b_l2hdr.b_l2node));
+
+       mutex_enter(&dev->l2ad_mtx);
+
+       /*
+        * We must place the realloc'ed header back into the list at
+        * the same spot. Otherwise, if it's placed earlier in the list,
+        * l2arc_write_buffers() could find it during the function's
+        * write phase, and try to write it out to the l2arc.
+        */
+       list_insert_after(&dev->l2ad_buflist, hdr, nhdr);
+       list_remove(&dev->l2ad_buflist, hdr);
+
+       mutex_exit(&dev->l2ad_mtx);
+
+       buf_discard_identity(hdr);
+       hdr->b_freeze_cksum = NULL;
+       kmem_cache_free(old, hdr);
+
+       return (nhdr);
+}
+
+
 #define        ARC_MINTIME     (hz>>4) /* 62 ms */
 
 static void
@@ -922,16 +1034,15 @@ arc_cksum_verify(arc_buf_t *buf)
        if (!(zfs_flags & ZFS_DEBUG_MODIFY))
                return;
 
-       mutex_enter(&buf->b_hdr->b_freeze_lock);
-       if (buf->b_hdr->b_freeze_cksum == NULL ||
-           (buf->b_hdr->b_flags & ARC_FLAG_IO_ERROR)) {
-               mutex_exit(&buf->b_hdr->b_freeze_lock);
+       mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+       if (buf->b_hdr->b_freeze_cksum == NULL || HDR_IO_ERROR(buf->b_hdr)) {
+               mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
                return;
        }
        fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
        if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
                panic("buffer modified while frozen!");
-       mutex_exit(&buf->b_hdr->b_freeze_lock);
+       mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 }
 
 static int
@@ -940,10 +1051,10 @@ arc_cksum_equal(arc_buf_t *buf)
        zio_cksum_t zc;
        int equal;
 
-       mutex_enter(&buf->b_hdr->b_freeze_lock);
+       mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
        fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
        equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc);
-       mutex_exit(&buf->b_hdr->b_freeze_lock);
+       mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 
        return (equal);
 }
@@ -954,16 +1065,16 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
        if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY))
                return;
 
-       mutex_enter(&buf->b_hdr->b_freeze_lock);
+       mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
        if (buf->b_hdr->b_freeze_cksum != NULL) {
-               mutex_exit(&buf->b_hdr->b_freeze_lock);
+               mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
                return;
        }
        buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
            KM_SLEEP);
        fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
            buf->b_hdr->b_freeze_cksum);
-       mutex_exit(&buf->b_hdr->b_freeze_lock);
+       mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
        arc_buf_watch(buf);
 }
 
@@ -997,24 +1108,50 @@ arc_buf_watch(arc_buf_t *buf)
 #endif
 }
 
+static arc_buf_contents_t
+arc_buf_type(arc_buf_hdr_t *hdr)
+{
+       if (HDR_ISTYPE_METADATA(hdr)) {
+               return (ARC_BUFC_METADATA);
+       } else {
+               return (ARC_BUFC_DATA);
+       }
+}
+
+static uint32_t
+arc_bufc_to_flags(arc_buf_contents_t type)
+{
+       switch (type) {
+       case ARC_BUFC_DATA:
+               /* metadata field is 0 if buffer contains normal data */
+               return (0);
+       case ARC_BUFC_METADATA:
+               return (ARC_FLAG_BUFC_METADATA);
+       default:
+               break;
+       }
+       panic("undefined ARC buffer type!");
+       return ((uint32_t)-1);
+}
+
 void
 arc_buf_thaw(arc_buf_t *buf)
 {
        if (zfs_flags & ZFS_DEBUG_MODIFY) {
-               if (buf->b_hdr->b_state != arc_anon)
+               if (buf->b_hdr->b_l1hdr.b_state != arc_anon)
                        panic("modifying non-anon buffer!");
-               if (buf->b_hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS)
+               if (HDR_IO_IN_PROGRESS(buf->b_hdr))
                        panic("modifying buffer while i/o in progress!");
                arc_cksum_verify(buf);
        }
 
-       mutex_enter(&buf->b_hdr->b_freeze_lock);
+       mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
        if (buf->b_hdr->b_freeze_cksum != NULL) {
                kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
                buf->b_hdr->b_freeze_cksum = NULL;
        }
 
-       mutex_exit(&buf->b_hdr->b_freeze_lock);
+       mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
 
        arc_buf_unwatch(buf);
 }
@@ -1031,7 +1168,7 @@ arc_buf_freeze(arc_buf_t *buf)
        mutex_enter(hash_lock);
 
        ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
-           buf->b_hdr->b_state == arc_anon);
+           buf->b_hdr->b_l1hdr.b_state == arc_anon);
        arc_cksum_compute(buf, B_FALSE);
        mutex_exit(hash_lock);
 
@@ -1040,30 +1177,37 @@ arc_buf_freeze(arc_buf_t *buf)
 static void
 add_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
 {
+       arc_state_t *state;
+
+       ASSERT(HDR_HAS_L1HDR(hdr));
        ASSERT(MUTEX_HELD(hash_lock));
 
-       if ((refcount_add(&hdr->b_refcnt, tag) == 1) &&
-           (hdr->b_state != arc_anon)) {
-               uint64_t delta = hdr->b_size * hdr->b_datacnt;
-               list_t *list = &hdr->b_state->arcs_list[hdr->b_type];
-               uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type];
-
-               ASSERT(!MUTEX_HELD(&hdr->b_state->arcs_mtx));
-               mutex_enter(&hdr->b_state->arcs_mtx);
-               ASSERT(list_link_active(&hdr->b_arc_node));
-               list_remove(list, hdr);
-               if (GHOST_STATE(hdr->b_state)) {
-                       ASSERT0(hdr->b_datacnt);
-                       ASSERT3P(hdr->b_buf, ==, NULL);
-                       delta = hdr->b_size;
+       state = hdr->b_l1hdr.b_state;
+
+       if ((refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
+           (state != arc_anon)) {
+               /* We don't use the L2-only state list. */
+               if (state != arc_l2c_only) {
+                       uint64_t delta = hdr->b_size * hdr->b_l1hdr.b_datacnt;
+                       list_t *list = &state->arcs_list[arc_buf_type(hdr)];
+                       uint64_t *size = &state->arcs_lsize[arc_buf_type(hdr)];
+
+                       ASSERT(!MUTEX_HELD(&state->arcs_mtx));
+                       mutex_enter(&state->arcs_mtx);
+                       ASSERT(list_link_active(&hdr->b_l1hdr.b_arc_node));
+                       list_remove(list, hdr);
+                       if (GHOST_STATE(state)) {
+                               ASSERT0(hdr->b_l1hdr.b_datacnt);
+                               ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+                               delta = hdr->b_size;
+                       }
+                       ASSERT(delta > 0);
+                       ASSERT3U(*size, >=, delta);
+                       atomic_add_64(size, -delta);
+                       mutex_exit(&state->arcs_mtx);
                }
-               ASSERT(delta > 0);
-               ASSERT3U(*size, >=, delta);
-               atomic_add_64(size, -delta);
-               mutex_exit(&hdr->b_state->arcs_mtx);
                /* remove the prefetch flag if we get a reference */
-               if (hdr->b_flags & ARC_FLAG_PREFETCH)
-                       hdr->b_flags &= ~ARC_FLAG_PREFETCH;
+               hdr->b_flags &= ~ARC_FLAG_PREFETCH;
        }
 }
 
@@ -1071,21 +1215,27 @@ static int
 remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
 {
        int cnt;
-       arc_state_t *state = hdr->b_state;
+       arc_state_t *state = hdr->b_l1hdr.b_state;
 
+       ASSERT(HDR_HAS_L1HDR(hdr));
        ASSERT(state == arc_anon || MUTEX_HELD(hash_lock));
        ASSERT(!GHOST_STATE(state));
 
-       if (((cnt = refcount_remove(&hdr->b_refcnt, tag)) == 0) &&
+       /*
+        * arc_l2c_only counts as a ghost state so we don't need to explicitly
+        * check to prevent usage of the arc_l2c_only list.
+        */
+       if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
            (state != arc_anon)) {
-               uint64_t *size = &state->arcs_lsize[hdr->b_type];
+               uint64_t *size = &state->arcs_lsize[arc_buf_type(hdr)];
 
                ASSERT(!MUTEX_HELD(&state->arcs_mtx));
                mutex_enter(&state->arcs_mtx);
-               ASSERT(!list_link_active(&hdr->b_arc_node));
-               list_insert_head(&state->arcs_list[hdr->b_type], hdr);
-               ASSERT(hdr->b_datacnt > 0);
-               atomic_add_64(size, hdr->b_size * hdr->b_datacnt);
+               ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+               list_insert_head(&state->arcs_list[arc_buf_type(hdr)], hdr);
+               ASSERT(hdr->b_l1hdr.b_datacnt > 0);
+               atomic_add_64(size, hdr->b_size *
+                   hdr->b_l1hdr.b_datacnt);
                mutex_exit(&state->arcs_mtx);
        }
        return (cnt);
@@ -1102,31 +1252,45 @@ void
 arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
 {
        arc_buf_hdr_t *hdr = ab->b_hdr;
-       arc_state_t *state = hdr->b_state;
+       l1arc_buf_hdr_t *l1hdr = NULL;
+       l2arc_buf_hdr_t *l2hdr = NULL;
+       arc_state_t *state = NULL;
+
+       if (HDR_HAS_L1HDR(hdr)) {
+               l1hdr = &hdr->b_l1hdr;
+               state = l1hdr->b_state;
+       }
+       if (HDR_HAS_L2HDR(hdr))
+               l2hdr = &hdr->b_l2hdr;
 
        memset(abi, 0, sizeof (arc_buf_info_t));
        abi->abi_flags = hdr->b_flags;
-       abi->abi_datacnt = hdr->b_datacnt;
+
+       if (l1hdr) {
+               abi->abi_datacnt = l1hdr->b_datacnt;
+               abi->abi_access = l1hdr->b_arc_access;
+               abi->abi_mru_hits = l1hdr->b_mru_hits;
+               abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits;
+               abi->abi_mfu_hits = l1hdr->b_mfu_hits;
+               abi->abi_mfu_ghost_hits = l1hdr->b_mfu_ghost_hits;
+               abi->abi_holds = refcount_count(&l1hdr->b_refcnt);
+       }
+
+       if (l2hdr) {
+               abi->abi_l2arc_dattr = l2hdr->b_daddr;
+               abi->abi_l2arc_asize = l2hdr->b_asize;
+               abi->abi_l2arc_compress = HDR_GET_COMPRESS(hdr);
+               abi->abi_l2arc_hits = l2hdr->b_hits;
+       }
+
        abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON;
-       abi->abi_state_contents = hdr->b_type;
+       abi->abi_state_contents = arc_buf_type(hdr);
        abi->abi_state_index = -1;
        abi->abi_size = hdr->b_size;
-       abi->abi_access = hdr->b_arc_access;
-       abi->abi_mru_hits = hdr->b_mru_hits;
-       abi->abi_mru_ghost_hits = hdr->b_mru_ghost_hits;
-       abi->abi_mfu_hits = hdr->b_mfu_hits;
-       abi->abi_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
-       abi->abi_holds = refcount_count(&hdr->b_refcnt);
-
-       if (hdr->b_l2hdr) {
-               abi->abi_l2arc_dattr = hdr->b_l2hdr->b_daddr;
-               abi->abi_l2arc_asize = hdr->b_l2hdr->b_asize;
-               abi->abi_l2arc_compress = hdr->b_l2hdr->b_compress;
-               abi->abi_l2arc_hits = hdr->b_l2hdr->b_hits;
-       }
 
-       if (state && state_index && list_link_active(&hdr->b_arc_node)) {
-               list_t *list = &state->arcs_list[hdr->b_type];
+       if (l1hdr && state && state_index &&
+           list_link_active(&l1hdr->b_arc_node)) {
+               list_t *list = &state->arcs_list[arc_buf_type(hdr)];
                arc_buf_hdr_t *h;
 
                mutex_enter(&state->arcs_mtx);
@@ -1147,40 +1311,60 @@ static void
 arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
     kmutex_t *hash_lock)
 {
-       arc_state_t *old_state = hdr->b_state;
-       int64_t refcnt = refcount_count(&hdr->b_refcnt);
+       arc_state_t *old_state;
+       int64_t refcnt;
+       uint32_t datacnt;
        uint64_t from_delta, to_delta;
+       arc_buf_contents_t buftype = arc_buf_type(hdr);
+
+       /*
+        * We almost always have an L1 hdr here, since we call arc_hdr_realloc()
+        * in arc_read() when bringing a buffer out of the L2ARC.  However, the
+        * L1 hdr doesn't always exist when we change state to arc_anon before
+        * destroying a header, in which case reallocating to add the L1 hdr is
+        * pointless.
+        */
+       if (HDR_HAS_L1HDR(hdr)) {
+               old_state = hdr->b_l1hdr.b_state;
+               refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt);
+               datacnt = hdr->b_l1hdr.b_datacnt;
+       } else {
+               old_state = arc_l2c_only;
+               refcnt = 0;
+               datacnt = 0;
+       }
 
        ASSERT(MUTEX_HELD(hash_lock));
        ASSERT3P(new_state, !=, old_state);
-       ASSERT(refcnt == 0 || hdr->b_datacnt > 0);
-       ASSERT(hdr->b_datacnt == 0 || !GHOST_STATE(new_state));
-       ASSERT(hdr->b_datacnt <= 1 || old_state != arc_anon);
+       ASSERT(refcnt == 0 || datacnt > 0);
+       ASSERT(!GHOST_STATE(new_state) || datacnt == 0);
+       ASSERT(old_state != arc_anon || datacnt <= 1);
 
-       from_delta = to_delta = hdr->b_datacnt * hdr->b_size;
+       from_delta = to_delta = datacnt * hdr->b_size;
 
        /*
         * If this buffer is evictable, transfer it from the
         * old state list to the new state list.
         */
        if (refcnt == 0) {
-               if (old_state != arc_anon) {
+               if (old_state != arc_anon && old_state != arc_l2c_only) {
                        int use_mutex = !MUTEX_HELD(&old_state->arcs_mtx);
-                       uint64_t *size = &old_state->arcs_lsize[hdr->b_type];
+                       uint64_t *size = &old_state->arcs_lsize[buftype];
 
                        if (use_mutex)
                                mutex_enter(&old_state->arcs_mtx);
 
-                       ASSERT(list_link_active(&hdr->b_arc_node));
-                       list_remove(&old_state->arcs_list[hdr->b_type], hdr);
+                       ASSERT(HDR_HAS_L1HDR(hdr));
+                       ASSERT(list_link_active(&hdr->b_l1hdr.b_arc_node));
+                       list_remove(&old_state->arcs_list[buftype], hdr);
 
                        /*
                         * If prefetching out of the ghost cache,
                         * we will have a non-zero datacnt.
                         */
-                       if (GHOST_STATE(old_state) && hdr->b_datacnt == 0) {
+                       if (GHOST_STATE(old_state) && datacnt == 0) {
                                /* ghost elements have a ghost size */
-                               ASSERT(hdr->b_buf == NULL);
+                               ASSERT(hdr->b_l1hdr.b_buf == NULL);
                                from_delta = hdr->b_size;
                        }
                        ASSERT3U(*size, >=, from_delta);
@@ -1189,20 +1373,26 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
                        if (use_mutex)
                                mutex_exit(&old_state->arcs_mtx);
                }
-               if (new_state != arc_anon) {
+               if (new_state != arc_anon && new_state != arc_l2c_only) {
                        int use_mutex = !MUTEX_HELD(&new_state->arcs_mtx);
-                       uint64_t *size = &new_state->arcs_lsize[hdr->b_type];
+                       uint64_t *size = &new_state->arcs_lsize[buftype];
 
+                       /*
+                        * An L1 header always exists here, since if we're
+                        * moving to some L1-cached state (i.e. not l2c_only or
+                        * anonymous), we realloc the header to add an L1hdr
+                        * beforehand.
+                        */
+                       ASSERT(HDR_HAS_L1HDR(hdr));
                        if (use_mutex)
                                mutex_enter(&new_state->arcs_mtx);
 
-                       list_insert_head(&new_state->arcs_list[hdr->b_type],
-                           hdr);
+                       list_insert_head(&new_state->arcs_list[buftype], hdr);
 
                        /* ghost elements have a ghost size */
                        if (GHOST_STATE(new_state)) {
-                               ASSERT(hdr->b_datacnt == 0);
-                               ASSERT(hdr->b_buf == NULL);
+                               ASSERT0(datacnt);
+                               ASSERT(hdr->b_l1hdr.b_buf == NULL);
                                to_delta = hdr->b_size;
                        }
                        atomic_add_64(size, to_delta);
@@ -1216,20 +1406,22 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
        if (new_state == arc_anon && HDR_IN_HASH_TABLE(hdr))
                buf_hash_remove(hdr);
 
-       /* adjust state sizes */
-       if (to_delta)
+       /* adjust state sizes (ignore arc_l2c_only) */
+       if (to_delta && new_state != arc_l2c_only)
                atomic_add_64(&new_state->arcs_size, to_delta);
-       if (from_delta) {
+       if (from_delta && old_state != arc_l2c_only) {
                ASSERT3U(old_state->arcs_size, >=, from_delta);
                atomic_add_64(&old_state->arcs_size, -from_delta);
        }
-       hdr->b_state = new_state;
+       if (HDR_HAS_L1HDR(hdr))
+               hdr->b_l1hdr.b_state = new_state;
 
-       /* adjust l2arc hdr stats */
-       if (new_state == arc_l2c_only)
-               l2arc_hdr_stat_add();
-       else if (old_state == arc_l2c_only)
-               l2arc_hdr_stat_remove();
+       /*
+        * L2 headers should never be on the L2 state list since they don't
+        * have L1 headers allocated.
+        */
+       ASSERT(list_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
+           list_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
 }
 
 void
@@ -1307,30 +1499,36 @@ arc_buf_alloc(spa_t *spa, uint64_t size, void *tag, arc_buf_contents_t type)
        arc_buf_t *buf;
 
        VERIFY3U(size, <=, spa_maxblocksize(spa));
-       hdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+       hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
        ASSERT(BUF_EMPTY(hdr));
+       ASSERT3P(hdr->b_freeze_cksum, ==, NULL);
        hdr->b_size = size;
-       hdr->b_type = type;
        hdr->b_spa = spa_load_guid(spa);
-       hdr->b_state = arc_anon;
-       hdr->b_arc_access = 0;
-       hdr->b_mru_hits = 0;
-       hdr->b_mru_ghost_hits = 0;
-       hdr->b_mfu_hits = 0;
-       hdr->b_mfu_ghost_hits = 0;
-       hdr->b_l2_hits = 0;
+       hdr->b_l1hdr.b_mru_hits = 0;
+       hdr->b_l1hdr.b_mru_ghost_hits = 0;
+       hdr->b_l1hdr.b_mfu_hits = 0;
+       hdr->b_l1hdr.b_mfu_ghost_hits = 0;
+       hdr->b_l1hdr.b_l2_hits = 0;
+
        buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
        buf->b_hdr = hdr;
        buf->b_data = NULL;
        buf->b_efunc = NULL;
        buf->b_private = NULL;
        buf->b_next = NULL;
-       hdr->b_buf = buf;
+
+       hdr->b_flags = arc_bufc_to_flags(type);
+       hdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+
+       hdr->b_l1hdr.b_buf = buf;
+       hdr->b_l1hdr.b_state = arc_anon;
+       hdr->b_l1hdr.b_arc_access = 0;
+       hdr->b_l1hdr.b_datacnt = 1;
+
        arc_get_data_buf(buf);
-       hdr->b_datacnt = 1;
-       hdr->b_flags = 0;
-       ASSERT(refcount_is_zero(&hdr->b_refcnt));
-       (void) refcount_add(&hdr->b_refcnt, tag);
+
+       ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+       (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
 
        return (buf);
 }
@@ -1363,8 +1561,9 @@ arc_return_buf(arc_buf_t *buf, void *tag)
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
        ASSERT(buf->b_data != NULL);
-       (void) refcount_add(&hdr->b_refcnt, tag);
-       (void) refcount_remove(&hdr->b_refcnt, arc_onloan_tag);
+       ASSERT(HDR_HAS_L1HDR(hdr));
+       (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
+       (void) refcount_remove(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
 
        atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
 }
@@ -1373,12 +1572,12 @@ arc_return_buf(arc_buf_t *buf, void *tag)
 void
 arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
 {
-       arc_buf_hdr_t *hdr;
+       arc_buf_hdr_t *hdr = buf->b_hdr;
 
        ASSERT(buf->b_data != NULL);
-       hdr = buf->b_hdr;
-       (void) refcount_add(&hdr->b_refcnt, arc_onloan_tag);
-       (void) refcount_remove(&hdr->b_refcnt, tag);
+       ASSERT(HDR_HAS_L1HDR(hdr));
+       (void) refcount_add(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
+       (void) refcount_remove(&hdr->b_l1hdr.b_refcnt, tag);
        buf->b_efunc = NULL;
        buf->b_private = NULL;
 
@@ -1392,15 +1591,16 @@ arc_buf_clone(arc_buf_t *from)
        arc_buf_hdr_t *hdr = from->b_hdr;
        uint64_t size = hdr->b_size;
 
-       ASSERT(hdr->b_state != arc_anon);
+       ASSERT(HDR_HAS_L1HDR(hdr));
+       ASSERT(hdr->b_l1hdr.b_state != arc_anon);
 
        buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
        buf->b_hdr = hdr;
        buf->b_data = NULL;
        buf->b_efunc = NULL;
        buf->b_private = NULL;
-       buf->b_next = hdr->b_buf;
-       hdr->b_buf = buf;
+       buf->b_next = hdr->b_l1hdr.b_buf;
+       hdr->b_l1hdr.b_buf = buf;
        arc_get_data_buf(buf);
        bcopy(from->b_data, buf->b_data, size);
 
@@ -1410,11 +1610,11 @@ arc_buf_clone(arc_buf_t *from)
         * then track the size and number of duplicates.  These stats will be
         * updated as duplicate buffers are created and destroyed.
         */
-       if (hdr->b_type == ARC_BUFC_DATA) {
+       if (HDR_ISTYPE_DATA(hdr)) {
                ARCSTAT_BUMP(arcstat_duplicate_buffers);
                ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
        }
-       hdr->b_datacnt += 1;
+       hdr->b_l1hdr.b_datacnt += 1;
        return (buf);
 }
 
@@ -1437,17 +1637,20 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
        hash_lock = HDR_LOCK(buf->b_hdr);
        mutex_enter(hash_lock);
        hdr = buf->b_hdr;
+       ASSERT(HDR_HAS_L1HDR(hdr));
        ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
        mutex_exit(&buf->b_evict_lock);
 
-       ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
+       ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+           hdr->b_l1hdr.b_state == arc_mfu);
+
        add_reference(hdr, hash_lock, tag);
        DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
        arc_access(hdr, hash_lock);
        mutex_exit(hash_lock);
        ARCSTAT_BUMP(arcstat_hits);
-       ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH),
-           demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
+       ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
+           demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
            data, metadata, hits);
 }
 
@@ -1485,10 +1688,10 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
        arc_buf_t **bufp;
 
        /* free up data associated with the buf */
-       if (buf->b_data) {
-               arc_state_t *state = buf->b_hdr->b_state;
+       if (buf->b_data != NULL) {
+               arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
                uint64_t size = buf->b_hdr->b_size;
-               arc_buf_contents_t type = buf->b_hdr->b_type;
+               arc_buf_contents_t type = arc_buf_type(buf->b_hdr);
 
                arc_cksum_verify(buf);
                arc_buf_unwatch(buf);
@@ -1503,11 +1706,12 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
                                arc_space_return(size, ARC_SPACE_DATA);
                        }
                }
-               if (list_link_active(&buf->b_hdr->b_arc_node)) {
+               if (list_link_active(&buf->b_hdr->b_l1hdr.b_arc_node)) {
                        uint64_t *cnt = &state->arcs_lsize[type];
 
-                       ASSERT(refcount_is_zero(&buf->b_hdr->b_refcnt));
-                       ASSERT(state != arc_anon);
+                       ASSERT(refcount_is_zero(
+                           &buf->b_hdr->b_l1hdr.b_refcnt));
+                       ASSERT(state != arc_anon && state != arc_l2c_only);
 
                        ASSERT3U(*cnt, >=, size);
                        atomic_add_64(cnt, -size);
@@ -1520,13 +1724,13 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
                 * If we're destroying a duplicate buffer make sure
                 * that the appropriate statistics are updated.
                 */
-               if (buf->b_hdr->b_datacnt > 1 &&
-                   buf->b_hdr->b_type == ARC_BUFC_DATA) {
+               if (buf->b_hdr->b_l1hdr.b_datacnt > 1 &&
+                   HDR_ISTYPE_DATA(buf->b_hdr)) {
                        ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
                        ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
                }
-               ASSERT(buf->b_hdr->b_datacnt > 0);
-               buf->b_hdr->b_datacnt -= 1;
+               ASSERT(buf->b_hdr->b_l1hdr.b_datacnt > 0);
+               buf->b_hdr->b_l1hdr.b_datacnt -= 1;
        }
 
        /* only remove the buf if requested */
@@ -1534,7 +1738,8 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
                return;
 
        /* remove the buf from the hdr list */
-       for (bufp = &buf->b_hdr->b_buf; *bufp != buf; bufp = &(*bufp)->b_next)
+       for (bufp = &buf->b_hdr->b_l1hdr.b_buf; *bufp != buf;
+           bufp = &(*bufp)->b_next)
                continue;
        *bufp = buf->b_next;
        buf->b_next = NULL;
@@ -1549,84 +1754,82 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
 static void
 arc_hdr_destroy(arc_buf_hdr_t *hdr)
 {
-       l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
-
-       ASSERT(refcount_is_zero(&hdr->b_refcnt));
-       ASSERT3P(hdr->b_state, ==, arc_anon);
+       if (HDR_HAS_L1HDR(hdr)) {
+               ASSERT(hdr->b_l1hdr.b_buf == NULL ||
+                   hdr->b_l1hdr.b_datacnt > 0);
+               ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+               ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+       }
        ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+       ASSERT(!HDR_IN_HASH_TABLE(hdr));
+
+       if (HDR_HAS_L2HDR(hdr)) {
+               l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
+               boolean_t buflist_held = MUTEX_HELD(&l2hdr->b_dev->l2ad_mtx);
 
-       if (l2hdr != NULL) {
-               boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx);
-               /*
-                * To prevent arc_free() and l2arc_evict() from
-                * attempting to free the same buffer at the same time,
-                * a FREE_IN_PROGRESS flag is given to arc_free() to
-                * give it priority.  l2arc_evict() can't destroy this
-                * header while we are waiting on l2arc_buflist_mtx.
-                *
-                * The hdr may be removed from l2ad_buflist before we
-                * grab l2arc_buflist_mtx, so b_l2hdr is rechecked.
-                */
                if (!buflist_held) {
-                       mutex_enter(&l2arc_buflist_mtx);
-                       l2hdr = hdr->b_l2hdr;
+                       mutex_enter(&l2hdr->b_dev->l2ad_mtx);
+                       l2hdr = &hdr->b_l2hdr;
                }
 
-               if (l2hdr != NULL) {
-                       list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
-                       ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
-                       ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
-                       vdev_space_update(l2hdr->b_dev->l2ad_vdev,
-                           -l2hdr->b_asize, 0, 0);
-                       kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
-                       arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
-                       if (hdr->b_state == arc_l2c_only)
-                               l2arc_hdr_stat_remove();
-                       hdr->b_l2hdr = NULL;
-               }
+               list_remove(&l2hdr->b_dev->l2ad_buflist, hdr);
+
+               arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
+               ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+               ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
 
                if (!buflist_held)
-                       mutex_exit(&l2arc_buflist_mtx);
+                       mutex_exit(&l2hdr->b_dev->l2ad_mtx);
+
+               hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
        }
 
-       if (!BUF_EMPTY(hdr)) {
-               ASSERT(!HDR_IN_HASH_TABLE(hdr));
+       if (!BUF_EMPTY(hdr))
                buf_discard_identity(hdr);
-       }
-       while (hdr->b_buf) {
-               arc_buf_t *buf = hdr->b_buf;
-
-               if (buf->b_efunc) {
-                       mutex_enter(&arc_eviction_mtx);
-                       mutex_enter(&buf->b_evict_lock);
-                       ASSERT(buf->b_hdr != NULL);
-                       arc_buf_destroy(hdr->b_buf, FALSE, FALSE);
-                       hdr->b_buf = buf->b_next;
-                       buf->b_hdr = &arc_eviction_hdr;
-                       buf->b_next = arc_eviction_list;
-                       arc_eviction_list = buf;
-                       mutex_exit(&buf->b_evict_lock);
-                       mutex_exit(&arc_eviction_mtx);
-               } else {
-                       arc_buf_destroy(hdr->b_buf, FALSE, TRUE);
-               }
-       }
+
        if (hdr->b_freeze_cksum != NULL) {
                kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
                hdr->b_freeze_cksum = NULL;
        }
 
-       ASSERT(!list_link_active(&hdr->b_arc_node));
+       if (HDR_HAS_L1HDR(hdr)) {
+               while (hdr->b_l1hdr.b_buf) {
+                       arc_buf_t *buf = hdr->b_l1hdr.b_buf;
+
+                       if (buf->b_efunc != NULL) {
+                               mutex_enter(&arc_eviction_mtx);
+                               mutex_enter(&buf->b_evict_lock);
+                               ASSERT(buf->b_hdr != NULL);
+                               arc_buf_destroy(hdr->b_l1hdr.b_buf, FALSE,
+                                   FALSE);
+                               hdr->b_l1hdr.b_buf = buf->b_next;
+                               buf->b_hdr = &arc_eviction_hdr;
+                               buf->b_next = arc_eviction_list;
+                               arc_eviction_list = buf;
+                               mutex_exit(&buf->b_evict_lock);
+                               mutex_exit(&arc_eviction_mtx);
+                       } else {
+                               arc_buf_destroy(hdr->b_l1hdr.b_buf, FALSE,
+                                   TRUE);
+                       }
+               }
+       }
+
        ASSERT3P(hdr->b_hash_next, ==, NULL);
-       ASSERT3P(hdr->b_acb, ==, NULL);
-       kmem_cache_free(hdr_cache, hdr);
+       if (HDR_HAS_L1HDR(hdr)) {
+               ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+               ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+               kmem_cache_free(hdr_full_cache, hdr);
+       } else {
+               kmem_cache_free(hdr_l2only_cache, hdr);
+       }
 }
 
 void
 arc_buf_free(arc_buf_t *buf, void *tag)
 {
        arc_buf_hdr_t *hdr = buf->b_hdr;
-       int hashed = hdr->b_state != arc_anon;
+       int hashed = hdr->b_l1hdr.b_state != arc_anon;
 
        ASSERT(buf->b_efunc == NULL);
        ASSERT(buf->b_data != NULL);
@@ -1639,10 +1842,10 @@ arc_buf_free(arc_buf_t *buf, void *tag)
                ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 
                (void) remove_reference(hdr, hash_lock, tag);
-               if (hdr->b_datacnt > 1) {
+               if (hdr->b_l1hdr.b_datacnt > 1) {
                        arc_buf_destroy(buf, FALSE, TRUE);
                } else {
-                       ASSERT(buf == hdr->b_buf);
+                       ASSERT(buf == hdr->b_l1hdr.b_buf);
                        ASSERT(buf->b_efunc == NULL);
                        hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
                }
@@ -1656,7 +1859,7 @@ arc_buf_free(arc_buf_t *buf, void *tag)
                 */
                mutex_enter(&arc_eviction_mtx);
                (void) remove_reference(hdr, NULL, tag);
-               ASSERT(refcount_is_zero(&hdr->b_refcnt));
+               ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
                destroy_hdr = !HDR_IO_IN_PROGRESS(hdr);
                mutex_exit(&arc_eviction_mtx);
                if (destroy_hdr)
@@ -1676,8 +1879,8 @@ arc_buf_remove_ref(arc_buf_t *buf, void* tag)
        kmutex_t *hash_lock = NULL;
        boolean_t no_callback = (buf->b_efunc == NULL);
 
-       if (hdr->b_state == arc_anon) {
-               ASSERT(hdr->b_datacnt == 1);
+       if (hdr->b_l1hdr.b_state == arc_anon) {
+               ASSERT(hdr->b_l1hdr.b_datacnt == 1);
                arc_buf_free(buf, tag);
                return (no_callback);
        }
@@ -1685,21 +1888,22 @@ arc_buf_remove_ref(arc_buf_t *buf, void* tag)
        hash_lock = HDR_LOCK(hdr);
        mutex_enter(hash_lock);
        hdr = buf->b_hdr;
+       ASSERT(hdr->b_l1hdr.b_datacnt > 0);
        ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
-       ASSERT(hdr->b_state != arc_anon);
+       ASSERT(hdr->b_l1hdr.b_state != arc_anon);
        ASSERT(buf->b_data != NULL);
 
        (void) remove_reference(hdr, hash_lock, tag);
-       if (hdr->b_datacnt > 1) {
+       if (hdr->b_l1hdr.b_datacnt > 1) {
                if (no_callback)
                        arc_buf_destroy(buf, FALSE, TRUE);
        } else if (no_callback) {
-               ASSERT(hdr->b_buf == buf && buf->b_next == NULL);
+               ASSERT(hdr->b_l1hdr.b_buf == buf && buf->b_next == NULL);
                ASSERT(buf->b_efunc == NULL);
                hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
        }
-       ASSERT(no_callback || hdr->b_datacnt > 1 ||
-           refcount_is_zero(&hdr->b_refcnt));
+       ASSERT(no_callback || hdr->b_l1hdr.b_datacnt > 1 ||
+           refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
        mutex_exit(hash_lock);
        return (no_callback);
 }
@@ -1745,7 +1949,7 @@ arc_buf_eviction_needed(arc_buf_t *buf)
                return (B_TRUE);
        }
 
-       if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
+       if (hdr->b_l1hdr.b_datacnt > 1 && HDR_ISTYPE_DATA(hdr))
                evict_needed = B_TRUE;
 
        mutex_exit(&buf->b_evict_lock);
@@ -1784,16 +1988,27 @@ arc_evict(arc_state_t *state, uint64_t spa, int64_t bytes, boolean_t recycle,
        evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost;
 
 top:
-       mutex_enter(&state->arcs_mtx);
+       /*
+        * The ghost list lock must be acquired first in order to prevent
+        * a 3 party deadlock:
+        *
+        *  - arc_evict_ghost acquires arc_*_ghost->arcs_mtx, followed by
+        *    l2ad_mtx in arc_hdr_realloc
+        *  - l2arc_write_buffers acquires l2ad_mtx, followed by arc_*->arcs_mtx
+        *  - arc_evict acquires arc_*_ghost->arcs_mtx, followed by
+        *    arc_*_ghost->arcs_mtx and forms a deadlock cycle.
+        *
+        * This situation is avoided by acquiring the ghost list lock first.
+        */
        mutex_enter(&evicted_state->arcs_mtx);
+       mutex_enter(&state->arcs_mtx);
 
        for (hdr = list_tail(list); hdr; hdr = hdr_prev) {
                hdr_prev = list_prev(list, hdr);
                /* prefetch buffers have a minimum lifespan */
                if (HDR_IO_IN_PROGRESS(hdr) ||
-                   (spa && hdr->b_spa != spa) ||
-                   (hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT) &&
-                   ddi_get_lbolt() - hdr->b_arc_access <
+                   ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) &&
+                   ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access <
                    zfs_arc_min_prefetch_lifespan)) {
                        skipped++;
                        continue;
@@ -1818,11 +2033,11 @@ top:
                 */
                if (!recycle && count++ > arc_evict_iterations) {
                        list_insert_after(list, hdr, &marker);
-                       mutex_exit(&evicted_state->arcs_mtx);
                        mutex_exit(&state->arcs_mtx);
+                       mutex_exit(&evicted_state->arcs_mtx);
                        kpreempt(KPREEMPT_SYNC);
-                       mutex_enter(&state->arcs_mtx);
                        mutex_enter(&evicted_state->arcs_mtx);
+                       mutex_enter(&state->arcs_mtx);
                        hdr_prev = list_prev(list, &marker);
                        list_remove(list, &marker);
                        count = 0;
@@ -1832,28 +2047,29 @@ top:
                hash_lock = HDR_LOCK(hdr);
                have_lock = MUTEX_HELD(hash_lock);
                if (have_lock || mutex_tryenter(hash_lock)) {
-                       ASSERT0(refcount_count(&hdr->b_refcnt));
-                       ASSERT(hdr->b_datacnt > 0);
-                       while (hdr->b_buf) {
-                               arc_buf_t *buf = hdr->b_buf;
+                       ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
+                       ASSERT3U(hdr->b_l1hdr.b_datacnt, >, 0);
+                       while (hdr->b_l1hdr.b_buf) {
+                               arc_buf_t *buf = hdr->b_l1hdr.b_buf;
                                if (!mutex_tryenter(&buf->b_evict_lock)) {
                                        missed += 1;
                                        break;
                                }
-                               if (buf->b_data) {
+                               if (buf->b_data != NULL) {
                                        bytes_evicted += hdr->b_size;
-                                       if (recycle && hdr->b_type == type &&
+                                       if (recycle &&
+                                           arc_buf_type(hdr) == type &&
                                            hdr->b_size == bytes &&
                                            !HDR_L2_WRITING(hdr)) {
                                                stolen = buf->b_data;
                                                recycle = FALSE;
                                        }
                                }
-                               if (buf->b_efunc) {
+                               if (buf->b_efunc != NULL) {
                                        mutex_enter(&arc_eviction_mtx);
                                        arc_buf_destroy(buf,
                                            buf->b_data == stolen, FALSE);
-                                       hdr->b_buf = buf->b_next;
+                                       hdr->b_l1hdr.b_buf = buf->b_next;
                                        buf->b_hdr = &arc_eviction_hdr;
                                        buf->b_next = arc_eviction_list;
                                        arc_eviction_list = buf;
@@ -1866,7 +2082,7 @@ top:
                                }
                        }
 
-                       if (hdr->b_l2hdr) {
+                       if (HDR_HAS_L2HDR(hdr)) {
                                ARCSTAT_INCR(arcstat_evict_l2_cached,
                                    hdr->b_size);
                        } else {
@@ -1880,7 +2096,7 @@ top:
                                }
                        }
 
-                       if (hdr->b_datacnt == 0) {
+                       if (hdr->b_l1hdr.b_datacnt == 0) {
                                arc_change_state(evicted_state, hdr, hash_lock);
                                ASSERT(HDR_IN_HASH_TABLE(hdr));
                                hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE;
@@ -1896,8 +2112,8 @@ top:
                }
        }
 
-       mutex_exit(&evicted_state->arcs_mtx);
        mutex_exit(&state->arcs_mtx);
+       mutex_exit(&evicted_state->arcs_mtx);
 
        if (list == &state->arcs_list[ARC_BUFC_DATA] &&
            (bytes < 0 || bytes_evicted < bytes)) {
@@ -1948,7 +2164,7 @@ top:
        mutex_enter(&state->arcs_mtx);
        for (hdr = list_tail(list); hdr; hdr = hdr_prev) {
                hdr_prev = list_prev(list, hdr);
-               if (hdr->b_type > ARC_BUFC_NUMTYPES)
+               if (arc_buf_type(hdr) >= ARC_BUFC_NUMTYPES)
                        panic("invalid hdr=%p", (void *)hdr);
                if (spa && hdr->b_spa != spa)
                        continue;
@@ -1980,16 +2196,23 @@ top:
                }
                if (mutex_tryenter(hash_lock)) {
                        ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-                       ASSERT(hdr->b_buf == NULL);
+                       ASSERT(!HDR_HAS_L1HDR(hdr) ||
+                           hdr->b_l1hdr.b_buf == NULL);
                        ARCSTAT_BUMP(arcstat_deleted);
                        bytes_deleted += hdr->b_size;
 
-                       if (hdr->b_l2hdr != NULL) {
+                       if (HDR_HAS_L2HDR(hdr)) {
                                /*
                                 * This buffer is cached on the 2nd Level ARC;
                                 * don't destroy the header.
                                 */
                                arc_change_state(arc_l2c_only, hdr, hash_lock);
+                               /*
+                                * dropping from L1+L2 cached to L2-only,
+                                * realloc to remove the L1 header.
+                                */
+                               hdr = arc_hdr_realloc(hdr, hdr_full_cache,
+                                   hdr_l2only_cache);
                                mutex_exit(hash_lock);
                        } else {
                                arc_change_state(arc_anon, hdr, hash_lock);
@@ -2253,27 +2476,27 @@ arc_flush(spa_t *spa)
 {
        uint64_t guid = 0;
 
-       if (spa)
+       if (spa != NULL)
                guid = spa_load_guid(spa);
 
        while (list_head(&arc_mru->arcs_list[ARC_BUFC_DATA])) {
                (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA);
-               if (spa)
+               if (spa != NULL)
                        break;
        }
        while (list_head(&arc_mru->arcs_list[ARC_BUFC_METADATA])) {
                (void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_METADATA);
-               if (spa)
+               if (spa != NULL)
                        break;
        }
        while (list_head(&arc_mfu->arcs_list[ARC_BUFC_DATA])) {
                (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_DATA);
-               if (spa)
+               if (spa != NULL)
                        break;
        }
        while (list_head(&arc_mfu->arcs_list[ARC_BUFC_METADATA])) {
                (void) arc_evict(arc_mfu, guid, -1, FALSE, ARC_BUFC_METADATA);
-               if (spa)
+               if (spa != NULL)
                        break;
        }
 
@@ -2345,8 +2568,8 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat, uint64_t bytes)
                }
        }
 
-       kmem_cache_reap_now(buf_cache);
-       kmem_cache_reap_now(hdr_cache);
+       kmem_cache_reap_now(hdr_full_cache);
+       kmem_cache_reap_now(hdr_l2only_cache);
 }
 
 /*
@@ -2689,9 +2912,9 @@ arc_evict_needed(arc_buf_contents_t type)
 static void
 arc_get_data_buf(arc_buf_t *buf)
 {
-       arc_state_t             *state = buf->b_hdr->b_state;
+       arc_state_t             *state = buf->b_hdr->b_l1hdr.b_state;
        uint64_t                size = buf->b_hdr->b_size;
-       arc_buf_contents_t      type = buf->b_hdr->b_type;
+       arc_buf_contents_t      type = arc_buf_type(buf->b_hdr);
        arc_buf_contents_t      evict = ARC_BUFC_DATA;
        boolean_t               recycle = TRUE;
 
@@ -2718,8 +2941,7 @@ arc_get_data_buf(arc_buf_t *buf)
         * will end up on the mru list; so steal space from there.
         */
        if (state == arc_mfu_ghost)
-               state = buf->b_hdr->b_flags & ARC_FLAG_PREFETCH ?
-                   arc_mru : arc_mfu;
+               state = HDR_PREFETCH(buf->b_hdr) ? arc_mru : arc_mfu;
        else if (state == arc_mru_ghost)
                state = arc_mru;
 
@@ -2782,20 +3004,21 @@ out:
         * Update the state size.  Note that ghost states have a
         * "ghost size" and so don't need to be updated.
         */
-       if (!GHOST_STATE(buf->b_hdr->b_state)) {
+       if (!GHOST_STATE(buf->b_hdr->b_l1hdr.b_state)) {
                arc_buf_hdr_t *hdr = buf->b_hdr;
 
-               atomic_add_64(&hdr->b_state->arcs_size, size);
-               if (list_link_active(&hdr->b_arc_node)) {
-                       ASSERT(refcount_is_zero(&hdr->b_refcnt));
-                       atomic_add_64(&hdr->b_state->arcs_lsize[type], size);
+               atomic_add_64(&hdr->b_l1hdr.b_state->arcs_size, size);
+               if (list_link_active(&hdr->b_l1hdr.b_arc_node)) {
+                       ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+                       atomic_add_64(&hdr->b_l1hdr.b_state->arcs_lsize[type],
+                           size);
                }
                /*
                 * If we are growing the cache, and we are adding anonymous
                 * data, and we have outgrown arc_p, update arc_p
                 */
                if (!zfs_arc_p_aggressive_disable &&
-                   arc_size < arc_c && hdr->b_state == arc_anon &&
+                   arc_size < arc_c && hdr->b_l1hdr.b_state == arc_anon &&
                    arc_anon->arcs_size + arc_mru->arcs_size > arc_p)
                        arc_p = MIN(arc_c, arc_p + size);
        }
@@ -2811,20 +3034,21 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
        clock_t now;
 
        ASSERT(MUTEX_HELD(hash_lock));
+       ASSERT(HDR_HAS_L1HDR(hdr));
 
-       if (hdr->b_state == arc_anon) {
+       if (hdr->b_l1hdr.b_state == arc_anon) {
                /*
                 * This buffer is not in the cache, and does not
                 * appear in our "ghost" list.  Add the new buffer
                 * to the MRU state.
                 */
 
-               ASSERT(hdr->b_arc_access == 0);
-               hdr->b_arc_access = ddi_get_lbolt();
+               ASSERT0(hdr->b_l1hdr.b_arc_access);
+               hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
                DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
                arc_change_state(arc_mru, hdr, hash_lock);
 
-       } else if (hdr->b_state == arc_mru) {
+       } else if (hdr->b_l1hdr.b_state == arc_mru) {
                now = ddi_get_lbolt();
 
                /*
@@ -2835,15 +3059,16 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 * - move the buffer to the head of the list if this is
                 *   another prefetch (to make it less likely to be evicted).
                 */
-               if ((hdr->b_flags & ARC_FLAG_PREFETCH) != 0) {
-                       if (refcount_count(&hdr->b_refcnt) == 0) {
-                               ASSERT(list_link_active(&hdr->b_arc_node));
+               if (HDR_PREFETCH(hdr)) {
+                       if (refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
+                               ASSERT(list_link_active(
+                                   &hdr->b_l1hdr.b_arc_node));
                        } else {
                                hdr->b_flags &= ~ARC_FLAG_PREFETCH;
-                               atomic_inc_32(&hdr->b_mru_hits);
+                               atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
                                ARCSTAT_BUMP(arcstat_mru_hits);
                        }
-                       hdr->b_arc_access = now;
+                       hdr->b_l1hdr.b_arc_access = now;
                        return;
                }
 
@@ -2852,19 +3077,20 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 * but it is still in the cache. Move it to the MFU
                 * state.
                 */
-               if (ddi_time_after(now, hdr->b_arc_access + ARC_MINTIME)) {
+               if (ddi_time_after(now, hdr->b_l1hdr.b_arc_access +
+                   ARC_MINTIME)) {
                        /*
                         * More than 125ms have passed since we
                         * instantiated this buffer.  Move it to the
                         * most frequently used state.
                         */
-                       hdr->b_arc_access = now;
+                       hdr->b_l1hdr.b_arc_access = now;
                        DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                        arc_change_state(arc_mfu, hdr, hash_lock);
                }
-               atomic_inc_32(&hdr->b_mru_hits);
+               atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
                ARCSTAT_BUMP(arcstat_mru_hits);
-       } else if (hdr->b_state == arc_mru_ghost) {
+       } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) {
                arc_state_t     *new_state;
                /*
                 * This buffer has been "accessed" recently, but
@@ -2872,9 +3098,9 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 * MFU state.
                 */
 
-               if (hdr->b_flags & ARC_FLAG_PREFETCH) {
+               if (HDR_PREFETCH(hdr)) {
                        new_state = arc_mru;
-                       if (refcount_count(&hdr->b_refcnt) > 0)
+                       if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0)
                                hdr->b_flags &= ~ARC_FLAG_PREFETCH;
                        DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
                } else {
@@ -2882,12 +3108,12 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                        DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                }
 
-               hdr->b_arc_access = ddi_get_lbolt();
+               hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
                arc_change_state(new_state, hdr, hash_lock);
 
-               atomic_inc_32(&hdr->b_mru_ghost_hits);
+               atomic_inc_32(&hdr->b_l1hdr.b_mru_ghost_hits);
                ARCSTAT_BUMP(arcstat_mru_ghost_hits);
-       } else if (hdr->b_state == arc_mfu) {
+       } else if (hdr->b_l1hdr.b_state == arc_mfu) {
                /*
                 * This buffer has been accessed more than once and is
                 * still in the cache.  Keep it in the MFU state.
@@ -2897,14 +3123,14 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 * If it was a prefetch, we will explicitly move it to
                 * the head of the list now.
                 */
-               if ((hdr->b_flags & ARC_FLAG_PREFETCH) != 0) {
-                       ASSERT(refcount_count(&hdr->b_refcnt) == 0);
-                       ASSERT(list_link_active(&hdr->b_arc_node));
+               if ((HDR_PREFETCH(hdr)) != 0) {
+                       ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+                       ASSERT(list_link_active(&hdr->b_l1hdr.b_arc_node));
                }
-               atomic_inc_32(&hdr->b_mfu_hits);
+               atomic_inc_32(&hdr->b_l1hdr.b_mfu_hits);
                ARCSTAT_BUMP(arcstat_mfu_hits);
-               hdr->b_arc_access = ddi_get_lbolt();
-       } else if (hdr->b_state == arc_mfu_ghost) {
+               hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
+       } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) {
                arc_state_t     *new_state = arc_mfu;
                /*
                 * This buffer has been accessed more than once but has
@@ -2912,31 +3138,32 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
                 * MFU state.
                 */
 
-               if (hdr->b_flags & ARC_FLAG_PREFETCH) {
+               if (HDR_PREFETCH(hdr)) {
                        /*
                         * This is a prefetch access...
                         * move this block back to the MRU state.
                         */
-                       ASSERT0(refcount_count(&hdr->b_refcnt));
+                       ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
                        new_state = arc_mru;
                }
 
-               hdr->b_arc_access = ddi_get_lbolt();
+               hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
                DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                arc_change_state(new_state, hdr, hash_lock);
 
-               atomic_inc_32(&hdr->b_mfu_ghost_hits);
+               atomic_inc_32(&hdr->b_l1hdr.b_mfu_ghost_hits);
                ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
-       } else if (hdr->b_state == arc_l2c_only) {
+       } else if (hdr->b_l1hdr.b_state == arc_l2c_only) {
                /*
                 * This buffer is on the 2nd Level ARC.
                 */
 
-               hdr->b_arc_access = ddi_get_lbolt();
+               hdr->b_l1hdr.b_arc_access = ddi_get_lbolt();
                DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr);
                arc_change_state(arc_mfu, hdr, hash_lock);
        } else {
-               cmn_err(CE_PANIC, "invalid arc state 0x%p", hdr->b_state);
+               cmn_err(CE_PANIC, "invalid arc state 0x%p",
+                   hdr->b_l1hdr.b_state);
        }
 }
 
@@ -3005,11 +3232,11 @@ arc_read_done(zio_t *zio)
        }
 
        hdr->b_flags &= ~ARC_FLAG_L2_EVICTED;
-       if (l2arc_noprefetch && (hdr->b_flags & ARC_FLAG_PREFETCH))
+       if (l2arc_noprefetch && HDR_PREFETCH(hdr))
                hdr->b_flags &= ~ARC_FLAG_L2CACHE;
 
        /* byteswap if necessary */
-       callback_list = hdr->b_acb;
+       callback_list = hdr->b_l1hdr.b_acb;
        ASSERT(callback_list != NULL);
        if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
                dmu_object_byteswap_t bswap =
@@ -3023,7 +3250,8 @@ arc_read_done(zio_t *zio)
        arc_cksum_compute(buf, B_FALSE);
        arc_buf_watch(buf);
 
-       if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) {
+       if (hash_lock && zio->io_error == 0 &&
+           hdr->b_l1hdr.b_state == arc_anon) {
                /*
                 * Only call arc_access on anonymous buffers.  This is because
                 * if we've issued an I/O for an evicted buffer, we've already
@@ -3045,24 +3273,25 @@ arc_read_done(zio_t *zio)
                        abuf = NULL;
                }
        }
-       hdr->b_acb = NULL;
+       hdr->b_l1hdr.b_acb = NULL;
        hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
        ASSERT(!HDR_BUF_AVAILABLE(hdr));
        if (abuf == buf) {
                ASSERT(buf->b_efunc == NULL);
-               ASSERT(hdr->b_datacnt == 1);
+               ASSERT(hdr->b_l1hdr.b_datacnt == 1);
                hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
        }
 
-       ASSERT(refcount_is_zero(&hdr->b_refcnt) || callback_list != NULL);
+       ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
+           callback_list != NULL);
 
        if (zio->io_error != 0) {
                hdr->b_flags |= ARC_FLAG_IO_ERROR;
-               if (hdr->b_state != arc_anon)
+               if (hdr->b_l1hdr.b_state != arc_anon)
                        arc_change_state(arc_anon, hdr, hash_lock);
                if (HDR_IN_HASH_TABLE(hdr))
                        buf_hash_remove(hdr);
-               freeable = refcount_is_zero(&hdr->b_refcnt);
+               freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
        }
 
        /*
@@ -3070,9 +3299,9 @@ arc_read_done(zio_t *zio)
         * that the hdr (and hence the cv) might be freed before we get to
         * the cv_broadcast().
         */
-       cv_broadcast(&hdr->b_cv);
+       cv_broadcast(&hdr->b_l1hdr.b_cv);
 
-       if (hash_lock) {
+       if (hash_lock != NULL) {
                mutex_exit(hash_lock);
        } else {
                /*
@@ -3081,8 +3310,8 @@ arc_read_done(zio_t *zio)
                 * moved to the anonymous state (so that it won't show up
                 * in the cache).
                 */
-               ASSERT3P(hdr->b_state, ==, arc_anon);
-               freeable = refcount_is_zero(&hdr->b_refcnt);
+               ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+               freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt);
        }
 
        /* execute each callback and free its structure */
@@ -3145,14 +3374,14 @@ top:
                hdr = buf_hash_find(guid, bp, &hash_lock);
        }
 
-       if (hdr != NULL && hdr->b_datacnt > 0) {
+       if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_datacnt > 0) {
 
                *arc_flags |= ARC_FLAG_CACHED;
 
                if (HDR_IO_IN_PROGRESS(hdr)) {
 
                        if (*arc_flags & ARC_FLAG_WAIT) {
-                               cv_wait(&hdr->b_cv, hash_lock);
+                               cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
                                mutex_exit(hash_lock);
                                goto top;
                        }
@@ -3170,8 +3399,8 @@ top:
                                            spa, NULL, NULL, NULL, zio_flags);
 
                                ASSERT(acb->acb_done != NULL);
-                               acb->acb_next = hdr->b_acb;
-                               hdr->b_acb = acb;
+                               acb->acb_next = hdr->b_l1hdr.b_acb;
+                               hdr->b_l1hdr.b_acb = acb;
                                add_reference(hdr, hash_lock, private);
                                mutex_exit(hash_lock);
                                goto out;
@@ -3180,7 +3409,8 @@ top:
                        goto out;
                }
 
-               ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
+               ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+                   hdr->b_l1hdr.b_state == arc_mfu);
 
                if (done) {
                        add_reference(hdr, hash_lock, private);
@@ -3189,7 +3419,7 @@ top:
                         * copy of the data so that we will be guaranteed
                         * that arc_release() will always succeed.
                         */
-                       buf = hdr->b_buf;
+                       buf = hdr->b_l1hdr.b_buf;
                        ASSERT(buf);
                        ASSERT(buf->b_data);
                        if (HDR_BUF_AVAILABLE(hdr)) {
@@ -3200,7 +3430,7 @@ top:
                        }
 
                } else if (*arc_flags & ARC_FLAG_PREFETCH &&
-                   refcount_count(&hdr->b_refcnt) == 0) {
+                   refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
                        hdr->b_flags |= ARC_FLAG_PREFETCH;
                }
                DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
@@ -3211,8 +3441,8 @@ top:
                        hdr->b_flags |= ARC_FLAG_L2COMPRESS;
                mutex_exit(hash_lock);
                ARCSTAT_BUMP(arcstat_hits);
-               ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH),
-                   demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
+               ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
+                   demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
                    data, metadata, hits);
 
                if (done)
@@ -3224,7 +3454,7 @@ top:
                uint64_t addr = 0;
                boolean_t devw = B_FALSE;
                enum zio_compress b_compress = ZIO_COMPRESS_OFF;
-               uint64_t b_asize = 0;
+               int32_t b_asize = 0;
 
                /*
                 * Gracefully handle a damaged logical block size as a
@@ -3251,7 +3481,6 @@ top:
                        if (!BP_IS_EMBEDDED(bp)) {
                                hdr->b_dva = *BP_IDENTITY(bp);
                                hdr->b_birth = BP_PHYSICAL_BIRTH(bp);
-                               hdr->b_cksum0 = bp->blk_cksum.zc_word[0];
                                exists = buf_hash_insert(hdr, &hash_lock);
                        }
                        if (exists != NULL) {
@@ -3275,11 +3504,20 @@ top:
                        if (BP_GET_LEVEL(bp) > 0)
                                hdr->b_flags |= ARC_FLAG_INDIRECT;
                } else {
-                       /* this block is in the ghost cache */
-                       ASSERT(GHOST_STATE(hdr->b_state));
+                       /*
+                        * This block is in the ghost cache. If it was L2-only
+                        * (and thus didn't have an L1 hdr), we realloc the
+                        * header to add an L1 hdr.
+                        */
+                       if (!HDR_HAS_L1HDR(hdr)) {
+                               hdr = arc_hdr_realloc(hdr, hdr_l2only_cache,
+                                   hdr_full_cache);
+                       }
+
+                       ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state));
                        ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-                       ASSERT0(refcount_count(&hdr->b_refcnt));
-                       ASSERT(hdr->b_buf == NULL);
+                       ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+                       ASSERT(hdr->b_l1hdr.b_buf == NULL);
 
                        /* if this is a prefetch, we don't have a reference */
                        if (*arc_flags & ARC_FLAG_PREFETCH)
@@ -3296,29 +3534,29 @@ top:
                        buf->b_efunc = NULL;
                        buf->b_private = NULL;
                        buf->b_next = NULL;
-                       hdr->b_buf = buf;
-                       ASSERT(hdr->b_datacnt == 0);
-                       hdr->b_datacnt = 1;
+                       hdr->b_l1hdr.b_buf = buf;
+                       ASSERT0(hdr->b_l1hdr.b_datacnt);
+                       hdr->b_l1hdr.b_datacnt = 1;
                        arc_get_data_buf(buf);
                        arc_access(hdr, hash_lock);
                }
 
-               ASSERT(!GHOST_STATE(hdr->b_state));
+               ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
 
                acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
                acb->acb_done = done;
                acb->acb_private = private;
 
-               ASSERT(hdr->b_acb == NULL);
-               hdr->b_acb = acb;
+               ASSERT(hdr->b_l1hdr.b_acb == NULL);
+               hdr->b_l1hdr.b_acb = acb;
                hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
 
-               if (hdr->b_l2hdr != NULL &&
-                   (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) {
-                       devw = hdr->b_l2hdr->b_dev->l2ad_writing;
-                       addr = hdr->b_l2hdr->b_daddr;
-                       b_compress = hdr->b_l2hdr->b_compress;
-                       b_asize = hdr->b_l2hdr->b_asize;
+               if (HDR_HAS_L2HDR(hdr) &&
+                   (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
+                       devw = hdr->b_l2hdr.b_dev->l2ad_writing;
+                       addr = hdr->b_l2hdr.b_daddr;
+                       b_compress = HDR_GET_COMPRESS(hdr);
+                       b_asize = hdr->b_l2hdr.b_asize;
                        /*
                         * Lock out device removal.
                         */
@@ -3338,8 +3576,8 @@ top:
                DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
                    uint64_t, size, zbookmark_phys_t *, zb);
                ARCSTAT_BUMP(arcstat_misses);
-               ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_FLAG_PREFETCH),
-                   demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
+               ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
+                   demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
                    data, metadata, misses);
 
                if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
@@ -3352,14 +3590,14 @@ top:
                         *    also have invalidated the vdev.
                         * 5. This isn't prefetch and l2arc_noprefetch is set.
                         */
-                       if (hdr->b_l2hdr != NULL &&
+                       if (HDR_HAS_L2HDR(hdr) &&
                            !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
                            !(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
                                l2arc_read_callback_t *cb;
 
                                DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
                                ARCSTAT_BUMP(arcstat_l2_hits);
-                               atomic_inc_32(&hdr->b_l2hdr->b_hits);
+                               atomic_inc_32(&hdr->b_l2hdr.b_hits);
 
                                cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
                                    KM_SLEEP);
@@ -3481,8 +3719,9 @@ void
 arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
 {
        ASSERT(buf->b_hdr != NULL);
-       ASSERT(buf->b_hdr->b_state != arc_anon);
-       ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt) || func == NULL);
+       ASSERT(buf->b_hdr->b_l1hdr.b_state != arc_anon);
+       ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt) ||
+           func == NULL);
        ASSERT(buf->b_efunc == NULL);
        ASSERT(!HDR_BUF_AVAILABLE(buf->b_hdr));
 
@@ -3506,7 +3745,7 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
        if (hdr == NULL)
                return;
        if (HDR_BUF_AVAILABLE(hdr)) {
-               arc_buf_t *buf = hdr->b_buf;
+               arc_buf_t *buf = hdr->b_l1hdr.b_buf;
                add_reference(hdr, hash_lock, FTAG);
                hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
                mutex_exit(hash_lock);
@@ -3564,17 +3803,19 @@ arc_clear_callback(arc_buf_t *buf)
        hdr = buf->b_hdr;
        ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
 
-       ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt);
-       ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
+       ASSERT3U(refcount_count(&hdr->b_l1hdr.b_refcnt), <,
+           hdr->b_l1hdr.b_datacnt);
+       ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+           hdr->b_l1hdr.b_state == arc_mfu);
 
        buf->b_efunc = NULL;
        buf->b_private = NULL;
 
-       if (hdr->b_datacnt > 1) {
+       if (hdr->b_l1hdr.b_datacnt > 1) {
                mutex_exit(&buf->b_evict_lock);
                arc_buf_destroy(buf, FALSE, TRUE);
        } else {
-               ASSERT(buf == hdr->b_buf);
+               ASSERT(buf == hdr->b_l1hdr.b_buf);
                hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
                mutex_exit(&buf->b_evict_lock);
        }
@@ -3593,10 +3834,9 @@ arc_clear_callback(arc_buf_t *buf)
 void
 arc_release(arc_buf_t *buf, void *tag)
 {
-       arc_buf_hdr_t *hdr;
-       kmutex_t *hash_lock = NULL;
-       l2arc_buf_hdr_t *l2hdr;
-       uint64_t buf_size = 0;
+       kmutex_t *hash_lock;
+       arc_state_t *state;
+       arc_buf_hdr_t *hdr = buf->b_hdr;
 
        /*
         * It would be nice to assert that if it's DMU metadata (level >
@@ -3605,56 +3845,89 @@ arc_release(arc_buf_t *buf, void *tag)
         */
 
        mutex_enter(&buf->b_evict_lock);
-       hdr = buf->b_hdr;
 
-       /* this buffer is not on any list */
-       ASSERT(refcount_count(&hdr->b_refcnt) > 0);
+       /*
+        * We don't grab the hash lock prior to this check, because if
+        * the buffer's header is in the arc_anon state, it won't be
+        * linked into the hash table.
+        */
+       if (hdr->b_l1hdr.b_state == arc_anon) {
+               mutex_exit(&buf->b_evict_lock);
+               ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+               ASSERT(!HDR_IN_HASH_TABLE(hdr));
+               ASSERT(!HDR_HAS_L2HDR(hdr));
+               ASSERT(BUF_EMPTY(hdr));
 
-       if (hdr->b_state == arc_anon) {
-               /* this buffer is already released */
-               ASSERT(buf->b_efunc == NULL);
-       } else {
-               hash_lock = HDR_LOCK(hdr);
-               mutex_enter(hash_lock);
-               hdr = buf->b_hdr;
-               ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+               ASSERT3U(hdr->b_l1hdr.b_datacnt, ==, 1);
+               ASSERT3S(refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
+               ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
+
+               ASSERT3P(buf->b_efunc, ==, NULL);
+               ASSERT3P(buf->b_private, ==, NULL);
+
+               hdr->b_l1hdr.b_arc_access = 0;
+               arc_buf_thaw(buf);
+
+               return;
        }
 
-       l2hdr = hdr->b_l2hdr;
-       if (l2hdr) {
-               mutex_enter(&l2arc_buflist_mtx);
-               hdr->b_l2hdr = NULL;
-               list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
+       hash_lock = HDR_LOCK(hdr);
+       mutex_enter(hash_lock);
+
+       /*
+        * This assignment is only valid as long as the hash_lock is
+        * held, we must be careful not to reference state or the
+        * b_state field after dropping the lock.
+        */
+       state = hdr->b_l1hdr.b_state;
+       ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
+       ASSERT3P(state, !=, arc_anon);
+
+       /* this buffer is not on any list */
+       ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) > 0);
+
+       if (HDR_HAS_L2HDR(hdr)) {
+               ARCSTAT_INCR(arcstat_l2_asize, -hdr->b_l2hdr.b_asize);
+               ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+
+               mutex_enter(&hdr->b_l2hdr.b_dev->l2ad_mtx);
+               list_remove(&hdr->b_l2hdr.b_dev->l2ad_buflist, hdr);
+               mutex_exit(&hdr->b_l2hdr.b_dev->l2ad_mtx);
+
+               hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
        }
-       buf_size = hdr->b_size;
 
        /*
         * Do we have more than one buf?
         */
-       if (hdr->b_datacnt > 1) {
+       if (hdr->b_l1hdr.b_datacnt > 1) {
                arc_buf_hdr_t *nhdr;
                arc_buf_t **bufp;
                uint64_t blksz = hdr->b_size;
                uint64_t spa = hdr->b_spa;
-               arc_buf_contents_t type = hdr->b_type;
+               arc_buf_contents_t type = arc_buf_type(hdr);
                uint32_t flags = hdr->b_flags;
 
-               ASSERT(hdr->b_buf != buf || buf->b_next != NULL);
+               ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
                /*
                 * Pull the data off of this hdr and attach it to
                 * a new anonymous hdr.
                 */
                (void) remove_reference(hdr, hash_lock, tag);
-               bufp = &hdr->b_buf;
+               bufp = &hdr->b_l1hdr.b_buf;
                while (*bufp != buf)
                        bufp = &(*bufp)->b_next;
                *bufp = buf->b_next;
                buf->b_next = NULL;
 
-               ASSERT3U(hdr->b_state->arcs_size, >=, hdr->b_size);
-               atomic_add_64(&hdr->b_state->arcs_size, -hdr->b_size);
-               if (refcount_is_zero(&hdr->b_refcnt)) {
-                       uint64_t *size = &hdr->b_state->arcs_lsize[hdr->b_type];
+               ASSERT3P(state, !=, arc_l2c_only);
+               ASSERT3U(state->arcs_size, >=, hdr->b_size);
+               atomic_add_64(&state->arcs_size, -hdr->b_size);
+               if (refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
+                       uint64_t *size;
+
+                       ASSERT3P(state, !=, arc_l2c_only);
+                       size = &state->arcs_lsize[type];
                        ASSERT3U(*size, >=, hdr->b_size);
                        atomic_add_64(size, -hdr->b_size);
                }
@@ -3663,68 +3936,60 @@ arc_release(arc_buf_t *buf, void *tag)
                 * We're releasing a duplicate user data buffer, update
                 * our statistics accordingly.
                 */
-               if (hdr->b_type == ARC_BUFC_DATA) {
+               if (HDR_ISTYPE_DATA(hdr)) {
                        ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
                        ARCSTAT_INCR(arcstat_duplicate_buffers_size,
                            -hdr->b_size);
                }
-               hdr->b_datacnt -= 1;
+               hdr->b_l1hdr.b_datacnt -= 1;
                arc_cksum_verify(buf);
                arc_buf_unwatch(buf);
 
                mutex_exit(hash_lock);
 
-               nhdr = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+               nhdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
                nhdr->b_size = blksz;
                nhdr->b_spa = spa;
-               nhdr->b_type = type;
-               nhdr->b_buf = buf;
-               nhdr->b_state = arc_anon;
-               nhdr->b_arc_access = 0;
-               nhdr->b_mru_hits = 0;
-               nhdr->b_mru_ghost_hits = 0;
-               nhdr->b_mfu_hits = 0;
-               nhdr->b_mfu_ghost_hits = 0;
-               nhdr->b_l2_hits = 0;
+
+               nhdr->b_l1hdr.b_mru_hits = 0;
+               nhdr->b_l1hdr.b_mru_ghost_hits = 0;
+               nhdr->b_l1hdr.b_mfu_hits = 0;
+               nhdr->b_l1hdr.b_mfu_ghost_hits = 0;
+               nhdr->b_l1hdr.b_l2_hits = 0;
                nhdr->b_flags = flags & ARC_FLAG_L2_WRITING;
-               nhdr->b_l2hdr = NULL;
-               nhdr->b_datacnt = 1;
+               nhdr->b_flags |= arc_bufc_to_flags(type);
+               nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+
+               nhdr->b_l1hdr.b_buf = buf;
+               nhdr->b_l1hdr.b_datacnt = 1;
+               nhdr->b_l1hdr.b_state = arc_anon;
+               nhdr->b_l1hdr.b_arc_access = 0;
                nhdr->b_freeze_cksum = NULL;
-               (void) refcount_add(&nhdr->b_refcnt, tag);
+
+               (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
                buf->b_hdr = nhdr;
                mutex_exit(&buf->b_evict_lock);
                atomic_add_64(&arc_anon->arcs_size, blksz);
        } else {
                mutex_exit(&buf->b_evict_lock);
-               ASSERT(refcount_count(&hdr->b_refcnt) == 1);
-               ASSERT(!list_link_active(&hdr->b_arc_node));
+               ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1);
+               /* protected by hash lock */
+               ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
                ASSERT(!HDR_IO_IN_PROGRESS(hdr));
-               if (hdr->b_state != arc_anon)
-                       arc_change_state(arc_anon, hdr, hash_lock);
-               hdr->b_arc_access = 0;
-               hdr->b_mru_hits = 0;
-               hdr->b_mru_ghost_hits = 0;
-               hdr->b_mfu_hits = 0;
-               hdr->b_mfu_ghost_hits = 0;
-               hdr->b_l2_hits = 0;
-               if (hash_lock)
-                       mutex_exit(hash_lock);
+               hdr->b_l1hdr.b_mru_hits = 0;
+               hdr->b_l1hdr.b_mru_ghost_hits = 0;
+               hdr->b_l1hdr.b_mfu_hits = 0;
+               hdr->b_l1hdr.b_mfu_ghost_hits = 0;
+               hdr->b_l1hdr.b_l2_hits = 0;
+               arc_change_state(arc_anon, hdr, hash_lock);
+               hdr->b_l1hdr.b_arc_access = 0;
+               mutex_exit(hash_lock);
 
                buf_discard_identity(hdr);
                arc_buf_thaw(buf);
        }
        buf->b_efunc = NULL;
        buf->b_private = NULL;
-
-       if (l2hdr) {
-               ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
-               vdev_space_update(l2hdr->b_dev->l2ad_vdev,
-                   -l2hdr->b_asize, 0, 0);
-               kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
-               arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
-               ARCSTAT_INCR(arcstat_l2_size, -buf_size);
-               mutex_exit(&l2arc_buflist_mtx);
-       }
 }
 
 int
@@ -3733,7 +3998,8 @@ arc_released(arc_buf_t *buf)
        int released;
 
        mutex_enter(&buf->b_evict_lock);
-       released = (buf->b_data != NULL && buf->b_hdr->b_state == arc_anon);
+       released = (buf->b_data != NULL &&
+           buf->b_hdr->b_l1hdr.b_state == arc_anon);
        mutex_exit(&buf->b_evict_lock);
        return (released);
 }
@@ -3745,7 +4011,7 @@ arc_referenced(arc_buf_t *buf)
        int referenced;
 
        mutex_enter(&buf->b_evict_lock);
-       referenced = (refcount_count(&buf->b_hdr->b_refcnt));
+       referenced = (refcount_count(&buf->b_hdr->b_l1hdr.b_refcnt));
        mutex_exit(&buf->b_evict_lock);
        return (referenced);
 }
@@ -3758,7 +4024,9 @@ arc_write_ready(zio_t *zio)
        arc_buf_t *buf = callback->awcb_buf;
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
-       ASSERT(!refcount_is_zero(&buf->b_hdr->b_refcnt));
+       ASSERT(HDR_HAS_L1HDR(hdr));
+       ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
+       ASSERT(hdr->b_l1hdr.b_datacnt > 0);
        callback->awcb_ready(zio, buf, callback->awcb_private);
 
        /*
@@ -3768,12 +4036,12 @@ arc_write_ready(zio_t *zio)
         * accounting for any re-write attempt.
         */
        if (HDR_IO_IN_PROGRESS(hdr)) {
-               mutex_enter(&hdr->b_freeze_lock);
+               mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
                if (hdr->b_freeze_cksum != NULL) {
                        kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
                        hdr->b_freeze_cksum = NULL;
                }
-               mutex_exit(&hdr->b_freeze_lock);
+               mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
        }
        arc_cksum_compute(buf, B_FALSE);
        hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
@@ -3798,7 +4066,7 @@ arc_write_done(zio_t *zio)
        arc_buf_t *buf = callback->awcb_buf;
        arc_buf_hdr_t *hdr = buf->b_hdr;
 
-       ASSERT(hdr->b_acb == NULL);
+       ASSERT(hdr->b_l1hdr.b_acb == NULL);
 
        if (zio->io_error == 0) {
                if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
@@ -3806,7 +4074,6 @@ arc_write_done(zio_t *zio)
                } else {
                        hdr->b_dva = *BP_IDENTITY(zio->io_bp);
                        hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
-                       hdr->b_cksum0 = zio->io_bp->blk_cksum.zc_word[0];
                }
        } else {
                ASSERT(BUF_EMPTY(hdr));
@@ -3827,7 +4094,7 @@ arc_write_done(zio_t *zio)
                arc_cksum_verify(buf);
 
                exists = buf_hash_insert(hdr, &hash_lock);
-               if (exists) {
+               if (exists != NULL) {
                        /*
                         * This can only happen if we overwrite for
                         * sync-to-convergence, because we remove
@@ -3837,7 +4104,8 @@ arc_write_done(zio_t *zio)
                                if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp))
                                        panic("bad overwrite, hdr=%p exists=%p",
                                            (void *)hdr, (void *)exists);
-                               ASSERT(refcount_is_zero(&exists->b_refcnt));
+                               ASSERT(refcount_is_zero(
+                                   &exists->b_l1hdr.b_refcnt));
                                arc_change_state(arc_anon, exists, hash_lock);
                                mutex_exit(hash_lock);
                                arc_hdr_destroy(exists);
@@ -3851,22 +4119,22 @@ arc_write_done(zio_t *zio)
                                            (void *)hdr, (void *)exists);
                        } else {
                                /* Dedup */
-                               ASSERT(hdr->b_datacnt == 1);
-                               ASSERT(hdr->b_state == arc_anon);
+                               ASSERT(hdr->b_l1hdr.b_datacnt == 1);
+                               ASSERT(hdr->b_l1hdr.b_state == arc_anon);
                                ASSERT(BP_GET_DEDUP(zio->io_bp));
                                ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
                        }
                }
                hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
                /* if it's not anon, we are doing a scrub */
-               if (!exists && hdr->b_state == arc_anon)
+               if (exists == NULL && hdr->b_l1hdr.b_state == arc_anon)
                        arc_access(hdr, hash_lock);
                mutex_exit(hash_lock);
        } else {
                hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
        }
 
-       ASSERT(!refcount_is_zero(&hdr->b_refcnt));
+       ASSERT(!refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
        callback->awcb_done(zio, buf, callback->awcb_private);
 
        kmem_free(callback, sizeof (arc_write_callback_t));
@@ -3886,8 +4154,9 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
        ASSERT(ready != NULL);
        ASSERT(done != NULL);
        ASSERT(!HDR_IO_ERROR(hdr));
-       ASSERT((hdr->b_flags & ARC_FLAG_IO_IN_PROGRESS) == 0);
-       ASSERT(hdr->b_acb == NULL);
+       ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+       ASSERT(hdr->b_l1hdr.b_acb == NULL);
+       ASSERT(hdr->b_l1hdr.b_datacnt > 0);
        if (l2arc)
                hdr->b_flags |= ARC_FLAG_L2CACHE;
        if (l2arc_compress)
@@ -4102,25 +4371,35 @@ arc_init(void)
        mutex_init(&arc_l2c_only->arcs_mtx, NULL, MUTEX_DEFAULT, NULL);
 
        list_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
        list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
-           sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+           sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node));
 
        arc_anon->arcs_state = ARC_STATE_ANON;
        arc_mru->arcs_state = ARC_STATE_MRU;
@@ -4231,7 +4510,7 @@ arc_fini(void)
 
        buf_fini();
 
-       ASSERT(arc_loaned_bytes == 0);
+       ASSERT0(arc_loaned_bytes);
 }
 
 /*
@@ -4390,7 +4669,7 @@ l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr)
         * 3. has an I/O in progress (it may be an incomplete read).
         * 4. is flagged not eligible (zfs property).
         */
-       if (hdr->b_spa != spa_guid || hdr->b_l2hdr != NULL ||
+       if (hdr->b_spa != spa_guid || HDR_HAS_L2HDR(hdr) ||
            HDR_IO_IN_PROGRESS(hdr) || !HDR_L2CACHE(hdr))
                return (B_FALSE);
 
@@ -4443,20 +4722,6 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote)
        return (next);
 }
 
-static void
-l2arc_hdr_stat_add(void)
-{
-       ARCSTAT_INCR(arcstat_l2_hdr_size, HDR_SIZE);
-       ARCSTAT_INCR(arcstat_hdr_size, -HDR_SIZE);
-}
-
-static void
-l2arc_hdr_stat_remove(void)
-{
-       ARCSTAT_INCR(arcstat_l2_hdr_size, -HDR_SIZE);
-       ARCSTAT_INCR(arcstat_hdr_size, HDR_SIZE);
-}
-
 /*
  * Cycle through L2ARC devices.  This is how L2ARC load balances.
  * If a device is returned, this also returns holding the spa config lock.
@@ -4553,7 +4818,6 @@ l2arc_write_done(zio_t *zio)
        l2arc_dev_t *dev;
        list_t *buflist;
        arc_buf_hdr_t *head, *hdr, *hdr_prev;
-       l2arc_buf_hdr_t *abl2;
        kmutex_t *hash_lock;
        int64_t bytes_dropped = 0;
 
@@ -4563,7 +4827,7 @@ l2arc_write_done(zio_t *zio)
        ASSERT(dev != NULL);
        head = cb->l2wcb_head;
        ASSERT(head != NULL);
-       buflist = dev->l2ad_buflist;
+       buflist = &dev->l2ad_buflist;
        ASSERT(buflist != NULL);
        DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
            l2arc_write_callback_t *, cb);
@@ -4571,42 +4835,43 @@ l2arc_write_done(zio_t *zio)
        if (zio->io_error != 0)
                ARCSTAT_BUMP(arcstat_l2_writes_error);
 
-       mutex_enter(&l2arc_buflist_mtx);
+       mutex_enter(&dev->l2ad_mtx);
 
        /*
         * All writes completed, or an error was hit.
         */
        for (hdr = list_prev(buflist, head); hdr; hdr = hdr_prev) {
                hdr_prev = list_prev(buflist, hdr);
-               abl2 = hdr->b_l2hdr;
-
-               /*
-                * Release the temporary compressed buffer as soon as possible.
-                */
-               if (abl2->b_compress != ZIO_COMPRESS_OFF)
-                       l2arc_release_cdata_buf(hdr);
 
                hash_lock = HDR_LOCK(hdr);
                if (!mutex_tryenter(hash_lock)) {
                        /*
                         * This buffer misses out.  It may be in a stage
-                        * of eviction.  Its ARC_L2_WRITING flag will be
+                        * of eviction.  Its ARC_FLAG_L2_WRITING flag will be
                         * left set, denying reads to this buffer.
                         */
                        ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss);
                        continue;
                }
 
+               /*
+                * It's possible that this buffer got evicted from the L1 cache
+                * before we grabbed the vdev + hash locks, in which case
+                * arc_hdr_realloc freed b_tmp_cdata for us if it was allocated.
+                * Only free the buffer if we still have an L1 hdr.
+                */
+               if (HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_tmp_cdata != NULL &&
+                   HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF)
+                       l2arc_release_cdata_buf(hdr);
+
                if (zio->io_error != 0) {
                        /*
                         * Error - drop L2ARC entry.
                         */
                        list_remove(buflist, hdr);
-                       ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
-                       bytes_dropped += abl2->b_asize;
-                       hdr->b_l2hdr = NULL;
-                       kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
-                       arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
+                       hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
+
+                       ARCSTAT_INCR(arcstat_l2_asize, -hdr->b_l2hdr.b_asize);
                        ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
                }
 
@@ -4620,8 +4885,9 @@ l2arc_write_done(zio_t *zio)
 
        atomic_inc_64(&l2arc_writes_done);
        list_remove(buflist, head);
-       kmem_cache_free(hdr_cache, head);
-       mutex_exit(&l2arc_buflist_mtx);
+       ASSERT(!HDR_HAS_L1HDR(head));
+       kmem_cache_free(hdr_l2only_cache, head);
+       mutex_exit(&dev->l2ad_mtx);
 
        vdev_space_update(dev->l2ad_vdev, -bytes_dropped, 0, 0);
 
@@ -4759,16 +5025,12 @@ static void
 l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
 {
        list_t *buflist;
-       l2arc_buf_hdr_t *abl2;
        arc_buf_hdr_t *hdr, *hdr_prev;
        kmutex_t *hash_lock;
        uint64_t taddr;
        int64_t bytes_evicted = 0;
 
-       buflist = dev->l2ad_buflist;
-
-       if (buflist == NULL)
-               return;
+       buflist = &dev->l2ad_buflist;
 
        if (!all && dev->l2ad_first) {
                /*
@@ -4791,7 +5053,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
            uint64_t, taddr, boolean_t, all);
 
 top:
-       mutex_enter(&l2arc_buflist_mtx);
+       mutex_enter(&dev->l2ad_mtx);
        for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) {
                hdr_prev = list_prev(buflist, hdr);
 
@@ -4801,7 +5063,7 @@ top:
                         * Missed the hash lock.  Retry.
                         */
                        ARCSTAT_BUMP(arcstat_l2_evict_lock_retry);
-                       mutex_exit(&l2arc_buflist_mtx);
+                       mutex_exit(&dev->l2ad_mtx);
                        mutex_enter(hash_lock);
                        mutex_exit(hash_lock);
                        goto top;
@@ -4817,9 +5079,9 @@ top:
                        continue;
                }
 
-               if (!all && hdr->b_l2hdr != NULL &&
-                   (hdr->b_l2hdr->b_daddr > taddr ||
-                   hdr->b_l2hdr->b_daddr < dev->l2ad_hand)) {
+               if (!all && HDR_HAS_L2HDR(hdr) &&
+                   (hdr->b_l2hdr.b_daddr > taddr ||
+                   hdr->b_l2hdr.b_daddr < dev->l2ad_hand)) {
                        /*
                         * We've evicted to the target address,
                         * or the end of the device.
@@ -4828,15 +5090,8 @@ top:
                        break;
                }
 
-               if (HDR_FREE_IN_PROGRESS(hdr)) {
-                       /*
-                        * Already on the path to destruction.
-                        */
-                       mutex_exit(hash_lock);
-                       continue;
-               }
-
-               if (hdr->b_state == arc_l2c_only) {
+               ASSERT(HDR_HAS_L2HDR(hdr));
+               if (!HDR_HAS_L1HDR(hdr)) {
                        ASSERT(!HDR_L2_READING(hdr));
                        /*
                         * This doesn't exist in the ARC.  Destroy.
@@ -4846,6 +5101,8 @@ top:
                        arc_change_state(arc_anon, hdr, hash_lock);
                        arc_hdr_destroy(hdr);
                } else {
+                       ASSERT(hdr->b_l1hdr.b_state != arc_l2c_only);
+                       ARCSTAT_BUMP(arcstat_l2_evict_l1cached);
                        /*
                         * Invalidate issued or about to be issued
                         * reads, since we may be about to write
@@ -4859,26 +5116,18 @@ top:
                        /*
                         * Tell ARC this no longer exists in L2ARC.
                         */
-                       if (hdr->b_l2hdr != NULL) {
-                               abl2 = hdr->b_l2hdr;
-                               ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
-                               bytes_evicted += abl2->b_asize;
-                               hdr->b_l2hdr = NULL;
-                               kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
-                               arc_space_return(L2HDR_SIZE, ARC_SPACE_L2HDRS);
-                               ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
-                       }
+                       /* Tell ARC this no longer exists in L2ARC. */
+                       ARCSTAT_INCR(arcstat_l2_asize, -hdr->b_l2hdr.b_asize);
+                       ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+                       hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
                        list_remove(buflist, hdr);
 
-                       /*
-                        * This may have been leftover after a
-                        * failed write.
-                        */
+                       /* This may have been leftover after a failed write. */
                        hdr->b_flags &= ~ARC_FLAG_L2_WRITING;
                }
                mutex_exit(hash_lock);
        }
-       mutex_exit(&l2arc_buflist_mtx);
+       mutex_exit(&dev->l2ad_mtx);
 
        vdev_space_update(dev->l2ad_vdev, -bytes_evicted, 0, 0);
        dev->l2ad_evict = taddr;
@@ -4920,8 +5169,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
        pio = NULL;
        write_sz = write_asize = write_psize = 0;
        full = B_FALSE;
-       head = kmem_cache_alloc(hdr_cache, KM_PUSHPAGE);
+       head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
        head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
+       head->b_flags |= ARC_FLAG_HAS_L2HDR;
 
        /*
         * We will want to try to compress buffers that are at least 2x the
@@ -4932,7 +5182,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
        /*
         * Copy buffers for L2ARC writing.
         */
-       mutex_enter(&l2arc_buflist_mtx);
+       mutex_enter(&dev->l2ad_mtx);
        for (try = 0; try <= 3; try++) {
                uint64_t passed_sz = 0;
 
@@ -4954,7 +5204,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
                        headroom = (headroom * l2arc_headroom_boost) / 100;
 
                for (; hdr; hdr = hdr_prev) {
-                       l2arc_buf_hdr_t *l2hdr;
                        kmutex_t *hash_lock;
                        uint64_t buf_sz;
 
@@ -4997,7 +5246,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
                                 * l2arc_write_done() can find where the
                                 * write buffers begin without searching.
                                 */
-                               list_insert_head(dev->l2ad_buflist, head);
+                               list_insert_head(&dev->l2ad_buflist, head);
 
                                cb = kmem_alloc(sizeof (l2arc_write_callback_t),
                                    KM_SLEEP);
@@ -5010,37 +5259,33 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
                        /*
                         * Create and add a new L2ARC header.
                         */
-                       l2hdr = kmem_zalloc(sizeof (l2arc_buf_hdr_t),
-                           KM_PUSHPAGE);
-                       l2hdr->b_dev = dev;
-                       arc_space_consume(L2HDR_SIZE, ARC_SPACE_L2HDRS);
-
+                       hdr->b_l2hdr.b_dev = dev;
+                       arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
                        hdr->b_flags |= ARC_FLAG_L2_WRITING;
-
                        /*
                         * Temporarily stash the data buffer in b_tmp_cdata.
                         * The subsequent write step will pick it up from
-                        * there. This is because can't access hdr->b_buf
+                        * there. This is because can't access b_l1hdr.b_buf
                         * without holding the hash_lock, which we in turn
                         * can't access without holding the ARC list locks
                         * (which we want to avoid during compression/writing)
                         */
-                       l2hdr->b_compress = ZIO_COMPRESS_OFF;
-                       l2hdr->b_asize = hdr->b_size;
-                       l2hdr->b_tmp_cdata = hdr->b_buf->b_data;
-                       l2hdr->b_hits = 0;
+                       HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+                       hdr->b_l2hdr.b_asize = hdr->b_size;
+                       hdr->b_l2hdr.b_hits = 0;
+                       hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
 
                        buf_sz = hdr->b_size;
-                       hdr->b_l2hdr = l2hdr;
+                       hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
 
-                       list_insert_head(dev->l2ad_buflist, hdr);
+                       list_insert_head(&dev->l2ad_buflist, hdr);
 
                        /*
                         * Compute and store the buffer cksum before
                         * writing.  On debug the cksum is verified first.
                         */
-                       arc_cksum_verify(hdr->b_buf);
-                       arc_cksum_compute(hdr->b_buf, B_TRUE);
+                       arc_cksum_verify(hdr->b_l1hdr.b_buf);
+                       arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE);
 
                        mutex_exit(hash_lock);
 
@@ -5056,8 +5301,9 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
        /* No buffers selected for writing? */
        if (pio == NULL) {
                ASSERT0(write_sz);
-               mutex_exit(&l2arc_buflist_mtx);
-               kmem_cache_free(hdr_cache, head);
+               mutex_exit(&dev->l2ad_mtx);
+               ASSERT(!HDR_HAS_L1HDR(head));
+               kmem_cache_free(hdr_l2only_cache, head);
                return (0);
        }
 
@@ -5066,24 +5312,22 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
         * and work backwards, retracing the course of the buffer selector
         * loop above.
         */
-       for (hdr = list_prev(dev->l2ad_buflist, head); hdr;
-           hdr = list_prev(dev->l2ad_buflist, hdr)) {
-               l2arc_buf_hdr_t *l2hdr;
+       for (hdr = list_prev(&dev->l2ad_buflist, head); hdr;
+           hdr = list_prev(&dev->l2ad_buflist, hdr)) {
                uint64_t buf_sz;
 
                /*
                 * We shouldn't need to lock the buffer here, since we flagged
                 * it as ARC_FLAG_L2_WRITING in the previous step, but we must
                 * take care to only access its L2 cache parameters. In
-                * particular, hdr->b_buf may be invalid by now due to
+                * particular, hdr->l1hdr.b_buf may be invalid by now due to
                 * ARC eviction.
                 */
-               l2hdr = hdr->b_l2hdr;
-               l2hdr->b_daddr = dev->l2ad_hand;
+               hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
 
-               if (!l2arc_nocompress && (hdr->b_flags & ARC_FLAG_L2COMPRESS) &&
-                   l2hdr->b_asize >= buf_compress_minsz) {
-                       if (l2arc_compress_buf(l2hdr)) {
+               if ((!l2arc_nocompress && HDR_L2COMPRESS(hdr)) &&
+                   hdr->b_l2hdr.b_asize >= buf_compress_minsz) {
+                       if (l2arc_compress_buf(hdr)) {
                                /*
                                 * If compression succeeded, enable headroom
                                 * boost on the next scan cycle.
@@ -5096,8 +5340,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
                 * Pick up the buffer data we had previously stashed away
                 * (and now potentially also compressed).
                 */
-               buf_data = l2hdr->b_tmp_cdata;
-               buf_sz = l2hdr->b_asize;
+               buf_data = hdr->b_l1hdr.b_tmp_cdata;
+               buf_sz = hdr->b_l2hdr.b_asize;
 
                /* Compression may have squashed the buffer to zero length. */
                if (buf_sz != 0) {
@@ -5122,7 +5366,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
                }
        }
 
-       mutex_exit(&l2arc_buflist_mtx);
+       mutex_exit(&dev->l2ad_mtx);
 
        ASSERT3U(write_asize, <=, target_sz);
        ARCSTAT_BUMP(arcstat_l2_writes_sent);
@@ -5150,7 +5394,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
 
 /*
  * Compresses an L2ARC buffer.
- * The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
+ * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its
  * size in l2hdr->b_asize. This routine tries to compress the data and
  * depending on the compression result there are three possible outcomes:
  * *) The buffer was incompressible. The original l2hdr contents were left
@@ -5168,17 +5412,24 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
  * buffer was incompressible).
  */
 static boolean_t
-l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
+l2arc_compress_buf(arc_buf_hdr_t *hdr)
 {
        void *cdata;
        size_t csize, len, rounded;
+       l2arc_buf_hdr_t *l2hdr;
 
-       ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
-       ASSERT(l2hdr->b_tmp_cdata != NULL);
+       ASSERT(HDR_HAS_L2HDR(hdr));
+
+       l2hdr = &hdr->b_l2hdr;
+
+       ASSERT(HDR_HAS_L1HDR(hdr));
+       ASSERT(HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF);
+       ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
 
        len = l2hdr->b_asize;
        cdata = zio_data_buf_alloc(len);
-       csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
+       ASSERT3P(cdata, !=, NULL);
+       csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata,
            cdata, l2hdr->b_asize);
 
        rounded = P2ROUNDUP(csize, (size_t)SPA_MINBLOCKSIZE);
@@ -5190,9 +5441,9 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
        if (csize == 0) {
                /* zero block, indicate that there's nothing to write */
                zio_data_buf_free(cdata, len);
-               l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
+               HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_EMPTY);
                l2hdr->b_asize = 0;
-               l2hdr->b_tmp_cdata = NULL;
+               hdr->b_l1hdr.b_tmp_cdata = NULL;
                ARCSTAT_BUMP(arcstat_l2_compress_zeros);
                return (B_TRUE);
        } else if (csize > 0 && csize < len) {
@@ -5200,9 +5451,9 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
                 * Compression succeeded, we'll keep the cdata around for
                 * writing and release it afterwards.
                 */
-               l2hdr->b_compress = ZIO_COMPRESS_LZ4;
+               HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_LZ4);
                l2hdr->b_asize = csize;
-               l2hdr->b_tmp_cdata = cdata;
+               hdr->b_l1hdr.b_tmp_cdata = cdata;
                ARCSTAT_BUMP(arcstat_l2_compress_successes);
                return (B_TRUE);
        } else {
@@ -5250,9 +5501,9 @@ l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c)
                 * need to fill its io_data after we're done restoring the
                 * buffer's contents.
                 */
-               ASSERT(hdr->b_buf != NULL);
-               bzero(hdr->b_buf->b_data, hdr->b_size);
-               zio->io_data = zio->io_orig_data = hdr->b_buf->b_data;
+               ASSERT(hdr->b_l1hdr.b_buf != NULL);
+               bzero(hdr->b_l1hdr.b_buf->b_data, hdr->b_size);
+               zio->io_data = zio->io_orig_data = hdr->b_l1hdr.b_buf->b_data;
        } else {
                ASSERT(zio->io_data != NULL);
                /*
@@ -5287,17 +5538,17 @@ l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c)
 static void
 l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
 {
-       l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
-
-       if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
+       ASSERT(HDR_HAS_L1HDR(hdr));
+       if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_EMPTY) {
                /*
                 * If the data was compressed, then we've allocated a
                 * temporary buffer for it, so now we need to release it.
                 */
-               ASSERT(l2hdr->b_tmp_cdata != NULL);
-               zio_data_buf_free(l2hdr->b_tmp_cdata, hdr->b_size);
+               ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
+               zio_data_buf_free(hdr->b_l1hdr.b_tmp_cdata,
+                   hdr->b_size);
        }
-       l2hdr->b_tmp_cdata = NULL;
+       hdr->b_l1hdr.b_tmp_cdata = NULL;
 }
 
 /*
@@ -5442,13 +5693,13 @@ l2arc_add_vdev(spa_t *spa, vdev_t *vd)
        adddev->l2ad_writing = B_FALSE;
        list_link_init(&adddev->l2ad_node);
 
+       mutex_init(&adddev->l2ad_mtx, NULL, MUTEX_DEFAULT, NULL);
        /*
         * This is a list of all ARC buffers that are still valid on the
         * device.
         */
-       adddev->l2ad_buflist = kmem_zalloc(sizeof (list_t), KM_SLEEP);
-       list_create(adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
-           offsetof(arc_buf_hdr_t, b_l2node));
+       list_create(&adddev->l2ad_buflist, sizeof (arc_buf_hdr_t),
+           offsetof(arc_buf_hdr_t, b_l2hdr.b_l2node));
 
        vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand);
 
@@ -5494,8 +5745,8 @@ l2arc_remove_vdev(vdev_t *vd)
         * Clear all buflists and ARC references.  L2ARC device flush.
         */
        l2arc_evict(remdev, 0, B_TRUE);
-       list_destroy(remdev->l2ad_buflist);
-       kmem_free(remdev->l2ad_buflist, sizeof (list_t));
+       list_destroy(&remdev->l2ad_buflist);
+       mutex_destroy(&remdev->l2ad_mtx);
        kmem_free(remdev, sizeof (l2arc_dev_t));
 }
 
@@ -5510,7 +5761,6 @@ l2arc_init(void)
        mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);
        mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
-       mutex_init(&l2arc_buflist_mtx, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL);
 
        l2arc_dev_list = &L2ARC_dev_list;
@@ -5535,7 +5785,6 @@ l2arc_fini(void)
        mutex_destroy(&l2arc_feed_thr_lock);
        cv_destroy(&l2arc_feed_thr_cv);
        mutex_destroy(&l2arc_dev_mtx);
-       mutex_destroy(&l2arc_buflist_mtx);
        mutex_destroy(&l2arc_free_on_write_mtx);
 
        list_destroy(l2arc_dev_list);