]> granicus.if.org Git - zfs/commitdiff
SHA256 QAT acceleration
authorTom Caputi <tcaputi@datto.com>
Thu, 15 Mar 2018 17:53:58 +0000 (13:53 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 15 Mar 2018 17:53:58 +0000 (10:53 -0700)
This patch enables acceleration of SHA256 checksums using Intel
Quick Assist Technology. This patch also fixes up and refactors
some of the code from QAT encryption to make the behavior
consistent.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chengfeix Zhu <chengfeix.zhu@intel.com>
Signed-off-by: Weigang Li <weigang.li@intel.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7295

module/zfs/qat.c
module/zfs/qat.h
module/zfs/qat_compress.c
module/zfs/qat_crypt.c
module/zfs/sha256.c

index 4dc34f1e626c921efa8115014f3fbff8ce631d90..0a4f9c72a3dfc77837f67fcc5a2629193830c20d 100644 (file)
@@ -38,10 +38,12 @@ qat_stats_t qat_stats = {
        { "decrypt_total_in_bytes",             KSTAT_DATA_UINT64 },
        { "decrypt_total_out_bytes",            KSTAT_DATA_UINT64 },
        { "crypt_fails",                        KSTAT_DATA_UINT64 },
+       { "cksum_requests",                     KSTAT_DATA_UINT64 },
+       { "cksum_total_in_bytes",               KSTAT_DATA_UINT64 },
+       { "cksum_fails",                        KSTAT_DATA_UINT64 },
 };
 
 static kstat_t *qat_ksp = NULL;
-int zfs_qat_disable = 0;
 
 CpaStatus
 qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
index 44f9cb532f6f68eb4309c74f749e3e3c0c8c8c1a..4866dfe150692e1937c87d70ae6a0acd84e20c51 100644 (file)
@@ -122,6 +122,22 @@ typedef struct qat_stats {
         * so the functionality of ZFS is not impacted.
         */
        kstat_named_t crypt_fails;
+
+       /*
+        * Number of jobs submitted to qat checksum engine.
+        */
+       kstat_named_t cksum_requests;
+       /*
+        * Total bytes sent to qat checksum engine.
+        */
+       kstat_named_t cksum_total_in_bytes;
+       /*
+        * Number of fails in the qat checksum engine.
+        * Note: when qat fail happens, it doesn't mean a critical hardware
+        * issue. The checksum job will be transfered to the software
+        * implementation, so the functionality of ZFS is not impacted.
+        */
+       kstat_named_t cksum_fails;
 } qat_stats_t;
 
 #define        QAT_STAT_INCR(stat, val) \
@@ -130,7 +146,6 @@ typedef struct qat_stats {
        QAT_STAT_INCR(stat, 1)
 
 extern qat_stats_t qat_stats;
-extern int zfs_qat_disable;
 
 /* inlined for performance */
 static inline struct page *
@@ -158,19 +173,24 @@ extern void qat_fini(void);
 
 extern boolean_t qat_dc_use_accel(size_t s_len);
 extern boolean_t qat_crypt_use_accel(size_t s_len);
+extern boolean_t qat_checksum_use_accel(size_t s_len);
 extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len,
     char *dst, int dst_len, size_t *c_len);
 extern int qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
     uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
     crypto_key_t *key, uint64_t crypt, uint32_t enc_len);
+extern int qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size,
+    zio_cksum_t *zcp);
 #else
 #define        CPA_STATUS_SUCCESS                                      0
 #define        qat_init()
 #define        qat_fini()
 #define        qat_dc_use_accel(s_len)                                 0
 #define        qat_crypt_use_accel(s_len)                              0
+#define        qat_checksum_use_accel(s_len)                           0
 #define        qat_compress(dir, s, sl, d, dl, cl)                     0
 #define        qat_crypt(dir, s, d, a, al, i, db, k, c, el)            0
+#define        qat_checksum(c, buf, s, z)                              0
 #endif
 
 #endif /* _SYS_QAT_H */
index 3d756b53d245886f9743ea7ef9d5792c74169d26..2116c97902e79571a5ad52caccf0967b19ba9e66 100644 (file)
@@ -47,11 +47,12 @@ static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
 static Cpa16U num_inst = 0;
 static Cpa32U inst_num = 0;
 static boolean_t qat_dc_init_done = B_FALSE;
+int zfs_qat_compress_disable = 0;
 
 boolean_t
 qat_dc_use_accel(size_t s_len)
 {
-       return (!zfs_qat_disable &&
+       return (!zfs_qat_compress_disable &&
            qat_dc_init_done &&
            s_len >= QAT_MIN_BUF_SIZE &&
            s_len <= QAT_MAX_BUF_SIZE);
@@ -471,4 +472,7 @@ fail:
        return (ret);
 }
 
+module_param(zfs_qat_compress_disable, int, 0644);
+MODULE_PARM_DESC(zfs_qat_compress_disable, "Disable QAT compression");
+
 #endif
index d850d9ce8f26138c87d0f10052b907e89992fe42..3e2a76ab2dd92ac883316c24183e296f6602d9c9 100644 (file)
  * CDDL HEADER END
  */
 
+/*
+ * This file represents the QAT implementation of checksums and encryption.
+ * Internally, QAT shares the same cryptographic instances for both of these
+ * operations, so the code has been combined here. QAT data compression uses
+ * compression instances, so that code is separated into qat_compress.c
+ */
+
 #if defined(_KERNEL) && defined(HAVE_QAT)
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -27,6 +34,7 @@
 #include <sys/zfs_context.h>
 #include <sys/zio_crypt.h>
 #include "lac/cpa_cy_im.h"
+#include "lac/cpa_cy_common.h"
 #include "qat.h"
 
 /*
 
 #define        MAX_PAGE_NUM                    1024
 
-static boolean_t qat_crypt_init_done = B_FALSE;
 static Cpa16U inst_num = 0;
 static Cpa16U num_inst = 0;
 static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
+static boolean_t qat_crypt_init_done = B_FALSE;
+int zfs_qat_encrypt_disable = 0;
+int zfs_qat_checksum_disable = 0;
 
 typedef struct cy_callback {
        CpaBoolean verify_result;
@@ -65,7 +75,16 @@ symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
 boolean_t
 qat_crypt_use_accel(size_t s_len)
 {
-       return (!zfs_qat_disable &&
+       return (!zfs_qat_encrypt_disable &&
+           qat_crypt_init_done &&
+           s_len >= QAT_MIN_BUF_SIZE &&
+           s_len <= QAT_MAX_BUF_SIZE);
+}
+
+boolean_t
+qat_checksum_use_accel(size_t s_len)
+{
+       return (!zfs_qat_checksum_disable &&
            qat_crypt_init_done &&
            s_len >= QAT_MIN_BUF_SIZE &&
            s_len <= QAT_MAX_BUF_SIZE);
@@ -131,7 +150,7 @@ qat_crypt_fini(void)
 }
 
 static CpaStatus
-init_cy_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
+qat_init_crypt_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
     CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
     Cpa64U crypt, Cpa32U aad_len)
 {
@@ -192,7 +211,52 @@ init_cy_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
 }
 
 static CpaStatus
-init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
+qat_init_checksum_session_ctx(CpaInstanceHandle inst_handle,
+    CpaCySymSessionCtx **cy_session_ctx, Cpa64U cksum)
+{
+       CpaStatus status = CPA_STATUS_SUCCESS;
+       Cpa32U ctx_size;
+       Cpa32U hash_algorithm;
+       CpaCySymSessionSetupData sd = { 0 };
+
+       /*
+        * ZFS's SHA512 checksum is actually SHA512/256, which uses
+        * a different IV from standard SHA512. QAT does not support
+        * SHA512/256, so we can only support SHA256.
+        */
+       if (cksum == ZIO_CHECKSUM_SHA256)
+               hash_algorithm = CPA_CY_SYM_HASH_SHA256;
+       else
+               return (CPA_STATUS_FAIL);
+
+       sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
+       sd.symOperation = CPA_CY_SYM_OP_HASH;
+       sd.hashSetupData.hashAlgorithm = hash_algorithm;
+       sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_PLAIN;
+       sd.hashSetupData.digestResultLenInBytes = sizeof (zio_cksum_t);
+       sd.digestIsAppended = CPA_FALSE;
+       sd.verifyDigest = CPA_FALSE;
+
+       status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
+       if (status != CPA_STATUS_SUCCESS)
+               return (status);
+
+       status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
+       if (status != CPA_STATUS_SUCCESS)
+               return (status);
+
+       status = cpaCySymInitSession(inst_handle, symcallback, &sd,
+           *cy_session_ctx);
+       if (status != CPA_STATUS_SUCCESS) {
+               QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
+               return (status);
+       }
+
+       return (CPA_STATUS_SUCCESS);
+}
+
+static CpaStatus
+qat_init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
     CpaBufferList *src, CpaBufferList *dst)
 {
        CpaStatus status = CPA_STATUS_SUCCESS;
@@ -233,7 +297,7 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
        CpaStatus status = CPA_STATUS_SUCCESS;
        Cpa16U i;
        CpaInstanceHandle cy_inst_handle;
-       Cpa16U nr_bufs;
+       Cpa16U nr_bufs = (enc_len + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
        Cpa32U bytes_left = 0;
        Cpa8S *in = NULL;
        Cpa8S *out = NULL;
@@ -249,6 +313,8 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
        struct page *in_pages[MAX_PAGE_NUM];
        struct page *out_pages[MAX_PAGE_NUM];
        Cpa32S page_num = 0;
+       Cpa32U in_page_off = 0;
+       Cpa32U out_page_off = 0;
 
        if (dir == QAT_ENCRYPT) {
                QAT_STAT_BUMP(encrypt_requests);
@@ -261,15 +327,17 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
        i = atomic_inc_32_nv(&inst_num) % num_inst;
        cy_inst_handle = cy_inst_handles[i];
 
-       status = init_cy_session_ctx(dir, cy_inst_handle, &cy_session_ctx, key,
-           crypt, aad_len);
-       if (status != CPA_STATUS_SUCCESS)
+       status = qat_init_crypt_session_ctx(dir, cy_inst_handle,
+           &cy_session_ctx, key, crypt, aad_len);
+       if (status != CPA_STATUS_SUCCESS) {
+               /* don't count CCM as a failure since it's not supported */
+               if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_GCM)
+                       QAT_STAT_BUMP(crypt_fails);
                return (status);
+       }
 
-       nr_bufs = enc_len / PAGE_CACHE_SIZE +
-           (enc_len % PAGE_CACHE_SIZE == 0 ? 0 : 1);
-       status = init_cy_buffer_lists(cy_inst_handle, nr_bufs, &src_buffer_list,
-           &dst_buffer_list);
+       status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
+           &src_buffer_list, &dst_buffer_list);
        if (status != CPA_STATUS_SUCCESS)
                goto fail;
 
@@ -288,14 +356,16 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
        flat_src_buf = flat_src_buf_array;
        flat_dst_buf = flat_dst_buf_array;
        while (bytes_left > 0) {
+               in_page_off = ((long)in & ~PAGE_MASK);
+               out_page_off = ((long)out & ~PAGE_MASK);
                in_pages[page_num] = qat_mem_to_page(in);
                out_pages[page_num] = qat_mem_to_page(out);
-               flat_src_buf->pData = kmap(in_pages[page_num]);
-               flat_dst_buf->pData = kmap(out_pages[page_num]);
-               flat_src_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE,
-                   (long)bytes_left);
-               flat_dst_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE,
-                   (long)bytes_left);
+               flat_src_buf->pData = kmap(in_pages[page_num]) + in_page_off;
+               flat_dst_buf->pData = kmap(out_pages[page_num]) + out_page_off;
+               flat_src_buf->dataLenInBytes =
+                   min((long)PAGE_CACHE_SIZE - in_page_off, (long)bytes_left);
+               flat_dst_buf->dataLenInBytes =
+                   min((long)PAGE_CACHE_SIZE - out_page_off, (long)bytes_left);
                in += flat_src_buf->dataLenInBytes;
                out += flat_dst_buf->dataLenInBytes;
                bytes_left -= flat_src_buf->dataLenInBytes;
@@ -345,12 +415,10 @@ qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
                QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
 
 fail:
-       /* don't count CCM as a failure since it's not supported */
-       if (status != CPA_STATUS_SUCCESS &&
-           zio_crypt_table[crypt].ci_crypt_type != ZC_TYPE_CCM)
+       if (status != CPA_STATUS_SUCCESS)
                QAT_STAT_BUMP(crypt_fails);
 
-       for (i = 0; i < page_num; i ++) {
+       for (i = 0; i < page_num; i++) {
                kunmap(in_pages[i]);
                kunmap(out_pages[i]);
        }
@@ -365,7 +433,108 @@ fail:
        return (status);
 }
 
-module_param(zfs_qat_disable, int, 0644);
-MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT acceleration");
+int
+qat_checksum(uint64_t cksum, uint8_t *buf, uint64_t size, zio_cksum_t *zcp)
+{
+       CpaStatus status;
+       Cpa16U i;
+       CpaInstanceHandle cy_inst_handle;
+       Cpa16U nr_bufs = (size + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE;
+       Cpa32U bytes_left = 0;
+       Cpa8S *data = NULL;
+       CpaCySymSessionCtx *cy_session_ctx = NULL;
+       cy_callback_t cb;
+       Cpa8U digest_buffer[sizeof (zio_cksum_t)];
+       CpaCySymOpData op_data = { 0 };
+       CpaBufferList src_buffer_list = { 0 };
+       CpaFlatBuffer *flat_src_buf_array = NULL;
+       CpaFlatBuffer *flat_src_buf = NULL;
+       struct page *in_pages[MAX_PAGE_NUM];
+       Cpa32S page_num = 0;
+       Cpa32U page_off = 0;
+
+       QAT_STAT_BUMP(cksum_requests);
+       QAT_STAT_INCR(cksum_total_in_bytes, size);
+
+       i = atomic_inc_32_nv(&inst_num) % num_inst;
+       cy_inst_handle = cy_inst_handles[i];
+
+       status = qat_init_checksum_session_ctx(cy_inst_handle,
+           &cy_session_ctx, cksum);
+       if (status != CPA_STATUS_SUCCESS) {
+               /* don't count unsupported checksums as a failure */
+               if (cksum == ZIO_CHECKSUM_SHA256 ||
+                   cksum == ZIO_CHECKSUM_SHA512)
+                       QAT_STAT_BUMP(cksum_fails);
+               return (status);
+       }
+
+       status = qat_init_cy_buffer_lists(cy_inst_handle, nr_bufs,
+           &src_buffer_list, &src_buffer_list);
+       if (status != CPA_STATUS_SUCCESS)
+               goto fail;
+
+       status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
+           nr_bufs * sizeof (CpaFlatBuffer));
+       if (status != CPA_STATUS_SUCCESS)
+               goto fail;
+
+       bytes_left = size;
+       data = buf;
+       flat_src_buf = flat_src_buf_array;
+       while (bytes_left > 0) {
+               page_off = ((long)data & ~PAGE_MASK);
+               in_pages[page_num] = qat_mem_to_page(data);
+               flat_src_buf->pData = kmap(in_pages[page_num]) + page_off;
+               flat_src_buf->dataLenInBytes =
+                   min((long)PAGE_CACHE_SIZE - page_off, (long)bytes_left);
+               data += flat_src_buf->dataLenInBytes;
+               bytes_left -= flat_src_buf->dataLenInBytes;
+               flat_src_buf++;
+               page_num++;
+       }
+       src_buffer_list.pBuffers = flat_src_buf_array;
+
+       op_data.sessionCtx = cy_session_ctx;
+       op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
+       op_data.hashStartSrcOffsetInBytes = 0;
+       op_data.messageLenToHashInBytes = size;
+       op_data.pDigestResult = digest_buffer;
+
+       cb.verify_result = CPA_FALSE;
+       init_completion(&cb.complete);
+       status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
+           &src_buffer_list, &src_buffer_list, NULL);
+       if (status != CPA_STATUS_SUCCESS)
+               goto fail;
+
+       if (!wait_for_completion_interruptible_timeout(&cb.complete,
+           QAT_TIMEOUT_MS)) {
+               status = CPA_STATUS_FAIL;
+               goto fail;
+       }
+
+       bcopy(digest_buffer, zcp, sizeof (zio_cksum_t));
+
+fail:
+       if (status != CPA_STATUS_SUCCESS)
+               QAT_STAT_BUMP(cksum_fails);
+
+       for (i = 0; i < page_num; i++)
+               kunmap(in_pages[i]);
+
+       cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
+       QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
+       QAT_PHYS_CONTIG_FREE(cy_session_ctx);
+       QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
+
+       return (status);
+}
+
+module_param(zfs_qat_encrypt_disable, int, 0644);
+MODULE_PARM_DESC(zfs_qat_encrypt_disable, "Disable QAT encryption");
+
+module_param(zfs_qat_checksum_disable, int, 0644);
+MODULE_PARM_DESC(zfs_qat_checksum_disable, "Disable QAT checksumming");
 
 #endif
index 23a97aa3de179dea0eb8180f9cbfea8292efcce7..2adadf56f94b802c2abb0c958e8d2ef128c12750 100644 (file)
@@ -30,6 +30,7 @@
 #include <sys/zio.h>
 #include <sys/sha2.h>
 #include <sys/abd.h>
+#include "qat.h"
 
 static int
 sha_incremental(void *buf, size_t size, void *arg)
@@ -44,13 +45,25 @@ void
 abd_checksum_SHA256(abd_t *abd, uint64_t size,
     const void *ctx_template, zio_cksum_t *zcp)
 {
+       int ret;
        SHA2_CTX ctx;
        zio_cksum_t tmp;
 
+       if (qat_checksum_use_accel(size)) {
+               uint8_t *buf = abd_borrow_buf_copy(abd, size);
+               ret = qat_checksum(ZIO_CHECKSUM_SHA256, buf, size, &tmp);
+               abd_return_buf(abd, buf, size);
+               if (ret == CPA_STATUS_SUCCESS)
+                       goto bswap;
+
+               /* If the hardware implementation fails fall back to software */
+       }
+
        SHA2Init(SHA256, &ctx);
        (void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx);
        SHA2Final(&tmp, &ctx);
 
+bswap:
        /*
         * A prior implementation of this function had a
         * private SHA256 implementation always wrote things out in