From cf63739191b6cac629d053930a4aea592bca3819 Mon Sep 17 00:00:00 2001
From: Tom Caputi <tcaputi@datto.com>
Date: Fri, 9 Mar 2018 16:37:15 -0500
Subject: [PATCH] QAT support for AES-GCM

This patch adds support for acceleration of AES-GCM encryption
with Intel Quick Assist Technology.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Chengfeix Zhu <chengfeix.zhu@intel.com>
Signed-off-by: Weigang Li <weigang.li@intel.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7282
---
 man/man5/zfs-module-parameters.5 |   6 +-
 module/zfs/Makefile.in           |   2 +
 module/zfs/gzip.c                |   6 +-
 module/zfs/qat.c                 | 102 +++++++++
 module/zfs/qat.h                 | 176 +++++++++++++++
 module/zfs/qat_compress.c        | 223 +++++--------------
 module/zfs/qat_compress.h        |  48 ----
 module/zfs/qat_crypt.c           | 371 +++++++++++++++++++++++++++++++
 module/zfs/spa_misc.c            |   2 +-
 module/zfs/zio_crypt.c           |  54 ++++-
 10 files changed, 758 insertions(+), 232 deletions(-)
 create mode 100644 module/zfs/qat.c
 create mode 100644 module/zfs/qat.h
 delete mode 100644 module/zfs/qat_compress.h
 create mode 100644 module/zfs/qat_crypt.c

diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5
index b7d32e069..a4b6b4f3a 100644
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -2352,9 +2352,9 @@ Default value: \fB1\fR.
 \fBzfs_qat_disable\fR (int)
 .ad
 .RS 12n
-This tunable disables qat hardware acceleration for gzip compression.
-It is available only if qat acceleration is compiled in and qat driver
-is present.
+This tunable disables qat hardware acceleration for gzip compression and.
+AES-GCM encryption. It is available only if qat acceleration is compiled in
+and the qat driver is present.
 .sp
 Use \fB1\fR for yes and \fB0\fR for no (default).
 .RE
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
index 084c1ac23..fe0d5b523 100644
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -132,7 +132,9 @@ $(MODULE)-objs += zrlock.o
 $(MODULE)-objs += zvol.o
 $(MODULE)-objs += dsl_destroy.o
 $(MODULE)-objs += dsl_userhold.o
+$(MODULE)-objs += qat.o
 $(MODULE)-objs += qat_compress.o
+$(MODULE)-objs += qat_crypt.o
 
 # Suppress incorrect warnings from versions of objtool which are not
 # aware of x86 EVEX prefix instructions used for AVX512.
diff --git a/module/zfs/gzip.c b/module/zfs/gzip.c
index 6c8fdd308..6e4db718c 100644
--- a/module/zfs/gzip.c
+++ b/module/zfs/gzip.c
@@ -28,7 +28,7 @@
 
 #include <sys/debug.h>
 #include <sys/types.h>
-#include "qat_compress.h"
+#include "qat.h"
 
 #ifdef _KERNEL
 
@@ -58,7 +58,7 @@ gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 	ASSERT(d_len <= s_len);
 
 	/* check if hardware accelerator can be used */
-	if (qat_use_accel(s_len)) {
+	if (qat_dc_use_accel(s_len)) {
 		if (qat_compress(QAT_COMPRESS, s_start,
 		    s_len, d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)
 			return ((size_t)dstlen);
@@ -85,7 +85,7 @@ gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
 	ASSERT(d_len >= s_len);
 
 	/* check if hardware accelerator can be used */
-	if (qat_use_accel(d_len)) {
+	if (qat_dc_use_accel(d_len)) {
 		if (qat_compress(QAT_DECOMPRESS, s_start, s_len,
 		    d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)
 			return (0);
diff --git a/module/zfs/qat.c b/module/zfs/qat.c
new file mode 100644
index 000000000..4dc34f1e6
--- /dev/null
+++ b/module/zfs/qat.c
@@ -0,0 +1,102 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <sys/zfs_context.h>
+#include "qat.h"
+
+qat_stats_t qat_stats = {
+	{ "comp_requests",			KSTAT_DATA_UINT64 },
+	{ "comp_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "comp_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "decomp_requests",			KSTAT_DATA_UINT64 },
+	{ "decomp_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "decomp_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "dc_fails",				KSTAT_DATA_UINT64 },
+	{ "encrypt_requests",			KSTAT_DATA_UINT64 },
+	{ "encrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "encrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "decrypt_requests",			KSTAT_DATA_UINT64 },
+	{ "decrypt_total_in_bytes",		KSTAT_DATA_UINT64 },
+	{ "decrypt_total_out_bytes",		KSTAT_DATA_UINT64 },
+	{ "crypt_fails",			KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *qat_ksp = NULL;
+int zfs_qat_disable = 0;
+
+CpaStatus
+qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
+{
+	*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
+	if (*pp_mem_addr == NULL)
+		return (CPA_STATUS_RESOURCE);
+	return (CPA_STATUS_SUCCESS);
+}
+
+void
+qat_mem_free_contig(void **pp_mem_addr)
+{
+	if (*pp_mem_addr != NULL) {
+		kfree(*pp_mem_addr);
+		*pp_mem_addr = NULL;
+	}
+}
+
+int
+qat_init(void)
+{
+	int ret;
+
+	ret = qat_dc_init();
+	if (ret != 0)
+		return (ret);
+
+	ret = qat_crypt_init();
+	if (ret != 0) {
+		qat_dc_fini();
+		return (ret);
+	}
+
+	qat_ksp = kstat_create("zfs", 0, "qat", "misc",
+	    KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
+	    KSTAT_FLAG_VIRTUAL);
+	if (qat_ksp != NULL) {
+		qat_ksp->ks_data = &qat_stats;
+		kstat_install(qat_ksp);
+	}
+
+	return (0);
+}
+
+void
+qat_fini(void)
+{
+	if (qat_ksp != NULL) {
+		kstat_delete(qat_ksp);
+		qat_ksp = NULL;
+	}
+
+	qat_crypt_fini();
+	qat_dc_fini();
+}
+
+#endif
diff --git a/module/zfs/qat.h b/module/zfs/qat.h
new file mode 100644
index 000000000..44f9cb532
--- /dev/null
+++ b/module/zfs/qat.h
@@ -0,0 +1,176 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef	_SYS_QAT_H
+#define	_SYS_QAT_H
+
+typedef enum qat_compress_dir {
+	QAT_DECOMPRESS = 0,
+	QAT_COMPRESS = 1,
+} qat_compress_dir_t;
+
+typedef enum qat_encrypt_dir {
+	QAT_DECRYPT = 0,
+	QAT_ENCRYPT = 1,
+} qat_encrypt_dir_t;
+
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <sys/zio.h>
+#include <sys/crypto/api.h>
+#include "cpa.h"
+#include "dc/cpa_dc.h"
+#include "lac/cpa_cy_sym.h"
+
+/*
+ * Timeout - no response from hardware after 0.5 seconds
+ */
+#define	QAT_TIMEOUT_MS		500
+
+/*
+ * The minimal and maximal buffer size, which are not restricted
+ * in the QAT hardware, but with the input buffer size between 4KB
+ * and 128KB, the hardware can provide the optimal performance.
+ */
+#define	QAT_MIN_BUF_SIZE	(4*1024)
+#define	QAT_MAX_BUF_SIZE	(128*1024)
+
+/*
+ * Used for qat kstat.
+ */
+typedef struct qat_stats {
+	/*
+	 * Number of jobs submitted to qat compression engine.
+	 */
+	kstat_named_t comp_requests;
+	/*
+	 * Total bytes sent to qat compression engine.
+	 */
+	kstat_named_t comp_total_in_bytes;
+	/*
+	 * Total bytes output from qat compression engine.
+	 */
+	kstat_named_t comp_total_out_bytes;
+	/*
+	 * Number of jobs submitted to qat de-compression engine.
+	 */
+	kstat_named_t decomp_requests;
+	/*
+	 * Total bytes sent to qat de-compression engine.
+	 */
+	kstat_named_t decomp_total_in_bytes;
+	/*
+	 * Total bytes output from qat de-compression engine.
+	 */
+	kstat_named_t decomp_total_out_bytes;
+	/*
+	 * Number of fails in the qat compression / decompression engine.
+	 * Note: when qat fail happens, it doesn't mean a critical hardware
+	 * issue. Sometimes it is because the output buffer is not big enough.
+	 * The compression job will be transfered to gzip software
+	 * implementation, so the functionality of ZFS is not impacted.
+	 */
+	kstat_named_t dc_fails;
+
+	/*
+	 * Number of jobs submitted to qat encryption engine.
+	 */
+	kstat_named_t encrypt_requests;
+	/*
+	 * Total bytes sent to qat encryption engine.
+	 */
+	kstat_named_t encrypt_total_in_bytes;
+	/*
+	 * Total bytes output from qat encryption engine.
+	 */
+	kstat_named_t encrypt_total_out_bytes;
+	/*
+	 * Number of jobs submitted to qat decryption engine.
+	 */
+	kstat_named_t decrypt_requests;
+	/*
+	 * Total bytes sent to qat decryption engine.
+	 */
+	kstat_named_t decrypt_total_in_bytes;
+	/*
+	 * Total bytes output from qat decryption engine.
+	 */
+	kstat_named_t decrypt_total_out_bytes;
+	/*
+	 * Number of fails in the qat encryption / decryption engine.
+	 * Note: when qat fail happens, it doesn't mean a critical hardware
+	 * issue. Sometimes it is because the output buffer is not big enough.
+	 * The encryption job will be transfered to the software implementation,
+	 * so the functionality of ZFS is not impacted.
+	 */
+	kstat_named_t crypt_fails;
+} qat_stats_t;
+
+#define	QAT_STAT_INCR(stat, val) \
+	atomic_add_64(&qat_stats.stat.value.ui64, (val))
+#define	QAT_STAT_BUMP(stat) \
+	QAT_STAT_INCR(stat, 1)
+
+extern qat_stats_t qat_stats;
+extern int zfs_qat_disable;
+
+/* inlined for performance */
+static inline struct page *
+qat_mem_to_page(void *addr)
+{
+	if (!is_vmalloc_addr(addr))
+		return (virt_to_page(addr));
+
+	return (vmalloc_to_page(addr));
+}
+
+CpaStatus qat_mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes);
+void qat_mem_free_contig(void **pp_mem_addr);
+#define	QAT_PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes)	\
+	qat_mem_alloc_contig((void *)(pp_mem_addr), (size_bytes))
+#define	QAT_PHYS_CONTIG_FREE(p_mem_addr)	\
+	qat_mem_free_contig((void *)&(p_mem_addr))
+
+extern int qat_dc_init(void);
+extern void qat_dc_fini(void);
+extern int qat_crypt_init(void);
+extern void qat_crypt_fini(void);
+extern int qat_init(void);
+extern void qat_fini(void);
+
+extern boolean_t qat_dc_use_accel(size_t s_len);
+extern boolean_t qat_crypt_use_accel(size_t s_len);
+extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len,
+    char *dst, int dst_len, size_t *c_len);
+extern int qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
+    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
+    crypto_key_t *key, uint64_t crypt, uint32_t enc_len);
+#else
+#define	CPA_STATUS_SUCCESS					0
+#define	qat_init()
+#define	qat_fini()
+#define	qat_dc_use_accel(s_len)					0
+#define	qat_crypt_use_accel(s_len)				0
+#define	qat_compress(dir, s, sl, d, dl, cl)			0
+#define	qat_crypt(dir, s, d, a, al, i, db, k, c, el)		0
+#endif
+
+#endif /* _SYS_QAT_H */
diff --git a/module/zfs/qat_compress.c b/module/zfs/qat_compress.c
index 62655f56d..3d756b53d 100644
--- a/module/zfs/qat_compress.c
+++ b/module/zfs/qat_compress.c
@@ -25,12 +25,7 @@
 #include <linux/pagemap.h>
 #include <linux/completion.h>
 #include <sys/zfs_context.h>
-#include "qat_compress.h"
-
-/*
- * Timeout - no response from hardware after 0.5 seconds
- */
-#define	TIMEOUT_MS		500
+#include "qat.h"
 
 /*
  * Max instances in QAT device, each instance is a channel to submit
@@ -38,7 +33,7 @@
  * and session arrays, the actual number of instances are defined in
  * the QAT driver's configure file.
  */
-#define	MAX_INSTANCES		48
+#define	QAT_DC_MAX_INSTANCES	48
 
 /*
  * ZLIB head and foot size
@@ -46,89 +41,20 @@
 #define	ZLIB_HEAD_SZ		2
 #define	ZLIB_FOOT_SZ		4
 
-/*
- * The minimal and maximal buffer size, which are not restricted
- * in the QAT hardware, but with the input buffer size between 4KB
- * and 128KB, the hardware can provide the optimal performance.
- */
-#define	QAT_MIN_BUF_SIZE	(4*1024)
-#define	QAT_MAX_BUF_SIZE	(128*1024)
-
-/*
- * Used for qat kstat.
- */
-typedef struct qat_stats {
-	/*
-	 * Number of jobs submitted to qat compression engine.
-	 */
-	kstat_named_t comp_requests;
-	/*
-	 * Total bytes sent to qat compression engine.
-	 */
-	kstat_named_t comp_total_in_bytes;
-	/*
-	 * Total bytes output from qat compression engine.
-	 */
-	kstat_named_t comp_total_out_bytes;
-	/*
-	 * Number of jobs submitted to qat de-compression engine.
-	 */
-	kstat_named_t decomp_requests;
-	/*
-	 * Total bytes sent to qat de-compression engine.
-	 */
-	kstat_named_t decomp_total_in_bytes;
-	/*
-	 * Total bytes output from qat de-compression engine.
-	 */
-	kstat_named_t decomp_total_out_bytes;
-	/*
-	 * Number of fails in qat engine.
-	 * Note: when qat fail happens, it doesn't mean a critical hardware
-	 * issue, sometimes it is because the output buffer is not big enough,
-	 * and the compression job will be transfered to gzip software again,
-	 * so the functionality of ZFS is not impacted.
-	 */
-	kstat_named_t dc_fails;
-} qat_stats_t;
-
-qat_stats_t qat_stats = {
-	{ "comp_reqests",			KSTAT_DATA_UINT64 },
-	{ "comp_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "comp_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "decomp_reqests",			KSTAT_DATA_UINT64 },
-	{ "decomp_total_in_bytes",		KSTAT_DATA_UINT64 },
-	{ "decomp_total_out_bytes",		KSTAT_DATA_UINT64 },
-	{ "dc_fails",				KSTAT_DATA_UINT64 },
-};
-
-static kstat_t *qat_ksp;
-static CpaInstanceHandle dc_inst_handles[MAX_INSTANCES];
-static CpaDcSessionHandle session_handles[MAX_INSTANCES];
-static CpaBufferList **buffer_array[MAX_INSTANCES];
+static CpaInstanceHandle dc_inst_handles[QAT_DC_MAX_INSTANCES];
+static CpaDcSessionHandle session_handles[QAT_DC_MAX_INSTANCES];
+static CpaBufferList **buffer_array[QAT_DC_MAX_INSTANCES];
 static Cpa16U num_inst = 0;
 static Cpa32U inst_num = 0;
-static boolean_t qat_init_done = B_FALSE;
-int zfs_qat_disable = 0;
-
-#define	QAT_STAT_INCR(stat, val) \
-	atomic_add_64(&qat_stats.stat.value.ui64, (val));
-#define	QAT_STAT_BUMP(stat) \
-	QAT_STAT_INCR(stat, 1);
+static boolean_t qat_dc_init_done = B_FALSE;
 
-#define	PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes)	\
-	mem_alloc_contig((void *)(pp_mem_addr), (size_bytes))
-
-#define	PHYS_CONTIG_FREE(p_mem_addr)	\
-	mem_free_contig((void *)&(p_mem_addr))
-
-static inline struct page *
-mem_to_page(void *addr)
+boolean_t
+qat_dc_use_accel(size_t s_len)
 {
-	if (!is_vmalloc_addr(addr))
-		return (virt_to_page(addr));
-
-	return (vmalloc_to_page(addr));
+	return (!zfs_qat_disable &&
+	    qat_dc_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
 }
 
 static void
@@ -138,26 +64,8 @@ qat_dc_callback(void *p_callback, CpaStatus status)
 		complete((struct completion *)p_callback);
 }
 
-static inline CpaStatus
-mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
-{
-	*pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
-	if (*pp_mem_addr == NULL)
-		return (CPA_STATUS_RESOURCE);
-	return (CPA_STATUS_SUCCESS);
-}
-
-static inline void
-mem_free_contig(void **pp_mem_addr)
-{
-	if (*pp_mem_addr != NULL) {
-		kfree(*pp_mem_addr);
-		*pp_mem_addr = NULL;
-	}
-}
-
 static void
-qat_clean(void)
+qat_dc_clean(void)
 {
 	Cpa16U buff_num = 0;
 	Cpa16U num_inter_buff_lists = 0;
@@ -165,7 +73,7 @@ qat_clean(void)
 
 	for (i = 0; i < num_inst; i++) {
 		cpaDcStopInstance(dc_inst_handles[i]);
-		PHYS_CONTIG_FREE(session_handles[i]);
+		QAT_PHYS_CONTIG_FREE(session_handles[i]);
 		/* free intermediate buffers  */
 		if (buffer_array[i] != NULL) {
 			cpaDcGetNumIntermediateBuffers(
@@ -175,24 +83,24 @@ qat_clean(void)
 				CpaBufferList *buffer_inter =
 				    buffer_array[i][buff_num];
 				if (buffer_inter->pBuffers) {
-					PHYS_CONTIG_FREE(
+					QAT_PHYS_CONTIG_FREE(
 					    buffer_inter->pBuffers->pData);
-					PHYS_CONTIG_FREE(
+					QAT_PHYS_CONTIG_FREE(
 					    buffer_inter->pBuffers);
 				}
-				PHYS_CONTIG_FREE(
+				QAT_PHYS_CONTIG_FREE(
 				    buffer_inter->pPrivateMetaData);
-				PHYS_CONTIG_FREE(buffer_inter);
+				QAT_PHYS_CONTIG_FREE(buffer_inter);
 			}
 		}
 	}
 
 	num_inst = 0;
-	qat_init_done = B_FALSE;
+	qat_dc_init_done = B_FALSE;
 }
 
 int
-qat_init(void)
+qat_dc_init(void)
 {
 	CpaStatus status = CPA_STATUS_SUCCESS;
 	Cpa32U sess_size = 0;
@@ -204,11 +112,15 @@ qat_init(void)
 	Cpa16U i;
 
 	status = cpaDcGetNumInstances(&num_inst);
-	if (status != CPA_STATUS_SUCCESS || num_inst == 0)
+	if (status != CPA_STATUS_SUCCESS)
 		return (-1);
 
-	if (num_inst > MAX_INSTANCES)
-		num_inst = MAX_INSTANCES;
+	/* if the user has configured no QAT compression units just return */
+	if (num_inst == 0)
+		return (0);
+
+	if (num_inst > QAT_DC_MAX_INSTANCES)
+		num_inst = QAT_DC_MAX_INSTANCES;
 
 	status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
 	if (status != CPA_STATUS_SUCCESS)
@@ -226,25 +138,25 @@ qat_init(void)
 			    dc_inst_handles[i], &num_inter_buff_lists);
 
 		if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
-			status = PHYS_CONTIG_ALLOC(&buffer_array[i],
+			status = QAT_PHYS_CONTIG_ALLOC(&buffer_array[i],
 			    num_inter_buff_lists *
 			    sizeof (CpaBufferList *));
 
 		for (buff_num = 0; buff_num < num_inter_buff_lists;
 		    buff_num++) {
 			if (status == CPA_STATUS_SUCCESS)
-				status = PHYS_CONTIG_ALLOC(
+				status = QAT_PHYS_CONTIG_ALLOC(
 				    &buffer_array[i][buff_num],
 				    sizeof (CpaBufferList));
 
 			if (status == CPA_STATUS_SUCCESS)
-				status = PHYS_CONTIG_ALLOC(
+				status = QAT_PHYS_CONTIG_ALLOC(
 				    &buffer_array[i][buff_num]->
 				    pPrivateMetaData,
 				    buff_meta_size);
 
 			if (status == CPA_STATUS_SUCCESS)
-				status = PHYS_CONTIG_ALLOC(
+				status = QAT_PHYS_CONTIG_ALLOC(
 				    &buffer_array[i][buff_num]->pBuffers,
 				    sizeof (CpaFlatBuffer));
 
@@ -255,7 +167,7 @@ qat_init(void)
 				 *  output buffer, which is 2x max buffer
 				 *  size here.
 				 */
-				status = PHYS_CONTIG_ALLOC(
+				status = QAT_PHYS_CONTIG_ALLOC(
 				    &buffer_array[i][buff_num]->pBuffers->
 				    pData, 2 * QAT_MAX_BUF_SIZE);
 				if (status != CPA_STATUS_SUCCESS)
@@ -284,7 +196,7 @@ qat_init(void)
 		if (status != CPA_STATUS_SUCCESS)
 			goto fail;
 
-		PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
+		QAT_PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
 		if (session_handles[i] == NULL)
 			goto fail;
 
@@ -295,39 +207,20 @@ qat_init(void)
 			goto fail;
 	}
 
-	qat_ksp = kstat_create("zfs", 0, "qat", "misc",
-	    KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
-	    KSTAT_FLAG_VIRTUAL);
-	if (qat_ksp != NULL) {
-		qat_ksp->ks_data = &qat_stats;
-		kstat_install(qat_ksp);
-	}
-
-	qat_init_done = B_TRUE;
+	qat_dc_init_done = B_TRUE;
 	return (0);
 fail:
-	qat_clean();
+	qat_dc_clean();
 	return (-1);
 }
 
 void
-qat_fini(void)
+qat_dc_fini(void)
 {
-	qat_clean();
+	if (!qat_dc_init_done)
+		return;
 
-	if (qat_ksp != NULL) {
-		kstat_delete(qat_ksp);
-		qat_ksp = NULL;
-	}
-}
-
-boolean_t
-qat_use_accel(size_t s_len)
-{
-	return (!zfs_qat_disable &&
-	    qat_init_done &&
-	    s_len >= QAT_MIN_BUF_SIZE &&
-	    s_len <= QAT_MAX_BUF_SIZE);
+	qat_dc_clean();
 }
 
 int
@@ -364,11 +257,11 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 	Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
 	    (num_dst_buf * sizeof (CpaFlatBuffer));
 
-	if (PHYS_CONTIG_ALLOC(&in_pages,
+	if (QAT_PHYS_CONTIG_ALLOC(&in_pages,
 	    num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
 		goto fail;
 
-	if (PHYS_CONTIG_ALLOC(&out_pages,
+	if (QAT_PHYS_CONTIG_ALLOC(&out_pages,
 	    num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
 		goto fail;
 
@@ -378,18 +271,18 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 
 	cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
 	    &buffer_meta_size);
-	if (PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
+	if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
 	    CPA_STATUS_SUCCESS)
 		goto fail;
 
 	cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf,
 	    &buffer_meta_size);
-	if (PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
+	if (QAT_PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
 	    CPA_STATUS_SUCCESS)
 		goto fail;
 
 	/* build source buffer list */
-	if (PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
+	if (QAT_PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
 	    CPA_STATUS_SUCCESS)
 		goto fail;
 
@@ -398,7 +291,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 	buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
 
 	/* build destination buffer list */
-	if (PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
+	if (QAT_PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
 	    CPA_STATUS_SUCCESS)
 		goto fail;
 
@@ -412,7 +305,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 	data = src;
 	page_num = 0;
 	while (bytes_left > 0) {
-		in_page = mem_to_page(data);
+		in_page = qat_mem_to_page(data);
 		in_pages[page_num] = in_page;
 		flat_buf_src->pData = kmap(in_page);
 		flat_buf_src->dataLenInBytes =
@@ -431,7 +324,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 	data = dst;
 	page_num = 0;
 	while (bytes_left > 0) {
-		out_page = mem_to_page(data);
+		out_page = qat_mem_to_page(data);
 		flat_buf_dst->pData = kmap(out_page);
 		out_pages[page_num] = out_page;
 		flat_buf_dst->dataLenInBytes =
@@ -465,7 +358,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 
 		/* we now wait until the completion of the operation. */
 		if (!wait_for_completion_interruptible_timeout(&complete,
-		    TIMEOUT_MS)) {
+		    QAT_TIMEOUT_MS)) {
 			status = CPA_STATUS_FAIL;
 			goto fail;
 		}
@@ -508,7 +401,8 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 
 		ret = 0;
 
-	} else if (dir == QAT_DECOMPRESS) {
+	} else {
+		ASSERT3U(dir, ==, QAT_DECOMPRESS);
 		QAT_STAT_BUMP(decomp_requests);
 		QAT_STAT_INCR(decomp_total_in_bytes, src_len);
 
@@ -529,7 +423,7 @@ qat_compress(qat_compress_dir_t dir, char *src, int src_len,
 
 		/* we now wait until the completion of the operation. */
 		if (!wait_for_completion_interruptible_timeout(&complete,
-		    TIMEOUT_MS)) {
+		    QAT_TIMEOUT_MS)) {
 			status = CPA_STATUS_FAIL;
 			goto fail;
 		}
@@ -557,7 +451,7 @@ fail:
 		    page_num++) {
 			kunmap(in_pages[page_num]);
 		}
-		PHYS_CONTIG_FREE(in_pages);
+		QAT_PHYS_CONTIG_FREE(in_pages);
 	}
 
 	if (out_pages) {
@@ -566,18 +460,15 @@ fail:
 		    page_num++) {
 			kunmap(out_pages[page_num]);
 		}
-		PHYS_CONTIG_FREE(out_pages);
+		QAT_PHYS_CONTIG_FREE(out_pages);
 	}
 
-	PHYS_CONTIG_FREE(buffer_meta_src);
-	PHYS_CONTIG_FREE(buffer_meta_dst);
-	PHYS_CONTIG_FREE(buf_list_src);
-	PHYS_CONTIG_FREE(buf_list_dst);
+	QAT_PHYS_CONTIG_FREE(buffer_meta_src);
+	QAT_PHYS_CONTIG_FREE(buffer_meta_dst);
+	QAT_PHYS_CONTIG_FREE(buf_list_src);
+	QAT_PHYS_CONTIG_FREE(buf_list_dst);
 
 	return (ret);
 }
 
-module_param(zfs_qat_disable, int, 0644);
-MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT compression");
-
 #endif
diff --git a/module/zfs/qat_compress.h b/module/zfs/qat_compress.h
deleted file mode 100644
index ff074646f..000000000
--- a/module/zfs/qat_compress.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-#ifndef	_SYS_QAT_COMPRESS_H
-#define	_SYS_QAT_COMPRESS_H
-
-#if defined(_KERNEL) && defined(HAVE_QAT)
-#include <sys/zio.h>
-#include "cpa.h"
-#include "dc/cpa_dc.h"
-
-typedef enum qat_compress_dir {
-	QAT_COMPRESS = 0,
-	QAT_DECOMPRESS = 1,
-} qat_compress_dir_t;
-
-extern int qat_init(void);
-extern void qat_fini(void);
-extern boolean_t qat_use_accel(size_t s_len);
-extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len,
-    char *dst, int dst_len, size_t *c_len);
-#else
-#define	CPA_STATUS_SUCCESS	0
-#define	qat_init()
-#define	qat_fini()
-#define	qat_use_accel(s_len)	0
-#define	qat_compress(dir, s, sl, d, dl, cl)	0
-#endif
-
-#endif /* _SYS_QAT_COMPRESS_H */
diff --git a/module/zfs/qat_crypt.c b/module/zfs/qat_crypt.c
new file mode 100644
index 000000000..d850d9ce8
--- /dev/null
+++ b/module/zfs/qat_crypt.c
@@ -0,0 +1,371 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/completion.h>
+#include <sys/zfs_context.h>
+#include <sys/zio_crypt.h>
+#include "lac/cpa_cy_im.h"
+#include "qat.h"
+
+/*
+ * Max instances in QAT device, each instance is a channel to submit
+ * jobs to QAT hardware, this is only for pre-allocating instance,
+ * and session arrays, the actual number of instances are defined in
+ * the QAT driver's configure file.
+ */
+#define	QAT_CRYPT_MAX_INSTANCES		48
+
+#define	MAX_PAGE_NUM			1024
+
+static boolean_t qat_crypt_init_done = B_FALSE;
+static Cpa16U inst_num = 0;
+static Cpa16U num_inst = 0;
+static CpaInstanceHandle cy_inst_handles[QAT_CRYPT_MAX_INSTANCES];
+
+typedef struct cy_callback {
+	CpaBoolean verify_result;
+	struct completion complete;
+} cy_callback_t;
+
+static void
+symcallback(void *p_callback, CpaStatus status, const CpaCySymOp operation,
+    void *op_data, CpaBufferList *buf_list_dst, CpaBoolean verify)
+{
+	cy_callback_t *cb = p_callback;
+
+	if (cb != NULL) {
+		/* indicate that the function has been called */
+		cb->verify_result = verify;
+		complete(&cb->complete);
+	}
+}
+
+boolean_t
+qat_crypt_use_accel(size_t s_len)
+{
+	return (!zfs_qat_disable &&
+	    qat_crypt_init_done &&
+	    s_len >= QAT_MIN_BUF_SIZE &&
+	    s_len <= QAT_MAX_BUF_SIZE);
+}
+
+void
+qat_crypt_clean(void)
+{
+	for (Cpa32U i = 0; i < num_inst; i++)
+		cpaCyStopInstance(cy_inst_handles[i]);
+
+	num_inst = 0;
+	qat_crypt_init_done = B_FALSE;
+}
+
+int
+qat_crypt_init(void)
+{
+	Cpa32U i;
+	CpaStatus status = CPA_STATUS_FAIL;
+
+	status = cpaCyGetNumInstances(&num_inst);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	/* if the user has configured no QAT encryption units just return */
+	if (num_inst == 0)
+		return (0);
+
+	if (num_inst > QAT_CRYPT_MAX_INSTANCES)
+		num_inst = QAT_CRYPT_MAX_INSTANCES;
+
+	status = cpaCyGetInstances(num_inst, &cy_inst_handles[0]);
+	if (status != CPA_STATUS_SUCCESS)
+		return (-1);
+
+	for (i = 0; i < num_inst; i++) {
+		status = cpaCySetAddressTranslation(cy_inst_handles[i],
+		    (void *)virt_to_phys);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+
+		status = cpaCyStartInstance(cy_inst_handles[i]);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+	}
+
+	qat_crypt_init_done = B_TRUE;
+	return (0);
+
+error:
+	qat_crypt_clean();
+	return (-1);
+}
+
+void
+qat_crypt_fini(void)
+{
+	if (!qat_crypt_init_done)
+		return;
+
+	qat_crypt_clean();
+}
+
+static CpaStatus
+init_cy_session_ctx(qat_encrypt_dir_t dir, CpaInstanceHandle inst_handle,
+    CpaCySymSessionCtx **cy_session_ctx, crypto_key_t *key,
+    Cpa64U crypt, Cpa32U aad_len)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U ctx_size;
+	Cpa32U ciper_algorithm;
+	Cpa32U hash_algorithm;
+	CpaCySymSessionSetupData sd = { 0 };
+
+	if (zio_crypt_table[crypt].ci_crypt_type == ZC_TYPE_CCM) {
+		return (CPA_STATUS_FAIL);
+	} else {
+		ciper_algorithm = CPA_CY_SYM_CIPHER_AES_GCM;
+		hash_algorithm = CPA_CY_SYM_HASH_AES_GCM;
+	}
+
+	sd.cipherSetupData.cipherAlgorithm = ciper_algorithm;
+	sd.cipherSetupData.pCipherKey = key->ck_data;
+	sd.cipherSetupData.cipherKeyLenInBytes = key->ck_length / 8;
+	sd.hashSetupData.hashAlgorithm = hash_algorithm;
+	sd.hashSetupData.hashMode = CPA_CY_SYM_HASH_MODE_AUTH;
+	sd.hashSetupData.digestResultLenInBytes = ZIO_DATA_MAC_LEN;
+	sd.hashSetupData.authModeSetupData.aadLenInBytes = aad_len;
+	sd.sessionPriority = CPA_CY_PRIORITY_NORMAL;
+	sd.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
+	sd.digestIsAppended = CPA_FALSE;
+	sd.verifyDigest = CPA_FALSE;
+
+	if (dir == QAT_ENCRYPT) {
+		sd.cipherSetupData.cipherDirection =
+		    CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT;
+		sd.algChainOrder =
+		    CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
+	} else {
+		ASSERT3U(dir, ==, QAT_DECRYPT);
+		sd.cipherSetupData.cipherDirection =
+		    CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT;
+		sd.algChainOrder =
+		    CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
+	}
+
+	status = cpaCySymSessionCtxGetSize(inst_handle, &sd, &ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = QAT_PHYS_CONTIG_ALLOC(cy_session_ctx, ctx_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	status = cpaCySymInitSession(inst_handle, symcallback, &sd,
+	    *cy_session_ctx);
+	if (status != CPA_STATUS_SUCCESS) {
+		QAT_PHYS_CONTIG_FREE(*cy_session_ctx);
+		return (status);
+	}
+
+	return (CPA_STATUS_SUCCESS);
+}
+
+static CpaStatus
+init_cy_buffer_lists(CpaInstanceHandle inst_handle, uint32_t nr_bufs,
+    CpaBufferList *src, CpaBufferList *dst)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa32U meta_size = 0;
+
+	status = cpaCyBufferListGetMetaSize(inst_handle, nr_bufs, &meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	src->numBuffers = nr_bufs;
+	status = QAT_PHYS_CONTIG_ALLOC(&src->pPrivateMetaData, meta_size);
+	if (status != CPA_STATUS_SUCCESS)
+		goto error;
+
+	if (src != dst) {
+		dst->numBuffers = nr_bufs;
+		status = QAT_PHYS_CONTIG_ALLOC(&dst->pPrivateMetaData,
+		    meta_size);
+		if (status != CPA_STATUS_SUCCESS)
+			goto error;
+	}
+
+	return (CPA_STATUS_SUCCESS);
+
+error:
+	QAT_PHYS_CONTIG_FREE(src->pPrivateMetaData);
+	if (src != dst)
+		QAT_PHYS_CONTIG_FREE(dst->pPrivateMetaData);
+
+	return (status);
+}
+
+int
+qat_crypt(qat_encrypt_dir_t dir, uint8_t *src_buf, uint8_t *dst_buf,
+    uint8_t *aad_buf, uint32_t aad_len, uint8_t *iv_buf, uint8_t *digest_buf,
+    crypto_key_t *key, uint64_t crypt, uint32_t enc_len)
+{
+	CpaStatus status = CPA_STATUS_SUCCESS;
+	Cpa16U i;
+	CpaInstanceHandle cy_inst_handle;
+	Cpa16U nr_bufs;
+	Cpa32U bytes_left = 0;
+	Cpa8S *in = NULL;
+	Cpa8S *out = NULL;
+	CpaCySymSessionCtx *cy_session_ctx = NULL;
+	cy_callback_t cb;
+	CpaCySymOpData op_data = { 0 };
+	CpaBufferList src_buffer_list = { 0 };
+	CpaBufferList dst_buffer_list = { 0 };
+	CpaFlatBuffer *flat_src_buf_array = NULL;
+	CpaFlatBuffer *flat_src_buf = NULL;
+	CpaFlatBuffer *flat_dst_buf_array = NULL;
+	CpaFlatBuffer *flat_dst_buf = NULL;
+	struct page *in_pages[MAX_PAGE_NUM];
+	struct page *out_pages[MAX_PAGE_NUM];
+	Cpa32S page_num = 0;
+
+	if (dir == QAT_ENCRYPT) {
+		QAT_STAT_BUMP(encrypt_requests);
+		QAT_STAT_INCR(encrypt_total_in_bytes, enc_len);
+	} else {
+		QAT_STAT_BUMP(decrypt_requests);
+		QAT_STAT_INCR(decrypt_total_in_bytes, enc_len);
+	}
+
+	i = atomic_inc_32_nv(&inst_num) % num_inst;
+	cy_inst_handle = cy_inst_handles[i];
+
+	status = init_cy_session_ctx(dir, cy_inst_handle, &cy_session_ctx, key,
+	    crypt, aad_len);
+	if (status != CPA_STATUS_SUCCESS)
+		return (status);
+
+	nr_bufs = enc_len / PAGE_CACHE_SIZE +
+	    (enc_len % PAGE_CACHE_SIZE == 0 ? 0 : 1);
+	status = init_cy_buffer_lists(cy_inst_handle, nr_bufs, &src_buffer_list,
+	    &dst_buffer_list);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_src_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+	status = QAT_PHYS_CONTIG_ALLOC(&flat_dst_buf_array,
+	    nr_bufs * sizeof (CpaFlatBuffer));
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	bytes_left = enc_len;
+	in = src_buf;
+	out = dst_buf;
+	flat_src_buf = flat_src_buf_array;
+	flat_dst_buf = flat_dst_buf_array;
+	while (bytes_left > 0) {
+		in_pages[page_num] = qat_mem_to_page(in);
+		out_pages[page_num] = qat_mem_to_page(out);
+		flat_src_buf->pData = kmap(in_pages[page_num]);
+		flat_dst_buf->pData = kmap(out_pages[page_num]);
+		flat_src_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE,
+		    (long)bytes_left);
+		flat_dst_buf->dataLenInBytes = min((long)PAGE_CACHE_SIZE,
+		    (long)bytes_left);
+		in += flat_src_buf->dataLenInBytes;
+		out += flat_dst_buf->dataLenInBytes;
+		bytes_left -= flat_src_buf->dataLenInBytes;
+		flat_src_buf++;
+		flat_dst_buf++;
+		page_num++;
+	}
+	src_buffer_list.pBuffers = flat_src_buf_array;
+	dst_buffer_list.pBuffers = flat_dst_buf_array;
+
+	op_data.sessionCtx = cy_session_ctx;
+	op_data.packetType = CPA_CY_SYM_PACKET_TYPE_FULL;
+	op_data.pIv = NULL; /* set this later as the J0 block */
+	op_data.ivLenInBytes = 0;
+	op_data.cryptoStartSrcOffsetInBytes = 0;
+	op_data.messageLenToCipherInBytes = 0;
+	op_data.hashStartSrcOffsetInBytes = 0;
+	op_data.messageLenToHashInBytes = 0;
+	op_data.pDigestResult = 0;
+	op_data.messageLenToCipherInBytes = enc_len;
+	op_data.ivLenInBytes = ZIO_DATA_IV_LEN;
+	op_data.pDigestResult = digest_buf;
+	op_data.pAdditionalAuthData = aad_buf;
+	op_data.pIv = iv_buf;
+
+	cb.verify_result = CPA_FALSE;
+	init_completion(&cb.complete);
+	status = cpaCySymPerformOp(cy_inst_handle, &cb, &op_data,
+	    &src_buffer_list, &dst_buffer_list, NULL);
+	if (status != CPA_STATUS_SUCCESS)
+		goto fail;
+
+	if (!wait_for_completion_interruptible_timeout(&cb.complete,
+	    QAT_TIMEOUT_MS)) {
+		status = CPA_STATUS_FAIL;
+		goto fail;
+	}
+
+	if (cb.verify_result == CPA_FALSE) {
+		status = CPA_STATUS_FAIL;
+		goto fail;
+	}
+
+	if (dir == QAT_ENCRYPT)
+		QAT_STAT_INCR(encrypt_total_out_bytes, enc_len);
+	else
+		QAT_STAT_INCR(decrypt_total_out_bytes, enc_len);
+
+fail:
+	/* don't count CCM as a failure since it's not supported */
+	if (status != CPA_STATUS_SUCCESS &&
+	    zio_crypt_table[crypt].ci_crypt_type != ZC_TYPE_CCM)
+		QAT_STAT_BUMP(crypt_fails);
+
+	for (i = 0; i < page_num; i ++) {
+		kunmap(in_pages[i]);
+		kunmap(out_pages[i]);
+	}
+
+	cpaCySymRemoveSession(cy_inst_handle, cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(src_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(dst_buffer_list.pPrivateMetaData);
+	QAT_PHYS_CONTIG_FREE(cy_session_ctx);
+	QAT_PHYS_CONTIG_FREE(flat_src_buf_array);
+	QAT_PHYS_CONTIG_FREE(flat_dst_buf_array);
+
+	return (status);
+}
+
+module_param(zfs_qat_disable, int, 0644);
+MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT acceleration");
+
+#endif
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index e742af255..c67bacbbb 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -56,7 +56,7 @@
 #include <sys/kstat.h>
 #include "zfs_prop.h"
 #include <sys/zfeature.h>
-#include "qat_compress.h"
+#include "qat.h"
 
 /*
  * SPA locking
diff --git a/module/zfs/zio_crypt.c b/module/zfs/zio_crypt.c
index d0b39a3f2..741d64ad5 100644
--- a/module/zfs/zio_crypt.c
+++ b/module/zfs/zio_crypt.c
@@ -26,6 +26,7 @@
 #include <sys/zil.h>
 #include <sys/sha2.h>
 #include <sys/hkdf.h>
+#include "qat.h"
 
 /*
  * This file is responsible for handling all of the details of generating
@@ -1875,16 +1876,6 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
 	crypto_ctx_template_t tmpl;
 	uint8_t *authbuf = NULL;
 
-	bzero(&puio, sizeof (uio_t));
-	bzero(&cuio, sizeof (uio_t));
-
-	/* create uios for encryption */
-	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
-	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
-	    &authbuf, &auth_len, no_crypt);
-	if (ret != 0)
-		return (ret);
-
 	/*
 	 * If the needed key is the current one, just use it. Otherwise we
 	 * need to generate a temporary one from the given salt + master key.
@@ -1914,7 +1905,48 @@ zio_do_crypt_data(boolean_t encrypt, zio_crypt_key_t *key, uint8_t *salt,
 		tmpl = NULL;
 	}
 
-	/* perform the encryption / decryption */
+	/*
+	 * Attempt to use QAT acceleration if we can. We currently don't
+	 * do this for metadnode and ZIL blocks, since they have a much
+	 * more involved buffer layout and the qat_crypt() function only
+	 * works in-place.
+	 */
+	if (qat_crypt_use_accel(datalen) &&
+	    ot != DMU_OT_INTENT_LOG && ot != DMU_OT_DNODE) {
+		uint8_t *srcbuf, *dstbuf;
+
+		if (encrypt) {
+			srcbuf = plainbuf;
+			dstbuf = cipherbuf;
+		} else {
+			srcbuf = cipherbuf;
+			dstbuf = plainbuf;
+		}
+
+		ret = qat_crypt((encrypt) ? QAT_ENCRYPT : QAT_DECRYPT, srcbuf,
+		    dstbuf, NULL, 0, iv, mac, ckey, key->zk_crypt, datalen);
+		if (ret == CPA_STATUS_SUCCESS) {
+			if (locked) {
+				rw_exit(&key->zk_salt_lock);
+				locked = B_FALSE;
+			}
+
+			return (0);
+		}
+		/* If the hardware implementation fails fall back to software */
+	}
+
+	bzero(&puio, sizeof (uio_t));
+	bzero(&cuio, sizeof (uio_t));
+
+	/* create uios for encryption */
+	ret = zio_crypt_init_uios(encrypt, key->zk_version, ot, plainbuf,
+	    cipherbuf, datalen, byteswap, mac, &puio, &cuio, &enc_len,
+	    &authbuf, &auth_len, no_crypt);
+	if (ret != 0)
+		goto error;
+
+	/* perform the encryption / decryption in software */
 	ret = zio_do_crypt_uio(encrypt, key->zk_crypt, ckey, tmpl, iv, enc_len,
 	    &puio, &cuio, authbuf, auth_len);
 	if (ret != 0)
-- 
2.40.0