/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ #if defined(_KERNEL) && defined(HAVE_QAT) #include #include #include #include #include #include "qat_compress.h" /* * Timeout - no response from hardware after 0.5 seconds */ #define TIMEOUT_MS 500 /* * Max instances in QAT device, each instance is a channel to submit * jobs to QAT hardware */ #define MAX_INSTANCES 6 /* * ZLIB head and foot size */ #define ZLIB_HEAD_SZ 2 #define ZLIB_FOOT_SZ 4 /* * The minimal and maximal buffer size, which are not restricted * in the QAT hardware, but with the input buffer size between 4KB * and 128KB, the hardware can provide the optimal performance. */ #define QAT_MIN_BUF_SIZE (4*1024) #define QAT_MAX_BUF_SIZE (128*1024) /* * Used for qat kstat. */ typedef struct qat_stats { /* * Number of jobs submitted to qat compression engine. */ kstat_named_t comp_requests; /* * Total bytes sent to qat compression engine. */ kstat_named_t comp_total_in_bytes; /* * Total bytes output from qat compression engine. */ kstat_named_t comp_total_out_bytes; /* * Number of jobs submitted to qat de-compression engine. */ kstat_named_t decomp_requests; /* * Total bytes sent to qat de-compression engine. */ kstat_named_t decomp_total_in_bytes; /* * Total bytes output from qat de-compression engine. */ kstat_named_t decomp_total_out_bytes; /* * Number of fails in qat engine. * Note: when qat fail happens, it doesn't mean a critical hardware * issue, sometimes it is because the output buffer is not big enough, * and the compression job will be transfered to gzip software again, * so the functionality of ZFS is not impacted. */ kstat_named_t dc_fails; } qat_stats_t; qat_stats_t qat_stats = { { "comp_reqests", KSTAT_DATA_UINT64 }, { "comp_total_in_bytes", KSTAT_DATA_UINT64 }, { "comp_total_out_bytes", KSTAT_DATA_UINT64 }, { "decomp_reqests", KSTAT_DATA_UINT64 }, { "decomp_total_in_bytes", KSTAT_DATA_UINT64 }, { "decomp_total_out_bytes", KSTAT_DATA_UINT64 }, { "dc_fails", KSTAT_DATA_UINT64 }, }; static kstat_t *qat_ksp; static CpaInstanceHandle dc_inst_handles[MAX_INSTANCES]; static CpaDcSessionHandle session_handles[MAX_INSTANCES]; static CpaBufferList **buffer_array[MAX_INSTANCES]; static Cpa16U num_inst = 0; static Cpa16U inst_num = 0; static boolean_t qat_init_done = B_FALSE; int zfs_qat_disable = 0; #define QAT_STAT_INCR(stat, val) \ atomic_add_64(&qat_stats.stat.value.ui64, (val)); #define QAT_STAT_BUMP(stat) \ QAT_STAT_INCR(stat, 1); #define PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes) \ mem_alloc_contig((void *)(pp_mem_addr), (size_bytes)) #define PHYS_CONTIG_FREE(p_mem_addr) \ mem_free_contig((void *)&(p_mem_addr)) static inline struct page * mem_to_page(void *addr) { if (!is_vmalloc_addr(addr)) return (virt_to_page(addr)); return (vmalloc_to_page(addr)); } static void qat_dc_callback(void *p_callback, CpaStatus status) { if (p_callback != NULL) complete((struct completion *)p_callback); } static inline CpaStatus mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes) { *pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL); if (*pp_mem_addr == NULL) return (CPA_STATUS_RESOURCE); return (CPA_STATUS_SUCCESS); } static inline void mem_free_contig(void **pp_mem_addr) { if (*pp_mem_addr != NULL) { kfree(*pp_mem_addr); *pp_mem_addr = NULL; } } static void qat_clean(void) { Cpa16U buff_num = 0; Cpa16U num_inter_buff_lists = 0; Cpa16U i = 0; for (i = 0; i < num_inst; i++) { cpaDcStopInstance(dc_inst_handles[i]); PHYS_CONTIG_FREE(session_handles[i]); /* free intermediate buffers */ if (buffer_array[i] != NULL) { cpaDcGetNumIntermediateBuffers( dc_inst_handles[i], &num_inter_buff_lists); for (buff_num = 0; buff_num < num_inter_buff_lists; buff_num++) { CpaBufferList *buffer_inter = buffer_array[i][buff_num]; if (buffer_inter->pBuffers) { PHYS_CONTIG_FREE( buffer_inter->pBuffers->pData); PHYS_CONTIG_FREE( buffer_inter->pBuffers); } PHYS_CONTIG_FREE( buffer_inter->pPrivateMetaData); PHYS_CONTIG_FREE(buffer_inter); } } } num_inst = 0; qat_init_done = B_FALSE; } int qat_init(void) { CpaStatus status = CPA_STATUS_SUCCESS; Cpa32U sess_size = 0; Cpa32U ctx_size = 0; Cpa16U num_inter_buff_lists = 0; Cpa16U buff_num = 0; Cpa32U buff_meta_size = 0; CpaDcSessionSetupData sd = {0}; Cpa16U i; status = cpaDcGetNumInstances(&num_inst); if (status != CPA_STATUS_SUCCESS || num_inst == 0) return (-1); if (num_inst > MAX_INSTANCES) num_inst = MAX_INSTANCES; status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]); if (status != CPA_STATUS_SUCCESS) return (-1); for (i = 0; i < num_inst; i++) { cpaDcSetAddressTranslation(dc_inst_handles[i], (void*)virt_to_phys); status = cpaDcBufferListGetMetaSize(dc_inst_handles[i], 1, &buff_meta_size); if (status == CPA_STATUS_SUCCESS) status = cpaDcGetNumIntermediateBuffers( dc_inst_handles[i], &num_inter_buff_lists); if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0) status = PHYS_CONTIG_ALLOC(&buffer_array[i], num_inter_buff_lists * sizeof (CpaBufferList *)); for (buff_num = 0; buff_num < num_inter_buff_lists; buff_num++) { if (status == CPA_STATUS_SUCCESS) status = PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num], sizeof (CpaBufferList)); if (status == CPA_STATUS_SUCCESS) status = PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num]-> pPrivateMetaData, buff_meta_size); if (status == CPA_STATUS_SUCCESS) status = PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num]->pBuffers, sizeof (CpaFlatBuffer)); if (status == CPA_STATUS_SUCCESS) { /* * implementation requires an intermediate * buffer approximately twice the size of * output buffer, which is 2x max buffer * size here. */ status = PHYS_CONTIG_ALLOC( &buffer_array[i][buff_num]->pBuffers-> pData, 2 * QAT_MAX_BUF_SIZE); if (status != CPA_STATUS_SUCCESS) goto fail; buffer_array[i][buff_num]->numBuffers = 1; buffer_array[i][buff_num]->pBuffers-> dataLenInBytes = 2 * QAT_MAX_BUF_SIZE; } } status = cpaDcStartInstance(dc_inst_handles[i], num_inter_buff_lists, buffer_array[i]); if (status != CPA_STATUS_SUCCESS) goto fail; sd.compLevel = CPA_DC_L1; sd.compType = CPA_DC_DEFLATE; sd.huffType = CPA_DC_HT_FULL_DYNAMIC; sd.sessDirection = CPA_DC_DIR_COMBINED; sd.sessState = CPA_DC_STATELESS; sd.deflateWindowSize = 7; sd.checksum = CPA_DC_ADLER32; status = cpaDcGetSessionSize(dc_inst_handles[i], &sd, &sess_size, &ctx_size); if (status != CPA_STATUS_SUCCESS) goto fail; PHYS_CONTIG_ALLOC(&session_handles[i], sess_size); if (session_handles[i] == NULL) goto fail; status = cpaDcInitSession(dc_inst_handles[i], session_handles[i], &sd, NULL, qat_dc_callback); if (status != CPA_STATUS_SUCCESS) goto fail; } qat_ksp = kstat_create("zfs", 0, "qat", "misc", KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (qat_ksp != NULL) { qat_ksp->ks_data = &qat_stats; kstat_install(qat_ksp); } qat_init_done = B_TRUE; return (0); fail: qat_clean(); return (-1); } void qat_fini(void) { qat_clean(); if (qat_ksp != NULL) { kstat_delete(qat_ksp); qat_ksp = NULL; } } boolean_t qat_use_accel(size_t s_len) { return (!zfs_qat_disable && qat_init_done && s_len >= QAT_MIN_BUF_SIZE && s_len <= QAT_MAX_BUF_SIZE); } int qat_compress(qat_compress_dir_t dir, char *src, int src_len, char *dst, int dst_len, size_t *c_len) { CpaInstanceHandle dc_inst_handle; CpaDcSessionHandle session_handle; CpaBufferList *buf_list_src = NULL; CpaBufferList *buf_list_dst = NULL; CpaFlatBuffer *flat_buf_src = NULL; CpaFlatBuffer *flat_buf_dst = NULL; Cpa8U *buffer_meta_src = NULL; Cpa8U *buffer_meta_dst = NULL; Cpa32U buffer_meta_size = 0; CpaDcRqResults dc_results; CpaStatus status = CPA_STATUS_SUCCESS; Cpa32U hdr_sz = 0; Cpa32U compressed_sz; Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 1; Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 1; Cpa32U bytes_left; char *data; struct page *in_page, *out_page; struct page **in_pages = NULL; struct page **out_pages = NULL; struct completion complete; size_t ret = -1; Cpa16U page_num = 0; Cpa16U i; Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) + (num_src_buf * sizeof (CpaFlatBuffer)); Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) + (num_dst_buf * sizeof (CpaFlatBuffer)); if (!is_vmalloc_addr(src) || !is_vmalloc_addr(src + src_len - 1) || !is_vmalloc_addr(dst) || !is_vmalloc_addr(dst + dst_len - 1)) return (-1); if (PHYS_CONTIG_ALLOC(&in_pages, num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) goto fail; if (PHYS_CONTIG_ALLOC(&out_pages, num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS) goto fail; i = atomic_inc_32_nv(&inst_num) % num_inst; dc_inst_handle = dc_inst_handles[i]; session_handle = session_handles[i]; cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf, &buffer_meta_size); if (PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) != CPA_STATUS_SUCCESS) goto fail; cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf, &buffer_meta_size); if (PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) != CPA_STATUS_SUCCESS) goto fail; /* build source buffer list */ if (PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) != CPA_STATUS_SUCCESS) goto fail; flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1); buf_list_src->pBuffers = flat_buf_src; /* always point to first one */ /* build destination buffer list */ if (PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) != CPA_STATUS_SUCCESS) goto fail; flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1); buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */ buf_list_src->numBuffers = 0; buf_list_src->pPrivateMetaData = buffer_meta_src; bytes_left = src_len; data = src; page_num = 0; while (bytes_left > 0) { in_page = mem_to_page(data); in_pages[page_num] = in_page; flat_buf_src->pData = kmap(in_page); flat_buf_src->dataLenInBytes = min((long)bytes_left, (long)PAGE_SIZE); bytes_left -= flat_buf_src->dataLenInBytes; data += flat_buf_src->dataLenInBytes; flat_buf_src++; buf_list_src->numBuffers++; page_num++; } buf_list_dst->numBuffers = 0; buf_list_dst->pPrivateMetaData = buffer_meta_dst; bytes_left = dst_len; data = dst; page_num = 0; while (bytes_left > 0) { out_page = mem_to_page(data); flat_buf_dst->pData = kmap(out_page); out_pages[page_num] = out_page; flat_buf_dst->dataLenInBytes = min((long)bytes_left, (long)PAGE_SIZE); bytes_left -= flat_buf_dst->dataLenInBytes; data += flat_buf_dst->dataLenInBytes; flat_buf_dst++; buf_list_dst->numBuffers++; page_num++; } init_completion(&complete); if (dir == QAT_COMPRESS) { QAT_STAT_BUMP(comp_requests); QAT_STAT_INCR(comp_total_in_bytes, src_len); cpaDcGenerateHeader(session_handle, buf_list_dst->pBuffers, &hdr_sz); buf_list_dst->pBuffers->pData += hdr_sz; buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz; status = cpaDcCompressData( dc_inst_handle, session_handle, buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL, &complete); if (status != CPA_STATUS_SUCCESS) { goto fail; } /* we now wait until the completion of the operation. */ if (!wait_for_completion_interruptible_timeout(&complete, TIMEOUT_MS)) { status = CPA_STATUS_FAIL; goto fail; } if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; goto fail; } compressed_sz = dc_results.produced; if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) { goto fail; } flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1); /* move to the last page */ flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT; /* no space for gzip foot in the last page */ if (((compressed_sz + hdr_sz) % PAGE_SIZE) + ZLIB_FOOT_SZ > PAGE_SIZE) goto fail; flat_buf_dst->pData += (compressed_sz + hdr_sz) % PAGE_SIZE; flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ; dc_results.produced = 0; status = cpaDcGenerateFooter(session_handle, flat_buf_dst, &dc_results); if (status != CPA_STATUS_SUCCESS) { goto fail; } *c_len = compressed_sz + dc_results.produced + hdr_sz; if (*c_len < PAGE_SIZE) *c_len = 8 * PAGE_SIZE; QAT_STAT_INCR(comp_total_out_bytes, *c_len); ret = 0; } else if (dir == QAT_DECOMPRESS) { QAT_STAT_BUMP(decomp_requests); QAT_STAT_INCR(decomp_total_in_bytes, src_len); buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ; buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ; status = cpaDcDecompressData(dc_inst_handle, session_handle, buf_list_src, buf_list_dst, &dc_results, CPA_DC_FLUSH_FINAL, &complete); if (CPA_STATUS_SUCCESS != status) { status = CPA_STATUS_FAIL; goto fail; } /* we now wait until the completion of the operation. */ if (!wait_for_completion_interruptible_timeout(&complete, TIMEOUT_MS)) { status = CPA_STATUS_FAIL; goto fail; } if (dc_results.status != CPA_STATUS_SUCCESS) { status = CPA_STATUS_FAIL; goto fail; } *c_len = dc_results.produced; QAT_STAT_INCR(decomp_total_out_bytes, *c_len); ret = 0; } fail: if (status != CPA_STATUS_SUCCESS) { QAT_STAT_BUMP(dc_fails); } if (in_pages) { for (page_num = 0; page_num < buf_list_src->numBuffers; page_num++) { kunmap(in_pages[page_num]); } PHYS_CONTIG_FREE(in_pages); } if (out_pages) { for (page_num = 0; page_num < buf_list_dst->numBuffers; page_num++) { kunmap(out_pages[page_num]); } PHYS_CONTIG_FREE(out_pages); } PHYS_CONTIG_FREE(buffer_meta_src); PHYS_CONTIG_FREE(buffer_meta_dst); PHYS_CONTIG_FREE(buf_list_src); PHYS_CONTIG_FREE(buf_list_dst); return (ret); } module_param(zfs_qat_disable, int, 0644); MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT compression"); #endif