]> granicus.if.org Git - zfs/blob - module/zfs/qat_compress.c
Reimplement vdev_random_leaf and rename it
[zfs] / module / zfs / qat_compress.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 #if defined(_KERNEL) && defined(HAVE_QAT)
23 #include <linux/slab.h>
24 #include <linux/vmalloc.h>
25 #include <linux/pagemap.h>
26 #include <linux/completion.h>
27 #include <sys/zfs_context.h>
28 #include "qat_compress.h"
29
30 /*
31  * Timeout - no response from hardware after 0.5 seconds
32  */
33 #define TIMEOUT_MS              500
34
35 /*
36  * Max instances in QAT device, each instance is a channel to submit
37  * jobs to QAT hardware
38  */
39 #define MAX_INSTANCES           6
40
41 /*
42  * ZLIB head and foot size
43  */
44 #define ZLIB_HEAD_SZ            2
45 #define ZLIB_FOOT_SZ            4
46
47 /*
48  * The minimal and maximal buffer size, which are not restricted
49  * in the QAT hardware, but with the input buffer size between 4KB
50  * and 128KB, the hardware can provide the optimal performance.
51  */
52 #define QAT_MIN_BUF_SIZE        (4*1024)
53 #define QAT_MAX_BUF_SIZE        (128*1024)
54
55 /*
56  * Used for qat kstat.
57  */
58 typedef struct qat_stats {
59         /*
60          * Number of jobs submitted to qat compression engine.
61          */
62         kstat_named_t comp_requests;
63         /*
64          * Total bytes sent to qat compression engine.
65          */
66         kstat_named_t comp_total_in_bytes;
67         /*
68          * Total bytes output from qat compression engine.
69          */
70         kstat_named_t comp_total_out_bytes;
71         /*
72          * Number of jobs submitted to qat de-compression engine.
73          */
74         kstat_named_t decomp_requests;
75         /*
76          * Total bytes sent to qat de-compression engine.
77          */
78         kstat_named_t decomp_total_in_bytes;
79         /*
80          * Total bytes output from qat de-compression engine.
81          */
82         kstat_named_t decomp_total_out_bytes;
83         /*
84          * Number of fails in qat engine.
85          * Note: when qat fail happens, it doesn't mean a critical hardware
86          * issue, sometimes it is because the output buffer is not big enough,
87          * and the compression job will be transfered to gzip software again,
88          * so the functionality of ZFS is not impacted.
89          */
90         kstat_named_t dc_fails;
91 } qat_stats_t;
92
93 qat_stats_t qat_stats = {
94         { "comp_reqests",                       KSTAT_DATA_UINT64 },
95         { "comp_total_in_bytes",                KSTAT_DATA_UINT64 },
96         { "comp_total_out_bytes",               KSTAT_DATA_UINT64 },
97         { "decomp_reqests",                     KSTAT_DATA_UINT64 },
98         { "decomp_total_in_bytes",              KSTAT_DATA_UINT64 },
99         { "decomp_total_out_bytes",             KSTAT_DATA_UINT64 },
100         { "dc_fails",                           KSTAT_DATA_UINT64 },
101 };
102
103 static kstat_t *qat_ksp;
104 static CpaInstanceHandle dc_inst_handles[MAX_INSTANCES];
105 static CpaDcSessionHandle session_handles[MAX_INSTANCES];
106 static CpaBufferList **buffer_array[MAX_INSTANCES];
107 static Cpa32U num_inst = 0;
108 static Cpa32U inst_num = 0;
109 static boolean_t qat_init_done = B_FALSE;
110 int zfs_qat_disable = 0;
111
112 #define QAT_STAT_INCR(stat, val) \
113         atomic_add_64(&qat_stats.stat.value.ui64, (val));
114 #define QAT_STAT_BUMP(stat) \
115         QAT_STAT_INCR(stat, 1);
116
117 #define PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes)      \
118         mem_alloc_contig((void *)(pp_mem_addr), (size_bytes))
119
120 #define PHYS_CONTIG_FREE(p_mem_addr)    \
121         mem_free_contig((void *)&(p_mem_addr))
122
123 static inline struct page *
124 mem_to_page(void *addr)
125 {
126         if (!is_vmalloc_addr(addr))
127                 return (virt_to_page(addr));
128
129         return (vmalloc_to_page(addr));
130 }
131
132 static void
133 qat_dc_callback(void *p_callback, CpaStatus status)
134 {
135         if (p_callback != NULL)
136                 complete((struct completion *)p_callback);
137 }
138
139 static inline CpaStatus
140 mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
141 {
142         *pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
143         if (*pp_mem_addr == NULL)
144                 return (CPA_STATUS_RESOURCE);
145         return (CPA_STATUS_SUCCESS);
146 }
147
148 static inline void
149 mem_free_contig(void **pp_mem_addr)
150 {
151         if (*pp_mem_addr != NULL) {
152                 kfree(*pp_mem_addr);
153                 *pp_mem_addr = NULL;
154         }
155 }
156
157 static void
158 qat_clean(void)
159 {
160         Cpa16U buff_num = 0;
161         Cpa16U num_inter_buff_lists = 0;
162         Cpa16U i = 0;
163
164         for (i = 0; i < num_inst; i++) {
165                 cpaDcStopInstance(dc_inst_handles[i]);
166                 PHYS_CONTIG_FREE(session_handles[i]);
167                 /* free intermediate buffers  */
168                 if (buffer_array[i] != NULL) {
169                         cpaDcGetNumIntermediateBuffers(
170                             dc_inst_handles[i], &num_inter_buff_lists);
171                         for (buff_num = 0; buff_num < num_inter_buff_lists;
172                             buff_num++) {
173                                 CpaBufferList *buffer_inter =
174                                     buffer_array[i][buff_num];
175                                 if (buffer_inter->pBuffers) {
176                                         PHYS_CONTIG_FREE(
177                                             buffer_inter->pBuffers->pData);
178                                         PHYS_CONTIG_FREE(
179                                             buffer_inter->pBuffers);
180                                 }
181                                 PHYS_CONTIG_FREE(
182                                     buffer_inter->pPrivateMetaData);
183                                 PHYS_CONTIG_FREE(buffer_inter);
184                         }
185                 }
186         }
187
188         num_inst = 0;
189         qat_init_done = B_FALSE;
190 }
191
192 int
193 qat_init(void)
194 {
195         CpaStatus status = CPA_STATUS_SUCCESS;
196         Cpa32U sess_size = 0;
197         Cpa32U ctx_size = 0;
198         Cpa16U num_inter_buff_lists = 0;
199         Cpa16U buff_num = 0;
200         Cpa32U buff_meta_size = 0;
201         CpaDcSessionSetupData sd = {0};
202         Cpa16U i;
203
204         status = cpaDcGetNumInstances(&num_inst);
205         if (status != CPA_STATUS_SUCCESS || num_inst == 0)
206                 return (-1);
207
208         if (num_inst > MAX_INSTANCES)
209                 num_inst = MAX_INSTANCES;
210
211         status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
212         if (status != CPA_STATUS_SUCCESS)
213                 return (-1);
214
215         for (i = 0; i < num_inst; i++) {
216                 cpaDcSetAddressTranslation(dc_inst_handles[i],
217                     (void*)virt_to_phys);
218
219                 status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
220                     1, &buff_meta_size);
221
222                 if (status == CPA_STATUS_SUCCESS)
223                         status = cpaDcGetNumIntermediateBuffers(
224                             dc_inst_handles[i], &num_inter_buff_lists);
225
226                 if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
227                         status = PHYS_CONTIG_ALLOC(&buffer_array[i],
228                             num_inter_buff_lists *
229                             sizeof (CpaBufferList *));
230
231                 for (buff_num = 0; buff_num < num_inter_buff_lists;
232                     buff_num++) {
233                         if (status == CPA_STATUS_SUCCESS)
234                                 status = PHYS_CONTIG_ALLOC(
235                                     &buffer_array[i][buff_num],
236                                     sizeof (CpaBufferList));
237
238                         if (status == CPA_STATUS_SUCCESS)
239                                 status = PHYS_CONTIG_ALLOC(
240                                     &buffer_array[i][buff_num]->
241                                     pPrivateMetaData,
242                                     buff_meta_size);
243
244                         if (status == CPA_STATUS_SUCCESS)
245                                 status = PHYS_CONTIG_ALLOC(
246                                     &buffer_array[i][buff_num]->pBuffers,
247                                     sizeof (CpaFlatBuffer));
248
249                         if (status == CPA_STATUS_SUCCESS) {
250                                 /*
251                                  *  implementation requires an intermediate
252                                  *  buffer approximately twice the size of
253                                  *  output buffer, which is 2x max buffer
254                                  *  size here.
255                                  */
256                                 status = PHYS_CONTIG_ALLOC(
257                                     &buffer_array[i][buff_num]->pBuffers->
258                                     pData, 2 * QAT_MAX_BUF_SIZE);
259                                 if (status != CPA_STATUS_SUCCESS)
260                                         goto fail;
261
262                                 buffer_array[i][buff_num]->numBuffers = 1;
263                                 buffer_array[i][buff_num]->pBuffers->
264                                     dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
265                         }
266                 }
267
268                 status = cpaDcStartInstance(dc_inst_handles[i],
269                     num_inter_buff_lists, buffer_array[i]);
270                 if (status != CPA_STATUS_SUCCESS)
271                         goto fail;
272
273                 sd.compLevel = CPA_DC_L1;
274                 sd.compType = CPA_DC_DEFLATE;
275                 sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
276                 sd.sessDirection = CPA_DC_DIR_COMBINED;
277                 sd.sessState = CPA_DC_STATELESS;
278                 sd.deflateWindowSize = 7;
279                 sd.checksum = CPA_DC_ADLER32;
280                 status = cpaDcGetSessionSize(dc_inst_handles[i],
281                     &sd, &sess_size, &ctx_size);
282                 if (status != CPA_STATUS_SUCCESS)
283                         goto fail;
284
285                 PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
286                 if (session_handles[i] == NULL)
287                         goto fail;
288
289                 status = cpaDcInitSession(dc_inst_handles[i],
290                     session_handles[i],
291                     &sd, NULL, qat_dc_callback);
292                 if (status != CPA_STATUS_SUCCESS)
293                         goto fail;
294         }
295
296         qat_ksp = kstat_create("zfs", 0, "qat", "misc",
297             KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
298             KSTAT_FLAG_VIRTUAL);
299         if (qat_ksp != NULL) {
300                 qat_ksp->ks_data = &qat_stats;
301                 kstat_install(qat_ksp);
302         }
303
304         qat_init_done = B_TRUE;
305         return (0);
306 fail:
307         qat_clean();
308         return (-1);
309 }
310
311 void
312 qat_fini(void)
313 {
314         qat_clean();
315
316         if (qat_ksp != NULL) {
317                 kstat_delete(qat_ksp);
318                 qat_ksp = NULL;
319         }
320 }
321
322 boolean_t
323 qat_use_accel(size_t s_len)
324 {
325         return (!zfs_qat_disable &&
326             qat_init_done &&
327             s_len >= QAT_MIN_BUF_SIZE &&
328             s_len <= QAT_MAX_BUF_SIZE);
329 }
330
331 int
332 qat_compress(qat_compress_dir_t dir, char *src, int src_len,
333     char *dst, int dst_len, size_t *c_len)
334 {
335         CpaInstanceHandle dc_inst_handle;
336         CpaDcSessionHandle session_handle;
337         CpaBufferList *buf_list_src = NULL;
338         CpaBufferList *buf_list_dst = NULL;
339         CpaFlatBuffer *flat_buf_src = NULL;
340         CpaFlatBuffer *flat_buf_dst = NULL;
341         Cpa8U *buffer_meta_src = NULL;
342         Cpa8U *buffer_meta_dst = NULL;
343         Cpa32U buffer_meta_size = 0;
344         CpaDcRqResults dc_results;
345         CpaStatus status = CPA_STATUS_SUCCESS;
346         Cpa32U hdr_sz = 0;
347         Cpa32U compressed_sz;
348         Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 1;
349         Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 1;
350         Cpa32U bytes_left;
351         char *data;
352         struct page *in_page, *out_page;
353         struct page **in_pages = NULL;
354         struct page **out_pages = NULL;
355         struct completion complete;
356         size_t ret = -1;
357         Cpa16U page_num = 0;
358         Cpa16U i;
359
360         Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
361             (num_src_buf * sizeof (CpaFlatBuffer));
362         Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
363             (num_dst_buf * sizeof (CpaFlatBuffer));
364
365         if (!is_vmalloc_addr(src) || !is_vmalloc_addr(src + src_len - 1) ||
366             !is_vmalloc_addr(dst) || !is_vmalloc_addr(dst + dst_len - 1))
367                 return (-1);
368
369         if (PHYS_CONTIG_ALLOC(&in_pages,
370             num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
371                 goto fail;
372
373         if (PHYS_CONTIG_ALLOC(&out_pages,
374             num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
375                 goto fail;
376
377         i = atomic_inc_32_nv(&inst_num) % num_inst;
378         dc_inst_handle = dc_inst_handles[i];
379         session_handle = session_handles[i];
380
381         cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
382             &buffer_meta_size);
383         if (PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
384             CPA_STATUS_SUCCESS)
385                 goto fail;
386
387         cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf,
388             &buffer_meta_size);
389         if (PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
390             CPA_STATUS_SUCCESS)
391                 goto fail;
392
393         /* build source buffer list */
394         if (PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
395             CPA_STATUS_SUCCESS)
396                 goto fail;
397
398         flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
399
400         buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
401
402         /* build destination buffer list */
403         if (PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
404             CPA_STATUS_SUCCESS)
405                 goto fail;
406
407         flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
408
409         buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
410
411         buf_list_src->numBuffers = 0;
412         buf_list_src->pPrivateMetaData = buffer_meta_src;
413         bytes_left = src_len;
414         data = src;
415         page_num = 0;
416         while (bytes_left > 0) {
417                 in_page = mem_to_page(data);
418                 in_pages[page_num] = in_page;
419                 flat_buf_src->pData = kmap(in_page);
420                 flat_buf_src->dataLenInBytes =
421                     min((long)bytes_left, (long)PAGE_SIZE);
422
423                 bytes_left -= flat_buf_src->dataLenInBytes;
424                 data += flat_buf_src->dataLenInBytes;
425                 flat_buf_src++;
426                 buf_list_src->numBuffers++;
427                 page_num++;
428         }
429
430         buf_list_dst->numBuffers = 0;
431         buf_list_dst->pPrivateMetaData = buffer_meta_dst;
432         bytes_left = dst_len;
433         data = dst;
434         page_num = 0;
435         while (bytes_left > 0) {
436                 out_page = mem_to_page(data);
437                 flat_buf_dst->pData = kmap(out_page);
438                 out_pages[page_num] = out_page;
439                 flat_buf_dst->dataLenInBytes =
440                     min((long)bytes_left, (long)PAGE_SIZE);
441
442                 bytes_left -= flat_buf_dst->dataLenInBytes;
443                 data += flat_buf_dst->dataLenInBytes;
444                 flat_buf_dst++;
445                 buf_list_dst->numBuffers++;
446                 page_num++;
447         }
448
449         init_completion(&complete);
450
451         if (dir == QAT_COMPRESS) {
452                 QAT_STAT_BUMP(comp_requests);
453                 QAT_STAT_INCR(comp_total_in_bytes, src_len);
454
455                 cpaDcGenerateHeader(session_handle,
456                     buf_list_dst->pBuffers, &hdr_sz);
457                 buf_list_dst->pBuffers->pData += hdr_sz;
458                 buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
459                 status = cpaDcCompressData(
460                     dc_inst_handle, session_handle,
461                     buf_list_src, buf_list_dst,
462                     &dc_results, CPA_DC_FLUSH_FINAL,
463                     &complete);
464                 if (status != CPA_STATUS_SUCCESS) {
465                         goto fail;
466                 }
467
468                 /* we now wait until the completion of the operation. */
469                 if (!wait_for_completion_interruptible_timeout(&complete,
470                     TIMEOUT_MS)) {
471                         status = CPA_STATUS_FAIL;
472                         goto fail;
473                 }
474
475                 if (dc_results.status != CPA_STATUS_SUCCESS) {
476                         status = CPA_STATUS_FAIL;
477                         goto fail;
478                 }
479
480                 compressed_sz = dc_results.produced;
481                 if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
482                         goto fail;
483                 }
484
485                 flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
486                 /* move to the last page */
487                 flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
488
489                 /* no space for gzip foot in the last page */
490                 if (((compressed_sz + hdr_sz) % PAGE_SIZE)
491                     + ZLIB_FOOT_SZ > PAGE_SIZE)
492                         goto fail;
493
494                 flat_buf_dst->pData += (compressed_sz + hdr_sz) % PAGE_SIZE;
495                 flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
496
497                 dc_results.produced = 0;
498                 status = cpaDcGenerateFooter(session_handle,
499                     flat_buf_dst, &dc_results);
500                 if (status != CPA_STATUS_SUCCESS) {
501                         goto fail;
502                 }
503
504                 *c_len = compressed_sz + dc_results.produced + hdr_sz;
505
506                 if (*c_len < PAGE_SIZE)
507                         *c_len = 8 * PAGE_SIZE;
508
509                 QAT_STAT_INCR(comp_total_out_bytes, *c_len);
510
511                 ret = 0;
512
513         } else if (dir == QAT_DECOMPRESS) {
514                 QAT_STAT_BUMP(decomp_requests);
515                 QAT_STAT_INCR(decomp_total_in_bytes, src_len);
516
517                 buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
518                 buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
519                 status = cpaDcDecompressData(dc_inst_handle,
520                     session_handle,
521                     buf_list_src,
522                     buf_list_dst,
523                     &dc_results,
524                     CPA_DC_FLUSH_FINAL,
525                     &complete);
526
527                 if (CPA_STATUS_SUCCESS != status) {
528                         status = CPA_STATUS_FAIL;
529                         goto fail;
530                 }
531
532                 /* we now wait until the completion of the operation. */
533                 if (!wait_for_completion_interruptible_timeout(&complete,
534                     TIMEOUT_MS)) {
535                         status = CPA_STATUS_FAIL;
536                         goto fail;
537                 }
538
539                 if (dc_results.status != CPA_STATUS_SUCCESS) {
540                         status = CPA_STATUS_FAIL;
541                         goto fail;
542                 }
543
544                 *c_len = dc_results.produced;
545
546                 QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
547
548                 ret = 0;
549         }
550
551 fail:
552         if (status != CPA_STATUS_SUCCESS) {
553                 QAT_STAT_BUMP(dc_fails);
554         }
555
556         if (in_pages) {
557                 for (page_num = 0;
558                     page_num < buf_list_src->numBuffers;
559                     page_num++) {
560                         kunmap(in_pages[page_num]);
561                 }
562                 PHYS_CONTIG_FREE(in_pages);
563         }
564
565         if (out_pages) {
566                 for (page_num = 0;
567                     page_num < buf_list_dst->numBuffers;
568                     page_num++) {
569                         kunmap(out_pages[page_num]);
570                 }
571                 PHYS_CONTIG_FREE(out_pages);
572         }
573
574         PHYS_CONTIG_FREE(buffer_meta_src);
575         PHYS_CONTIG_FREE(buffer_meta_dst);
576         PHYS_CONTIG_FREE(buf_list_src);
577         PHYS_CONTIG_FREE(buf_list_dst);
578
579         return (ret);
580 }
581
582 module_param(zfs_qat_disable, int, 0644);
583 MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT compression");
584
585 #endif