4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
25 #if defined(_KERNEL) && defined(__amd64)
26 #include <linux/simd_x86.h>
28 #define KPREEMPT_DISABLE kfpu_begin()
29 #define KPREEMPT_ENABLE kfpu_end()
32 #define KPREEMPT_DISABLE
33 #define KPREEMPT_ENABLE
36 #include <sys/zfs_context.h>
37 #include <modes/modes.h>
38 #include <sys/crypto/common.h>
39 #include <sys/crypto/impl.h>
40 #include <sys/byteorder.h>
44 extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
45 static int intel_pclmulqdq_instruction_present(void);
56 * Perform a carry-less multiplication (that is, use XOR instead of the
57 * multiply operator) on *x_in and *y and place the result in *res.
59 * Byte swap the input (*x_in and *y) and the output (*res).
61 * Note: x_in, y, and res all point to 16-byte numbers (an array of two
65 gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
68 if (intel_pclmulqdq_instruction_present()) {
70 gcm_mul_pclmulqdq(x_in, y, res);
75 static const uint64_t R = 0xe100000000000000ULL;
76 struct aes_block z = {0, 0};
84 for (j = 0; j < 2; j++) {
86 for (i = 0; i < 64; i++, x <<= 1) {
87 if (x & 0x8000000000000000ULL) {
92 v.b = (v.a << 63)|(v.b >> 1);
95 v.b = (v.a << 63)|(v.b >> 1);
100 res[0] = htonll(z.a);
101 res[1] = htonll(z.b);
106 #define GHASH(c, d, t) \
107 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
108 gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
109 (uint64_t *)(void *)(t));
113 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
114 * is done in another function.
117 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
118 crypto_data_t *out, size_t block_size,
119 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
120 void (*copy_block)(uint8_t *, uint8_t *),
121 void (*xor_block)(uint8_t *, uint8_t *))
123 size_t remainder = length;
125 uint8_t *datap = (uint8_t *)data;
132 size_t out_data_1_len;
134 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
136 if (length + ctx->gcm_remainder_len < block_size) {
137 /* accumulate bytes here and return */
139 (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
141 ctx->gcm_remainder_len += length;
142 ctx->gcm_copy_to = datap;
143 return (CRYPTO_SUCCESS);
146 lastp = (uint8_t *)ctx->gcm_cb;
148 crypto_init_ptrs(out, &iov_or_mp, &offset);
151 /* Unprocessed data from last call. */
152 if (ctx->gcm_remainder_len > 0) {
153 need = block_size - ctx->gcm_remainder_len;
155 if (need > remainder)
156 return (CRYPTO_DATA_LEN_RANGE);
158 bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
159 [ctx->gcm_remainder_len], need);
161 blockp = (uint8_t *)ctx->gcm_remainder;
167 * Increment counter. Counter bits are confined
168 * to the bottom 32 bits of the counter block.
170 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
171 counter = htonll(counter + 1);
172 counter &= counter_mask;
173 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
175 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
176 (uint8_t *)ctx->gcm_tmp);
177 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
179 lastp = (uint8_t *)ctx->gcm_tmp;
181 ctx->gcm_processed_data_len += block_size;
184 if (ctx->gcm_remainder_len > 0) {
185 bcopy(blockp, ctx->gcm_copy_to,
186 ctx->gcm_remainder_len);
187 bcopy(blockp + ctx->gcm_remainder_len, datap,
191 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
192 &out_data_1_len, &out_data_2, block_size);
194 /* copy block to where it belongs */
195 if (out_data_1_len == block_size) {
196 copy_block(lastp, out_data_1);
198 bcopy(lastp, out_data_1, out_data_1_len);
199 if (out_data_2 != NULL) {
200 bcopy(lastp + out_data_1_len,
202 block_size - out_data_1_len);
206 out->cd_offset += block_size;
209 /* add ciphertext to the hash */
210 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
212 /* Update pointer to next block of data to be processed. */
213 if (ctx->gcm_remainder_len != 0) {
215 ctx->gcm_remainder_len = 0;
220 remainder = (size_t)&data[length] - (size_t)datap;
222 /* Incomplete last block. */
223 if (remainder > 0 && remainder < block_size) {
224 bcopy(datap, ctx->gcm_remainder, remainder);
225 ctx->gcm_remainder_len = remainder;
226 ctx->gcm_copy_to = datap;
229 ctx->gcm_copy_to = NULL;
231 } while (remainder > 0);
233 return (CRYPTO_SUCCESS);
238 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
239 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
240 void (*copy_block)(uint8_t *, uint8_t *),
241 void (*xor_block)(uint8_t *, uint8_t *))
243 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
244 uint8_t *ghash, *macp = NULL;
248 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
249 return (CRYPTO_DATA_LEN_RANGE);
252 ghash = (uint8_t *)ctx->gcm_ghash;
254 if (ctx->gcm_remainder_len > 0) {
256 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
259 * Here is where we deal with data that is not a
260 * multiple of the block size.
266 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
267 counter = htonll(counter + 1);
268 counter &= counter_mask;
269 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
271 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
272 (uint8_t *)ctx->gcm_tmp);
274 macp = (uint8_t *)ctx->gcm_remainder;
275 bzero(macp + ctx->gcm_remainder_len,
276 block_size - ctx->gcm_remainder_len);
278 /* XOR with counter block */
279 for (i = 0; i < ctx->gcm_remainder_len; i++) {
283 /* add ciphertext to the hash */
284 GHASH(ctx, macp, ghash);
286 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
289 ctx->gcm_len_a_len_c[1] =
290 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
291 GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
292 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
293 (uint8_t *)ctx->gcm_J0);
294 xor_block((uint8_t *)ctx->gcm_J0, ghash);
296 if (ctx->gcm_remainder_len > 0) {
297 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
298 if (rv != CRYPTO_SUCCESS)
301 out->cd_offset += ctx->gcm_remainder_len;
302 ctx->gcm_remainder_len = 0;
303 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
304 if (rv != CRYPTO_SUCCESS)
306 out->cd_offset += ctx->gcm_tag_len;
308 return (CRYPTO_SUCCESS);
312 * This will only deal with decrypting the last block of the input that
313 * might not be a multiple of block length.
316 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
317 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
318 void (*xor_block)(uint8_t *, uint8_t *))
320 uint8_t *datap, *outp, *counterp;
322 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
327 * Counter bits are confined to the bottom 32 bits
329 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
330 counter = htonll(counter + 1);
331 counter &= counter_mask;
332 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
334 datap = (uint8_t *)ctx->gcm_remainder;
335 outp = &((ctx->gcm_pt_buf)[index]);
336 counterp = (uint8_t *)ctx->gcm_tmp;
338 /* authentication tag */
339 bzero((uint8_t *)ctx->gcm_tmp, block_size);
340 bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
342 /* add ciphertext to the hash */
343 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
345 /* decrypt remaining ciphertext */
346 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
348 /* XOR with counter block */
349 for (i = 0; i < ctx->gcm_remainder_len; i++) {
350 outp[i] = datap[i] ^ counterp[i];
356 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
357 crypto_data_t *out, size_t block_size,
358 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
359 void (*copy_block)(uint8_t *, uint8_t *),
360 void (*xor_block)(uint8_t *, uint8_t *))
366 * Copy contiguous ciphertext input blocks to plaintext buffer.
367 * Ciphertext will be decrypted in the final.
370 new_len = ctx->gcm_pt_buf_len + length;
371 new = vmem_alloc(new_len, ctx->gcm_kmflag);
372 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
373 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
375 return (CRYPTO_HOST_MEMORY);
377 ctx->gcm_pt_buf = new;
378 ctx->gcm_pt_buf_len = new_len;
379 bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
381 ctx->gcm_processed_data_len += length;
384 ctx->gcm_remainder_len = 0;
385 return (CRYPTO_SUCCESS);
389 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
390 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
391 void (*xor_block)(uint8_t *, uint8_t *))
399 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
400 int processed = 0, rv;
402 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
404 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
405 ghash = (uint8_t *)ctx->gcm_ghash;
406 blockp = ctx->gcm_pt_buf;
408 while (remainder > 0) {
409 /* Incomplete last block */
410 if (remainder < block_size) {
411 bcopy(blockp, ctx->gcm_remainder, remainder);
412 ctx->gcm_remainder_len = remainder;
414 * not expecting anymore ciphertext, just
415 * compute plaintext for the remaining input
417 gcm_decrypt_incomplete_block(ctx, block_size,
418 processed, encrypt_block, xor_block);
419 ctx->gcm_remainder_len = 0;
422 /* add ciphertext to the hash */
423 GHASH(ctx, blockp, ghash);
427 * Counter bits are confined to the bottom 32 bits
429 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
430 counter = htonll(counter + 1);
431 counter &= counter_mask;
432 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
434 cbp = (uint8_t *)ctx->gcm_tmp;
435 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
437 /* XOR with ciphertext */
438 xor_block(cbp, blockp);
440 processed += block_size;
441 blockp += block_size;
442 remainder -= block_size;
445 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
446 GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
447 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
448 (uint8_t *)ctx->gcm_J0);
449 xor_block((uint8_t *)ctx->gcm_J0, ghash);
451 /* compare the input authentication tag with what we calculated */
452 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
453 /* They don't match */
454 return (CRYPTO_INVALID_MAC);
456 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
457 if (rv != CRYPTO_SUCCESS)
459 out->cd_offset += pt_len;
461 return (CRYPTO_SUCCESS);
465 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
470 * Check the length of the authentication tag (in bits).
472 tag_len = gcm_param->ulTagBits;
483 return (CRYPTO_MECHANISM_PARAM_INVALID);
486 if (gcm_param->ulIvLen == 0)
487 return (CRYPTO_MECHANISM_PARAM_INVALID);
489 return (CRYPTO_SUCCESS);
493 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
494 gcm_ctx_t *ctx, size_t block_size,
495 void (*copy_block)(uint8_t *, uint8_t *),
496 void (*xor_block)(uint8_t *, uint8_t *))
499 ulong_t remainder = iv_len;
500 ulong_t processed = 0;
501 uint8_t *datap, *ghash;
502 uint64_t len_a_len_c[2];
504 ghash = (uint8_t *)ctx->gcm_ghash;
505 cb = (uint8_t *)ctx->gcm_cb;
512 /* J0 will be used again in the final */
513 copy_block(cb, (uint8_t *)ctx->gcm_J0);
517 if (remainder < block_size) {
518 bzero(cb, block_size);
519 bcopy(&(iv[processed]), cb, remainder);
520 datap = (uint8_t *)cb;
523 datap = (uint8_t *)(&(iv[processed]));
524 processed += block_size;
525 remainder -= block_size;
527 GHASH(ctx, datap, ghash);
528 } while (remainder > 0);
531 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
532 GHASH(ctx, len_a_len_c, ctx->gcm_J0);
534 /* J0 will be used again in the final */
535 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
540 * The following function is called at encrypt or decrypt init time
544 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
545 unsigned char *auth_data, size_t auth_data_len, size_t block_size,
546 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
547 void (*copy_block)(uint8_t *, uint8_t *),
548 void (*xor_block)(uint8_t *, uint8_t *))
550 uint8_t *ghash, *datap, *authp;
551 size_t remainder, processed;
553 /* encrypt zero block to get subkey H */
554 bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
555 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
556 (uint8_t *)ctx->gcm_H);
558 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
559 copy_block, xor_block);
561 authp = (uint8_t *)ctx->gcm_tmp;
562 ghash = (uint8_t *)ctx->gcm_ghash;
563 bzero(authp, block_size);
564 bzero(ghash, block_size);
567 remainder = auth_data_len;
569 if (remainder < block_size) {
571 * There's not a block full of data, pad rest of
574 bzero(authp, block_size);
575 bcopy(&(auth_data[processed]), authp, remainder);
576 datap = (uint8_t *)authp;
579 datap = (uint8_t *)(&(auth_data[processed]));
580 processed += block_size;
581 remainder -= block_size;
584 /* add auth data to the hash */
585 GHASH(ctx, datap, ghash);
587 } while (remainder > 0);
589 return (CRYPTO_SUCCESS);
593 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
594 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
595 void (*copy_block)(uint8_t *, uint8_t *),
596 void (*xor_block)(uint8_t *, uint8_t *))
599 CK_AES_GCM_PARAMS *gcm_param;
602 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
604 if ((rv = gcm_validate_args(gcm_param)) != 0) {
608 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
609 gcm_ctx->gcm_tag_len >>= 3;
610 gcm_ctx->gcm_processed_data_len = 0;
612 /* these values are in bits */
613 gcm_ctx->gcm_len_a_len_c[0]
614 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
617 gcm_ctx->gcm_flags |= GCM_MODE;
619 rv = CRYPTO_MECHANISM_PARAM_INVALID;
623 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
624 gcm_param->pAAD, gcm_param->ulAADLen, block_size,
625 encrypt_block, copy_block, xor_block) != 0) {
626 rv = CRYPTO_MECHANISM_PARAM_INVALID;
633 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
634 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
635 void (*copy_block)(uint8_t *, uint8_t *),
636 void (*xor_block)(uint8_t *, uint8_t *))
639 CK_AES_GMAC_PARAMS *gmac_param;
642 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
644 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
645 gcm_ctx->gcm_processed_data_len = 0;
647 /* these values are in bits */
648 gcm_ctx->gcm_len_a_len_c[0]
649 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
652 gcm_ctx->gcm_flags |= GMAC_MODE;
654 rv = CRYPTO_MECHANISM_PARAM_INVALID;
658 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
659 gmac_param->pAAD, gmac_param->ulAADLen, block_size,
660 encrypt_block, copy_block, xor_block) != 0) {
661 rv = CRYPTO_MECHANISM_PARAM_INVALID;
668 gcm_alloc_ctx(int kmflag)
672 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
675 gcm_ctx->gcm_flags = GCM_MODE;
680 gmac_alloc_ctx(int kmflag)
684 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
687 gcm_ctx->gcm_flags = GMAC_MODE;
692 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
694 ctx->gcm_kmflag = kmflag;
700 #define INTEL_PCLMULQDQ_FLAG (1 << 1)
703 * Return 1 if executing on Intel with PCLMULQDQ instructions,
704 * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
705 * Cache the result, as the CPU can't change.
707 * Note: the userland version uses getisax(). The kernel version uses
708 * is_x86_featureset().
711 intel_pclmulqdq_instruction_present(void)
713 static int cached_result = -1;
714 unsigned eax, ebx, ecx, edx;
715 unsigned func, subfunc;
717 if (cached_result == -1) { /* first time */
718 /* check for an intel cpu */
722 __asm__ __volatile__(
724 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
725 : "a"(func), "c"(subfunc));
727 if (memcmp((char *)(&ebx), "Genu", 4) == 0 &&
728 memcmp((char *)(&edx), "ineI", 4) == 0 &&
729 memcmp((char *)(&ecx), "ntel", 4) == 0) {
733 /* check for aes-ni instruction set */
734 __asm__ __volatile__(
736 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
737 : "a"(func), "c"(subfunc));
739 cached_result = !!(ecx & INTEL_PCLMULQDQ_FLAG);
745 return (cached_result);