1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/zfs_context.h>
26 #include <modes/modes.h>
27 #include <sys/crypto/common.h>
28 #include <sys/crypto/icp.h>
29 #include <sys/crypto/impl.h>
30 #include <sys/byteorder.h>
31 #include <sys/simd.h>
32 #include <modes/gcm_impl.h>
33 #ifdef CAN_USE_GCM_ASM
34 #include <aes/aes_impl.h>
35 #endif
36
37 #define GHASH(c, d, t, o) \
38 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
39 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
40 (uint64_t *)(void *)(t));
41
42 /* Select GCM implementation */
43 #define IMPL_FASTEST (UINT32_MAX)
44 #define IMPL_CYCLE (UINT32_MAX-1)
45 #ifdef CAN_USE_GCM_ASM
46 #define IMPL_AVX (UINT32_MAX-2)
47 #endif
48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
49 static uint32_t icp_gcm_impl = IMPL_FASTEST;
50 static uint32_t user_sel_impl = IMPL_FASTEST;
51
52 #ifdef CAN_USE_GCM_ASM
53 /* Does the architecture we run on support the MOVBE instruction? */
54 boolean_t gcm_avx_can_use_movbe = B_FALSE;
55 /*
56 * Whether to use the optimized openssl gcm and ghash implementations.
57 * Set to true if module parameter icp_gcm_impl == "avx".
58 */
59 static boolean_t gcm_use_avx = B_FALSE;
60 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
61
62 extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
63
64 static inline boolean_t gcm_avx_will_work(void);
65 static inline void gcm_set_avx(boolean_t);
66 static inline boolean_t gcm_toggle_avx(void);
67 static inline size_t gcm_simd_get_htab_size(boolean_t);
68
69 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
70 crypto_data_t *, size_t);
71
72 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
73 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
74 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
75 size_t, size_t);
76 #endif /* ifdef CAN_USE_GCM_ASM */
77
78 /*
79 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
80 * is done in another function.
81 */
82 int
83 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
84 crypto_data_t *out, size_t block_size,
85 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
86 void (*copy_block)(uint8_t *, uint8_t *),
87 void (*xor_block)(uint8_t *, uint8_t *))
88 {
89 #ifdef CAN_USE_GCM_ASM
90 if (ctx->gcm_use_avx == B_TRUE)
91 return (gcm_mode_encrypt_contiguous_blocks_avx(
92 ctx, data, length, out, block_size));
93 #endif
94
95 const gcm_impl_ops_t *gops;
96 size_t remainder = length;
97 size_t need = 0;
98 uint8_t *datap = (uint8_t *)data;
99 uint8_t *blockp;
100 uint8_t *lastp;
101 void *iov_or_mp;
102 offset_t offset;
103 uint8_t *out_data_1;
104 uint8_t *out_data_2;
105 size_t out_data_1_len;
106 uint64_t counter;
107 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
108
109 if (length + ctx->gcm_remainder_len < block_size) {
110 /* accumulate bytes here and return */
111 memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
112 datap,
113 length);
114 ctx->gcm_remainder_len += length;
115 if (ctx->gcm_copy_to == NULL) {
116 ctx->gcm_copy_to = datap;
117 }
118 return (CRYPTO_SUCCESS);
119 }
120
121 crypto_init_ptrs(out, &iov_or_mp, &offset);
122
123 gops = gcm_impl_get_ops();
124 do {
125 /* Unprocessed data from last call. */
126 if (ctx->gcm_remainder_len > 0) {
127 need = block_size - ctx->gcm_remainder_len;
128
129 if (need > remainder)
130 return (CRYPTO_DATA_LEN_RANGE);
131
132 memcpy(&((uint8_t *)ctx->gcm_remainder)
133 [ctx->gcm_remainder_len], datap, need);
134
135 blockp = (uint8_t *)ctx->gcm_remainder;
136 } else {
137 blockp = datap;
138 }
139
140 /*
141 * Increment counter. Counter bits are confined
142 * to the bottom 32 bits of the counter block.
143 */
144 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
145 counter = htonll(counter + 1);
146 counter &= counter_mask;
147 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
148
149 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
150 (uint8_t *)ctx->gcm_tmp);
151 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
152
153 lastp = (uint8_t *)ctx->gcm_tmp;
154
155 ctx->gcm_processed_data_len += block_size;
156
157 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
158 &out_data_1_len, &out_data_2, block_size);
159
160 /* copy block to where it belongs */
161 if (out_data_1_len == block_size) {
162 copy_block(lastp, out_data_1);
163 } else {
164 memcpy(out_data_1, lastp, out_data_1_len);
165 if (out_data_2 != NULL) {
166 memcpy(out_data_2,
167 lastp + out_data_1_len,
168 block_size - out_data_1_len);
169 }
170 }
171 /* update offset */
172 out->cd_offset += block_size;
173
174 /* add ciphertext to the hash */
175 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
176
177 /* Update pointer to next block of data to be processed. */
178 if (ctx->gcm_remainder_len != 0) {
179 datap += need;
180 ctx->gcm_remainder_len = 0;
181 } else {
182 datap += block_size;
183 }
184
185 remainder = (size_t)&data[length] - (size_t)datap;
186
187 /* Incomplete last block. */
188 if (remainder > 0 && remainder < block_size) {
189 memcpy(ctx->gcm_remainder, datap, remainder);
190 ctx->gcm_remainder_len = remainder;
191 ctx->gcm_copy_to = datap;
192 goto out;
193 }
194 ctx->gcm_copy_to = NULL;
195
196 } while (remainder > 0);
197 out:
198 return (CRYPTO_SUCCESS);
199 }
200
201 int
202 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
203 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
204 void (*copy_block)(uint8_t *, uint8_t *),
205 void (*xor_block)(uint8_t *, uint8_t *))
206 {
207 (void) copy_block;
208 #ifdef CAN_USE_GCM_ASM
209 if (ctx->gcm_use_avx == B_TRUE)
210 return (gcm_encrypt_final_avx(ctx, out, block_size));
211 #endif
212
213 const gcm_impl_ops_t *gops;
214 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
215 uint8_t *ghash, *macp = NULL;
216 int i, rv;
217
218 if (out->cd_length <
219 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
220 return (CRYPTO_DATA_LEN_RANGE);
221 }
222
223 gops = gcm_impl_get_ops();
224 ghash = (uint8_t *)ctx->gcm_ghash;
225
226 if (ctx->gcm_remainder_len > 0) {
227 uint64_t counter;
228 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
229
230 /*
231 * Here is where we deal with data that is not a
232 * multiple of the block size.
233 */
234
235 /*
236 * Increment counter.
237 */
238 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
239 counter = htonll(counter + 1);
240 counter &= counter_mask;
241 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
242
243 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
244 (uint8_t *)ctx->gcm_tmp);
245
246 macp = (uint8_t *)ctx->gcm_remainder;
247 memset(macp + ctx->gcm_remainder_len, 0,
248 block_size - ctx->gcm_remainder_len);
249
250 /* XOR with counter block */
251 for (i = 0; i < ctx->gcm_remainder_len; i++) {
252 macp[i] ^= tmpp[i];
253 }
254
255 /* add ciphertext to the hash */
256 GHASH(ctx, macp, ghash, gops);
257
258 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
259 }
260
261 ctx->gcm_len_a_len_c[1] =
262 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
263 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
264 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
265 (uint8_t *)ctx->gcm_J0);
266 xor_block((uint8_t *)ctx->gcm_J0, ghash);
267
268 if (ctx->gcm_remainder_len > 0) {
269 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
270 if (rv != CRYPTO_SUCCESS)
271 return (rv);
272 }
273 out->cd_offset += ctx->gcm_remainder_len;
274 ctx->gcm_remainder_len = 0;
275 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
276 if (rv != CRYPTO_SUCCESS)
277 return (rv);
278 out->cd_offset += ctx->gcm_tag_len;
279
280 return (CRYPTO_SUCCESS);
281 }
282
283 /*
284 * This will only deal with decrypting the last block of the input that
285 * might not be a multiple of block length.
286 */
287 static void
288 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
289 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
290 void (*xor_block)(uint8_t *, uint8_t *))
291 {
292 uint8_t *datap, *outp, *counterp;
293 uint64_t counter;
294 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
295 int i;
296
297 /*
298 * Increment counter.
299 * Counter bits are confined to the bottom 32 bits
300 */
301 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
302 counter = htonll(counter + 1);
303 counter &= counter_mask;
304 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
305
306 datap = (uint8_t *)ctx->gcm_remainder;
307 outp = &((ctx->gcm_pt_buf)[index]);
308 counterp = (uint8_t *)ctx->gcm_tmp;
309
310 /* authentication tag */
311 memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
312 memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
313
314 /* add ciphertext to the hash */
315 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
316
317 /* decrypt remaining ciphertext */
318 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
319
320 /* XOR with counter block */
321 for (i = 0; i < ctx->gcm_remainder_len; i++) {
322 outp[i] = datap[i] ^ counterp[i];
323 }
324 }
325
326 int
327 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
328 crypto_data_t *out, size_t block_size,
329 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
330 void (*copy_block)(uint8_t *, uint8_t *),
331 void (*xor_block)(uint8_t *, uint8_t *))
332 {
333 (void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
334 (void) xor_block;
335 size_t new_len;
336 uint8_t *new;
337
338 /*
339 * Copy contiguous ciphertext input blocks to plaintext buffer.
340 * Ciphertext will be decrypted in the final.
341 */
342 if (length > 0) {
343 new_len = ctx->gcm_pt_buf_len + length;
344 new = vmem_alloc(new_len, KM_SLEEP);
345 if (new == NULL) {
346 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
347 ctx->gcm_pt_buf = NULL;
348 return (CRYPTO_HOST_MEMORY);
349 }
350
351 if (ctx->gcm_pt_buf != NULL) {
352 memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
353 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
354 } else {
355 ASSERT0(ctx->gcm_pt_buf_len);
356 }
357
358 ctx->gcm_pt_buf = new;
359 ctx->gcm_pt_buf_len = new_len;
360 memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
361 length);
362 ctx->gcm_processed_data_len += length;
363 }
364
365 ctx->gcm_remainder_len = 0;
366 return (CRYPTO_SUCCESS);
367 }
368
369 int
370 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
371 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
372 void (*xor_block)(uint8_t *, uint8_t *))
373 {
374 #ifdef CAN_USE_GCM_ASM
375 if (ctx->gcm_use_avx == B_TRUE)
376 return (gcm_decrypt_final_avx(ctx, out, block_size));
377 #endif
378
379 const gcm_impl_ops_t *gops;
380 size_t pt_len;
381 size_t remainder;
382 uint8_t *ghash;
383 uint8_t *blockp;
384 uint8_t *cbp;
385 uint64_t counter;
386 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
387 int processed = 0, rv;
388
389 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
390
391 gops = gcm_impl_get_ops();
392 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
393 ghash = (uint8_t *)ctx->gcm_ghash;
394 blockp = ctx->gcm_pt_buf;
395 remainder = pt_len;
396 while (remainder > 0) {
397 /* Incomplete last block */
398 if (remainder < block_size) {
399 memcpy(ctx->gcm_remainder, blockp, remainder);
400 ctx->gcm_remainder_len = remainder;
401 /*
402 * not expecting anymore ciphertext, just
403 * compute plaintext for the remaining input
404 */
405 gcm_decrypt_incomplete_block(ctx, block_size,
406 processed, encrypt_block, xor_block);
407 ctx->gcm_remainder_len = 0;
408 goto out;
409 }
410 /* add ciphertext to the hash */
411 GHASH(ctx, blockp, ghash, gops);
412
413 /*
414 * Increment counter.
415 * Counter bits are confined to the bottom 32 bits
416 */
417 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
418 counter = htonll(counter + 1);
419 counter &= counter_mask;
420 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
421
422 cbp = (uint8_t *)ctx->gcm_tmp;
423 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
424
425 /* XOR with ciphertext */
426 xor_block(cbp, blockp);
427
428 processed += block_size;
429 blockp += block_size;
430 remainder -= block_size;
431 }
432 out:
433 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
434 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
435 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
436 (uint8_t *)ctx->gcm_J0);
437 xor_block((uint8_t *)ctx->gcm_J0, ghash);
438
439 /* compare the input authentication tag with what we calculated */
440 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
441 /* They don't match */
442 return (CRYPTO_INVALID_MAC);
443 } else {
444 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
445 if (rv != CRYPTO_SUCCESS)
446 return (rv);
447 out->cd_offset += pt_len;
448 }
449 return (CRYPTO_SUCCESS);
450 }
451
452 static int
453 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
454 {
455 size_t tag_len;
456
457 /*
458 * Check the length of the authentication tag (in bits).
459 */
460 tag_len = gcm_param->ulTagBits;
461 switch (tag_len) {
462 case 32:
463 case 64:
464 case 96:
465 case 104:
466 case 112:
467 case 120:
468 case 128:
469 break;
470 default:
471 return (CRYPTO_MECHANISM_PARAM_INVALID);
472 }
473
474 if (gcm_param->ulIvLen == 0)
475 return (CRYPTO_MECHANISM_PARAM_INVALID);
476
477 return (CRYPTO_SUCCESS);
478 }
479
480 static void
481 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
482 gcm_ctx_t *ctx, size_t block_size,
483 void (*copy_block)(uint8_t *, uint8_t *),
484 void (*xor_block)(uint8_t *, uint8_t *))
485 {
486 const gcm_impl_ops_t *gops;
487 uint8_t *cb;
488 ulong_t remainder = iv_len;
489 ulong_t processed = 0;
490 uint8_t *datap, *ghash;
491 uint64_t len_a_len_c[2];
492
493 gops = gcm_impl_get_ops();
494 ghash = (uint8_t *)ctx->gcm_ghash;
495 cb = (uint8_t *)ctx->gcm_cb;
496 if (iv_len == 12) {
497 memcpy(cb, iv, 12);
498 cb[12] = 0;
499 cb[13] = 0;
500 cb[14] = 0;
501 cb[15] = 1;
502 /* J0 will be used again in the final */
503 copy_block(cb, (uint8_t *)ctx->gcm_J0);
504 } else {
505 /* GHASH the IV */
506 do {
507 if (remainder < block_size) {
508 memset(cb, 0, block_size);
509 memcpy(cb, &(iv[processed]), remainder);
510 datap = (uint8_t *)cb;
511 remainder = 0;
512 } else {
513 datap = (uint8_t *)(&(iv[processed]));
514 processed += block_size;
515 remainder -= block_size;
516 }
517 GHASH(ctx, datap, ghash, gops);
518 } while (remainder > 0);
519
520 len_a_len_c[0] = 0;
521 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
522 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
523
524 /* J0 will be used again in the final */
525 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
526 }
527 }
528
529 static int
530 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
531 unsigned char *auth_data, size_t auth_data_len, size_t block_size,
532 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
533 void (*copy_block)(uint8_t *, uint8_t *),
534 void (*xor_block)(uint8_t *, uint8_t *))
535 {
536 const gcm_impl_ops_t *gops;
537 uint8_t *ghash, *datap, *authp;
538 size_t remainder, processed;
539
540 /* encrypt zero block to get subkey H */
541 memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
542 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
543 (uint8_t *)ctx->gcm_H);
544
545 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
546 copy_block, xor_block);
547
548 gops = gcm_impl_get_ops();
549 authp = (uint8_t *)ctx->gcm_tmp;
550 ghash = (uint8_t *)ctx->gcm_ghash;
551 memset(authp, 0, block_size);
552 memset(ghash, 0, block_size);
553
554 processed = 0;
555 remainder = auth_data_len;
556 do {
557 if (remainder < block_size) {
558 /*
559 * There's not a block full of data, pad rest of
560 * buffer with zero
561 */
562
563 if (auth_data != NULL) {
564 memset(authp, 0, block_size);
565 memcpy(authp, &(auth_data[processed]),
566 remainder);
567 } else {
568 ASSERT0(remainder);
569 }
570
571 datap = (uint8_t *)authp;
572 remainder = 0;
573 } else {
574 datap = (uint8_t *)(&(auth_data[processed]));
575 processed += block_size;
576 remainder -= block_size;
577 }
578
579 /* add auth data to the hash */
580 GHASH(ctx, datap, ghash, gops);
581
582 } while (remainder > 0);
583
584 return (CRYPTO_SUCCESS);
585 }
586
587 /*
588 * The following function is called at encrypt or decrypt init time
589 * for AES GCM mode.
590 *
591 * Init the GCM context struct. Handle the cycle and avx implementations here.
592 */
593 int
594 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
595 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
596 void (*copy_block)(uint8_t *, uint8_t *),
597 void (*xor_block)(uint8_t *, uint8_t *))
598 {
599 int rv;
600 CK_AES_GCM_PARAMS *gcm_param;
601
602 if (param != NULL) {
603 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
604
605 if ((rv = gcm_validate_args(gcm_param)) != 0) {
606 return (rv);
607 }
608
609 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
610 gcm_ctx->gcm_tag_len >>= 3;
611 gcm_ctx->gcm_processed_data_len = 0;
612
613 /* these values are in bits */
614 gcm_ctx->gcm_len_a_len_c[0]
615 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
616
617 rv = CRYPTO_SUCCESS;
618 gcm_ctx->gcm_flags |= GCM_MODE;
619 } else {
620 return (CRYPTO_MECHANISM_PARAM_INVALID);
621 }
622
623 #ifdef CAN_USE_GCM_ASM
624 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
625 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
626 } else {
627 /*
628 * Handle the "cycle" implementation by creating avx and
629 * non-avx contexts alternately.
630 */
631 gcm_ctx->gcm_use_avx = gcm_toggle_avx();
632 /*
633 * We don't handle byte swapped key schedules in the avx
634 * code path.
635 */
636 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
637 if (ks->ops->needs_byteswap == B_TRUE) {
638 gcm_ctx->gcm_use_avx = B_FALSE;
639 }
640 /* Use the MOVBE and the BSWAP variants alternately. */
641 if (gcm_ctx->gcm_use_avx == B_TRUE &&
642 zfs_movbe_available() == B_TRUE) {
643 (void) atomic_toggle_boolean_nv(
644 (volatile boolean_t *)&gcm_avx_can_use_movbe);
645 }
646 }
647 /* Allocate Htab memory as needed. */
648 if (gcm_ctx->gcm_use_avx == B_TRUE) {
649 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
650
651 if (htab_len == 0) {
652 return (CRYPTO_MECHANISM_PARAM_INVALID);
653 }
654 gcm_ctx->gcm_htab_len = htab_len;
655 gcm_ctx->gcm_Htable =
656 kmem_alloc(htab_len, KM_SLEEP);
657
658 if (gcm_ctx->gcm_Htable == NULL) {
659 return (CRYPTO_HOST_MEMORY);
660 }
661 }
662 /* Avx and non avx context initialization differs from here on. */
663 if (gcm_ctx->gcm_use_avx == B_FALSE) {
664 #endif /* ifdef CAN_USE_GCM_ASM */
665 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
666 gcm_param->pAAD, gcm_param->ulAADLen, block_size,
667 encrypt_block, copy_block, xor_block) != 0) {
668 rv = CRYPTO_MECHANISM_PARAM_INVALID;
669 }
670 #ifdef CAN_USE_GCM_ASM
671 } else {
672 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
673 gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
674 rv = CRYPTO_MECHANISM_PARAM_INVALID;
675 }
676 }
677 #endif /* ifdef CAN_USE_GCM_ASM */
678
679 return (rv);
680 }
681
682 int
683 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
684 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
685 void (*copy_block)(uint8_t *, uint8_t *),
686 void (*xor_block)(uint8_t *, uint8_t *))
687 {
688 int rv;
689 CK_AES_GMAC_PARAMS *gmac_param;
690
691 if (param != NULL) {
692 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
693
694 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
695 gcm_ctx->gcm_processed_data_len = 0;
696
697 /* these values are in bits */
698 gcm_ctx->gcm_len_a_len_c[0]
699 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
700
701 rv = CRYPTO_SUCCESS;
702 gcm_ctx->gcm_flags |= GMAC_MODE;
703 } else {
704 return (CRYPTO_MECHANISM_PARAM_INVALID);
705 }
706
707 #ifdef CAN_USE_GCM_ASM
708 /*
709 * Handle the "cycle" implementation by creating avx and non avx
710 * contexts alternately.
711 */
712 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
713 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
714 } else {
715 gcm_ctx->gcm_use_avx = gcm_toggle_avx();
716 }
717 /* We don't handle byte swapped key schedules in the avx code path. */
718 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
719 if (ks->ops->needs_byteswap == B_TRUE) {
720 gcm_ctx->gcm_use_avx = B_FALSE;
721 }
722 /* Allocate Htab memory as needed. */
723 if (gcm_ctx->gcm_use_avx == B_TRUE) {
724 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
725
726 if (htab_len == 0) {
727 return (CRYPTO_MECHANISM_PARAM_INVALID);
728 }
729 gcm_ctx->gcm_htab_len = htab_len;
730 gcm_ctx->gcm_Htable =
731 kmem_alloc(htab_len, KM_SLEEP);
732
733 if (gcm_ctx->gcm_Htable == NULL) {
734 return (CRYPTO_HOST_MEMORY);
735 }
736 }
737
738 /* Avx and non avx context initialization differs from here on. */
739 if (gcm_ctx->gcm_use_avx == B_FALSE) {
740 #endif /* ifdef CAN_USE_GCM_ASM */
741 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
742 gmac_param->pAAD, gmac_param->ulAADLen, block_size,
743 encrypt_block, copy_block, xor_block) != 0) {
744 rv = CRYPTO_MECHANISM_PARAM_INVALID;
745 }
746 #ifdef CAN_USE_GCM_ASM
747 } else {
748 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
749 gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
750 rv = CRYPTO_MECHANISM_PARAM_INVALID;
751 }
752 }
753 #endif /* ifdef CAN_USE_GCM_ASM */
754
755 return (rv);
756 }
757
758 void *
759 gcm_alloc_ctx(int kmflag)
760 {
761 gcm_ctx_t *gcm_ctx;
762
763 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
764 return (NULL);
765
766 gcm_ctx->gcm_flags = GCM_MODE;
767 return (gcm_ctx);
768 }
769
770 void *
771 gmac_alloc_ctx(int kmflag)
772 {
773 gcm_ctx_t *gcm_ctx;
774
775 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
776 return (NULL);
777
778 gcm_ctx->gcm_flags = GMAC_MODE;
779 return (gcm_ctx);
780 }
781
782 /* GCM implementation that contains the fastest methods */
783 static gcm_impl_ops_t gcm_fastest_impl = {
784 .name = "fastest"
785 };
786
787 /* All compiled in implementations */
788 static const gcm_impl_ops_t *gcm_all_impl[] = {
789 &gcm_generic_impl,
790 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
791 &gcm_pclmulqdq_impl,
792 #endif
793 };
794
795 /* Indicate that benchmark has been completed */
796 static boolean_t gcm_impl_initialized = B_FALSE;
797
798 /* Hold all supported implementations */
799 static size_t gcm_supp_impl_cnt = 0;
800 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
801
802 /*
803 * Returns the GCM operations for encrypt/decrypt/key setup. When a
804 * SIMD implementation is not allowed in the current context, then
805 * fallback to the fastest generic implementation.
806 */
807 const gcm_impl_ops_t *
808 gcm_impl_get_ops(void)
809 {
810 if (!kfpu_allowed())
811 return (&gcm_generic_impl);
812
813 const gcm_impl_ops_t *ops = NULL;
814 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
815
816 switch (impl) {
817 case IMPL_FASTEST:
818 ASSERT(gcm_impl_initialized);
819 ops = &gcm_fastest_impl;
820 break;
821 case IMPL_CYCLE:
822 /* Cycle through supported implementations */
823 ASSERT(gcm_impl_initialized);
824 ASSERT3U(gcm_supp_impl_cnt, >, 0);
825 static size_t cycle_impl_idx = 0;
826 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
827 ops = gcm_supp_impl[idx];
828 break;
829 #ifdef CAN_USE_GCM_ASM
830 case IMPL_AVX:
831 /*
832 * Make sure that we return a valid implementation while
833 * switching to the avx implementation since there still
834 * may be unfinished non-avx contexts around.
835 */
836 ops = &gcm_generic_impl;
837 break;
838 #endif
839 default:
840 ASSERT3U(impl, <, gcm_supp_impl_cnt);
841 ASSERT3U(gcm_supp_impl_cnt, >, 0);
842 if (impl < ARRAY_SIZE(gcm_all_impl))
843 ops = gcm_supp_impl[impl];
844 break;
845 }
846
847 ASSERT3P(ops, !=, NULL);
848
849 return (ops);
850 }
851
852 /*
853 * Initialize all supported implementations.
854 */
855 void
856 gcm_impl_init(void)
857 {
858 gcm_impl_ops_t *curr_impl;
859 int i, c;
860
861 /* Move supported implementations into gcm_supp_impls */
862 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
863 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
864
865 if (curr_impl->is_supported())
866 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
867 }
868 gcm_supp_impl_cnt = c;
869
870 /*
871 * Set the fastest implementation given the assumption that the
872 * hardware accelerated version is the fastest.
873 */
874 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
875 if (gcm_pclmulqdq_impl.is_supported()) {
876 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
877 sizeof (gcm_fastest_impl));
878 } else
879 #endif
880 {
881 memcpy(&gcm_fastest_impl, &gcm_generic_impl,
882 sizeof (gcm_fastest_impl));
883 }
884
885 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
886
887 #ifdef CAN_USE_GCM_ASM
888 /*
889 * Use the avx implementation if it's available and the implementation
890 * hasn't changed from its default value of fastest on module load.
891 */
892 if (gcm_avx_will_work()) {
893 #ifdef HAVE_MOVBE
894 if (zfs_movbe_available() == B_TRUE) {
895 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
896 }
897 #endif
898 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
899 gcm_set_avx(B_TRUE);
900 }
901 }
902 #endif
903 /* Finish initialization */
904 atomic_swap_32(&icp_gcm_impl, user_sel_impl);
905 gcm_impl_initialized = B_TRUE;
906 }
907
908 static const struct {
909 const char *name;
910 uint32_t sel;
911 } gcm_impl_opts[] = {
912 { "cycle", IMPL_CYCLE },
913 { "fastest", IMPL_FASTEST },
914 #ifdef CAN_USE_GCM_ASM
915 { "avx", IMPL_AVX },
916 #endif
917 };
918
919 /*
920 * Function sets desired gcm implementation.
921 *
922 * If we are called before init(), user preference will be saved in
923 * user_sel_impl, and applied in later init() call. This occurs when module
924 * parameter is specified on module load. Otherwise, directly update
925 * icp_gcm_impl.
926 *
927 * @val Name of gcm implementation to use
928 * @param Unused.
929 */
930 int
931 gcm_impl_set(const char *val)
932 {
933 int err = -EINVAL;
934 char req_name[GCM_IMPL_NAME_MAX];
935 uint32_t impl = GCM_IMPL_READ(user_sel_impl);
936 size_t i;
937
938 /* sanitize input */
939 i = strnlen(val, GCM_IMPL_NAME_MAX);
940 if (i == 0 || i >= GCM_IMPL_NAME_MAX)
941 return (err);
942
943 strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
944 while (i > 0 && isspace(req_name[i-1]))
945 i--;
946 req_name[i] = '\0';
947
948 /* Check mandatory options */
949 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
950 #ifdef CAN_USE_GCM_ASM
951 /* Ignore avx implementation if it won't work. */
952 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
953 continue;
954 }
955 #endif
956 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
957 impl = gcm_impl_opts[i].sel;
958 err = 0;
959 break;
960 }
961 }
962
963 /* check all supported impl if init() was already called */
964 if (err != 0 && gcm_impl_initialized) {
965 /* check all supported implementations */
966 for (i = 0; i < gcm_supp_impl_cnt; i++) {
967 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
968 impl = i;
969 err = 0;
970 break;
971 }
972 }
973 }
974 #ifdef CAN_USE_GCM_ASM
975 /*
976 * Use the avx implementation if available and the requested one is
977 * avx or fastest.
978 */
979 if (gcm_avx_will_work() == B_TRUE &&
980 (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
981 gcm_set_avx(B_TRUE);
982 } else {
983 gcm_set_avx(B_FALSE);
984 }
985 #endif
986
987 if (err == 0) {
988 if (gcm_impl_initialized)
989 atomic_swap_32(&icp_gcm_impl, impl);
990 else
991 atomic_swap_32(&user_sel_impl, impl);
992 }
993
994 return (err);
995 }
996
997 #if defined(_KERNEL) && defined(__linux__)
998
999 static int
1000 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
1001 {
1002 return (gcm_impl_set(val));
1003 }
1004
1005 static int
1006 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
1007 {
1008 int i, cnt = 0;
1009 char *fmt;
1010 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
1011
1012 ASSERT(gcm_impl_initialized);
1013
1014 /* list mandatory options */
1015 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
1016 #ifdef CAN_USE_GCM_ASM
1017 /* Ignore avx implementation if it won't work. */
1018 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
1019 continue;
1020 }
1021 #endif
1022 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
1023 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
1024 gcm_impl_opts[i].name);
1025 }
1026
1027 /* list all supported implementations */
1028 for (i = 0; i < gcm_supp_impl_cnt; i++) {
1029 fmt = (i == impl) ? "[%s] " : "%s ";
1030 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
1031 gcm_supp_impl[i]->name);
1032 }
1033
1034 return (cnt);
1035 }
1036
1037 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
1038 NULL, 0644);
1039 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
1040 #endif /* defined(__KERNEL) */
1041
1042 #ifdef CAN_USE_GCM_ASM
1043 #define GCM_BLOCK_LEN 16
1044 /*
1045 * The openssl asm routines are 6x aggregated and need that many bytes
1046 * at minimum.
1047 */
1048 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
1049 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
1050 /*
1051 * Ensure the chunk size is reasonable since we are allocating a
1052 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
1053 */
1054 #define GCM_AVX_MAX_CHUNK_SIZE \
1055 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
1056
1057 /* Clear the FPU registers since they hold sensitive internal state. */
1058 #define clear_fpu_regs() clear_fpu_regs_avx()
1059 #define GHASH_AVX(ctx, in, len) \
1060 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
1061 in, len)
1062
1063 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1064
1065 /* Get the chunk size module parameter. */
1066 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1067
1068 /*
1069 * Module parameter: number of bytes to process at once while owning the FPU.
1070 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1071 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1072 */
1073 static uint32_t gcm_avx_chunk_size =
1074 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1075
1076 extern void ASMABI clear_fpu_regs_avx(void);
1077 extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
1078 extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
1079 const uint32_t pt[4], uint32_t ct[4]);
1080
1081 extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
1082 extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
1083 const uint8_t *in, size_t len);
1084
1085 extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1086 const void *, uint64_t *, uint64_t *);
1087
1088 extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1089 const void *, uint64_t *, uint64_t *);
1090
1091 static inline boolean_t
1092 gcm_avx_will_work(void)
1093 {
1094 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1095 return (kfpu_allowed() &&
1096 zfs_avx_available() && zfs_aes_available() &&
1097 zfs_pclmulqdq_available());
1098 }
1099
1100 static inline void
1101 gcm_set_avx(boolean_t val)
1102 {
1103 if (gcm_avx_will_work() == B_TRUE) {
1104 atomic_swap_32(&gcm_use_avx, val);
1105 }
1106 }
1107
1108 static inline boolean_t
1109 gcm_toggle_avx(void)
1110 {
1111 if (gcm_avx_will_work() == B_TRUE) {
1112 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
1113 } else {
1114 return (B_FALSE);
1115 }
1116 }
1117
1118 static inline size_t
1119 gcm_simd_get_htab_size(boolean_t simd_mode)
1120 {
1121 switch (simd_mode) {
1122 case B_TRUE:
1123 return (2 * 6 * 2 * sizeof (uint64_t));
1124
1125 default:
1126 return (0);
1127 }
1128 }
1129
1130 /*
1131 * Clear sensitive data in the context.
1132 *
1133 * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
1134 * ctx->gcm_Htable contain the hash sub key which protects authentication.
1135 *
1136 * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
1137 * a known plaintext attack, they consists of the IV and the first and last
1138 * counter respectively. If they should be cleared is debatable.
1139 */
1140 static inline void
1141 gcm_clear_ctx(gcm_ctx_t *ctx)
1142 {
1143 memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder));
1144 memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
1145 memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0));
1146 memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp));
1147 }
1148
1149 /* Increment the GCM counter block by n. */
1150 static inline void
1151 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
1152 {
1153 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
1154 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
1155
1156 counter = htonll(counter + n);
1157 counter &= counter_mask;
1158 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
1159 }
1160
1161 /*
1162 * Encrypt multiple blocks of data in GCM mode.
1163 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1164 * if possible. While processing a chunk the FPU is "locked".
1165 */
1166 static int
1167 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
1168 size_t length, crypto_data_t *out, size_t block_size)
1169 {
1170 size_t bleft = length;
1171 size_t need = 0;
1172 size_t done = 0;
1173 uint8_t *datap = (uint8_t *)data;
1174 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1175 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1176 uint64_t *ghash = ctx->gcm_ghash;
1177 uint64_t *cb = ctx->gcm_cb;
1178 uint8_t *ct_buf = NULL;
1179 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1180 int rv = CRYPTO_SUCCESS;
1181
1182 ASSERT(block_size == GCM_BLOCK_LEN);
1183 /*
1184 * If the last call left an incomplete block, try to fill
1185 * it first.
1186 */
1187 if (ctx->gcm_remainder_len > 0) {
1188 need = block_size - ctx->gcm_remainder_len;
1189 if (length < need) {
1190 /* Accumulate bytes here and return. */
1191 memcpy((uint8_t *)ctx->gcm_remainder +
1192 ctx->gcm_remainder_len, datap, length);
1193
1194 ctx->gcm_remainder_len += length;
1195 if (ctx->gcm_copy_to == NULL) {
1196 ctx->gcm_copy_to = datap;
1197 }
1198 return (CRYPTO_SUCCESS);
1199 } else {
1200 /* Complete incomplete block. */
1201 memcpy((uint8_t *)ctx->gcm_remainder +
1202 ctx->gcm_remainder_len, datap, need);
1203
1204 ctx->gcm_copy_to = NULL;
1205 }
1206 }
1207
1208 /* Allocate a buffer to encrypt to if there is enough input. */
1209 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1210 ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
1211 if (ct_buf == NULL) {
1212 return (CRYPTO_HOST_MEMORY);
1213 }
1214 }
1215
1216 /* If we completed an incomplete block, encrypt and write it out. */
1217 if (ctx->gcm_remainder_len > 0) {
1218 kfpu_begin();
1219 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1220 (const uint32_t *)cb, (uint32_t *)tmp);
1221
1222 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
1223 GHASH_AVX(ctx, tmp, block_size);
1224 clear_fpu_regs();
1225 kfpu_end();
1226 rv = crypto_put_output_data(tmp, out, block_size);
1227 out->cd_offset += block_size;
1228 gcm_incr_counter_block(ctx);
1229 ctx->gcm_processed_data_len += block_size;
1230 bleft -= need;
1231 datap += need;
1232 ctx->gcm_remainder_len = 0;
1233 }
1234
1235 /* Do the bulk encryption in chunk_size blocks. */
1236 for (; bleft >= chunk_size; bleft -= chunk_size) {
1237 kfpu_begin();
1238 done = aesni_gcm_encrypt(
1239 datap, ct_buf, chunk_size, key, cb, ghash);
1240
1241 clear_fpu_regs();
1242 kfpu_end();
1243 if (done != chunk_size) {
1244 rv = CRYPTO_FAILED;
1245 goto out_nofpu;
1246 }
1247 rv = crypto_put_output_data(ct_buf, out, chunk_size);
1248 if (rv != CRYPTO_SUCCESS) {
1249 goto out_nofpu;
1250 }
1251 out->cd_offset += chunk_size;
1252 datap += chunk_size;
1253 ctx->gcm_processed_data_len += chunk_size;
1254 }
1255 /* Check if we are already done. */
1256 if (bleft == 0) {
1257 goto out_nofpu;
1258 }
1259 /* Bulk encrypt the remaining data. */
1260 kfpu_begin();
1261 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1262 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
1263 if (done == 0) {
1264 rv = CRYPTO_FAILED;
1265 goto out;
1266 }
1267 rv = crypto_put_output_data(ct_buf, out, done);
1268 if (rv != CRYPTO_SUCCESS) {
1269 goto out;
1270 }
1271 out->cd_offset += done;
1272 ctx->gcm_processed_data_len += done;
1273 datap += done;
1274 bleft -= done;
1275
1276 }
1277 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1278 while (bleft > 0) {
1279 if (bleft < block_size) {
1280 memcpy(ctx->gcm_remainder, datap, bleft);
1281 ctx->gcm_remainder_len = bleft;
1282 ctx->gcm_copy_to = datap;
1283 goto out;
1284 }
1285 /* Encrypt, hash and write out. */
1286 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1287 (const uint32_t *)cb, (uint32_t *)tmp);
1288
1289 gcm_xor_avx(datap, tmp);
1290 GHASH_AVX(ctx, tmp, block_size);
1291 rv = crypto_put_output_data(tmp, out, block_size);
1292 if (rv != CRYPTO_SUCCESS) {
1293 goto out;
1294 }
1295 out->cd_offset += block_size;
1296 gcm_incr_counter_block(ctx);
1297 ctx->gcm_processed_data_len += block_size;
1298 datap += block_size;
1299 bleft -= block_size;
1300 }
1301 out:
1302 clear_fpu_regs();
1303 kfpu_end();
1304 out_nofpu:
1305 if (ct_buf != NULL) {
1306 vmem_free(ct_buf, chunk_size);
1307 }
1308 return (rv);
1309 }
1310
1311 /*
1312 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1313 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1314 */
1315 static int
1316 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1317 {
1318 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
1319 uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
1320 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
1321 size_t rem_len = ctx->gcm_remainder_len;
1322 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1323 int aes_rounds = ((aes_key_t *)keysched)->nr;
1324 int rv;
1325
1326 ASSERT(block_size == GCM_BLOCK_LEN);
1327
1328 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
1329 return (CRYPTO_DATA_LEN_RANGE);
1330 }
1331
1332 kfpu_begin();
1333 /* Pad last incomplete block with zeros, encrypt and hash. */
1334 if (rem_len > 0) {
1335 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1336 const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1337
1338 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
1339 memset(remainder + rem_len, 0, block_size - rem_len);
1340 for (int i = 0; i < rem_len; i++) {
1341 remainder[i] ^= tmp[i];
1342 }
1343 GHASH_AVX(ctx, remainder, block_size);
1344 ctx->gcm_processed_data_len += rem_len;
1345 /* No need to increment counter_block, it's the last block. */
1346 }
1347 /* Finish tag. */
1348 ctx->gcm_len_a_len_c[1] =
1349 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
1350 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
1351 aes_encrypt_intel(keysched, aes_rounds, J0, J0);
1352
1353 gcm_xor_avx((uint8_t *)J0, ghash);
1354 clear_fpu_regs();
1355 kfpu_end();
1356
1357 /* Output remainder. */
1358 if (rem_len > 0) {
1359 rv = crypto_put_output_data(remainder, out, rem_len);
1360 if (rv != CRYPTO_SUCCESS)
1361 return (rv);
1362 }
1363 out->cd_offset += rem_len;
1364 ctx->gcm_remainder_len = 0;
1365 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
1366 if (rv != CRYPTO_SUCCESS)
1367 return (rv);
1368
1369 out->cd_offset += ctx->gcm_tag_len;
1370 /* Clear sensitive data in the context before returning. */
1371 gcm_clear_ctx(ctx);
1372 return (CRYPTO_SUCCESS);
1373 }
1374
1375 /*
1376 * Finalize decryption: We just have accumulated crypto text, so now we
1377 * decrypt it here inplace.
1378 */
1379 static int
1380 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1381 {
1382 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
1383 ASSERT3U(block_size, ==, 16);
1384
1385 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1386 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
1387 uint8_t *datap = ctx->gcm_pt_buf;
1388 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1389 uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1390 uint64_t *ghash = ctx->gcm_ghash;
1391 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
1392 int rv = CRYPTO_SUCCESS;
1393 size_t bleft, done;
1394
1395 /*
1396 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1397 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1398 * GCM_AVX_MIN_DECRYPT_BYTES.
1399 */
1400 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
1401 kfpu_begin();
1402 done = aesni_gcm_decrypt(datap, datap, chunk_size,
1403 (const void *)key, ctx->gcm_cb, ghash);
1404 clear_fpu_regs();
1405 kfpu_end();
1406 if (done != chunk_size) {
1407 return (CRYPTO_FAILED);
1408 }
1409 datap += done;
1410 }
1411 /* Decrypt remainder, which is less than chunk size, in one go. */
1412 kfpu_begin();
1413 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
1414 done = aesni_gcm_decrypt(datap, datap, bleft,
1415 (const void *)key, ctx->gcm_cb, ghash);
1416 if (done == 0) {
1417 clear_fpu_regs();
1418 kfpu_end();
1419 return (CRYPTO_FAILED);
1420 }
1421 datap += done;
1422 bleft -= done;
1423 }
1424 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
1425
1426 /*
1427 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1428 * decrypt them block by block.
1429 */
1430 while (bleft > 0) {
1431 /* Incomplete last block. */
1432 if (bleft < block_size) {
1433 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
1434
1435 memset(lastb, 0, block_size);
1436 memcpy(lastb, datap, bleft);
1437 /* The GCM processing. */
1438 GHASH_AVX(ctx, lastb, block_size);
1439 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1440 for (size_t i = 0; i < bleft; i++) {
1441 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
1442 }
1443 break;
1444 }
1445 /* The GCM processing. */
1446 GHASH_AVX(ctx, datap, block_size);
1447 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1448 gcm_xor_avx((uint8_t *)tmp, datap);
1449 gcm_incr_counter_block(ctx);
1450
1451 datap += block_size;
1452 bleft -= block_size;
1453 }
1454 if (rv != CRYPTO_SUCCESS) {
1455 clear_fpu_regs();
1456 kfpu_end();
1457 return (rv);
1458 }
1459 /* Decryption done, finish the tag. */
1460 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
1461 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
1462 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
1463 (uint32_t *)ctx->gcm_J0);
1464
1465 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
1466
1467 /* We are done with the FPU, restore its state. */
1468 clear_fpu_regs();
1469 kfpu_end();
1470
1471 /* Compare the input authentication tag with what we calculated. */
1472 if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
1473 /* They don't match. */
1474 return (CRYPTO_INVALID_MAC);
1475 }
1476 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
1477 if (rv != CRYPTO_SUCCESS) {
1478 return (rv);
1479 }
1480 out->cd_offset += pt_len;
1481 gcm_clear_ctx(ctx);
1482 return (CRYPTO_SUCCESS);
1483 }
1484
1485 /*
1486 * Initialize the GCM params H, Htabtle and the counter block. Save the
1487 * initial counter block.
1488 */
1489 static int
1490 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
1491 unsigned char *auth_data, size_t auth_data_len, size_t block_size)
1492 {
1493 uint8_t *cb = (uint8_t *)ctx->gcm_cb;
1494 uint64_t *H = ctx->gcm_H;
1495 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1496 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
1497 uint8_t *datap = auth_data;
1498 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1499 size_t bleft;
1500
1501 ASSERT(block_size == GCM_BLOCK_LEN);
1502
1503 /* Init H (encrypt zero block) and create the initial counter block. */
1504 memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
1505 memset(H, 0, sizeof (ctx->gcm_H));
1506 kfpu_begin();
1507 aes_encrypt_intel(keysched, aes_rounds,
1508 (const uint32_t *)H, (uint32_t *)H);
1509
1510 gcm_init_htab_avx(ctx->gcm_Htable, H);
1511
1512 if (iv_len == 12) {
1513 memcpy(cb, iv, 12);
1514 cb[12] = 0;
1515 cb[13] = 0;
1516 cb[14] = 0;
1517 cb[15] = 1;
1518 /* We need the ICB later. */
1519 memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
1520 } else {
1521 /*
1522 * Most consumers use 12 byte IVs, so it's OK to use the
1523 * original routines for other IV sizes, just avoid nesting
1524 * kfpu_begin calls.
1525 */
1526 clear_fpu_regs();
1527 kfpu_end();
1528 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
1529 aes_copy_block, aes_xor_block);
1530 kfpu_begin();
1531 }
1532
1533 /* Openssl post increments the counter, adjust for that. */
1534 gcm_incr_counter_block(ctx);
1535
1536 /* Ghash AAD in chunk_size blocks. */
1537 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
1538 GHASH_AVX(ctx, datap, chunk_size);
1539 datap += chunk_size;
1540 clear_fpu_regs();
1541 kfpu_end();
1542 kfpu_begin();
1543 }
1544 /* Ghash the remainder and handle possible incomplete GCM block. */
1545 if (bleft > 0) {
1546 size_t incomp = bleft % block_size;
1547
1548 bleft -= incomp;
1549 if (bleft > 0) {
1550 GHASH_AVX(ctx, datap, bleft);
1551 datap += bleft;
1552 }
1553 if (incomp > 0) {
1554 /* Zero pad and hash incomplete last block. */
1555 uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
1556
1557 memset(authp, 0, block_size);
1558 memcpy(authp, datap, incomp);
1559 GHASH_AVX(ctx, authp, block_size);
1560 }
1561 }
1562 clear_fpu_regs();
1563 kfpu_end();
1564 return (CRYPTO_SUCCESS);
1565 }
1566
1567 #if defined(_KERNEL)
1568 static int
1569 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
1570 {
1571 unsigned long val;
1572 char val_rounded[16];
1573 int error = 0;
1574
1575 error = kstrtoul(buf, 0, &val);
1576 if (error)
1577 return (error);
1578
1579 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1580
1581 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
1582 return (-EINVAL);
1583
1584 snprintf(val_rounded, 16, "%u", (uint32_t)val);
1585 error = param_set_uint(val_rounded, kp);
1586 return (error);
1587 }
1588
1589 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
1590 param_get_uint, &gcm_avx_chunk_size, 0644);
1591
1592 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
1593 "How many bytes to process while owning the FPU");
1594
1595 #endif /* defined(__KERNEL) */
1596 #endif /* ifdef CAN_USE_GCM_ASM */
Cache object: 71d90c38ac919c4e1d37a75330af3d06
|