The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/contrib/openzfs/module/icp/algs/modes/gcm.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * CDDL HEADER START
    3  *
    4  * The contents of this file are subject to the terms of the
    5  * Common Development and Distribution License (the "License").
    6  * You may not use this file except in compliance with the License.
    7  *
    8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
    9  * or https://opensource.org/licenses/CDDL-1.0.
   10  * See the License for the specific language governing permissions
   11  * and limitations under the License.
   12  *
   13  * When distributing Covered Code, include this CDDL HEADER in each
   14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
   15  * If applicable, add the following below this CDDL HEADER, with the
   16  * fields enclosed by brackets "[]" replaced with your own identifying
   17  * information: Portions Copyright [yyyy] [name of copyright owner]
   18  *
   19  * CDDL HEADER END
   20  */
   21 /*
   22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
   23  */
   24 
   25 #include <sys/zfs_context.h>
   26 #include <modes/modes.h>
   27 #include <sys/crypto/common.h>
   28 #include <sys/crypto/icp.h>
   29 #include <sys/crypto/impl.h>
   30 #include <sys/byteorder.h>
   31 #include <sys/simd.h>
   32 #include <modes/gcm_impl.h>
   33 #ifdef CAN_USE_GCM_ASM
   34 #include <aes/aes_impl.h>
   35 #endif
   36 
   37 #define GHASH(c, d, t, o) \
   38         xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
   39         (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
   40         (uint64_t *)(void *)(t));
   41 
   42 /* Select GCM implementation */
   43 #define IMPL_FASTEST    (UINT32_MAX)
   44 #define IMPL_CYCLE      (UINT32_MAX-1)
   45 #ifdef CAN_USE_GCM_ASM
   46 #define IMPL_AVX        (UINT32_MAX-2)
   47 #endif
   48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
   49 static uint32_t icp_gcm_impl = IMPL_FASTEST;
   50 static uint32_t user_sel_impl = IMPL_FASTEST;
   51 
   52 #ifdef CAN_USE_GCM_ASM
   53 /* Does the architecture we run on support the MOVBE instruction? */
   54 boolean_t gcm_avx_can_use_movbe = B_FALSE;
   55 /*
   56  * Whether to use the optimized openssl gcm and ghash implementations.
   57  * Set to true if module parameter icp_gcm_impl == "avx".
   58  */
   59 static boolean_t gcm_use_avx = B_FALSE;
   60 #define GCM_IMPL_USE_AVX        (*(volatile boolean_t *)&gcm_use_avx)
   61 
   62 extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
   63 
   64 static inline boolean_t gcm_avx_will_work(void);
   65 static inline void gcm_set_avx(boolean_t);
   66 static inline boolean_t gcm_toggle_avx(void);
   67 static inline size_t gcm_simd_get_htab_size(boolean_t);
   68 
   69 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
   70     crypto_data_t *, size_t);
   71 
   72 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
   73 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
   74 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
   75     size_t, size_t);
   76 #endif /* ifdef CAN_USE_GCM_ASM */
   77 
   78 /*
   79  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
   80  * is done in another function.
   81  */
   82 int
   83 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
   84     crypto_data_t *out, size_t block_size,
   85     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
   86     void (*copy_block)(uint8_t *, uint8_t *),
   87     void (*xor_block)(uint8_t *, uint8_t *))
   88 {
   89 #ifdef CAN_USE_GCM_ASM
   90         if (ctx->gcm_use_avx == B_TRUE)
   91                 return (gcm_mode_encrypt_contiguous_blocks_avx(
   92                     ctx, data, length, out, block_size));
   93 #endif
   94 
   95         const gcm_impl_ops_t *gops;
   96         size_t remainder = length;
   97         size_t need = 0;
   98         uint8_t *datap = (uint8_t *)data;
   99         uint8_t *blockp;
  100         uint8_t *lastp;
  101         void *iov_or_mp;
  102         offset_t offset;
  103         uint8_t *out_data_1;
  104         uint8_t *out_data_2;
  105         size_t out_data_1_len;
  106         uint64_t counter;
  107         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
  108 
  109         if (length + ctx->gcm_remainder_len < block_size) {
  110                 /* accumulate bytes here and return */
  111                 memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
  112                     datap,
  113                     length);
  114                 ctx->gcm_remainder_len += length;
  115                 if (ctx->gcm_copy_to == NULL) {
  116                         ctx->gcm_copy_to = datap;
  117                 }
  118                 return (CRYPTO_SUCCESS);
  119         }
  120 
  121         crypto_init_ptrs(out, &iov_or_mp, &offset);
  122 
  123         gops = gcm_impl_get_ops();
  124         do {
  125                 /* Unprocessed data from last call. */
  126                 if (ctx->gcm_remainder_len > 0) {
  127                         need = block_size - ctx->gcm_remainder_len;
  128 
  129                         if (need > remainder)
  130                                 return (CRYPTO_DATA_LEN_RANGE);
  131 
  132                         memcpy(&((uint8_t *)ctx->gcm_remainder)
  133                             [ctx->gcm_remainder_len], datap, need);
  134 
  135                         blockp = (uint8_t *)ctx->gcm_remainder;
  136                 } else {
  137                         blockp = datap;
  138                 }
  139 
  140                 /*
  141                  * Increment counter. Counter bits are confined
  142                  * to the bottom 32 bits of the counter block.
  143                  */
  144                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
  145                 counter = htonll(counter + 1);
  146                 counter &= counter_mask;
  147                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
  148 
  149                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
  150                     (uint8_t *)ctx->gcm_tmp);
  151                 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
  152 
  153                 lastp = (uint8_t *)ctx->gcm_tmp;
  154 
  155                 ctx->gcm_processed_data_len += block_size;
  156 
  157                 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
  158                     &out_data_1_len, &out_data_2, block_size);
  159 
  160                 /* copy block to where it belongs */
  161                 if (out_data_1_len == block_size) {
  162                         copy_block(lastp, out_data_1);
  163                 } else {
  164                         memcpy(out_data_1, lastp, out_data_1_len);
  165                         if (out_data_2 != NULL) {
  166                                 memcpy(out_data_2,
  167                                     lastp + out_data_1_len,
  168                                     block_size - out_data_1_len);
  169                         }
  170                 }
  171                 /* update offset */
  172                 out->cd_offset += block_size;
  173 
  174                 /* add ciphertext to the hash */
  175                 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
  176 
  177                 /* Update pointer to next block of data to be processed. */
  178                 if (ctx->gcm_remainder_len != 0) {
  179                         datap += need;
  180                         ctx->gcm_remainder_len = 0;
  181                 } else {
  182                         datap += block_size;
  183                 }
  184 
  185                 remainder = (size_t)&data[length] - (size_t)datap;
  186 
  187                 /* Incomplete last block. */
  188                 if (remainder > 0 && remainder < block_size) {
  189                         memcpy(ctx->gcm_remainder, datap, remainder);
  190                         ctx->gcm_remainder_len = remainder;
  191                         ctx->gcm_copy_to = datap;
  192                         goto out;
  193                 }
  194                 ctx->gcm_copy_to = NULL;
  195 
  196         } while (remainder > 0);
  197 out:
  198         return (CRYPTO_SUCCESS);
  199 }
  200 
  201 int
  202 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
  203     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  204     void (*copy_block)(uint8_t *, uint8_t *),
  205     void (*xor_block)(uint8_t *, uint8_t *))
  206 {
  207         (void) copy_block;
  208 #ifdef CAN_USE_GCM_ASM
  209         if (ctx->gcm_use_avx == B_TRUE)
  210                 return (gcm_encrypt_final_avx(ctx, out, block_size));
  211 #endif
  212 
  213         const gcm_impl_ops_t *gops;
  214         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
  215         uint8_t *ghash, *macp = NULL;
  216         int i, rv;
  217 
  218         if (out->cd_length <
  219             (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
  220                 return (CRYPTO_DATA_LEN_RANGE);
  221         }
  222 
  223         gops = gcm_impl_get_ops();
  224         ghash = (uint8_t *)ctx->gcm_ghash;
  225 
  226         if (ctx->gcm_remainder_len > 0) {
  227                 uint64_t counter;
  228                 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
  229 
  230                 /*
  231                  * Here is where we deal with data that is not a
  232                  * multiple of the block size.
  233                  */
  234 
  235                 /*
  236                  * Increment counter.
  237                  */
  238                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
  239                 counter = htonll(counter + 1);
  240                 counter &= counter_mask;
  241                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
  242 
  243                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
  244                     (uint8_t *)ctx->gcm_tmp);
  245 
  246                 macp = (uint8_t *)ctx->gcm_remainder;
  247                 memset(macp + ctx->gcm_remainder_len, 0,
  248                     block_size - ctx->gcm_remainder_len);
  249 
  250                 /* XOR with counter block */
  251                 for (i = 0; i < ctx->gcm_remainder_len; i++) {
  252                         macp[i] ^= tmpp[i];
  253                 }
  254 
  255                 /* add ciphertext to the hash */
  256                 GHASH(ctx, macp, ghash, gops);
  257 
  258                 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
  259         }
  260 
  261         ctx->gcm_len_a_len_c[1] =
  262             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
  263         GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
  264         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
  265             (uint8_t *)ctx->gcm_J0);
  266         xor_block((uint8_t *)ctx->gcm_J0, ghash);
  267 
  268         if (ctx->gcm_remainder_len > 0) {
  269                 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
  270                 if (rv != CRYPTO_SUCCESS)
  271                         return (rv);
  272         }
  273         out->cd_offset += ctx->gcm_remainder_len;
  274         ctx->gcm_remainder_len = 0;
  275         rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
  276         if (rv != CRYPTO_SUCCESS)
  277                 return (rv);
  278         out->cd_offset += ctx->gcm_tag_len;
  279 
  280         return (CRYPTO_SUCCESS);
  281 }
  282 
  283 /*
  284  * This will only deal with decrypting the last block of the input that
  285  * might not be a multiple of block length.
  286  */
  287 static void
  288 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
  289     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  290     void (*xor_block)(uint8_t *, uint8_t *))
  291 {
  292         uint8_t *datap, *outp, *counterp;
  293         uint64_t counter;
  294         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
  295         int i;
  296 
  297         /*
  298          * Increment counter.
  299          * Counter bits are confined to the bottom 32 bits
  300          */
  301         counter = ntohll(ctx->gcm_cb[1] & counter_mask);
  302         counter = htonll(counter + 1);
  303         counter &= counter_mask;
  304         ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
  305 
  306         datap = (uint8_t *)ctx->gcm_remainder;
  307         outp = &((ctx->gcm_pt_buf)[index]);
  308         counterp = (uint8_t *)ctx->gcm_tmp;
  309 
  310         /* authentication tag */
  311         memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
  312         memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
  313 
  314         /* add ciphertext to the hash */
  315         GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
  316 
  317         /* decrypt remaining ciphertext */
  318         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
  319 
  320         /* XOR with counter block */
  321         for (i = 0; i < ctx->gcm_remainder_len; i++) {
  322                 outp[i] = datap[i] ^ counterp[i];
  323         }
  324 }
  325 
  326 int
  327 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
  328     crypto_data_t *out, size_t block_size,
  329     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  330     void (*copy_block)(uint8_t *, uint8_t *),
  331     void (*xor_block)(uint8_t *, uint8_t *))
  332 {
  333         (void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
  334             (void) xor_block;
  335         size_t new_len;
  336         uint8_t *new;
  337 
  338         /*
  339          * Copy contiguous ciphertext input blocks to plaintext buffer.
  340          * Ciphertext will be decrypted in the final.
  341          */
  342         if (length > 0) {
  343                 new_len = ctx->gcm_pt_buf_len + length;
  344                 new = vmem_alloc(new_len, KM_SLEEP);
  345                 if (new == NULL) {
  346                         vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
  347                         ctx->gcm_pt_buf = NULL;
  348                         return (CRYPTO_HOST_MEMORY);
  349                 }
  350 
  351                 if (ctx->gcm_pt_buf != NULL) {
  352                         memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
  353                         vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
  354                 } else {
  355                         ASSERT0(ctx->gcm_pt_buf_len);
  356                 }
  357 
  358                 ctx->gcm_pt_buf = new;
  359                 ctx->gcm_pt_buf_len = new_len;
  360                 memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
  361                     length);
  362                 ctx->gcm_processed_data_len += length;
  363         }
  364 
  365         ctx->gcm_remainder_len = 0;
  366         return (CRYPTO_SUCCESS);
  367 }
  368 
  369 int
  370 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
  371     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  372     void (*xor_block)(uint8_t *, uint8_t *))
  373 {
  374 #ifdef CAN_USE_GCM_ASM
  375         if (ctx->gcm_use_avx == B_TRUE)
  376                 return (gcm_decrypt_final_avx(ctx, out, block_size));
  377 #endif
  378 
  379         const gcm_impl_ops_t *gops;
  380         size_t pt_len;
  381         size_t remainder;
  382         uint8_t *ghash;
  383         uint8_t *blockp;
  384         uint8_t *cbp;
  385         uint64_t counter;
  386         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
  387         int processed = 0, rv;
  388 
  389         ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
  390 
  391         gops = gcm_impl_get_ops();
  392         pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
  393         ghash = (uint8_t *)ctx->gcm_ghash;
  394         blockp = ctx->gcm_pt_buf;
  395         remainder = pt_len;
  396         while (remainder > 0) {
  397                 /* Incomplete last block */
  398                 if (remainder < block_size) {
  399                         memcpy(ctx->gcm_remainder, blockp, remainder);
  400                         ctx->gcm_remainder_len = remainder;
  401                         /*
  402                          * not expecting anymore ciphertext, just
  403                          * compute plaintext for the remaining input
  404                          */
  405                         gcm_decrypt_incomplete_block(ctx, block_size,
  406                             processed, encrypt_block, xor_block);
  407                         ctx->gcm_remainder_len = 0;
  408                         goto out;
  409                 }
  410                 /* add ciphertext to the hash */
  411                 GHASH(ctx, blockp, ghash, gops);
  412 
  413                 /*
  414                  * Increment counter.
  415                  * Counter bits are confined to the bottom 32 bits
  416                  */
  417                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
  418                 counter = htonll(counter + 1);
  419                 counter &= counter_mask;
  420                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
  421 
  422                 cbp = (uint8_t *)ctx->gcm_tmp;
  423                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
  424 
  425                 /* XOR with ciphertext */
  426                 xor_block(cbp, blockp);
  427 
  428                 processed += block_size;
  429                 blockp += block_size;
  430                 remainder -= block_size;
  431         }
  432 out:
  433         ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
  434         GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
  435         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
  436             (uint8_t *)ctx->gcm_J0);
  437         xor_block((uint8_t *)ctx->gcm_J0, ghash);
  438 
  439         /* compare the input authentication tag with what we calculated */
  440         if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
  441                 /* They don't match */
  442                 return (CRYPTO_INVALID_MAC);
  443         } else {
  444                 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
  445                 if (rv != CRYPTO_SUCCESS)
  446                         return (rv);
  447                 out->cd_offset += pt_len;
  448         }
  449         return (CRYPTO_SUCCESS);
  450 }
  451 
  452 static int
  453 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
  454 {
  455         size_t tag_len;
  456 
  457         /*
  458          * Check the length of the authentication tag (in bits).
  459          */
  460         tag_len = gcm_param->ulTagBits;
  461         switch (tag_len) {
  462         case 32:
  463         case 64:
  464         case 96:
  465         case 104:
  466         case 112:
  467         case 120:
  468         case 128:
  469                 break;
  470         default:
  471                 return (CRYPTO_MECHANISM_PARAM_INVALID);
  472         }
  473 
  474         if (gcm_param->ulIvLen == 0)
  475                 return (CRYPTO_MECHANISM_PARAM_INVALID);
  476 
  477         return (CRYPTO_SUCCESS);
  478 }
  479 
  480 static void
  481 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
  482     gcm_ctx_t *ctx, size_t block_size,
  483     void (*copy_block)(uint8_t *, uint8_t *),
  484     void (*xor_block)(uint8_t *, uint8_t *))
  485 {
  486         const gcm_impl_ops_t *gops;
  487         uint8_t *cb;
  488         ulong_t remainder = iv_len;
  489         ulong_t processed = 0;
  490         uint8_t *datap, *ghash;
  491         uint64_t len_a_len_c[2];
  492 
  493         gops = gcm_impl_get_ops();
  494         ghash = (uint8_t *)ctx->gcm_ghash;
  495         cb = (uint8_t *)ctx->gcm_cb;
  496         if (iv_len == 12) {
  497                 memcpy(cb, iv, 12);
  498                 cb[12] = 0;
  499                 cb[13] = 0;
  500                 cb[14] = 0;
  501                 cb[15] = 1;
  502                 /* J0 will be used again in the final */
  503                 copy_block(cb, (uint8_t *)ctx->gcm_J0);
  504         } else {
  505                 /* GHASH the IV */
  506                 do {
  507                         if (remainder < block_size) {
  508                                 memset(cb, 0, block_size);
  509                                 memcpy(cb, &(iv[processed]), remainder);
  510                                 datap = (uint8_t *)cb;
  511                                 remainder = 0;
  512                         } else {
  513                                 datap = (uint8_t *)(&(iv[processed]));
  514                                 processed += block_size;
  515                                 remainder -= block_size;
  516                         }
  517                         GHASH(ctx, datap, ghash, gops);
  518                 } while (remainder > 0);
  519 
  520                 len_a_len_c[0] = 0;
  521                 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
  522                 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
  523 
  524                 /* J0 will be used again in the final */
  525                 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
  526         }
  527 }
  528 
  529 static int
  530 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
  531     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
  532     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  533     void (*copy_block)(uint8_t *, uint8_t *),
  534     void (*xor_block)(uint8_t *, uint8_t *))
  535 {
  536         const gcm_impl_ops_t *gops;
  537         uint8_t *ghash, *datap, *authp;
  538         size_t remainder, processed;
  539 
  540         /* encrypt zero block to get subkey H */
  541         memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
  542         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
  543             (uint8_t *)ctx->gcm_H);
  544 
  545         gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
  546             copy_block, xor_block);
  547 
  548         gops = gcm_impl_get_ops();
  549         authp = (uint8_t *)ctx->gcm_tmp;
  550         ghash = (uint8_t *)ctx->gcm_ghash;
  551         memset(authp, 0, block_size);
  552         memset(ghash, 0, block_size);
  553 
  554         processed = 0;
  555         remainder = auth_data_len;
  556         do {
  557                 if (remainder < block_size) {
  558                         /*
  559                          * There's not a block full of data, pad rest of
  560                          * buffer with zero
  561                          */
  562 
  563                         if (auth_data != NULL) {
  564                                 memset(authp, 0, block_size);
  565                                 memcpy(authp, &(auth_data[processed]),
  566                                     remainder);
  567                         } else {
  568                                 ASSERT0(remainder);
  569                         }
  570 
  571                         datap = (uint8_t *)authp;
  572                         remainder = 0;
  573                 } else {
  574                         datap = (uint8_t *)(&(auth_data[processed]));
  575                         processed += block_size;
  576                         remainder -= block_size;
  577                 }
  578 
  579                 /* add auth data to the hash */
  580                 GHASH(ctx, datap, ghash, gops);
  581 
  582         } while (remainder > 0);
  583 
  584         return (CRYPTO_SUCCESS);
  585 }
  586 
  587 /*
  588  * The following function is called at encrypt or decrypt init time
  589  * for AES GCM mode.
  590  *
  591  * Init the GCM context struct. Handle the cycle and avx implementations here.
  592  */
  593 int
  594 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
  595     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  596     void (*copy_block)(uint8_t *, uint8_t *),
  597     void (*xor_block)(uint8_t *, uint8_t *))
  598 {
  599         int rv;
  600         CK_AES_GCM_PARAMS *gcm_param;
  601 
  602         if (param != NULL) {
  603                 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
  604 
  605                 if ((rv = gcm_validate_args(gcm_param)) != 0) {
  606                         return (rv);
  607                 }
  608 
  609                 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
  610                 gcm_ctx->gcm_tag_len >>= 3;
  611                 gcm_ctx->gcm_processed_data_len = 0;
  612 
  613                 /* these values are in bits */
  614                 gcm_ctx->gcm_len_a_len_c[0]
  615                     = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
  616 
  617                 rv = CRYPTO_SUCCESS;
  618                 gcm_ctx->gcm_flags |= GCM_MODE;
  619         } else {
  620                 return (CRYPTO_MECHANISM_PARAM_INVALID);
  621         }
  622 
  623 #ifdef CAN_USE_GCM_ASM
  624         if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
  625                 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
  626         } else {
  627                 /*
  628                  * Handle the "cycle" implementation by creating avx and
  629                  * non-avx contexts alternately.
  630                  */
  631                 gcm_ctx->gcm_use_avx = gcm_toggle_avx();
  632                 /*
  633                  * We don't handle byte swapped key schedules in the avx
  634                  * code path.
  635                  */
  636                 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
  637                 if (ks->ops->needs_byteswap == B_TRUE) {
  638                         gcm_ctx->gcm_use_avx = B_FALSE;
  639                 }
  640                 /* Use the MOVBE and the BSWAP variants alternately. */
  641                 if (gcm_ctx->gcm_use_avx == B_TRUE &&
  642                     zfs_movbe_available() == B_TRUE) {
  643                         (void) atomic_toggle_boolean_nv(
  644                             (volatile boolean_t *)&gcm_avx_can_use_movbe);
  645                 }
  646         }
  647         /* Allocate Htab memory as needed. */
  648         if (gcm_ctx->gcm_use_avx == B_TRUE) {
  649                 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
  650 
  651                 if (htab_len == 0) {
  652                         return (CRYPTO_MECHANISM_PARAM_INVALID);
  653                 }
  654                 gcm_ctx->gcm_htab_len = htab_len;
  655                 gcm_ctx->gcm_Htable =
  656                     kmem_alloc(htab_len, KM_SLEEP);
  657 
  658                 if (gcm_ctx->gcm_Htable == NULL) {
  659                         return (CRYPTO_HOST_MEMORY);
  660                 }
  661         }
  662         /* Avx and non avx context initialization differs from here on. */
  663         if (gcm_ctx->gcm_use_avx == B_FALSE) {
  664 #endif /* ifdef CAN_USE_GCM_ASM */
  665                 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
  666                     gcm_param->pAAD, gcm_param->ulAADLen, block_size,
  667                     encrypt_block, copy_block, xor_block) != 0) {
  668                         rv = CRYPTO_MECHANISM_PARAM_INVALID;
  669                 }
  670 #ifdef CAN_USE_GCM_ASM
  671         } else {
  672                 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
  673                     gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
  674                         rv = CRYPTO_MECHANISM_PARAM_INVALID;
  675                 }
  676         }
  677 #endif /* ifdef CAN_USE_GCM_ASM */
  678 
  679         return (rv);
  680 }
  681 
  682 int
  683 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
  684     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
  685     void (*copy_block)(uint8_t *, uint8_t *),
  686     void (*xor_block)(uint8_t *, uint8_t *))
  687 {
  688         int rv;
  689         CK_AES_GMAC_PARAMS *gmac_param;
  690 
  691         if (param != NULL) {
  692                 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
  693 
  694                 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
  695                 gcm_ctx->gcm_processed_data_len = 0;
  696 
  697                 /* these values are in bits */
  698                 gcm_ctx->gcm_len_a_len_c[0]
  699                     = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
  700 
  701                 rv = CRYPTO_SUCCESS;
  702                 gcm_ctx->gcm_flags |= GMAC_MODE;
  703         } else {
  704                 return (CRYPTO_MECHANISM_PARAM_INVALID);
  705         }
  706 
  707 #ifdef CAN_USE_GCM_ASM
  708         /*
  709          * Handle the "cycle" implementation by creating avx and non avx
  710          * contexts alternately.
  711          */
  712         if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
  713                 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
  714         } else {
  715                 gcm_ctx->gcm_use_avx = gcm_toggle_avx();
  716         }
  717         /* We don't handle byte swapped key schedules in the avx code path. */
  718         aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
  719         if (ks->ops->needs_byteswap == B_TRUE) {
  720                 gcm_ctx->gcm_use_avx = B_FALSE;
  721         }
  722         /* Allocate Htab memory as needed. */
  723         if (gcm_ctx->gcm_use_avx == B_TRUE) {
  724                 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
  725 
  726                 if (htab_len == 0) {
  727                         return (CRYPTO_MECHANISM_PARAM_INVALID);
  728                 }
  729                 gcm_ctx->gcm_htab_len = htab_len;
  730                 gcm_ctx->gcm_Htable =
  731                     kmem_alloc(htab_len, KM_SLEEP);
  732 
  733                 if (gcm_ctx->gcm_Htable == NULL) {
  734                         return (CRYPTO_HOST_MEMORY);
  735                 }
  736         }
  737 
  738         /* Avx and non avx context initialization differs from here on. */
  739         if (gcm_ctx->gcm_use_avx == B_FALSE) {
  740 #endif  /* ifdef CAN_USE_GCM_ASM */
  741                 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
  742                     gmac_param->pAAD, gmac_param->ulAADLen, block_size,
  743                     encrypt_block, copy_block, xor_block) != 0) {
  744                         rv = CRYPTO_MECHANISM_PARAM_INVALID;
  745                 }
  746 #ifdef CAN_USE_GCM_ASM
  747         } else {
  748                 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
  749                     gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
  750                         rv = CRYPTO_MECHANISM_PARAM_INVALID;
  751                 }
  752         }
  753 #endif /* ifdef CAN_USE_GCM_ASM */
  754 
  755         return (rv);
  756 }
  757 
  758 void *
  759 gcm_alloc_ctx(int kmflag)
  760 {
  761         gcm_ctx_t *gcm_ctx;
  762 
  763         if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
  764                 return (NULL);
  765 
  766         gcm_ctx->gcm_flags = GCM_MODE;
  767         return (gcm_ctx);
  768 }
  769 
  770 void *
  771 gmac_alloc_ctx(int kmflag)
  772 {
  773         gcm_ctx_t *gcm_ctx;
  774 
  775         if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
  776                 return (NULL);
  777 
  778         gcm_ctx->gcm_flags = GMAC_MODE;
  779         return (gcm_ctx);
  780 }
  781 
  782 /* GCM implementation that contains the fastest methods */
  783 static gcm_impl_ops_t gcm_fastest_impl = {
  784         .name = "fastest"
  785 };
  786 
  787 /* All compiled in implementations */
  788 static const gcm_impl_ops_t *gcm_all_impl[] = {
  789         &gcm_generic_impl,
  790 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
  791         &gcm_pclmulqdq_impl,
  792 #endif
  793 };
  794 
  795 /* Indicate that benchmark has been completed */
  796 static boolean_t gcm_impl_initialized = B_FALSE;
  797 
  798 /* Hold all supported implementations */
  799 static size_t gcm_supp_impl_cnt = 0;
  800 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
  801 
  802 /*
  803  * Returns the GCM operations for encrypt/decrypt/key setup.  When a
  804  * SIMD implementation is not allowed in the current context, then
  805  * fallback to the fastest generic implementation.
  806  */
  807 const gcm_impl_ops_t *
  808 gcm_impl_get_ops(void)
  809 {
  810         if (!kfpu_allowed())
  811                 return (&gcm_generic_impl);
  812 
  813         const gcm_impl_ops_t *ops = NULL;
  814         const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
  815 
  816         switch (impl) {
  817         case IMPL_FASTEST:
  818                 ASSERT(gcm_impl_initialized);
  819                 ops = &gcm_fastest_impl;
  820                 break;
  821         case IMPL_CYCLE:
  822                 /* Cycle through supported implementations */
  823                 ASSERT(gcm_impl_initialized);
  824                 ASSERT3U(gcm_supp_impl_cnt, >, 0);
  825                 static size_t cycle_impl_idx = 0;
  826                 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
  827                 ops = gcm_supp_impl[idx];
  828                 break;
  829 #ifdef CAN_USE_GCM_ASM
  830         case IMPL_AVX:
  831                 /*
  832                  * Make sure that we return a valid implementation while
  833                  * switching to the avx implementation since there still
  834                  * may be unfinished non-avx contexts around.
  835                  */
  836                 ops = &gcm_generic_impl;
  837                 break;
  838 #endif
  839         default:
  840                 ASSERT3U(impl, <, gcm_supp_impl_cnt);
  841                 ASSERT3U(gcm_supp_impl_cnt, >, 0);
  842                 if (impl < ARRAY_SIZE(gcm_all_impl))
  843                         ops = gcm_supp_impl[impl];
  844                 break;
  845         }
  846 
  847         ASSERT3P(ops, !=, NULL);
  848 
  849         return (ops);
  850 }
  851 
  852 /*
  853  * Initialize all supported implementations.
  854  */
  855 void
  856 gcm_impl_init(void)
  857 {
  858         gcm_impl_ops_t *curr_impl;
  859         int i, c;
  860 
  861         /* Move supported implementations into gcm_supp_impls */
  862         for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
  863                 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
  864 
  865                 if (curr_impl->is_supported())
  866                         gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
  867         }
  868         gcm_supp_impl_cnt = c;
  869 
  870         /*
  871          * Set the fastest implementation given the assumption that the
  872          * hardware accelerated version is the fastest.
  873          */
  874 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
  875         if (gcm_pclmulqdq_impl.is_supported()) {
  876                 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
  877                     sizeof (gcm_fastest_impl));
  878         } else
  879 #endif
  880         {
  881                 memcpy(&gcm_fastest_impl, &gcm_generic_impl,
  882                     sizeof (gcm_fastest_impl));
  883         }
  884 
  885         strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
  886 
  887 #ifdef CAN_USE_GCM_ASM
  888         /*
  889          * Use the avx implementation if it's available and the implementation
  890          * hasn't changed from its default value of fastest on module load.
  891          */
  892         if (gcm_avx_will_work()) {
  893 #ifdef HAVE_MOVBE
  894                 if (zfs_movbe_available() == B_TRUE) {
  895                         atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
  896                 }
  897 #endif
  898                 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
  899                         gcm_set_avx(B_TRUE);
  900                 }
  901         }
  902 #endif
  903         /* Finish initialization */
  904         atomic_swap_32(&icp_gcm_impl, user_sel_impl);
  905         gcm_impl_initialized = B_TRUE;
  906 }
  907 
  908 static const struct {
  909         const char *name;
  910         uint32_t sel;
  911 } gcm_impl_opts[] = {
  912                 { "cycle",      IMPL_CYCLE },
  913                 { "fastest",    IMPL_FASTEST },
  914 #ifdef CAN_USE_GCM_ASM
  915                 { "avx",        IMPL_AVX },
  916 #endif
  917 };
  918 
  919 /*
  920  * Function sets desired gcm implementation.
  921  *
  922  * If we are called before init(), user preference will be saved in
  923  * user_sel_impl, and applied in later init() call. This occurs when module
  924  * parameter is specified on module load. Otherwise, directly update
  925  * icp_gcm_impl.
  926  *
  927  * @val         Name of gcm implementation to use
  928  * @param       Unused.
  929  */
  930 int
  931 gcm_impl_set(const char *val)
  932 {
  933         int err = -EINVAL;
  934         char req_name[GCM_IMPL_NAME_MAX];
  935         uint32_t impl = GCM_IMPL_READ(user_sel_impl);
  936         size_t i;
  937 
  938         /* sanitize input */
  939         i = strnlen(val, GCM_IMPL_NAME_MAX);
  940         if (i == 0 || i >= GCM_IMPL_NAME_MAX)
  941                 return (err);
  942 
  943         strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
  944         while (i > 0 && isspace(req_name[i-1]))
  945                 i--;
  946         req_name[i] = '\0';
  947 
  948         /* Check mandatory options */
  949         for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
  950 #ifdef CAN_USE_GCM_ASM
  951                 /* Ignore avx implementation if it won't work. */
  952                 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
  953                         continue;
  954                 }
  955 #endif
  956                 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
  957                         impl = gcm_impl_opts[i].sel;
  958                         err = 0;
  959                         break;
  960                 }
  961         }
  962 
  963         /* check all supported impl if init() was already called */
  964         if (err != 0 && gcm_impl_initialized) {
  965                 /* check all supported implementations */
  966                 for (i = 0; i < gcm_supp_impl_cnt; i++) {
  967                         if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
  968                                 impl = i;
  969                                 err = 0;
  970                                 break;
  971                         }
  972                 }
  973         }
  974 #ifdef CAN_USE_GCM_ASM
  975         /*
  976          * Use the avx implementation if available and the requested one is
  977          * avx or fastest.
  978          */
  979         if (gcm_avx_will_work() == B_TRUE &&
  980             (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
  981                 gcm_set_avx(B_TRUE);
  982         } else {
  983                 gcm_set_avx(B_FALSE);
  984         }
  985 #endif
  986 
  987         if (err == 0) {
  988                 if (gcm_impl_initialized)
  989                         atomic_swap_32(&icp_gcm_impl, impl);
  990                 else
  991                         atomic_swap_32(&user_sel_impl, impl);
  992         }
  993 
  994         return (err);
  995 }
  996 
  997 #if defined(_KERNEL) && defined(__linux__)
  998 
  999 static int
 1000 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
 1001 {
 1002         return (gcm_impl_set(val));
 1003 }
 1004 
 1005 static int
 1006 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
 1007 {
 1008         int i, cnt = 0;
 1009         char *fmt;
 1010         const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
 1011 
 1012         ASSERT(gcm_impl_initialized);
 1013 
 1014         /* list mandatory options */
 1015         for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
 1016 #ifdef CAN_USE_GCM_ASM
 1017                 /* Ignore avx implementation if it won't work. */
 1018                 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
 1019                         continue;
 1020                 }
 1021 #endif
 1022                 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
 1023                 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
 1024                     gcm_impl_opts[i].name);
 1025         }
 1026 
 1027         /* list all supported implementations */
 1028         for (i = 0; i < gcm_supp_impl_cnt; i++) {
 1029                 fmt = (i == impl) ? "[%s] " : "%s ";
 1030                 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
 1031                     gcm_supp_impl[i]->name);
 1032         }
 1033 
 1034         return (cnt);
 1035 }
 1036 
 1037 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
 1038     NULL, 0644);
 1039 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
 1040 #endif /* defined(__KERNEL) */
 1041 
 1042 #ifdef CAN_USE_GCM_ASM
 1043 #define GCM_BLOCK_LEN 16
 1044 /*
 1045  * The openssl asm routines are 6x aggregated and need that many bytes
 1046  * at minimum.
 1047  */
 1048 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
 1049 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
 1050 /*
 1051  * Ensure the chunk size is reasonable since we are allocating a
 1052  * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
 1053  */
 1054 #define GCM_AVX_MAX_CHUNK_SIZE \
 1055         (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
 1056 
 1057 /* Clear the FPU registers since they hold sensitive internal state. */
 1058 #define clear_fpu_regs() clear_fpu_regs_avx()
 1059 #define GHASH_AVX(ctx, in, len) \
 1060     gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
 1061     in, len)
 1062 
 1063 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
 1064 
 1065 /* Get the chunk size module parameter. */
 1066 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
 1067 
 1068 /*
 1069  * Module parameter: number of bytes to process at once while owning the FPU.
 1070  * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
 1071  * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
 1072  */
 1073 static uint32_t gcm_avx_chunk_size =
 1074         ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
 1075 
 1076 extern void ASMABI clear_fpu_regs_avx(void);
 1077 extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
 1078 extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
 1079     const uint32_t pt[4], uint32_t ct[4]);
 1080 
 1081 extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
 1082 extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
 1083     const uint8_t *in, size_t len);
 1084 
 1085 extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
 1086     const void *, uint64_t *, uint64_t *);
 1087 
 1088 extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
 1089     const void *, uint64_t *, uint64_t *);
 1090 
 1091 static inline boolean_t
 1092 gcm_avx_will_work(void)
 1093 {
 1094         /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
 1095         return (kfpu_allowed() &&
 1096             zfs_avx_available() && zfs_aes_available() &&
 1097             zfs_pclmulqdq_available());
 1098 }
 1099 
 1100 static inline void
 1101 gcm_set_avx(boolean_t val)
 1102 {
 1103         if (gcm_avx_will_work() == B_TRUE) {
 1104                 atomic_swap_32(&gcm_use_avx, val);
 1105         }
 1106 }
 1107 
 1108 static inline boolean_t
 1109 gcm_toggle_avx(void)
 1110 {
 1111         if (gcm_avx_will_work() == B_TRUE) {
 1112                 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
 1113         } else {
 1114                 return (B_FALSE);
 1115         }
 1116 }
 1117 
 1118 static inline size_t
 1119 gcm_simd_get_htab_size(boolean_t simd_mode)
 1120 {
 1121         switch (simd_mode) {
 1122         case B_TRUE:
 1123                 return (2 * 6 * 2 * sizeof (uint64_t));
 1124 
 1125         default:
 1126                 return (0);
 1127         }
 1128 }
 1129 
 1130 /*
 1131  * Clear sensitive data in the context.
 1132  *
 1133  * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
 1134  * ctx->gcm_Htable contain the hash sub key which protects authentication.
 1135  *
 1136  * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
 1137  * a known plaintext attack, they consists of the IV and the first and last
 1138  * counter respectively. If they should be cleared is debatable.
 1139  */
 1140 static inline void
 1141 gcm_clear_ctx(gcm_ctx_t *ctx)
 1142 {
 1143         memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder));
 1144         memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
 1145         memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0));
 1146         memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp));
 1147 }
 1148 
 1149 /* Increment the GCM counter block by n. */
 1150 static inline void
 1151 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
 1152 {
 1153         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 1154         uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 1155 
 1156         counter = htonll(counter + n);
 1157         counter &= counter_mask;
 1158         ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 1159 }
 1160 
 1161 /*
 1162  * Encrypt multiple blocks of data in GCM mode.
 1163  * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
 1164  * if possible. While processing a chunk the FPU is "locked".
 1165  */
 1166 static int
 1167 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
 1168     size_t length, crypto_data_t *out, size_t block_size)
 1169 {
 1170         size_t bleft = length;
 1171         size_t need = 0;
 1172         size_t done = 0;
 1173         uint8_t *datap = (uint8_t *)data;
 1174         size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 1175         const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
 1176         uint64_t *ghash = ctx->gcm_ghash;
 1177         uint64_t *cb = ctx->gcm_cb;
 1178         uint8_t *ct_buf = NULL;
 1179         uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
 1180         int rv = CRYPTO_SUCCESS;
 1181 
 1182         ASSERT(block_size == GCM_BLOCK_LEN);
 1183         /*
 1184          * If the last call left an incomplete block, try to fill
 1185          * it first.
 1186          */
 1187         if (ctx->gcm_remainder_len > 0) {
 1188                 need = block_size - ctx->gcm_remainder_len;
 1189                 if (length < need) {
 1190                         /* Accumulate bytes here and return. */
 1191                         memcpy((uint8_t *)ctx->gcm_remainder +
 1192                             ctx->gcm_remainder_len, datap, length);
 1193 
 1194                         ctx->gcm_remainder_len += length;
 1195                         if (ctx->gcm_copy_to == NULL) {
 1196                                 ctx->gcm_copy_to = datap;
 1197                         }
 1198                         return (CRYPTO_SUCCESS);
 1199                 } else {
 1200                         /* Complete incomplete block. */
 1201                         memcpy((uint8_t *)ctx->gcm_remainder +
 1202                             ctx->gcm_remainder_len, datap, need);
 1203 
 1204                         ctx->gcm_copy_to = NULL;
 1205                 }
 1206         }
 1207 
 1208         /* Allocate a buffer to encrypt to if there is enough input. */
 1209         if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
 1210                 ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
 1211                 if (ct_buf == NULL) {
 1212                         return (CRYPTO_HOST_MEMORY);
 1213                 }
 1214         }
 1215 
 1216         /* If we completed an incomplete block, encrypt and write it out. */
 1217         if (ctx->gcm_remainder_len > 0) {
 1218                 kfpu_begin();
 1219                 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
 1220                     (const uint32_t *)cb, (uint32_t *)tmp);
 1221 
 1222                 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
 1223                 GHASH_AVX(ctx, tmp, block_size);
 1224                 clear_fpu_regs();
 1225                 kfpu_end();
 1226                 rv = crypto_put_output_data(tmp, out, block_size);
 1227                 out->cd_offset += block_size;
 1228                 gcm_incr_counter_block(ctx);
 1229                 ctx->gcm_processed_data_len += block_size;
 1230                 bleft -= need;
 1231                 datap += need;
 1232                 ctx->gcm_remainder_len = 0;
 1233         }
 1234 
 1235         /* Do the bulk encryption in chunk_size blocks. */
 1236         for (; bleft >= chunk_size; bleft -= chunk_size) {
 1237                 kfpu_begin();
 1238                 done = aesni_gcm_encrypt(
 1239                     datap, ct_buf, chunk_size, key, cb, ghash);
 1240 
 1241                 clear_fpu_regs();
 1242                 kfpu_end();
 1243                 if (done != chunk_size) {
 1244                         rv = CRYPTO_FAILED;
 1245                         goto out_nofpu;
 1246                 }
 1247                 rv = crypto_put_output_data(ct_buf, out, chunk_size);
 1248                 if (rv != CRYPTO_SUCCESS) {
 1249                         goto out_nofpu;
 1250                 }
 1251                 out->cd_offset += chunk_size;
 1252                 datap += chunk_size;
 1253                 ctx->gcm_processed_data_len += chunk_size;
 1254         }
 1255         /* Check if we are already done. */
 1256         if (bleft == 0) {
 1257                 goto out_nofpu;
 1258         }
 1259         /* Bulk encrypt the remaining data. */
 1260         kfpu_begin();
 1261         if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
 1262                 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
 1263                 if (done == 0) {
 1264                         rv = CRYPTO_FAILED;
 1265                         goto out;
 1266                 }
 1267                 rv = crypto_put_output_data(ct_buf, out, done);
 1268                 if (rv != CRYPTO_SUCCESS) {
 1269                         goto out;
 1270                 }
 1271                 out->cd_offset += done;
 1272                 ctx->gcm_processed_data_len += done;
 1273                 datap += done;
 1274                 bleft -= done;
 1275 
 1276         }
 1277         /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
 1278         while (bleft > 0) {
 1279                 if (bleft < block_size) {
 1280                         memcpy(ctx->gcm_remainder, datap, bleft);
 1281                         ctx->gcm_remainder_len = bleft;
 1282                         ctx->gcm_copy_to = datap;
 1283                         goto out;
 1284                 }
 1285                 /* Encrypt, hash and write out. */
 1286                 aes_encrypt_intel(key->encr_ks.ks32, key->nr,
 1287                     (const uint32_t *)cb, (uint32_t *)tmp);
 1288 
 1289                 gcm_xor_avx(datap, tmp);
 1290                 GHASH_AVX(ctx, tmp, block_size);
 1291                 rv = crypto_put_output_data(tmp, out, block_size);
 1292                 if (rv != CRYPTO_SUCCESS) {
 1293                         goto out;
 1294                 }
 1295                 out->cd_offset += block_size;
 1296                 gcm_incr_counter_block(ctx);
 1297                 ctx->gcm_processed_data_len += block_size;
 1298                 datap += block_size;
 1299                 bleft -= block_size;
 1300         }
 1301 out:
 1302         clear_fpu_regs();
 1303         kfpu_end();
 1304 out_nofpu:
 1305         if (ct_buf != NULL) {
 1306                 vmem_free(ct_buf, chunk_size);
 1307         }
 1308         return (rv);
 1309 }
 1310 
 1311 /*
 1312  * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
 1313  * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
 1314  */
 1315 static int
 1316 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
 1317 {
 1318         uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
 1319         uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
 1320         uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
 1321         size_t rem_len = ctx->gcm_remainder_len;
 1322         const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
 1323         int aes_rounds = ((aes_key_t *)keysched)->nr;
 1324         int rv;
 1325 
 1326         ASSERT(block_size == GCM_BLOCK_LEN);
 1327 
 1328         if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
 1329                 return (CRYPTO_DATA_LEN_RANGE);
 1330         }
 1331 
 1332         kfpu_begin();
 1333         /* Pad last incomplete block with zeros, encrypt and hash. */
 1334         if (rem_len > 0) {
 1335                 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
 1336                 const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
 1337 
 1338                 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
 1339                 memset(remainder + rem_len, 0, block_size - rem_len);
 1340                 for (int i = 0; i < rem_len; i++) {
 1341                         remainder[i] ^= tmp[i];
 1342                 }
 1343                 GHASH_AVX(ctx, remainder, block_size);
 1344                 ctx->gcm_processed_data_len += rem_len;
 1345                 /* No need to increment counter_block, it's the last block. */
 1346         }
 1347         /* Finish tag. */
 1348         ctx->gcm_len_a_len_c[1] =
 1349             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 1350         GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
 1351         aes_encrypt_intel(keysched, aes_rounds, J0, J0);
 1352 
 1353         gcm_xor_avx((uint8_t *)J0, ghash);
 1354         clear_fpu_regs();
 1355         kfpu_end();
 1356 
 1357         /* Output remainder. */
 1358         if (rem_len > 0) {
 1359                 rv = crypto_put_output_data(remainder, out, rem_len);
 1360                 if (rv != CRYPTO_SUCCESS)
 1361                         return (rv);
 1362         }
 1363         out->cd_offset += rem_len;
 1364         ctx->gcm_remainder_len = 0;
 1365         rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 1366         if (rv != CRYPTO_SUCCESS)
 1367                 return (rv);
 1368 
 1369         out->cd_offset += ctx->gcm_tag_len;
 1370         /* Clear sensitive data in the context before returning. */
 1371         gcm_clear_ctx(ctx);
 1372         return (CRYPTO_SUCCESS);
 1373 }
 1374 
 1375 /*
 1376  * Finalize decryption: We just have accumulated crypto text, so now we
 1377  * decrypt it here inplace.
 1378  */
 1379 static int
 1380 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
 1381 {
 1382         ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
 1383         ASSERT3U(block_size, ==, 16);
 1384 
 1385         size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 1386         size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 1387         uint8_t *datap = ctx->gcm_pt_buf;
 1388         const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
 1389         uint32_t *cb = (uint32_t *)ctx->gcm_cb;
 1390         uint64_t *ghash = ctx->gcm_ghash;
 1391         uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
 1392         int rv = CRYPTO_SUCCESS;
 1393         size_t bleft, done;
 1394 
 1395         /*
 1396          * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
 1397          * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
 1398          * GCM_AVX_MIN_DECRYPT_BYTES.
 1399          */
 1400         for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
 1401                 kfpu_begin();
 1402                 done = aesni_gcm_decrypt(datap, datap, chunk_size,
 1403                     (const void *)key, ctx->gcm_cb, ghash);
 1404                 clear_fpu_regs();
 1405                 kfpu_end();
 1406                 if (done != chunk_size) {
 1407                         return (CRYPTO_FAILED);
 1408                 }
 1409                 datap += done;
 1410         }
 1411         /* Decrypt remainder, which is less than chunk size, in one go. */
 1412         kfpu_begin();
 1413         if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
 1414                 done = aesni_gcm_decrypt(datap, datap, bleft,
 1415                     (const void *)key, ctx->gcm_cb, ghash);
 1416                 if (done == 0) {
 1417                         clear_fpu_regs();
 1418                         kfpu_end();
 1419                         return (CRYPTO_FAILED);
 1420                 }
 1421                 datap += done;
 1422                 bleft -= done;
 1423         }
 1424         ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
 1425 
 1426         /*
 1427          * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
 1428          * decrypt them block by block.
 1429          */
 1430         while (bleft > 0) {
 1431                 /* Incomplete last block. */
 1432                 if (bleft < block_size) {
 1433                         uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
 1434 
 1435                         memset(lastb, 0, block_size);
 1436                         memcpy(lastb, datap, bleft);
 1437                         /* The GCM processing. */
 1438                         GHASH_AVX(ctx, lastb, block_size);
 1439                         aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
 1440                         for (size_t i = 0; i < bleft; i++) {
 1441                                 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
 1442                         }
 1443                         break;
 1444                 }
 1445                 /* The GCM processing. */
 1446                 GHASH_AVX(ctx, datap, block_size);
 1447                 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
 1448                 gcm_xor_avx((uint8_t *)tmp, datap);
 1449                 gcm_incr_counter_block(ctx);
 1450 
 1451                 datap += block_size;
 1452                 bleft -= block_size;
 1453         }
 1454         if (rv != CRYPTO_SUCCESS) {
 1455                 clear_fpu_regs();
 1456                 kfpu_end();
 1457                 return (rv);
 1458         }
 1459         /* Decryption done, finish the tag. */
 1460         ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 1461         GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
 1462         aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
 1463             (uint32_t *)ctx->gcm_J0);
 1464 
 1465         gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
 1466 
 1467         /* We are done with the FPU, restore its state. */
 1468         clear_fpu_regs();
 1469         kfpu_end();
 1470 
 1471         /* Compare the input authentication tag with what we calculated. */
 1472         if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 1473                 /* They don't match. */
 1474                 return (CRYPTO_INVALID_MAC);
 1475         }
 1476         rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 1477         if (rv != CRYPTO_SUCCESS) {
 1478                 return (rv);
 1479         }
 1480         out->cd_offset += pt_len;
 1481         gcm_clear_ctx(ctx);
 1482         return (CRYPTO_SUCCESS);
 1483 }
 1484 
 1485 /*
 1486  * Initialize the GCM params H, Htabtle and the counter block. Save the
 1487  * initial counter block.
 1488  */
 1489 static int
 1490 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
 1491     unsigned char *auth_data, size_t auth_data_len, size_t block_size)
 1492 {
 1493         uint8_t *cb = (uint8_t *)ctx->gcm_cb;
 1494         uint64_t *H = ctx->gcm_H;
 1495         const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
 1496         int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
 1497         uint8_t *datap = auth_data;
 1498         size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 1499         size_t bleft;
 1500 
 1501         ASSERT(block_size == GCM_BLOCK_LEN);
 1502 
 1503         /* Init H (encrypt zero block) and create the initial counter block. */
 1504         memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
 1505         memset(H, 0, sizeof (ctx->gcm_H));
 1506         kfpu_begin();
 1507         aes_encrypt_intel(keysched, aes_rounds,
 1508             (const uint32_t *)H, (uint32_t *)H);
 1509 
 1510         gcm_init_htab_avx(ctx->gcm_Htable, H);
 1511 
 1512         if (iv_len == 12) {
 1513                 memcpy(cb, iv, 12);
 1514                 cb[12] = 0;
 1515                 cb[13] = 0;
 1516                 cb[14] = 0;
 1517                 cb[15] = 1;
 1518                 /* We need the ICB later. */
 1519                 memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
 1520         } else {
 1521                 /*
 1522                  * Most consumers use 12 byte IVs, so it's OK to use the
 1523                  * original routines for other IV sizes, just avoid nesting
 1524                  * kfpu_begin calls.
 1525                  */
 1526                 clear_fpu_regs();
 1527                 kfpu_end();
 1528                 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 1529                     aes_copy_block, aes_xor_block);
 1530                 kfpu_begin();
 1531         }
 1532 
 1533         /* Openssl post increments the counter, adjust for that. */
 1534         gcm_incr_counter_block(ctx);
 1535 
 1536         /* Ghash AAD in chunk_size blocks. */
 1537         for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
 1538                 GHASH_AVX(ctx, datap, chunk_size);
 1539                 datap += chunk_size;
 1540                 clear_fpu_regs();
 1541                 kfpu_end();
 1542                 kfpu_begin();
 1543         }
 1544         /* Ghash the remainder and handle possible incomplete GCM block. */
 1545         if (bleft > 0) {
 1546                 size_t incomp = bleft % block_size;
 1547 
 1548                 bleft -= incomp;
 1549                 if (bleft > 0) {
 1550                         GHASH_AVX(ctx, datap, bleft);
 1551                         datap += bleft;
 1552                 }
 1553                 if (incomp > 0) {
 1554                         /* Zero pad and hash incomplete last block. */
 1555                         uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
 1556 
 1557                         memset(authp, 0, block_size);
 1558                         memcpy(authp, datap, incomp);
 1559                         GHASH_AVX(ctx, authp, block_size);
 1560                 }
 1561         }
 1562         clear_fpu_regs();
 1563         kfpu_end();
 1564         return (CRYPTO_SUCCESS);
 1565 }
 1566 
 1567 #if defined(_KERNEL)
 1568 static int
 1569 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
 1570 {
 1571         unsigned long val;
 1572         char val_rounded[16];
 1573         int error = 0;
 1574 
 1575         error = kstrtoul(buf, 0, &val);
 1576         if (error)
 1577                 return (error);
 1578 
 1579         val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
 1580 
 1581         if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
 1582                 return (-EINVAL);
 1583 
 1584         snprintf(val_rounded, 16, "%u", (uint32_t)val);
 1585         error = param_set_uint(val_rounded, kp);
 1586         return (error);
 1587 }
 1588 
 1589 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
 1590     param_get_uint, &gcm_avx_chunk_size, 0644);
 1591 
 1592 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
 1593         "How many bytes to process while owning the FPU");
 1594 
 1595 #endif /* defined(__KERNEL) */
 1596 #endif /* ifdef CAN_USE_GCM_ASM */

Cache object: 71d90c38ac919c4e1d37a75330af3d06


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.