The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/aesni/aesni_wrap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
    3  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
    4  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
    5  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
    6  * Copyright (c) 2014 The FreeBSD Foundation
    7  * All rights reserved.
    8  *
    9  * Portions of this software were developed by John-Mark Gurney
   10  * under sponsorship of the FreeBSD Foundation and
   11  * Rubicon Communications, LLC (Netgate).
   12  * 
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __FBSDID("$FreeBSD$");
   37 
   38 #include <sys/param.h>
   39 #include <sys/libkern.h>
   40 #include <sys/malloc.h>
   41 #include <sys/proc.h>
   42 #include <sys/systm.h>
   43 #include <crypto/aesni/aesni.h>
   44 
   45 #include <opencrypto/gmac.h>
   46 
   47 #include "aesencdec.h"
   48 #include <smmintrin.h>
   49 
   50 MALLOC_DECLARE(M_AESNI);
   51 
   52 struct blocks8 {
   53         __m128i blk[8];
   54 } __packed;
   55 
   56 void
   57 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
   58     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
   59 {
   60         __m128i tot, ivreg;
   61         size_t i;
   62 
   63         len /= AES_BLOCK_LEN;
   64         ivreg = _mm_loadu_si128((const __m128i *)iv);
   65         for (i = 0; i < len; i++) {
   66                 tot = aesni_enc(rounds - 1, key_schedule,
   67                     _mm_loadu_si128((const __m128i *)from) ^ ivreg);
   68                 ivreg = tot;
   69                 _mm_storeu_si128((__m128i *)to, tot);
   70                 from += AES_BLOCK_LEN;
   71                 to += AES_BLOCK_LEN;
   72         }
   73 }
   74 
   75 void
   76 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
   77     uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
   78 {
   79         __m128i blocks[8];
   80         struct blocks8 *blks;
   81         __m128i ivreg, nextiv;
   82         size_t i, j, cnt;
   83 
   84         ivreg = _mm_loadu_si128((const __m128i *)iv);
   85         cnt = len / AES_BLOCK_LEN / 8;
   86         for (i = 0; i < cnt; i++) {
   87                 blks = (struct blocks8 *)buf;
   88                 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
   89                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
   90                     blks->blk[6], blks->blk[7], &blocks[0]);
   91                 for (j = 0; j < 8; j++) {
   92                         nextiv = blks->blk[j];
   93                         blks->blk[j] = blocks[j] ^ ivreg;
   94                         ivreg = nextiv;
   95                 }
   96                 buf += AES_BLOCK_LEN * 8;
   97         }
   98         i *= 8;
   99         cnt = len / AES_BLOCK_LEN;
  100         for (; i < cnt; i++) {
  101                 nextiv = _mm_loadu_si128((void *)buf);
  102                 _mm_storeu_si128((void *)buf,
  103                     aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
  104                 ivreg = nextiv;
  105                 buf += AES_BLOCK_LEN;
  106         }
  107 }
  108 
  109 void
  110 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
  111     const uint8_t *from, uint8_t *to)
  112 {
  113         __m128i tot;
  114         __m128i tout[8];
  115         struct blocks8 *top;
  116         const struct blocks8 *blks;
  117         size_t i, cnt;
  118 
  119         cnt = len / AES_BLOCK_LEN / 8;
  120         for (i = 0; i < cnt; i++) {
  121                 blks = (const struct blocks8 *)from;
  122                 top = (struct blocks8 *)to;
  123                 aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
  124                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
  125                     blks->blk[6], blks->blk[7], tout);
  126                 top->blk[0] = tout[0];
  127                 top->blk[1] = tout[1];
  128                 top->blk[2] = tout[2];
  129                 top->blk[3] = tout[3];
  130                 top->blk[4] = tout[4];
  131                 top->blk[5] = tout[5];
  132                 top->blk[6] = tout[6];
  133                 top->blk[7] = tout[7];
  134                 from += AES_BLOCK_LEN * 8;
  135                 to += AES_BLOCK_LEN * 8;
  136         }
  137         i *= 8;
  138         cnt = len / AES_BLOCK_LEN;
  139         for (; i < cnt; i++) {
  140                 tot = aesni_enc(rounds - 1, key_schedule,
  141                     _mm_loadu_si128((const __m128i *)from));
  142                 _mm_storeu_si128((__m128i *)to, tot);
  143                 from += AES_BLOCK_LEN;
  144                 to += AES_BLOCK_LEN;
  145         }
  146 }
  147 
  148 void
  149 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
  150     const uint8_t *from, uint8_t *to)
  151 {
  152         __m128i tot;
  153         __m128i tout[8];
  154         const struct blocks8 *blks;
  155         struct blocks8 *top;
  156         size_t i, cnt;
  157 
  158         cnt = len / AES_BLOCK_LEN / 8;
  159         for (i = 0; i < cnt; i++) {
  160                 blks = (const struct blocks8 *)from;
  161                 top = (struct blocks8 *)to;
  162                 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
  163                     blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
  164                     blks->blk[6], blks->blk[7], tout);
  165                 top->blk[0] = tout[0];
  166                 top->blk[1] = tout[1];
  167                 top->blk[2] = tout[2];
  168                 top->blk[3] = tout[3];
  169                 top->blk[4] = tout[4];
  170                 top->blk[5] = tout[5];
  171                 top->blk[6] = tout[6];
  172                 top->blk[7] = tout[7];
  173                 from += AES_BLOCK_LEN * 8;
  174                 to += AES_BLOCK_LEN * 8;
  175         }
  176         i *= 8;
  177         cnt = len / AES_BLOCK_LEN;
  178         for (; i < cnt; i++) {
  179                 tot = aesni_dec(rounds - 1, key_schedule,
  180                     _mm_loadu_si128((const __m128i *)from));
  181                 _mm_storeu_si128((__m128i *)to, tot);
  182                 from += AES_BLOCK_LEN;
  183                 to += AES_BLOCK_LEN;
  184         }
  185 }
  186 
  187 /*
  188  * mixed endian increment, low 64bits stored in hi word to be compatible
  189  * with _icm's BSWAP.
  190  */
  191 static inline __m128i
  192 nextc(__m128i x)
  193 {
  194         const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0);
  195         const __m128i ZERO = _mm_setzero_si128();
  196 
  197         x = _mm_add_epi64(x, ONE);
  198         __m128i t = _mm_cmpeq_epi64(x, ZERO);
  199         t = _mm_unpackhi_epi64(t, ZERO);
  200         x = _mm_sub_epi64(x, t);
  201 
  202         return x;
  203 }
  204 
  205 void
  206 aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len,
  207     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
  208 {
  209         __m128i tot;
  210         __m128i tmp1, tmp2, tmp3, tmp4;
  211         __m128i tmp5, tmp6, tmp7, tmp8;
  212         __m128i ctr1, ctr2, ctr3, ctr4;
  213         __m128i ctr5, ctr6, ctr7, ctr8;
  214         __m128i BSWAP_EPI64;
  215         __m128i tout[8];
  216         __m128i block;
  217         struct blocks8 *top;
  218         const struct blocks8 *blks;
  219         size_t i, cnt, resid;
  220 
  221         BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
  222 
  223         ctr1 = _mm_loadu_si128((const __m128i *)iv);
  224         ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
  225 
  226         cnt = len / AES_BLOCK_LEN / 8;
  227         for (i = 0; i < cnt; i++) {
  228                 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
  229                 ctr2 = nextc(ctr1);
  230                 tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
  231                 ctr3 = nextc(ctr2);
  232                 tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
  233                 ctr4 = nextc(ctr3);
  234                 tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
  235                 ctr5 = nextc(ctr4);
  236                 tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
  237                 ctr6 = nextc(ctr5);
  238                 tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
  239                 ctr7 = nextc(ctr6);
  240                 tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
  241                 ctr8 = nextc(ctr7);
  242                 tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
  243                 ctr1 = nextc(ctr8);
  244 
  245                 blks = (const struct blocks8 *)from;
  246                 top = (struct blocks8 *)to;
  247                 aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4,
  248                     tmp5, tmp6, tmp7, tmp8, tout);
  249 
  250                 top->blk[0] = blks->blk[0] ^ tout[0];
  251                 top->blk[1] = blks->blk[1] ^ tout[1];
  252                 top->blk[2] = blks->blk[2] ^ tout[2];
  253                 top->blk[3] = blks->blk[3] ^ tout[3];
  254                 top->blk[4] = blks->blk[4] ^ tout[4];
  255                 top->blk[5] = blks->blk[5] ^ tout[5];
  256                 top->blk[6] = blks->blk[6] ^ tout[6];
  257                 top->blk[7] = blks->blk[7] ^ tout[7];
  258 
  259                 from += AES_BLOCK_LEN * 8;
  260                 to += AES_BLOCK_LEN * 8;
  261         }
  262         i *= 8;
  263         cnt = len / AES_BLOCK_LEN;
  264         for (; i < cnt; i++) {
  265                 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
  266                 ctr1 = nextc(ctr1);
  267 
  268                 tot = aesni_enc(rounds - 1, key_schedule, tmp1);
  269 
  270                 tot = tot ^ _mm_loadu_si128((const __m128i *)from);
  271                 _mm_storeu_si128((__m128i *)to, tot);
  272 
  273                 from += AES_BLOCK_LEN;
  274                 to += AES_BLOCK_LEN;
  275         }
  276 
  277         /*
  278          * Handle remaining partial round.  Copy the remaining payload onto the
  279          * stack to ensure that the full block can be loaded safely.
  280          */
  281         resid = len % AES_BLOCK_LEN;
  282         if (resid != 0) {
  283                 tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
  284                 tot = aesni_enc(rounds - 1, key_schedule, tmp1);
  285                 block = _mm_setzero_si128();
  286                 memcpy(&block, from, resid);
  287                 tot = tot ^ _mm_loadu_si128(&block);
  288                 memcpy(to, &tot, resid);
  289                 explicit_bzero(&block, sizeof(block));
  290         }
  291 }
  292 
  293 #define AES_XTS_BLOCKSIZE       16
  294 #define AES_XTS_IVSIZE          8
  295 #define AES_XTS_ALPHA           0x87    /* GF(2^128) generator polynomial */
  296 
  297 static inline __m128i
  298 xts_crank_lfsr(__m128i inp)
  299 {
  300         const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
  301         __m128i xtweak, ret;
  302 
  303         /* set up xor mask */
  304         xtweak = _mm_shuffle_epi32(inp, 0x93);
  305         xtweak = _mm_srai_epi32(xtweak, 31);
  306         xtweak &= alphamask;
  307 
  308         /* next term */
  309         ret = _mm_slli_epi32(inp, 1);
  310         ret ^= xtweak;
  311 
  312         return ret;
  313 }
  314 
  315 static void
  316 aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
  317     const uint8_t *from, uint8_t *to, int do_encrypt)
  318 {
  319         __m128i block;
  320 
  321         block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
  322 
  323         if (do_encrypt)
  324                 block = aesni_enc(rounds - 1, key_schedule, block);
  325         else
  326                 block = aesni_dec(rounds - 1, key_schedule, block);
  327 
  328         _mm_storeu_si128((__m128i *)to, block ^ *tweak);
  329 
  330         *tweak = xts_crank_lfsr(*tweak);
  331 }
  332 
  333 static void
  334 aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
  335     const uint8_t *from, uint8_t *to, int do_encrypt)
  336 {
  337         __m128i tmptweak;
  338         __m128i a, b, c, d, e, f, g, h;
  339         __m128i tweaks[8];
  340         __m128i tmp[8];
  341         __m128i *top;
  342         const __m128i *fromp;
  343 
  344         tmptweak = *tweak;
  345 
  346         /*
  347          * unroll the loop.  This lets gcc put values directly in the
  348          * register and saves memory accesses.
  349          */
  350         fromp = (const __m128i *)from;
  351 #define PREPINP(v, pos)                                         \
  352                 do {                                            \
  353                         tweaks[(pos)] = tmptweak;               \
  354                         (v) = _mm_loadu_si128(&fromp[pos]) ^    \
  355                             tmptweak;                           \
  356                         tmptweak = xts_crank_lfsr(tmptweak);    \
  357                 } while (0)
  358         PREPINP(a, 0);
  359         PREPINP(b, 1);
  360         PREPINP(c, 2);
  361         PREPINP(d, 3);
  362         PREPINP(e, 4);
  363         PREPINP(f, 5);
  364         PREPINP(g, 6);
  365         PREPINP(h, 7);
  366         *tweak = tmptweak;
  367 
  368         if (do_encrypt)
  369                 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
  370                     tmp);
  371         else
  372                 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
  373                     tmp);
  374 
  375         top = (__m128i *)to;
  376         _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
  377         _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
  378         _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
  379         _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
  380         _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
  381         _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
  382         _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
  383         _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
  384 }
  385 
  386 static void
  387 aesni_crypt_xts(int rounds, const __m128i *data_schedule,
  388     const __m128i *tweak_schedule, size_t len, const uint8_t *from,
  389     uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
  390 {
  391         __m128i tweakreg;
  392         uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
  393         size_t i, cnt;
  394 
  395         /*
  396          * Prepare tweak as E_k2(IV). IV is specified as LE representation
  397          * of a 64-bit block number which we allow to be passed in directly.
  398          */
  399 #if BYTE_ORDER == LITTLE_ENDIAN
  400         bcopy(iv, tweak, AES_XTS_IVSIZE);
  401         /* Last 64 bits of IV are always zero. */
  402         bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
  403 #else
  404 #error Only LITTLE_ENDIAN architectures are supported.
  405 #endif
  406         tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
  407         tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
  408 
  409         cnt = len / AES_XTS_BLOCKSIZE / 8;
  410         for (i = 0; i < cnt; i++) {
  411                 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
  412                     from, to, do_encrypt);
  413                 from += AES_XTS_BLOCKSIZE * 8;
  414                 to += AES_XTS_BLOCKSIZE * 8;
  415         }
  416         i *= 8;
  417         cnt = len / AES_XTS_BLOCKSIZE;
  418         for (; i < cnt; i++) {
  419                 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
  420                     from, to, do_encrypt);
  421                 from += AES_XTS_BLOCKSIZE;
  422                 to += AES_XTS_BLOCKSIZE;
  423         }
  424 }
  425 
  426 void
  427 aesni_encrypt_xts(int rounds, const void *data_schedule,
  428     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
  429     const uint8_t iv[static AES_BLOCK_LEN])
  430 {
  431 
  432         aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
  433             iv, 1);
  434 }
  435 
  436 void
  437 aesni_decrypt_xts(int rounds, const void *data_schedule,
  438     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
  439     const uint8_t iv[static AES_BLOCK_LEN])
  440 {
  441 
  442         aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
  443             iv, 0);
  444 }
  445 
  446 void
  447 aesni_cipher_setup_common(struct aesni_session *ses,
  448     const struct crypto_session_params *csp, const uint8_t *key, int keylen)
  449 {
  450         int decsched;
  451 
  452         decsched = 1;
  453 
  454         switch (csp->csp_cipher_alg) {
  455         case CRYPTO_AES_ICM:
  456         case CRYPTO_AES_NIST_GCM_16:
  457         case CRYPTO_AES_CCM_16:
  458                 decsched = 0;
  459                 break;
  460         }
  461 
  462         if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
  463                 keylen /= 2;
  464 
  465         switch (keylen * 8) {
  466         case 128:
  467                 ses->rounds = AES128_ROUNDS;
  468                 break;
  469         case 192:
  470                 ses->rounds = AES192_ROUNDS;
  471                 break;
  472         case 256:
  473                 ses->rounds = AES256_ROUNDS;
  474                 break;
  475         default:
  476                 panic("shouldn't happen");
  477         }
  478 
  479         aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
  480         if (decsched)
  481                 aesni_set_deckey(ses->enc_schedule, ses->dec_schedule,
  482                     ses->rounds);
  483 
  484         if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
  485                 aesni_set_enckey(key + keylen, ses->xts_schedule,
  486                     ses->rounds);
  487 }

Cache object: 9a1ace3b8774c9f11aa2d4bd8e29b373


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.