The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/aesni/intel_sha256.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*******************************************************************************
    2 * Copyright (c) 2013, Intel Corporation 
    3 * 
    4 * All rights reserved. 
    5 * 
    6 * Redistribution and use in source and binary forms, with or without
    7 * modification, are permitted provided that the following conditions are
    8 * met: 
    9 * 
   10 * * Redistributions of source code must retain the above copyright
   11 *   notice, this list of conditions and the following disclaimer.  
   12 * 
   13 * * Redistributions in binary form must reproduce the above copyright
   14 *   notice, this list of conditions and the following disclaimer in the
   15 *   documentation and/or other materials provided with the
   16 *   distribution. 
   17 * 
   18 * * Neither the name of the Intel Corporation nor the names of its
   19 *   contributors may be used to endorse or promote products derived from
   20 *   this software without specific prior written permission. 
   21 * 
   22 * 
   23 * THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
   24 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
   27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   28 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   29 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   34 ********************************************************************************
   35 *
   36 * Intel SHA Extensions optimized implementation of a SHA-256 update function 
   37 *
   38 * The function takes a pointer to the current hash values, a pointer to the 
   39 * input data, and a number of 64 byte blocks to process.  Once all blocks have 
   40 * been processed, the digest pointer is  updated with the resulting hash value.
   41 * The function only processes complete blocks, there is no functionality to 
   42 * store partial blocks.  All message padding and hash value initialization must
   43 * be done outside the update function.  
   44 *
   45 * The indented lines in the loop are instructions related to rounds processing.
   46 * The non-indented lines are instructions related to the message schedule.
   47 *
   48 * Author: Sean Gulley <sean.m.gulley@intel.com>
   49 * Date:   July 2013
   50 *
   51 ********************************************************************************
   52 *
   53 * Example complier command line:
   54 * icc intel_sha_extensions_sha256_intrinsic.c
   55 * gcc -msha -msse4 intel_sha_extensions_sha256_intrinsic.c
   56 *
   57 *******************************************************************************/
   58 #include <sys/cdefs.h>
   59 __FBSDID("$FreeBSD$");
   60 
   61 #include <sys/types.h>
   62 #include <crypto/aesni/aesni_os.h>
   63 #include <crypto/aesni/sha_sse.h>
   64 
   65 #include <immintrin.h>
   66 
   67 void intel_sha256_step(uint32_t *digest, const char *data, uint32_t num_blks) {
   68    __m128i state0, state1;
   69    __m128i msg;
   70    __m128i msgtmp0, msgtmp1, msgtmp2, msgtmp3;
   71    __m128i tmp;
   72    __m128i shuf_mask;
   73    __m128i abef_save, cdgh_save;
   74 
   75    // Load initial hash values
   76    // Need to reorder these appropriately
   77    // DCBA, HGFE -> ABEF, CDGH
   78    tmp    = _mm_loadu_si128((__m128i*) digest);
   79    state1 = _mm_loadu_si128((__m128i*) (digest+4));
   80 
   81    tmp    = _mm_shuffle_epi32(tmp, 0xB1);       // CDAB
   82    state1 = _mm_shuffle_epi32(state1, 0x1B);    // EFGH
   83    state0 = _mm_alignr_epi8(tmp, state1, 8);    // ABEF
   84    state1 = _mm_blend_epi16(state1, tmp, 0xF0); // CDGH
   85 
   86    shuf_mask = _mm_set_epi64x(0x0c0d0e0f08090a0bull, 0x0405060700010203ull);
   87 
   88    while (num_blks > 0) {
   89       // Save hash values for addition after rounds
   90       abef_save = state0;
   91       cdgh_save = state1;
   92 
   93       // Rounds 0-3
   94       msg     = _mm_loadu_si128((const __m128i*) data);
   95       msgtmp0 = _mm_shuffle_epi8(msg, shuf_mask);
   96          msg    = _mm_add_epi32(msgtmp0, 
   97                   _mm_set_epi64x(0xE9B5DBA5B5C0FBCFull, 0x71374491428A2F98ull));
   98          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
   99          msg    = _mm_shuffle_epi32(msg, 0x0E);
  100          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  101 
  102       // Rounds 4-7
  103       msgtmp1 = _mm_loadu_si128((const __m128i*) (data+16));
  104       msgtmp1 = _mm_shuffle_epi8(msgtmp1, shuf_mask);
  105          msg    = _mm_add_epi32(msgtmp1, 
  106                   _mm_set_epi64x(0xAB1C5ED5923F82A4ull, 0x59F111F13956C25Bull));
  107          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  108          msg    = _mm_shuffle_epi32(msg, 0x0E);
  109          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  110       msgtmp0 = _mm_sha256msg1_epu32(msgtmp0, msgtmp1);
  111 
  112       // Rounds 8-11
  113       msgtmp2 = _mm_loadu_si128((const __m128i*) (data+32));
  114       msgtmp2 = _mm_shuffle_epi8(msgtmp2, shuf_mask);
  115          msg    = _mm_add_epi32(msgtmp2, 
  116                   _mm_set_epi64x(0x550C7DC3243185BEull, 0x12835B01D807AA98ull));
  117          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  118          msg    = _mm_shuffle_epi32(msg, 0x0E);
  119          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  120       msgtmp1 = _mm_sha256msg1_epu32(msgtmp1, msgtmp2);
  121 
  122       // Rounds 12-15
  123       msgtmp3 = _mm_loadu_si128((const __m128i*) (data+48));
  124       msgtmp3 = _mm_shuffle_epi8(msgtmp3, shuf_mask);
  125          msg    = _mm_add_epi32(msgtmp3, 
  126                   _mm_set_epi64x(0xC19BF1749BDC06A7ull, 0x80DEB1FE72BE5D74ull));
  127          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  128       tmp     = _mm_alignr_epi8(msgtmp3, msgtmp2, 4);
  129       msgtmp0 = _mm_add_epi32(msgtmp0, tmp);
  130       msgtmp0 = _mm_sha256msg2_epu32(msgtmp0, msgtmp3);
  131          msg    = _mm_shuffle_epi32(msg, 0x0E);
  132          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  133       msgtmp2 = _mm_sha256msg1_epu32(msgtmp2, msgtmp3);
  134 
  135       // Rounds 16-19
  136          msg    = _mm_add_epi32(msgtmp0, 
  137                   _mm_set_epi64x(0x240CA1CC0FC19DC6ull, 0xEFBE4786E49B69C1ull));
  138          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  139       tmp     = _mm_alignr_epi8(msgtmp0, msgtmp3, 4);
  140       msgtmp1 = _mm_add_epi32(msgtmp1, tmp);
  141       msgtmp1 = _mm_sha256msg2_epu32(msgtmp1, msgtmp0);
  142          msg    = _mm_shuffle_epi32(msg, 0x0E);
  143          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  144       msgtmp3 = _mm_sha256msg1_epu32(msgtmp3, msgtmp0);
  145 
  146       // Rounds 20-23
  147          msg    = _mm_add_epi32(msgtmp1, 
  148                   _mm_set_epi64x(0x76F988DA5CB0A9DCull, 0x4A7484AA2DE92C6Full));
  149          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  150       tmp     = _mm_alignr_epi8(msgtmp1, msgtmp0, 4);
  151       msgtmp2 = _mm_add_epi32(msgtmp2, tmp);
  152       msgtmp2 = _mm_sha256msg2_epu32(msgtmp2, msgtmp1);
  153          msg    = _mm_shuffle_epi32(msg, 0x0E);
  154          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  155       msgtmp0 = _mm_sha256msg1_epu32(msgtmp0, msgtmp1);
  156 
  157       // Rounds 24-27
  158          msg    = _mm_add_epi32(msgtmp2, 
  159                   _mm_set_epi64x(0xBF597FC7B00327C8ull, 0xA831C66D983E5152ull));
  160          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  161       tmp     = _mm_alignr_epi8(msgtmp2, msgtmp1, 4);
  162       msgtmp3 = _mm_add_epi32(msgtmp3, tmp);
  163       msgtmp3 = _mm_sha256msg2_epu32(msgtmp3, msgtmp2);
  164          msg    = _mm_shuffle_epi32(msg, 0x0E);
  165          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  166       msgtmp1 = _mm_sha256msg1_epu32(msgtmp1, msgtmp2);
  167 
  168       // Rounds 28-31
  169          msg    = _mm_add_epi32(msgtmp3, 
  170                   _mm_set_epi64x(0x1429296706CA6351ull, 0xD5A79147C6E00BF3ull));
  171          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  172       tmp     = _mm_alignr_epi8(msgtmp3, msgtmp2, 4);
  173       msgtmp0 = _mm_add_epi32(msgtmp0, tmp);
  174       msgtmp0 = _mm_sha256msg2_epu32(msgtmp0, msgtmp3);
  175          msg    = _mm_shuffle_epi32(msg, 0x0E);
  176          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  177       msgtmp2 = _mm_sha256msg1_epu32(msgtmp2, msgtmp3);
  178 
  179       // Rounds 32-35
  180          msg    = _mm_add_epi32(msgtmp0, 
  181                   _mm_set_epi64x(0x53380D134D2C6DFCull, 0x2E1B213827B70A85ull));
  182          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  183       tmp     = _mm_alignr_epi8(msgtmp0, msgtmp3, 4);
  184       msgtmp1 = _mm_add_epi32(msgtmp1, tmp);
  185       msgtmp1 = _mm_sha256msg2_epu32(msgtmp1, msgtmp0);
  186          msg    = _mm_shuffle_epi32(msg, 0x0E);
  187          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  188       msgtmp3 = _mm_sha256msg1_epu32(msgtmp3, msgtmp0);
  189 
  190       // Rounds 36-39
  191          msg    = _mm_add_epi32(msgtmp1, 
  192                   _mm_set_epi64x(0x92722C8581C2C92Eull, 0x766A0ABB650A7354ull));
  193          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  194       tmp     = _mm_alignr_epi8(msgtmp1, msgtmp0, 4);
  195       msgtmp2 = _mm_add_epi32(msgtmp2, tmp);
  196       msgtmp2 = _mm_sha256msg2_epu32(msgtmp2, msgtmp1);
  197          msg    = _mm_shuffle_epi32(msg, 0x0E);
  198          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  199       msgtmp0 = _mm_sha256msg1_epu32(msgtmp0, msgtmp1);
  200 
  201       // Rounds 40-43
  202          msg    = _mm_add_epi32(msgtmp2, 
  203                   _mm_set_epi64x(0xC76C51A3C24B8B70ull, 0xA81A664BA2BFE8A1ull));
  204          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  205       tmp     = _mm_alignr_epi8(msgtmp2, msgtmp1, 4);
  206       msgtmp3 = _mm_add_epi32(msgtmp3, tmp);
  207       msgtmp3 = _mm_sha256msg2_epu32(msgtmp3, msgtmp2);
  208          msg    = _mm_shuffle_epi32(msg, 0x0E);
  209          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  210       msgtmp1 = _mm_sha256msg1_epu32(msgtmp1, msgtmp2);
  211 
  212       // Rounds 44-47
  213          msg    = _mm_add_epi32(msgtmp3, 
  214                   _mm_set_epi64x(0x106AA070F40E3585ull, 0xD6990624D192E819ull));
  215          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  216       tmp     = _mm_alignr_epi8(msgtmp3, msgtmp2, 4);
  217       msgtmp0 = _mm_add_epi32(msgtmp0, tmp);
  218       msgtmp0 = _mm_sha256msg2_epu32(msgtmp0, msgtmp3);
  219          msg    = _mm_shuffle_epi32(msg, 0x0E);
  220          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  221       msgtmp2 = _mm_sha256msg1_epu32(msgtmp2, msgtmp3);
  222 
  223       // Rounds 48-51
  224          msg    = _mm_add_epi32(msgtmp0, 
  225                   _mm_set_epi64x(0x34B0BCB52748774Cull, 0x1E376C0819A4C116ull));
  226          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  227       tmp     = _mm_alignr_epi8(msgtmp0, msgtmp3, 4);
  228       msgtmp1 = _mm_add_epi32(msgtmp1, tmp);
  229       msgtmp1 = _mm_sha256msg2_epu32(msgtmp1, msgtmp0);
  230          msg    = _mm_shuffle_epi32(msg, 0x0E);
  231          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  232       msgtmp3 = _mm_sha256msg1_epu32(msgtmp3, msgtmp0);
  233 
  234       // Rounds 52-55
  235          msg    = _mm_add_epi32(msgtmp1, 
  236                   _mm_set_epi64x(0x682E6FF35B9CCA4Full, 0x4ED8AA4A391C0CB3ull));
  237          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  238       tmp     = _mm_alignr_epi8(msgtmp1, msgtmp0, 4);
  239       msgtmp2 = _mm_add_epi32(msgtmp2, tmp);
  240       msgtmp2 = _mm_sha256msg2_epu32(msgtmp2, msgtmp1);
  241          msg    = _mm_shuffle_epi32(msg, 0x0E);
  242          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  243 
  244       // Rounds 56-59
  245          msg    = _mm_add_epi32(msgtmp2, 
  246                   _mm_set_epi64x(0x8CC7020884C87814ull, 0x78A5636F748F82EEull));
  247          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  248       tmp     = _mm_alignr_epi8(msgtmp2, msgtmp1, 4);
  249       msgtmp3 = _mm_add_epi32(msgtmp3, tmp);
  250       msgtmp3 = _mm_sha256msg2_epu32(msgtmp3, msgtmp2);
  251          msg    = _mm_shuffle_epi32(msg, 0x0E);
  252          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  253 
  254       // Rounds 60-63
  255          msg    = _mm_add_epi32(msgtmp3, 
  256                   _mm_set_epi64x(0xC67178F2BEF9A3F7ull, 0xA4506CEB90BEFFFAull));
  257          state1 = _mm_sha256rnds2_epu32(state1, state0, msg);
  258          msg    = _mm_shuffle_epi32(msg, 0x0E);
  259          state0 = _mm_sha256rnds2_epu32(state0, state1, msg);
  260 
  261       // Add current hash values with previously saved
  262       state0 = _mm_add_epi32(state0, abef_save);
  263       state1 = _mm_add_epi32(state1, cdgh_save);
  264 
  265       data += 64;
  266       num_blks--;
  267    }
  268 
  269    // Write hash values back in the correct order
  270    tmp    = _mm_shuffle_epi32(state0, 0x1B);    // FEBA
  271    state1 = _mm_shuffle_epi32(state1, 0xB1);    // DCHG
  272    state0 = _mm_blend_epi16(tmp, state1, 0xF0); // DCBA
  273    state1 = _mm_alignr_epi8(state1, tmp, 8);    // ABEF
  274 
  275    _mm_store_si128((__m128i*) digest, state0);
  276    _mm_store_si128((__m128i*) (digest+4), state1);
  277 }
  278 

Cache object: 116ab02f49ab006c0ba59b961605b025


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.