The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/crypto/aesni/intel_sha1.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*******************************************************************************
    2 * Copyright (c) 2013, Intel Corporation 
    3 * 
    4 * All rights reserved. 
    5 * 
    6 * Redistribution and use in source and binary forms, with or without
    7 * modification, are permitted provided that the following conditions are
    8 * met: 
    9 * 
   10 * * Redistributions of source code must retain the above copyright
   11 *   notice, this list of conditions and the following disclaimer.  
   12 * 
   13 * * Redistributions in binary form must reproduce the above copyright
   14 *   notice, this list of conditions and the following disclaimer in the
   15 *   documentation and/or other materials provided with the
   16 *   distribution. 
   17 * 
   18 * * Neither the name of the Intel Corporation nor the names of its
   19 *   contributors may be used to endorse or promote products derived from
   20 *   this software without specific prior written permission. 
   21 * 
   22 * 
   23 * THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
   24 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
   27 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   28 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   29 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   34 ********************************************************************************
   35 *
   36 * Intel SHA Extensions optimized implementation of a SHA-1 update function 
   37 * 
   38 * The function takes a pointer to the current hash values, a pointer to the 
   39 * input data, and a number of 64 byte blocks to process.  Once all blocks have 
   40 * been processed, the digest pointer is  updated with the resulting hash value.
   41 * The function only processes complete blocks, there is no functionality to 
   42 * store partial blocks.  All message padding and hash value initialization must
   43 * be done outside the update function.  
   44 * 
   45 * The indented lines in the loop are instructions related to rounds processing.
   46 * The non-indented lines are instructions related to the message schedule.
   47 * 
   48 * Author: Sean Gulley <sean.m.gulley@intel.com>
   49 * Date:   July 2013
   50 *
   51 ********************************************************************************
   52 *
   53 * Example complier command line:
   54 * icc intel_sha_extensions_sha1_intrinsic.c
   55 * gcc -msha -msse4 intel_sha_extensions_sha1_intrinsic.c
   56 *
   57 *******************************************************************************/
   58 #include <sys/cdefs.h>
   59 __FBSDID("$FreeBSD$");
   60 
   61 #include <sys/types.h>
   62 #include <crypto/aesni/aesni_os.h>
   63 #include <crypto/aesni/sha_sse.h>
   64 
   65 #include <immintrin.h>
   66 
   67 void intel_sha1_step(uint32_t *digest, const char *data, uint32_t num_blks) {
   68    __m128i abcd, e0, e1;
   69    __m128i abcd_save, e_save;
   70    __m128i msg0, msg1, msg2, msg3;
   71    __m128i shuf_mask, e_mask;
   72 
   73 #if 0
   74    e_mask    = _mm_set_epi64x(0xFFFFFFFF00000000ull, 0x0000000000000000ull);
   75 #else
   76    (void)e_mask;
   77    e0        = _mm_set_epi64x(0, 0);
   78 #endif
   79    shuf_mask = _mm_set_epi64x(0x0001020304050607ull, 0x08090a0b0c0d0e0full);
   80 
   81    // Load initial hash values
   82    abcd      = _mm_loadu_si128((__m128i*) digest);
   83    e0        = _mm_insert_epi32(e0, *(digest+4), 3);
   84    abcd      = _mm_shuffle_epi32(abcd, 0x1B);
   85 #if 0
   86    e0        = _mm_and_si128(e0, e_mask);
   87 #endif
   88 
   89    while (num_blks > 0) {
   90       // Save hash values for addition after rounds
   91       abcd_save = abcd;
   92       e_save    = e0;
   93 
   94       // Rounds 0-3
   95       msg0 = _mm_loadu_si128((const __m128i*) data);
   96       msg0 = _mm_shuffle_epi8(msg0, shuf_mask);
   97          e0   = _mm_add_epi32(e0, msg0);
   98          e1   = abcd;
   99          abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
  100 
  101       // Rounds 4-7
  102       msg1 = _mm_loadu_si128((const __m128i*) (data+16));
  103       msg1 = _mm_shuffle_epi8(msg1, shuf_mask);
  104          e1   = _mm_sha1nexte_epu32(e1, msg1);
  105          e0   = abcd;
  106          abcd = _mm_sha1rnds4_epu32(abcd, e1, 0);
  107       msg0 = _mm_sha1msg1_epu32(msg0, msg1);
  108 
  109       // Rounds 8-11
  110       msg2 = _mm_loadu_si128((const __m128i*) (data+32));
  111       msg2 = _mm_shuffle_epi8(msg2, shuf_mask);
  112          e0   = _mm_sha1nexte_epu32(e0, msg2);
  113          e1   = abcd;
  114          abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
  115       msg1 = _mm_sha1msg1_epu32(msg1, msg2);
  116       msg0 = _mm_xor_si128(msg0, msg2);
  117 
  118       // Rounds 12-15
  119       msg3 = _mm_loadu_si128((const __m128i*) (data+48));
  120       msg3 = _mm_shuffle_epi8(msg3, shuf_mask);
  121          e1   = _mm_sha1nexte_epu32(e1, msg3);
  122          e0   = abcd;
  123       msg0 = _mm_sha1msg2_epu32(msg0, msg3);
  124          abcd = _mm_sha1rnds4_epu32(abcd, e1, 0);
  125       msg2 = _mm_sha1msg1_epu32(msg2, msg3);
  126       msg1 = _mm_xor_si128(msg1, msg3);
  127 
  128       // Rounds 16-19
  129          e0   = _mm_sha1nexte_epu32(e0, msg0);
  130          e1   = abcd;
  131       msg1 = _mm_sha1msg2_epu32(msg1, msg0);
  132          abcd = _mm_sha1rnds4_epu32(abcd, e0, 0);
  133       msg3 = _mm_sha1msg1_epu32(msg3, msg0);
  134       msg2 = _mm_xor_si128(msg2, msg0);
  135 
  136       // Rounds 20-23
  137          e1   = _mm_sha1nexte_epu32(e1, msg1);
  138          e0   = abcd;
  139       msg2 = _mm_sha1msg2_epu32(msg2, msg1);
  140          abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
  141       msg0 = _mm_sha1msg1_epu32(msg0, msg1);
  142       msg3 = _mm_xor_si128(msg3, msg1);
  143         
  144       // Rounds 24-27
  145          e0   = _mm_sha1nexte_epu32(e0, msg2);
  146          e1   = abcd;
  147       msg3 = _mm_sha1msg2_epu32(msg3, msg2);
  148          abcd = _mm_sha1rnds4_epu32(abcd, e0, 1);
  149       msg1 = _mm_sha1msg1_epu32(msg1, msg2);
  150       msg0 = _mm_xor_si128(msg0, msg2);
  151 
  152       // Rounds 28-31
  153          e1   = _mm_sha1nexte_epu32(e1, msg3);
  154          e0   = abcd;
  155       msg0 = _mm_sha1msg2_epu32(msg0, msg3);
  156          abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
  157       msg2 = _mm_sha1msg1_epu32(msg2, msg3);
  158       msg1 = _mm_xor_si128(msg1, msg3);
  159 
  160       // Rounds 32-35
  161          e0   = _mm_sha1nexte_epu32(e0, msg0);
  162          e1   = abcd;
  163       msg1 = _mm_sha1msg2_epu32(msg1, msg0);
  164          abcd = _mm_sha1rnds4_epu32(abcd, e0, 1);
  165       msg3 = _mm_sha1msg1_epu32(msg3, msg0);
  166       msg2 = _mm_xor_si128(msg2, msg0);
  167 
  168       // Rounds 36-39
  169          e1   = _mm_sha1nexte_epu32(e1, msg1);
  170          e0   = abcd;
  171       msg2 = _mm_sha1msg2_epu32(msg2, msg1);
  172          abcd = _mm_sha1rnds4_epu32(abcd, e1, 1);
  173       msg0 = _mm_sha1msg1_epu32(msg0, msg1);
  174       msg3 = _mm_xor_si128(msg3, msg1);
  175         
  176       // Rounds 40-43
  177          e0   = _mm_sha1nexte_epu32(e0, msg2);
  178          e1   = abcd;
  179       msg3 = _mm_sha1msg2_epu32(msg3, msg2);
  180          abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
  181       msg1 = _mm_sha1msg1_epu32(msg1, msg2);
  182       msg0 = _mm_xor_si128(msg0, msg2);
  183 
  184       // Rounds 44-47
  185          e1   = _mm_sha1nexte_epu32(e1, msg3);
  186          e0   = abcd;
  187       msg0 = _mm_sha1msg2_epu32(msg0, msg3);
  188          abcd = _mm_sha1rnds4_epu32(abcd, e1, 2);
  189       msg2 = _mm_sha1msg1_epu32(msg2, msg3);
  190       msg1 = _mm_xor_si128(msg1, msg3);
  191 
  192       // Rounds 48-51
  193          e0   = _mm_sha1nexte_epu32(e0, msg0);
  194          e1   = abcd;
  195       msg1 = _mm_sha1msg2_epu32(msg1, msg0);
  196          abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
  197       msg3 = _mm_sha1msg1_epu32(msg3, msg0);
  198       msg2 = _mm_xor_si128(msg2, msg0);
  199 
  200       // Rounds 52-55
  201          e1   = _mm_sha1nexte_epu32(e1, msg1);
  202          e0   = abcd;
  203       msg2 = _mm_sha1msg2_epu32(msg2, msg1);
  204          abcd = _mm_sha1rnds4_epu32(abcd, e1, 2);
  205       msg0 = _mm_sha1msg1_epu32(msg0, msg1);
  206       msg3 = _mm_xor_si128(msg3, msg1);
  207         
  208       // Rounds 56-59
  209          e0   = _mm_sha1nexte_epu32(e0, msg2);
  210          e1   = abcd;
  211       msg3 = _mm_sha1msg2_epu32(msg3, msg2);
  212          abcd = _mm_sha1rnds4_epu32(abcd, e0, 2);
  213       msg1 = _mm_sha1msg1_epu32(msg1, msg2);
  214       msg0 = _mm_xor_si128(msg0, msg2);
  215 
  216       // Rounds 60-63
  217          e1   = _mm_sha1nexte_epu32(e1, msg3);
  218          e0   = abcd;
  219       msg0 = _mm_sha1msg2_epu32(msg0, msg3);
  220          abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
  221       msg2 = _mm_sha1msg1_epu32(msg2, msg3);
  222       msg1 = _mm_xor_si128(msg1, msg3);
  223 
  224       // Rounds 64-67
  225          e0   = _mm_sha1nexte_epu32(e0, msg0);
  226          e1   = abcd;
  227       msg1 = _mm_sha1msg2_epu32(msg1, msg0);
  228          abcd = _mm_sha1rnds4_epu32(abcd, e0, 3);
  229       msg3 = _mm_sha1msg1_epu32(msg3, msg0);
  230       msg2 = _mm_xor_si128(msg2, msg0);
  231 
  232       // Rounds 68-71
  233          e1   = _mm_sha1nexte_epu32(e1, msg1);
  234          e0   = abcd;
  235       msg2 = _mm_sha1msg2_epu32(msg2, msg1);
  236          abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
  237       msg3 = _mm_xor_si128(msg3, msg1);
  238         
  239       // Rounds 72-75
  240          e0   = _mm_sha1nexte_epu32(e0, msg2);
  241          e1   = abcd;
  242       msg3 = _mm_sha1msg2_epu32(msg3, msg2);
  243          abcd = _mm_sha1rnds4_epu32(abcd, e0, 3);
  244 
  245       // Rounds 76-79
  246          e1   = _mm_sha1nexte_epu32(e1, msg3);
  247          e0   = abcd;
  248          abcd = _mm_sha1rnds4_epu32(abcd, e1, 3);
  249 
  250       // Add current hash values with previously saved
  251       e0   = _mm_sha1nexte_epu32(e0, e_save);
  252       abcd = _mm_add_epi32(abcd, abcd_save);
  253 
  254       data += 64;
  255       num_blks--;
  256    }
  257 
  258    abcd = _mm_shuffle_epi32(abcd, 0x1B);
  259    _mm_store_si128((__m128i*) digest, abcd);
  260    *(digest+4) = _mm_extract_epi32(e0, 3);
  261 }
  262 

Cache object: 5f30ba78cdf4eb3c0eaab54ed035e694


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.